Merge branch 'master' of https://git01.codeplex.com/cntk into amitaga/separate1bitDataParallelSGD
This commit is contained in:
Коммит
18528f15b4
2
Makefile
2
Makefile
|
@ -162,7 +162,7 @@ ifeq ("$(BUILDTYPE)","debug")
|
|||
CXXFLAGS += -g
|
||||
LDFLAGS += -rdynamic
|
||||
CPPFLAGS += -D_DEBUG
|
||||
CUFLAGS += -O0 -use_fast_math -lineinfo $(GENCODE_FLAGS)
|
||||
CUFLAGS += -O0 -g -use_fast_math -lineinfo $(GENCODE_FLAGS)
|
||||
endif
|
||||
|
||||
ifeq ("$(BUILDTYPE)","release")
|
||||
|
|
|
@ -47,7 +47,7 @@ using namespace std;
|
|||
L"PastValue(dims, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'PastValue' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
|
||||
L"FutureValue(dims, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'FutureValue' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
|
||||
// TODO: ^^ DelayedValues no longer need to know their dimension. That is inferred in Validation.
|
||||
L"Shift(input, fromOffset, boundaryValue, boundaryMode=-1/*context*/, dim=-1, numSteps=1, insertedDim=0, tag='') = new ComputationNode [ operation = 'Shift' ; inputs = (input : boundaryValue) /*plus the function args*/ ]\n"
|
||||
L"Shift(input, fromOffset, boundaryValue, boundaryMode=-1/*context*/, dim=-1, tag='') = new ComputationNode [ operation = 'Shift' ; inputs = (input : boundaryValue) /*plus the function args*/ ]\n"
|
||||
L"RowSlice(startIndex, numRows, input, needGradient = false, tag='') = new ComputationNode [ operation = 'RowSlice' ; inputs = input /*plus the function args*/ ]\n"
|
||||
L"RowRepeat(input, numRepeats, needGradient = false, tag='') = new ComputationNode [ operation = 'RowRepeat' ; inputs = input /*plus the function args*/ ]\n"
|
||||
L"RowStack(inputs, tag='') = new ComputationNode [ operation = 'RowStack' /*plus the function args*/ ]\n"
|
||||
|
|
|
@ -345,6 +345,9 @@ void PrintBuiltInfo()
|
|||
#ifdef _CUB_PATH_
|
||||
fprintf(stderr, "\t\tCUB_PATH: %s\n", _CUB_PATH_);
|
||||
#endif
|
||||
#ifdef _CUDNN_PATH_
|
||||
fprintf(stderr, "\t\tCUDNN_PATH: %s\n", _CUDNN_PATH_);
|
||||
#endif
|
||||
#ifdef _GIT_EXIST
|
||||
fprintf(stderr, "\t\tBuild Branch: %s\n", _BUILDBRANCH_);
|
||||
fprintf(stderr, "\t\tBuild SHA1: %s\n", _BUILDSHA1_);
|
||||
|
@ -568,7 +571,7 @@ int wmainOldCNTKConfig(int argc, wchar_t* argv[]) // called from wmain which i
|
|||
RedirectStdErr(logpath);
|
||||
}
|
||||
|
||||
PrintBuiltInfo();
|
||||
PrintBuiltInfo(); // this one goes to log file
|
||||
std::string timestamp = TimeDateStamp();
|
||||
|
||||
//dump config info
|
||||
|
@ -643,10 +646,11 @@ int wmainOldCNTKConfig(int argc, wchar_t* argv[]) // called from wmain which i
|
|||
// main wrapper that catches C++ exceptions and prints them
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
int wmain1(int argc, wchar_t* argv[]) // called from wmain which is a wrapper that catches & repots Win32 exceptions
|
||||
int wmain1(int argc, wchar_t* argv[]) // called from wmain which is a wrapper that catches & reports Win32 exceptions
|
||||
{
|
||||
try
|
||||
{
|
||||
PrintBuiltInfo(); // print build info directly in case that user provides zero argument (convenient for checking build type)
|
||||
if (argc <= 1)
|
||||
InvalidArgument("No command-line argument given.");
|
||||
// detect legacy CNTK configuration
|
||||
|
@ -684,6 +688,8 @@ void terminate_this() { fprintf(stderr, "terminate_this: aborting\n"), fflush(st
|
|||
int wmain(int argc, wchar_t* argv[]) // wmain wrapper that reports Win32 exceptions
|
||||
{
|
||||
set_terminate (terminate_this); // insert a termination handler to ensure stderr gets flushed before actually terminating
|
||||
_set_error_mode(_OUT_TO_STDERR); // make sure there are no CRT prompts when CNTK is executing
|
||||
|
||||
// Note: this does not seem to work--processes with this seem to just hang instead of terminating
|
||||
__try
|
||||
{
|
||||
|
|
|
@ -100,7 +100,7 @@ template <typename ElemType>
|
|||
void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigParamList& params)
|
||||
{
|
||||
std::string name = p_name;
|
||||
if (EqualInsensitive(name, "CreateModel")) //create a blank model
|
||||
if (EqualInsensitive(name, "CreateModel")) // create a blank model
|
||||
{
|
||||
size_t numFixedParams = 0, numOptionalParams = 0;
|
||||
if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams)
|
||||
|
@ -109,7 +109,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
|
|||
auto cn = make_shared<ComputationNetwork>(CPUDEVICE);
|
||||
OverrideModelNameAndSetDefaultModel(cn);
|
||||
}
|
||||
if (EqualInsensitive(name, "CreateModelWithName")) //create a blank model
|
||||
if (EqualInsensitive(name, "CreateModelWithName")) // create a blank model
|
||||
{
|
||||
size_t numFixedParams = 1, numOptionalParams = 0;
|
||||
if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams)
|
||||
|
@ -139,6 +139,16 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
|
|||
std::wstring modelFormat = GetOptionalModelFormat(params, numFixedParams);
|
||||
|
||||
auto cn = make_shared<ComputationNetwork>(CPUDEVICE);
|
||||
#if 1 // support for a specific kind of legacy format, for the sole purpose of allowing users to convert (=load & save) them
|
||||
if (modelFormat == L"cntk_legacy_no_tensorlib")
|
||||
{
|
||||
cn->Read<ElemType>(params[1]);
|
||||
for (auto node : cn->FeatureNodes())
|
||||
node->SetDims(TensorShape(node->GetNumRows()), 0); // pre-tensorlib InputValues had incorrect tensor dimensions
|
||||
cn->CompileNetwork();
|
||||
}
|
||||
else
|
||||
#endif
|
||||
cn->Load<ElemType>(params[1]);
|
||||
OverrideModelNameAndSetDefaultModel(cn, params[0]);
|
||||
}
|
||||
|
@ -189,8 +199,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
|
|||
|
||||
// validate the network before we save it out
|
||||
ProcessNDLScript(m_netNdlDefault, ndlPassAll, true);
|
||||
|
||||
cn->Save(fileName);
|
||||
cn->SaveEdited(fileName);
|
||||
}
|
||||
else if (EqualInsensitive(name, "SaveModel"))
|
||||
{
|
||||
|
@ -209,7 +218,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
|
|||
|
||||
// validate and finish the second pass through NDL if any in-line NDL was defined
|
||||
ProcessNDLScript(netNdl, ndlPassAll, true);
|
||||
netNdl->cn->Save(fileName);
|
||||
netNdl->cn->SaveEdited(fileName);
|
||||
}
|
||||
else if (EqualInsensitive(name, "SetDefaultModel"))
|
||||
{
|
||||
|
|
|
@ -443,6 +443,10 @@ public:
|
|||
{
|
||||
modelFormat = L"cntk";
|
||||
}
|
||||
else if (EqualInsensitive(value, "cntk_legacy_no_tensorlib")) // model of late 2015 which had a bug in setting InputValue's tensor dimensions
|
||||
{
|
||||
modelFormat = L"cntk_legacy_no_tensorlib";
|
||||
}
|
||||
else
|
||||
{
|
||||
RuntimeError("Invalid optional parameter value %s, valid values are: format=(cntk)", value.c_str());
|
||||
|
|
|
@ -2423,9 +2423,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
Matrix<ElemType> priorVals = ReadMatrixFromDbnFile(fstream, std::string("Pu"));
|
||||
assert(priorVals.GetNumCols() == 1 && priorVals.GetNumRows() == m_outputLayerSize);
|
||||
|
||||
w = builder.Mean(label, L"Prior");
|
||||
static_pointer_cast<PreComputedNode<ElemType>>(w)->SideLoadFromMatrix(priorVals);
|
||||
w->SetParameterUpdateRequired(false);
|
||||
prior = builder.Mean(label, L"Prior");
|
||||
static_pointer_cast<PreComputedNode<ElemType>>(prior)->SideLoadFromMatrix(priorVals);
|
||||
prior->SetParameterUpdateRequired(false);
|
||||
}
|
||||
else // pretrained network - need to add output layer, initalize
|
||||
{
|
||||
|
@ -2465,7 +2465,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
if (layerType == "perceptron" || m_needPrior)
|
||||
{
|
||||
input = builder.Log(pcNodePtr, L"LogOfPrior");
|
||||
input = builder.Log(prior, L"LogOfPrior");
|
||||
|
||||
//following two lines is needed only if true probability is needed
|
||||
//output = builder.Softmax(output);
|
||||
|
|
|
@ -33,6 +33,16 @@ if "%cuda_path%" == "" (
|
|||
echo #define _CUDA_PATH_ "%cuda_path:\=\\%" >> buildinfo.h$$
|
||||
)
|
||||
|
||||
if not "%cudnn_path%" == "" (
|
||||
echo #define _CUDNN_PATH_ "%cudnn_path:\=\\%" >> buildinfo.h$$
|
||||
)
|
||||
|
||||
if not "%cub_path%" == "" (
|
||||
echo #define _CUB_PATH_ "%cub_path:\=\\%" >> buildinfo.h$$
|
||||
)
|
||||
|
||||
|
||||
|
||||
echo #endif >> buildinfo.h$$
|
||||
|
||||
::: update file only if it changed (otherwise CNTK.cpp will get rebuilt each time)
|
||||
|
|
|
@ -84,6 +84,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
ptrdiff_t tBegin; // first time index in this minibatch. Note that this may be negative of the sequence started before this MB.
|
||||
size_t tEnd; // end = first frame index after final frame. May be beyond the minibatch if reql sequence is longer than the MB.
|
||||
bool operator==(const SequenceInfo & other) const { return seqId == other.seqId && s == other.s && tBegin == other.tBegin && tEnd == other.tEnd; }
|
||||
size_t GetNumTimeSteps() const { return (size_t)(tEnd - tBegin); }
|
||||
};
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
|
@ -270,6 +271,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// I'd love to start with all-gaps, but that would require to set flags upfront, and then clearing them.
|
||||
void AddGap(size_t s, ptrdiff_t beginTime, size_t endTime) { if ((ptrdiff_t)endTime > beginTime) AddSequence(GAP_SEQUENCE_ID, s, beginTime, endTime); }
|
||||
|
||||
// find a sequence by its id
|
||||
const SequenceInfo & FindSequence(UniqueSequenceId seqId) const
|
||||
{
|
||||
for (const auto & seqInfo : m_sequences)
|
||||
if (seqInfo.seqId == seqId)
|
||||
return seqInfo;
|
||||
LogicError("FindSequence: Requested sequence (id %u) not found.", (unsigned int) seqId);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// inquire about gaps or boundaries
|
||||
// -------------------------------------------------------------------
|
||||
|
@ -427,6 +437,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
public: // TODO: make private (currently used from masking and DataFor) ; TODO: rename all members with m_ prefix
|
||||
size_t timeIdxInSeq; // start frame; SIZE_MAX = all frames in MB
|
||||
ptrdiff_t m_timeOffset; // this is added to timeIdxInSeq wherever it is used
|
||||
size_t m_timeRange; // use this to describe a custom range > 1 frame
|
||||
size_t seqIndex; // parallel-sequence index; SIZE_MAX = all sequences in MB (most common case) --TODO: Bad name, 'sequence' and 'parallel sequence' are two different things
|
||||
MBLayoutPtr m_pMBLayout; // layout associated with this
|
||||
bool m_broadcastAllowed; // frame range may be broadcast from outer layout (e.g. a matrix with NULL layout and 1 column is acceptable to this frame range)
|
||||
|
@ -434,7 +445,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
public:
|
||||
// can construct from a single size_t -> a single-frame range
|
||||
FrameRange(MBLayoutPtr pMBLayout, size_t timeIdxInSeq) : timeIdxInSeq(timeIdxInSeq), m_timeOffset(0), seqIndex(SIZE_MAX), m_pMBLayout(pMBLayout), m_broadcastAllowed(false), parent(nullptr) {}
|
||||
FrameRange(MBLayoutPtr pMBLayout, size_t timeIdxInSeq) : timeIdxInSeq(timeIdxInSeq), m_timeOffset(0), m_timeRange(1), seqIndex(SIZE_MAX), m_pMBLayout(pMBLayout), m_broadcastAllowed(false), parent(nullptr) {}
|
||||
|
||||
// or without arguments -> entire minibatch / no frame-range
|
||||
FrameRange(MBLayoutPtr pMBLayout) : FrameRange(pMBLayout, SIZE_MAX) {}
|
||||
|
@ -471,7 +482,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
// create a FrameRange with a time offset
|
||||
// Note: This currently does not work in conjunction with IsAllFrames(). This would be a nice-to have, but tricky w.r.t. out-of-bounds accesses.
|
||||
// If IsAllFrames() then this will cause out-of-bounds slices.
|
||||
FrameRange WithTimeOffset(ptrdiff_t offset) const
|
||||
{
|
||||
FrameRange ret = *this;
|
||||
|
@ -479,6 +490,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return ret;
|
||||
}
|
||||
|
||||
// create a FrameRange with a time range > 1
|
||||
FrameRange WithTimeRange(size_t range) const
|
||||
{
|
||||
FrameRange ret = *this;
|
||||
if (!IsAllFrames())
|
||||
ret.m_timeRange = range;
|
||||
return ret;
|
||||
}
|
||||
|
||||
// dimension we are iterating over; -1 means time dimension; 0 means no layout
|
||||
int GetIterationDimension() const
|
||||
{
|
||||
if (!m_pMBLayout)
|
||||
return 0;
|
||||
else
|
||||
return -1; // TODO: allow user to specify other dimensions
|
||||
}
|
||||
|
||||
class IndexIteration // range for range-based for over sequences
|
||||
{
|
||||
size_t m_beginIndex, m_endIndex;
|
||||
|
@ -753,7 +782,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (startColumn >= numCols)
|
||||
LogicError("DataFor: FrameRange specifies a time index that is out of range.");
|
||||
if (fr.seqIndex == SIZE_MAX)
|
||||
return std::pair<size_t, size_t>(startColumn, numParallelSequences);
|
||||
return std::pair<size_t, size_t>(startColumn, numParallelSequences * fr.m_timeRange);
|
||||
else if (fr.m_timeRange != 1)
|
||||
LogicError("DataFor: FrameRange only support per-sequence time ranges with tensor slices, not matrix slices.");
|
||||
else
|
||||
return std::pair<size_t, size_t>(startColumn + fr.seqIndex, 1);
|
||||
}
|
||||
|
@ -778,7 +809,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// TensorSliceWithMBLayoutFor() -- Return tensor slice for a FrameRange with specified number of columns with a given MBLayout
|
||||
// This implements the logic of interpreting the FrameRange object.
|
||||
// Unlike the matrix version above, this supports iteration indices other than time.
|
||||
// TODO: This ^^. Still missing is a field to identify the index.
|
||||
// TODO: This ^^. FrameRange still missing is a field to identify the index.
|
||||
// This function happily returns tensor bounds that are out of bounds, assuming caller will do the right thing.
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
template<class DimensionVector> // e.g. std::vector<size_t> or SmallVector<size_t>
|
||||
|
@ -787,6 +819,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
const MBLayoutPtr & pMBLayout/*the MB layout of 'data'*/)
|
||||
{
|
||||
std::pair<DimensionVector, DimensionVector> result;
|
||||
typedef decltype(result.first[0]) ElemType;
|
||||
|
||||
// this creates a slice for the entire matrix, which we will then narrow down
|
||||
result.first.resize(shape.size(), 0);
|
||||
|
@ -795,8 +828,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// get position of time and sequence index
|
||||
// These are only valid if we have a layout.
|
||||
// In the future, the 'timeDim' will be identified by the FrameRange.
|
||||
int iterDimParam = fr.GetIterationDimension();
|
||||
size_t iterDim = iterDimParam > 0 ? iterDimParam - 1/*regular dimensions are specified as 1-based*/ : shape.size() + iterDimParam/*-1 for time dimension*/;
|
||||
size_t sequenceDim = shape.size() - 2; // TODO: In case of multiple time dims, this must be adjusted.
|
||||
size_t timeDim = sequenceDim + 1; // TODO: Get this from the FrameRange object.
|
||||
|
||||
// MBLayout of data and of FrameRange must be identical pointers,
|
||||
// or in case of broadcasting, respective parent pointers.
|
||||
|
@ -819,28 +853,33 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// but as a reference (e.g. it cannot be resized)
|
||||
else if (!pMBLayout || fr.IsAllFrames())
|
||||
{
|
||||
if (fr.m_timeOffset != 0) // entire minibatch with non-zero offset exceeds bounds on at least one side
|
||||
LogicError("DataFor: Iteration offset must not be specified for FrameRanges that reference the entire minibatch.");
|
||||
// TODO: Can we allow this? Semantics would be different, it would crop frames outside.
|
||||
if (fr.m_timeOffset)
|
||||
{
|
||||
if (iterDim >= result.first.size())
|
||||
LogicError("DataFor: Time offset cannot be applied to tensors that have no time dimension.");
|
||||
result.first[iterDim] += (ElemType)fr.m_timeOffset; // Note: If we have an offset, this is guaranteed to yield a slice that is out of bounds.
|
||||
result.second[iterDim] += (ElemType)fr.m_timeOffset;
|
||||
if (result.first[iterDim] > result.second[iterDim])
|
||||
LogicError("DataFor: Numeric wraparound. You used a size_t vector where an int vector would be needed.");
|
||||
}
|
||||
}
|
||||
// FrameRange refers to a time slice -> return that
|
||||
else if (result.second[timeDim] > 1) // (if time dim is broadcasting then always return that one independent of requested index)
|
||||
else if (result.second[iterDim] > 1) // (if time dim is broadcasting then always return that one independent of requested index)
|
||||
{
|
||||
size_t t = fr.timeIdxInSeq + fr.m_timeOffset;
|
||||
if (t >= result.second[timeDim])
|
||||
LogicError("DataFor: FrameRange specifies an iteration index that is out of range.");
|
||||
result.first[timeDim] = t;
|
||||
result.second[timeDim] = t + 1;
|
||||
size_t ts = fr.timeIdxInSeq + fr.m_timeOffset;
|
||||
size_t te = ts + fr.m_timeRange;
|
||||
result.first[iterDim] = (ElemType)ts;
|
||||
result.second[iterDim] = (ElemType)te;
|
||||
}
|
||||
|
||||
|
||||
// sequence index
|
||||
if (fr.seqIndex != SIZE_MAX/*sequence requested*/ && pMBLayout/*have sequences*/ && result.second[sequenceDim] > 1/*>1 sequence (not broadcasting)*/)
|
||||
{
|
||||
size_t s = fr.seqIndex;
|
||||
if (s >= result.second[sequenceDim])
|
||||
LogicError("DataFor: FrameRange specifies a paralllel-sequence index that is out of range.");
|
||||
result.first[sequenceDim] = s;
|
||||
result.second[sequenceDim] = s + 1;
|
||||
result.first[sequenceDim] = (ElemType)s;
|
||||
result.second[sequenceDim] = (ElemType)s + 1;
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
|
@ -104,7 +104,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
void resize(size_t sz, const T & val) { if (sz < m_size) m_size = sz; else while (m_size < sz) push_back(val); }
|
||||
void assign(size_t sz, const T & val) { clear(); resize(sz, val); }
|
||||
template<class ITER>
|
||||
void append(ITER beg, const ITER & end) { while (beg != end) push_back(*beg++); }
|
||||
void append(ITER beg, const ITER & end) { while (beg != end) push_back((T)*beg++); } // typecast allows signed/unsigned conversions
|
||||
template<class ITER>
|
||||
void assign(ITER beg, const ITER & end) { clear(); append(beg,end); }
|
||||
void operator=(const SmallVector & other) { m_size = other.m_size; memcpy(m_data, other.m_data, other.m_size * sizeof(T)); }
|
||||
|
@ -180,8 +180,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// boilerplate
|
||||
bool operator==(const TensorShape & other) const { return m_dims == other.m_dims; }
|
||||
|
||||
void Invalidate() { m_dims.assign(3, SIZE_MAX); } // TODO: clean up the valid/invalid situation (this is currently done inconsistently). Also this object is immutable.
|
||||
|
||||
// verify that this refers to a dense matrix (no strides)
|
||||
void VerifyIsDense() const
|
||||
{
|
||||
|
@ -374,7 +372,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (size() != bounds.first.size() || size() != bounds.second.size())
|
||||
LogicError("NarrowedTo: Bounds parameter must have same rank as tensor.");
|
||||
for (size_t k = 0; k < size(); k++)
|
||||
if (bounds.second[k] <= bounds.first[k] || bounds.second[k] > m_dims[k])
|
||||
if (bounds.second[k] <= bounds.first[k] || (size_t)bounds.second[k] > m_dims[k])
|
||||
LogicError("NarrowedTo: Invalid bounds parameter, dimensions must be at least one.");
|
||||
for (size_t k = 0; k < size(); k++)
|
||||
{
|
||||
|
|
|
@ -51,6 +51,7 @@ enum mbrclassdefinition // used to identify definition of class in minimum b
|
|||
// ===========================================================================
|
||||
class lattice
|
||||
{
|
||||
mutable int verbosity;
|
||||
struct header_v1_v2
|
||||
{
|
||||
size_t numnodes : 32;
|
||||
|
@ -567,11 +568,13 @@ private:
|
|||
std::vector<size_t> backptroffsets; // TODO: we could change this to 'unsigned int' to save some transfer time
|
||||
std::vector<unsigned short> backptrstorage; // CPU-side versions use this as the traceback buffer; CUDA code has its CUDA-side buffer
|
||||
size_t numofstates; // per sil hmm
|
||||
int verbosity;
|
||||
public:
|
||||
backpointers (const lattice & L, const msra::asr::simplesenonehmm & hset) : numofstates(0)
|
||||
backpointers (const lattice & L, const msra::asr::simplesenonehmm & hset, int verbosity=0) : numofstates(0)
|
||||
{
|
||||
size_t edgeswithsilence = 0; // (diagnostics only: number of edges with at least one /sil/)
|
||||
size_t backptrbufsize = 0; // number of entries in buffer for silence backpointer array, used as cursor as we build it
|
||||
|
||||
backptroffsets.resize (L.edges.size() + 1); // +1, so that the final entry determines the overall size of the allocated buffer
|
||||
const size_t silUnitId = hset.gethmmid ("sil");
|
||||
numofstates = hset.gethmm (silUnitId).getnumstates();
|
||||
|
@ -595,15 +598,18 @@ private:
|
|||
#if 1 // multiple /sil/ -> log this (as we are not sure whether this is actually proper--probably it is)
|
||||
if (numsilunits > 1)
|
||||
{
|
||||
fprintf (stderr, "backpointers: lattice '%S', edge %d has %d /sil/ phonemes\n", L.getkey(), j, (int)numsilunits);
|
||||
fprintf (stderr, "alignments: :");
|
||||
foreach_index (a, aligntokens)
|
||||
if (verbosity)
|
||||
{
|
||||
const auto & unit = aligntokens[a];
|
||||
const auto & hmm = hset.gethmm (unit.unit);
|
||||
fprintf (stderr, "%s,%.2f:", hmm.getname(), unit.frames / 100.0f);
|
||||
fprintf(stderr, "backpointers: lattice '%S', edge %d has %d /sil/ phonemes\n", L.getkey(), j, (int)numsilunits);
|
||||
fprintf(stderr, "alignments: :");
|
||||
foreach_index(a, aligntokens)
|
||||
{
|
||||
const auto & unit = aligntokens[a];
|
||||
const auto & hmm = hset.gethmm(unit.unit);
|
||||
fprintf(stderr, "%s,%.2f:", hmm.getname(), unit.frames / 100.0f);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
fprintf (stderr, "\n");
|
||||
}
|
||||
#endif
|
||||
if (numsilunits > 0)
|
||||
|
@ -611,7 +617,8 @@ private:
|
|||
backptrbufsize += maxsilframes * numofstates;
|
||||
}
|
||||
backptroffsets[L.edges.size()] = backptrbufsize; // (TODO: remove if not actually needed)
|
||||
fprintf (stderr, "backpointers: %.1f%% edges have at least one /sil/ unit inside\n", 100.0f * ((float) edgeswithsilence / L.edges.size()));
|
||||
if (verbosity)
|
||||
fprintf (stderr, "backpointers: %.1f%% edges have at least one /sil/ unit inside\n", 100.0f * ((float) edgeswithsilence / L.edges.size()));
|
||||
}
|
||||
// CUDA support
|
||||
const std::vector<size_t> & getbackptroffsets() const { return backptroffsets; }
|
||||
|
@ -1002,6 +1009,10 @@ public:
|
|||
|
||||
std::wstring key; // (keep our own name (key) so we can identify ourselves for diagnostics messages)
|
||||
const wchar_t * getkey() const { return key.c_str(); }
|
||||
|
||||
void setverbosity(int veb) const{
|
||||
verbosity = veb;
|
||||
}
|
||||
};
|
||||
|
||||
// ===========================================================================
|
||||
|
@ -1016,6 +1027,8 @@ class archive
|
|||
// set of lattice archive files referenced
|
||||
// Note that .toc files can be concatenated, i.e. one .toc file can reference multiple archive files.
|
||||
std::vector<std::wstring> archivepaths; // [archiveindex] -> archive path
|
||||
std::wstring prefixPathInToc; // prefix path in a toc; using this to avoid pushd some path before start training
|
||||
mutable int verbosity;
|
||||
size_t getarchiveindex (const std::wstring & path) // get index of a path in archivepaths[]; create new entry if needed
|
||||
{
|
||||
auto iter = std::find (archivepaths.begin(), archivepaths.end(), path);
|
||||
|
@ -1042,7 +1055,8 @@ class archive
|
|||
{ // need to read the map and establish the mapping
|
||||
// get the symlist file
|
||||
const std::wstring symlistpath = archivepaths[archiveindex] + L".symlist";
|
||||
fprintf (stderr, "getcachedidmap: reading '%S'\n", symlistpath.c_str());
|
||||
if (verbosity>0)
|
||||
fprintf (stderr, "getcachedidmap: reading '%S'\n", symlistpath.c_str());
|
||||
std::vector<char> textbuffer;
|
||||
auto lines = msra::files::fgetfilelines (symlistpath, textbuffer);
|
||||
// establish mapping of each entry to the corresponding id in 'symmap'; this should fail if the symbol is not found
|
||||
|
@ -1092,19 +1106,25 @@ class archive
|
|||
public:
|
||||
// construct = open the archive
|
||||
//archive() : currentarchiveindex (SIZE_MAX) {}
|
||||
|
||||
void setverbosity(int veb) const
|
||||
{
|
||||
verbosity = veb;
|
||||
}
|
||||
// test if this object is loaded with anything (if not, an empty set of TOC paths was passed--meaning disable lattice mode)
|
||||
bool empty() const { return archivepaths.empty(); }
|
||||
|
||||
// construct from a list of TOC files
|
||||
archive (const std::vector<std::wstring> & tocpaths, const std::unordered_map<std::string,size_t> & modelsymmap) : currentarchiveindex (SIZE_MAX), modelsymmap (modelsymmap)
|
||||
archive (const std::vector<std::wstring> & tocpaths, const std::unordered_map<std::string,size_t> & modelsymmap, const std::wstring prefixPath=L"")
|
||||
: currentarchiveindex(SIZE_MAX), modelsymmap(modelsymmap), prefixPathInToc(prefixPath), verbosity(0)
|
||||
{
|
||||
if (tocpaths.empty()) // nothing to read--keep silent
|
||||
return;
|
||||
fprintf (stderr, "archive: opening %d lattice-archive TOC files ('%S' etc.)..", (int)tocpaths.size(), tocpaths[0].c_str());
|
||||
size_t onepercentage = tocpaths.size() / 100 ? tocpaths.size()/100 : 1;
|
||||
foreach_index (i, tocpaths)
|
||||
{
|
||||
fprintf (stderr, ".");
|
||||
if ( (i % onepercentage) == 0)
|
||||
fprintf (stderr, ".");
|
||||
open (tocpaths[i]);
|
||||
}
|
||||
fprintf (stderr, " %d total lattices referenced in %d archive files\n", (int)toc.size(), (int)archivepaths.size());
|
||||
|
@ -1135,7 +1155,11 @@ public:
|
|||
RuntimeError("open: invalid TOC line (no [): %s", line);
|
||||
if (q != p)
|
||||
{
|
||||
const std::wstring archivepath = msra::strfun::utf16 (std::string (p, q - p));
|
||||
std::wstring archivepath = msra::strfun::utf16 (std::string (p, q - p));
|
||||
if (!prefixPathInToc.empty())
|
||||
{
|
||||
archivepath = prefixPathInToc + L"/" + archivepath;
|
||||
}
|
||||
// TODO: should we allow paths relative to TOC file?
|
||||
archiveindex = getarchiveindex (archivepath);
|
||||
}
|
||||
|
@ -1207,6 +1231,7 @@ public:
|
|||
fsetpos (f, offset);
|
||||
// get it
|
||||
L.fread (f, idmap, spunit);
|
||||
L.setverbosity(verbosity);
|
||||
#ifdef HACK_IN_SILENCE // hack to simulate DEL in the lattice
|
||||
const size_t silunit = getid (modelsymmap, "sil");
|
||||
const bool addsp = true;
|
||||
|
|
|
@ -23,10 +23,11 @@ public:
|
|||
class latticesource
|
||||
{
|
||||
const msra::lattices::archive numlattices, denlattices;
|
||||
int verbosity;
|
||||
public:
|
||||
typedef msra::dbn::latticepair latticepair;
|
||||
latticesource (std::pair<std::vector<std::wstring>,std::vector<std::wstring>> latticetocs, const std::unordered_map<std::string,size_t> & modelsymmap)
|
||||
: numlattices (latticetocs.first, modelsymmap), denlattices (latticetocs.second, modelsymmap) {}
|
||||
latticesource (std::pair<std::vector<std::wstring>,std::vector<std::wstring>> latticetocs, const std::unordered_map<std::string,size_t> & modelsymmap, std::wstring RootPathInToc)
|
||||
: numlattices (latticetocs.first, modelsymmap, RootPathInToc), denlattices (latticetocs.second, modelsymmap, RootPathInToc), verbosity(0) {}
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
|
@ -52,6 +53,12 @@ public:
|
|||
denlattices.getlattice (key, LP->second, expectedframes); // this loads the lattice from disk, using the existing L.second object
|
||||
L = LP;
|
||||
}
|
||||
|
||||
void setverbosity(int veb)
|
||||
{
|
||||
verbosity = veb;
|
||||
numlattices.setverbosity(veb); denlattices.setverbosity(veb);
|
||||
}
|
||||
};
|
||||
|
||||
}}
|
|
@ -296,6 +296,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
CreateMatrixIfNull(m_value);
|
||||
m_value->SetValue(value);
|
||||
m_hasComputed = true;
|
||||
SetDims(TensorShape(value.GetNumRows()), value.GetNumCols());
|
||||
}
|
||||
public:
|
||||
bool m_hasComputed;
|
||||
|
|
|
@ -62,6 +62,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// break cycles
|
||||
// BUGBUG: This only works if nodes are not shared across networks.
|
||||
// Once we allow that (BrainScript editing), we need proper cycle detectors. Luckily, we know our cycles, so it won't be too hard.
|
||||
// Or just use weak ptrs.
|
||||
for (auto & iter : m_nameToNodeMap)
|
||||
iter.second->DetachInputs();
|
||||
|
||||
|
@ -74,8 +75,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// serialization
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
// after after editing--network is possibly not validated/compiled
|
||||
void ComputationNetwork::SaveEdited(const wstring& fileName, const FileOptions fileFormat)
|
||||
{
|
||||
if (!IsCompiled())
|
||||
CompileNetwork();
|
||||
Save(fileName, fileFormat);
|
||||
}
|
||||
|
||||
void ComputationNetwork::Save(const wstring& fileName, const FileOptions fileFormat) const
|
||||
{
|
||||
VerifyIsCompiled("Save");
|
||||
// In case of parallel training only the main node should we saving the model to prevent
|
||||
// the parallel training nodes from colliding to write the same file
|
||||
// TODO: This does not belong here.
|
||||
|
@ -182,7 +192,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// load the section of nodes that contain persistable parameters
|
||||
// This is used for reloading a model without recreating it, e.g. during training.
|
||||
// TODO: Why not just reload it? Because SGD::Train() holds pointers to the parameters directly? That should be fixed.
|
||||
template<class ElemType> void ComputationNetwork::LoadPersistableParameters(File & fstream, bool create)
|
||||
template<class ElemType> void ComputationNetwork::ReadPersistableParameters(File & fstream, bool create)
|
||||
{
|
||||
fstream.GetMarker(FileMarker::fileMarkerBeginSection, L"BCN");
|
||||
|
||||
|
@ -221,47 +231,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ENodeList");
|
||||
}
|
||||
|
||||
template<class ElemType> void ComputationNetwork::Load(const wstring& fileName, const FileOptions fileFormat, const bool /*bAllowNoCriterionNode --unused*/, ComputationNetwork* anotherNetwork)
|
||||
// deserialize the model
|
||||
// This does not post-process the model (CompileNetwork()). Use Load() instead.
|
||||
template<class ElemType> void ComputationNetwork::Read(const wstring& fileName, const FileOptions fileFormat, const bool /*bAllowNoCriterionNode --unused*/, ComputationNetwork* anotherNetwork)
|
||||
{
|
||||
ClearNetwork();
|
||||
|
||||
File fstream(fileName, fileFormat | FileOptions::fileOptionsRead);
|
||||
|
||||
#if 1
|
||||
LoadPersistableParameters<ElemType>(fstream, true);
|
||||
#else
|
||||
fstream.GetMarker(FileMarker::fileMarkerBeginSection, L"BCN");
|
||||
|
||||
// model version
|
||||
size_t modelVersion = CNTK_MODEL_VERSION_1; //if version info is not there it is version 1
|
||||
if (fstream.TryGetMarker(FileMarker::fileMarkerBeginSection, L"BVersion"))
|
||||
{
|
||||
fstream >> modelVersion;
|
||||
fstream.GetMarker(FileMarker::fileMarkerEndSection, L"EVersion");
|
||||
}
|
||||
|
||||
size_t numNodes;
|
||||
fstream >> numNodes;
|
||||
|
||||
// get all node info first
|
||||
fstream.GetMarker(FileMarker::fileMarkerBeginSection, L"BNodeList");
|
||||
for (size_t i = 0; i < numNodes; i++)
|
||||
{
|
||||
wstring opName, nodeName;
|
||||
fstream >> opName >> nodeName;
|
||||
|
||||
auto newNode = ComputationNetworkBuilder<ElemType>::NewNode(opName, m_deviceId, nodeName);
|
||||
|
||||
if (!newNode)
|
||||
{
|
||||
fprintf(stderr, "Unknown ComputationNode type %ls (node name %ls)\n", opName.c_str(), nodeName.c_str());
|
||||
InvalidArgument("Invalid node type.");
|
||||
}
|
||||
newNode->Load(fstream, modelVersion);
|
||||
AddNodeToNet(newNode);
|
||||
}
|
||||
fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ENodeList");
|
||||
#endif
|
||||
ReadPersistableParameters<ElemType>(fstream, true);
|
||||
|
||||
size_t numNodes = m_nameToNodeMap.size();
|
||||
|
||||
|
@ -277,9 +255,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
vector<wstring> childrenNames;
|
||||
childrenNames.resize(numChildren);
|
||||
for (size_t j = 0; j < numChildren; j++)
|
||||
{
|
||||
fstream >> childrenNames[j];
|
||||
}
|
||||
|
||||
// TODO: how does the file distinguish float from double?
|
||||
ComputationNodeBasePtr nodePtr = GetNodeFromName(nodeName);
|
||||
|
@ -288,42 +264,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
for (int j = 0; j < numChildren; j++)
|
||||
childrenNodes[j] = GetNodeFromName(childrenNames[j], anotherNetwork);
|
||||
|
||||
//if (nodePtr->OperationName() == OperationNameOf(RowStackNode))
|
||||
//{
|
||||
// allow for variable input nodes
|
||||
nodePtr->AttachInputs(childrenNodes);
|
||||
//}
|
||||
//else
|
||||
//{
|
||||
// // fixed input nodes
|
||||
// // TODO: Use the variable-length AttachInputs() as well. This is a refactoring left-over.
|
||||
// switch (numChildren)
|
||||
// {
|
||||
// case 1:
|
||||
// nodePtr->AttachInputs(childrenNodes[0]);
|
||||
// break;
|
||||
// case 2:
|
||||
// nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1]);
|
||||
// break;
|
||||
// case 3:
|
||||
// nodePtr->AttachInputs(childrenNodes[0],childrenNodes[1], childrenNodes[2]);
|
||||
// break;
|
||||
// case 4:
|
||||
// nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2], childrenNodes[3]);
|
||||
// break;
|
||||
// case 5:
|
||||
// nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2], childrenNodes[3], childrenNodes[4]);
|
||||
// break;
|
||||
// case 6:
|
||||
// nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2], childrenNodes[3], childrenNodes[4], childrenNodes[5]);
|
||||
// break;
|
||||
// default:
|
||||
// LogicError("Invalid number of children.");
|
||||
// }
|
||||
//}
|
||||
nodePtr->AttachInputs(childrenNodes);
|
||||
}
|
||||
}
|
||||
|
||||
fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ERelation");
|
||||
|
||||
fstream.GetMarker(FileMarker::fileMarkerBeginSection, L"BRootNodes");
|
||||
|
@ -340,7 +283,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
fstream >> nodeName;
|
||||
m_features.push_back(GetNodeFromName(nodeName));
|
||||
}
|
||||
|
||||
fstream.GetMarker(FileMarker::fileMarkerEndSection, L"EFeatureNodes");
|
||||
}
|
||||
|
||||
|
@ -353,7 +295,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_labels.push_back(GetNodeFromName(nodeName));
|
||||
}
|
||||
}
|
||||
|
||||
// BUGBUG: Should this be inside the block?
|
||||
fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ELabelNodes");
|
||||
|
||||
if (fstream.TryGetMarker(FileMarker::fileMarkerBeginSection, L"BCriterionNodes") ||
|
||||
|
@ -372,13 +314,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: this section is defunct
|
||||
// TODO: this section is defunct, skip over
|
||||
if (fstream.TryGetMarker(FileMarker::fileMarkerBeginSection, L"BNodesReqMultiSeqHandling"))
|
||||
{
|
||||
fprintf(stderr, "WARNING: Ignoring defunct 'BNodesReqMultiSeqHandling' section in input file.\n");
|
||||
fstream >> num;
|
||||
for (size_t i = 0; i < num; i++)
|
||||
fstream >> nodeName;
|
||||
fstream >> nodeName; // dummy
|
||||
fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ENodesReqMultiSeqHandling");
|
||||
}
|
||||
|
||||
|
@ -415,13 +357,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
fstream.GetMarker(FileMarker::fileMarkerEndSection, L"EPairNodes");
|
||||
}
|
||||
}
|
||||
|
||||
fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ERootNodes");
|
||||
|
||||
fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ECN");
|
||||
|
||||
// perform all further post-processing, caching, etc.
|
||||
CompileNetwork();
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
@ -622,9 +560,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
//set sequence training parameters, e.g. smoothing weight, frame drop threshhold
|
||||
template<class ElemType>
|
||||
void ComputationNetwork::SetSeqParam(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign)
|
||||
void ComputationNetwork::SetSeqParam(ComputationNetworkPtr net,
|
||||
const ComputationNodeBasePtr criterionNode,
|
||||
const double& hsmoothingWeight,
|
||||
const double& frameDropThresh,
|
||||
const bool& doreferencealign,
|
||||
const double& amf /*= 14.0f*/,
|
||||
const double& lmf /*= 14.0f*/,
|
||||
const double& wp /*= 0.0f*/,
|
||||
const double& bMMIfactor /*= 0.0f*/,
|
||||
const bool& sMBR /*= false*/
|
||||
)
|
||||
{
|
||||
fprintf(stderr, "Setting Hsmoothing weight to %.8g and frame-dropping threshhold to %.8g\n", hsmoothingWeight, frameDropThresh);
|
||||
fprintf(stderr, "Setting SeqGammar-related parameters: amf=%.2f, lmf=%.2f, wp=%.2f, bMMIFactor=%.2f, usesMBR=%s\n",
|
||||
amf, lmf, wp, bMMIfactor, sMBR ? "true" : "false");
|
||||
list<ComputationNodeBasePtr> seqNodes = net->GetNodesWithType(OperationNameOf(SequenceWithSoftmaxNode), criterionNode);
|
||||
if (seqNodes.size() == 0)
|
||||
{
|
||||
|
@ -638,6 +588,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
node->SetSmoothWeight(hsmoothingWeight);
|
||||
node->SetFrameDropThresh(frameDropThresh);
|
||||
node->SetReferenceAlign(doreferencealign);
|
||||
node->SetGammarCalculationParam(amf, lmf, wp, bMMIfactor, sMBR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1114,18 +1065,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template void ComputationNetwork::InitLearnableParameters<float>(const ComputationNodeBasePtr& node, const bool uniformInit, const unsigned long randomSeed, const float initValueScale, bool initOnCPUOnly);
|
||||
template void ComputationNetwork::Load<float>(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork);
|
||||
template void ComputationNetwork::LoadPersistableParameters<float>(File & fstream, bool create);
|
||||
template void ComputationNetwork::Read<float>(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork);
|
||||
template void ComputationNetwork::ReadPersistableParameters<float>(File & fstream, bool create);
|
||||
template void ComputationNetwork::PerformSVDecomposition<float>(const map<wstring, float>& SVDConfig, size_t alignedsize);
|
||||
template /*static*/void ComputationNetwork::SetDropoutRate<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
|
||||
template void ComputationNetwork::SetSeqParam<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);
|
||||
template void ComputationNetwork::SetSeqParam<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, const double& hsmoothingWeight, const double& frameDropThresh, const bool& doreferencealign,
|
||||
const double& amf, const double& lmf, const double& wp, const double& bMMIfactor, const bool& sMBR);
|
||||
|
||||
template void ComputationNetwork::InitLearnableParameters<double>(const ComputationNodeBasePtr& node, const bool uniformInit, const unsigned long randomSeed, const double initValueScale, bool initOnCPUOnly);
|
||||
template void ComputationNetwork::Load<double>(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork);
|
||||
template void ComputationNetwork::LoadPersistableParameters<double>(File & fstream, bool create);
|
||||
template void ComputationNetwork::Read<double>(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork);
|
||||
template void ComputationNetwork::ReadPersistableParameters<double>(File & fstream, bool create);
|
||||
template void ComputationNetwork::PerformSVDecomposition<double>(const map<wstring, float>& SVDConfig, size_t alignedsize);
|
||||
template /*static*/void ComputationNetwork::SetDropoutRate<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
|
||||
template void ComputationNetwork::SetSeqParam<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);
|
||||
template void ComputationNetwork::SetSeqParam<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, const double& hsmoothingWeight, const double& frameDropThresh, const bool& doreferencealign,
|
||||
const double& amf, const double& lmf, const double& wp, const double& bMMIfactor, const bool& sMBR);
|
||||
|
||||
// register ComputationNetwork with the ScriptableObject system
|
||||
ScriptableObjects::ConfigurableRuntimeTypeRegister::Add<ComputationNetwork> registerComputationNetwork(L"ComputationNetwork");
|
||||
|
|
|
@ -78,24 +78,33 @@ public:
|
|||
// -----------------------------------------------------------------------
|
||||
|
||||
void Save(const std::wstring& fileName, const FileOptions fileFormat = FileOptions::fileOptionsBinary) const;
|
||||
void SaveEdited(const std::wstring& fileName, const FileOptions fileFormat = FileOptions::fileOptionsBinary);
|
||||
private:
|
||||
void SaveToFileImpl(const std::wstring& fileName, const FileOptions fileFormat) const;
|
||||
public:
|
||||
|
||||
template<class ElemType>
|
||||
void LoadPersistableParameters(File & fstream, bool create);
|
||||
void ReadPersistableParameters(File & fstream, bool create);
|
||||
// reload node content only, e.g. used by SGD::Train() when going back to an older model that had better training objective
|
||||
template<class ElemType>
|
||||
void ReloadPersistableParameters(const std::wstring& fileName)
|
||||
void RereadPersistableParameters(const std::wstring& fileName)
|
||||
{
|
||||
File fstream(fileName, FileOptions::fileOptionsBinary | FileOptions::fileOptionsRead);
|
||||
LoadPersistableParameters<ElemType>(fstream, false);
|
||||
ReadPersistableParameters<ElemType>(fstream, false);
|
||||
}
|
||||
// design BUGBUG: binary files do not know whether they are float or double.
|
||||
// TODO: modify file format to know this; then eliminate the <ElemType> dependency (and in some future, allow nodes to be different)
|
||||
template<class ElemType>
|
||||
void Read(const std::wstring& fileName, const FileOptions fileFormat = FileOptions::fileOptionsBinary,
|
||||
const bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr);
|
||||
template<class ElemType>
|
||||
void Load(const std::wstring& fileName, const FileOptions fileFormat = FileOptions::fileOptionsBinary,
|
||||
const bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr);
|
||||
const bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr)
|
||||
{
|
||||
Read<ElemType>(fileName, fileFormat, bAllowNoCriterionNode, anotherNetwork);
|
||||
// perform all further post-processing, caching, etc.
|
||||
CompileNetwork();
|
||||
}
|
||||
|
||||
// static helper to instantiate a network from a file
|
||||
template<class ElemType>
|
||||
|
@ -159,9 +168,11 @@ public:
|
|||
private:
|
||||
void ValidateNodes(list<ComputationNodeBasePtr> nodes, bool isFinalValidationPass, size_t & todo);
|
||||
void ValidateSubNetwork(const ComputationNodeBasePtr& rootNode);
|
||||
void MarkValueNonSharableNodes();
|
||||
private:
|
||||
void DetermineSetOfAllRoots();
|
||||
void CollectInputAndLearnableParameters(const ComputationNodeBasePtr& rootNode);
|
||||
bool IsCompiled() const { return m_isCompiled; }
|
||||
void VerifyIsCompiled(const char * where) const;
|
||||
//bool BuiltAndValidatedSubNetwork(const ComputationNodeBasePtr & rootNode);
|
||||
public:
|
||||
|
@ -411,8 +422,20 @@ public:
|
|||
|
||||
template<class ElemType>
|
||||
static void SetDropoutRate(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
|
||||
|
||||
|
||||
|
||||
template<class ElemType>
|
||||
static void SetSeqParam(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);
|
||||
static void SetSeqParam(ComputationNetworkPtr net,
|
||||
const ComputationNodeBasePtr criterionNode,
|
||||
const double& hsmoothingWeight,
|
||||
const double& frameDropThresh,
|
||||
const bool& doreferencealign,
|
||||
const double& amf=14.0f,
|
||||
const double& lmf=14.0f,
|
||||
const double& wp=0.0f,
|
||||
const double& bMMIfactor=0.0f,
|
||||
const bool& sMBR=false);
|
||||
static void SetMaxTempMemSizeForCNN(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const size_t maxTempMemSizeInSamples);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
|
@ -30,6 +30,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
std::wstring toName,
|
||||
const CopyNodeFlags flags)
|
||||
{
|
||||
InvalidateCompiledNetwork();
|
||||
|
||||
if (toName == L"")
|
||||
toName = fromName;
|
||||
|
||||
|
@ -50,11 +52,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
else
|
||||
{
|
||||
//node already exists
|
||||
|
||||
// node already exists
|
||||
pToNode = GetNodeFromName(toName);
|
||||
|
||||
//same node. no copy needed
|
||||
// same node. no copy needed
|
||||
if (pFromNode == pToNode)
|
||||
LogicError("CopyNode: You are copying the node to the same network with same node name.");
|
||||
else
|
||||
|
@ -69,6 +70,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
const std::wstring fromName, std::wstring toNamePrefix,
|
||||
const CopyNodeFlags flags)
|
||||
{
|
||||
InvalidateCompiledNetwork();
|
||||
|
||||
if (!(flags & CopyNodeFlags::copyNodeValue))
|
||||
LogicError("CopySubTree: you cannot copy a tree without copying the node values.");
|
||||
|
||||
|
@ -103,7 +106,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// nodeNameNew - new node name
|
||||
void ComputationNetwork::RenameNode(const std::wstring& nodeNameOrig, const std::wstring& nodeNameNew)
|
||||
{
|
||||
// so that renamed node will not be referenced
|
||||
InvalidateCompiledNetwork();
|
||||
|
||||
ComputationNodeBasePtr nodeToRename = GetNodeFromName(nodeNameOrig);
|
||||
|
@ -128,7 +130,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
void ComputationNetwork::DeleteNode(const std::wstring & nodeName)
|
||||
{
|
||||
// so that deleted node will not be referenced
|
||||
InvalidateCompiledNetwork();
|
||||
|
||||
ComputationNodeBasePtr nodeToDelete = GetNodeFromName(nodeName);
|
||||
|
@ -172,6 +173,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// need to update all the mappings as well childrens
|
||||
void ComputationNetwork::ChangeNode(wstring nodeName, ComputationNodeBasePtr newNode)
|
||||
{
|
||||
InvalidateCompiledNetwork();
|
||||
|
||||
ComputationNodeBasePtr oldNode = GetNodeFromName(nodeName);
|
||||
if (oldNode->OperationName() != newNode->OperationName())
|
||||
InvalidArgument("newNode must have the same type as the old node.");
|
||||
|
@ -204,6 +207,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// need to update those nodes who use oldNode as their child
|
||||
void ComputationNetwork::ReplaceLeafNode(wstring oldNodeName, ComputationNodeBasePtr newNode)
|
||||
{
|
||||
InvalidateCompiledNetwork();
|
||||
|
||||
ComputationNodeBasePtr oldNode = GetNodeFromName(oldNodeName);
|
||||
|
||||
// change the input of those nodes whose child is oldNode
|
||||
|
@ -223,6 +228,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
void ComputationNetwork::ReplaceFinalCriterionNode(wstring oldNodeName, ComputationNodeBasePtr newNode)
|
||||
{
|
||||
InvalidateCompiledNetwork();
|
||||
|
||||
// Checks if the node is a criterion node.
|
||||
int index = -1;
|
||||
for (int i = 0; i < m_finalCriteria.size(); ++i)
|
||||
|
@ -251,6 +258,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
void ComputationNetwork::AddFeatureNode(ComputationNodeBasePtr featureNode)
|
||||
{
|
||||
InvalidateCompiledNetwork();
|
||||
|
||||
wstring nodeName = featureNode->NodeName();
|
||||
if (NodeNameExists(nodeName))
|
||||
RuntimeError("AddFeatureNode: feature node already exists.");
|
||||
|
@ -261,12 +270,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// We only remove the node, not delete it.
|
||||
void ComputationNetwork::RemoveFeatureNode(ComputationNodeBasePtr featureNode)
|
||||
{
|
||||
InvalidateCompiledNetwork();
|
||||
|
||||
wstring nodeName = featureNode->NodeName();
|
||||
if (!NodeNameExists(nodeName))
|
||||
RuntimeError("RemoveFeatureNode: feature node does not exist.");
|
||||
|
||||
InvalidateCompiledNetwork();
|
||||
|
||||
// Removes links.
|
||||
for (auto nodeIter = m_nameToNodeMap.begin(); nodeIter != m_nameToNodeMap.end(); ++nodeIter)
|
||||
{
|
||||
|
|
|
@ -10,11 +10,13 @@
|
|||
#include "ComputationNode.h"
|
||||
#include "ComputationNetwork.h"
|
||||
#include "RecurrentNodes.h"
|
||||
#include "InputAndParamNodes.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <set>
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
@ -365,7 +367,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// verify that network has undergone CompileNetwork()
|
||||
void ComputationNetwork::VerifyIsCompiled(const char * where) const
|
||||
{
|
||||
if (!m_isCompiled)
|
||||
if (!IsCompiled())
|
||||
LogicError("%s: A compiled network was expected.", where);
|
||||
}
|
||||
|
||||
|
@ -712,6 +714,63 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// -----------------------------------------------------------------------
|
||||
// memory allocation
|
||||
// -----------------------------------------------------------------------
|
||||
// mark nodes that are purely induced by parameters as non-sharable and create space for value if null
|
||||
void ComputationNetwork::MarkValueNonSharableNodes()
|
||||
{
|
||||
const auto & nodes = GetEvalOrder(nullptr);
|
||||
std::map<wstring, bool> allLeafDescendentsAreParameters;
|
||||
std::list<ComputationNodeBasePtr> allLearnableParameters = GetNodesWithType(OperationNameOf(LearnableParameter));
|
||||
// note that: we cannot use m_learnableParameters because we need all parameters node, regardless whether it requires update or not
|
||||
|
||||
for (auto& node : nodes)
|
||||
{
|
||||
auto children = node->GetInputs();
|
||||
wstring myname = node->NodeName();
|
||||
bool allParameters = true;
|
||||
|
||||
if (children.size()) // we don't do the check for leaf node, cause all the possible leaf nodes (input/parameters/precompute node) are marked as non-sharable already
|
||||
{
|
||||
for (auto child : children)
|
||||
{
|
||||
wstring ChildName = child->NodeName();
|
||||
if (allLeafDescendentsAreParameters.find(ChildName) == allLeafDescendentsAreParameters.end())
|
||||
{
|
||||
// not found, means it is a leaf node (we are at eval order )
|
||||
assert(child->IsLeaf() || child->IsPartOfLoop());
|
||||
if (std::find(allLearnableParameters.begin(), allLearnableParameters.end(), child)!= allLearnableParameters.end())
|
||||
{
|
||||
allLeafDescendentsAreParameters[ChildName] = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
allParameters = false;
|
||||
allLeafDescendentsAreParameters[ChildName] = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (allLeafDescendentsAreParameters[ChildName] == false)
|
||||
{
|
||||
allParameters = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
allLeafDescendentsAreParameters[myname] = allParameters;
|
||||
if (allParameters)
|
||||
{
|
||||
node->MarkValueNonSharable();
|
||||
}
|
||||
else
|
||||
{
|
||||
node->MarkValueSharable();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
// this function will need to be called before actual validation and execution to
|
||||
// predetermine how to share matrices to reduce memory usage.
|
||||
|
@ -726,9 +785,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
VerifyIsCompiled("AllocateAllMatrices");
|
||||
|
||||
// Due to special topology, if a node is solely induced by parameters, its function value should not be shared
|
||||
MarkValueNonSharableNodes();
|
||||
|
||||
bool performingBackPropagation = (trainRootNode != nullptr);
|
||||
|
||||
// Create a composite Eval order with the specfied nodes as roots
|
||||
// Create a composite Eval order with the specified nodes as roots
|
||||
std::vector<ComputationNodeBasePtr> forwardPropRoots;
|
||||
forwardPropRoots.insert(forwardPropRoots.end(), evalRootNodes.begin(), evalRootNodes.end());
|
||||
forwardPropRoots.insert(forwardPropRoots.end(), outValueRootNodes.begin(), outValueRootNodes.end());
|
||||
|
|
|
@ -136,7 +136,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
typedef std::shared_ptr<INodeState> NodeStatePtr;
|
||||
virtual NodeStatePtr ExportState() = 0;
|
||||
virtual void ImportState(NodeStatePtr && state) = 0;
|
||||
virtual void ImportState(const NodeStatePtr & state) = 0;
|
||||
};
|
||||
typedef IStatefulNode::NodeStatePtr NodeStatePtr;
|
||||
|
||||
|
@ -151,7 +151,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
friend class ComputationNetwork;
|
||||
|
||||
ComputationNetworkOwnedNodeState() :
|
||||
m_needsGradient(false)
|
||||
m_needsGradient(false), m_valueSharable(true)
|
||||
{
|
||||
PurgeStateForFormingRecurrentLoops();
|
||||
m_isPartOfLoop = false;
|
||||
|
@ -166,10 +166,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
bool IsPartOfLoop() const { return m_isPartOfLoop; }
|
||||
|
||||
virtual void MarkValueNonSharable(){ m_valueSharable = false; }
|
||||
virtual void MarkValueSharable() { m_valueSharable = true; }
|
||||
bool isValueSharable() const { return m_valueSharable; }
|
||||
|
||||
protected: // TODO: should be fully encapsulated here
|
||||
|
||||
bool m_needsGradient; // true if this node or any children need a gradient to be computed (for own consumption or propagation to somewhere in the child tree)
|
||||
|
||||
bool m_valueSharable; // a flag is needed for memory share.
|
||||
// If it is false (e.g., learnableParameters/InputValue and those nodes are solely induced by learnableParameters),
|
||||
// it will never be released to memory pool
|
||||
private:
|
||||
|
||||
bool m_isPartOfLoop; // true if this loop is part of a recurrent loop
|
||||
|
@ -250,7 +257,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_deviceId(deviceId), m_outputNeededDuringBackprop(true),
|
||||
m_parameterUpdateRequired(false), m_gradientInitialized(false),
|
||||
m_nodeName(name == L"" ? CreateUniqNodeName() : name),
|
||||
m_numRows(0), m_numCols(0)
|
||||
m_numRows(0), m_numCols(0)
|
||||
{ }
|
||||
virtual ~ComputationNodeBase(){}
|
||||
|
||||
|
@ -348,9 +355,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
const TensorShape & GetSampleLayout() const { return m_sampleLayout; }
|
||||
bool HasSampleLayout() const { return m_sampleLayout.GetRank() != 1; } // meaning does it have a layout that is not just a vector
|
||||
TensorShape GetTensorShape(size_t rank) const; // form the actual tensor that describes the full object
|
||||
protected:
|
||||
size_t DetermineElementwiseTensorRank() const; // determine tensor rank when considering all inputs with padding
|
||||
TensorShape GetTensorShape(size_t rank) const; // form the actual tensor that describes the full object
|
||||
TensorShape GetTensorSliceFor(size_t rank, const FrameRange & fr) const; // form tensor shape of the slice referenced by FrameRange
|
||||
public:
|
||||
// access to element(0,0) without having to type-cast
|
||||
|
@ -455,6 +462,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
LogicError("VerifyNumParallelSequences: value inconsistent with MB layout");
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
public: // ...the following should be protected, but nodes inquire about their children, requiring public access
|
||||
|
||||
|
@ -537,7 +545,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
void SetOutputNeededDuringBackprop(bool f) { m_outputNeededDuringBackprop = f; }
|
||||
bool IsOutputNeededDuringBackprop() const
|
||||
{
|
||||
return !g_shareNodeValueMatrices || m_outputNeededDuringBackprop;
|
||||
return !g_shareNodeValueMatrices || m_outputNeededDuringBackprop ;
|
||||
}
|
||||
|
||||
const size_t GetNumInputs() const { return m_inputs.size(); }
|
||||
|
@ -769,6 +777,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
bool m_parameterUpdateRequired; // update parameters? Only used for LearnableParameters. --TODO: Should we make this a member of LearnableParameters actually? And require a type cast? Currently it is read out for all leaves.
|
||||
bool m_gradientInitialized; // indicates whether the gradient matrix has been resized and initialized to 0
|
||||
bool m_outputNeededDuringBackprop; // indicates whether the output value of the node is needed during backprop
|
||||
|
||||
};
|
||||
typedef ComputationNodeBase::ComputationNodeBasePtr ComputationNodeBasePtr;
|
||||
|
||||
|
@ -902,7 +911,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
//don't release matrices that need to be used in the gradient computation
|
||||
virtual void ReleaseMatricesAfterForwardProp(MatrixPool& matrixPool)
|
||||
{
|
||||
if (!IsOutputNeededDuringBackprop() && (m_value->GetMatrixType() != SPARSE))
|
||||
if (!IsOutputNeededDuringBackprop() && (m_value->GetMatrixType() != SPARSE) && isValueSharable())
|
||||
ReleaseMatrixToPool(m_value, matrixPool);
|
||||
}
|
||||
|
||||
|
@ -931,7 +940,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
// Release the Value matrix only if the output value is needed during backprop
|
||||
// since in the case it isn't used, we release it during forward prop itself
|
||||
if (IsOutputNeededDuringBackprop() && m_value->GetMatrixType() != SPARSE)
|
||||
if (IsOutputNeededDuringBackprop() && m_value->GetMatrixType() != SPARSE && isValueSharable())
|
||||
ReleaseMatrixToPool(m_value, matrixPool);
|
||||
}
|
||||
}
|
||||
|
@ -1317,6 +1326,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
CreateMatrixIfNull(m_gradient);
|
||||
}
|
||||
|
||||
void MarkValueNonSharable() override
|
||||
{
|
||||
m_valueSharable = false;
|
||||
CreateMatrixIfNull(m_value);
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
// this function is used to create matrices for those needed before matrix pool is available
|
||||
|
@ -1532,7 +1548,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
#define UsingComputationNodeMembers /*without OperationName; needed to support inconsistent pattern of InputValue--TODO: This comment it out of date. */ \
|
||||
protected: \
|
||||
typedef shared_ptr<ComputationNode<ElemType>> ComputationNodePtr; \
|
||||
using Base::m_deviceId; using Base::GetDeviceId; using Base::SetDims; using Base::SetDims1; using Base::SetNumCols; using Base::GetNumRows; using Base::GetNumCols; using Base::UpdateFunctionValuesSize; using Base::LoadValue; \
|
||||
using Base::m_deviceId; using Base::shared_from_this; using Base::GetDeviceId; using Base::SetDims; using Base::SetDims1; using Base::SetNumCols; \
|
||||
using Base::GetNumRows; using Base::GetNumCols; using Base::GetTensorShape; using Base::UpdateFunctionValuesSize; using Base::LoadValue; \
|
||||
using Base::m_pMBLayout; using Base::GetNumTimeSteps; using Base::GetNumParallelSequences; \
|
||||
using Base::MaskMissingColumnsToZero; using Base::MaskMissingValueColumnsToZero; using Base::MaskMissingGradientColumnsToZero; using Base::InvalidateMissingValueColumns; using Base::InvalidateMissingGradientColumns; \
|
||||
using Base::DataFor; using Base::ValueFor; using Base::Gradient; using Base::GradientFor; \
|
||||
|
|
|
@ -813,9 +813,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
void SetEvalMode(bool bnEvalMode)
|
||||
{
|
||||
m_eval = bnEvalMode;
|
||||
m_eval = bnEvalMode;
|
||||
}
|
||||
|
||||
private:
|
||||
struct VersionInfo
|
||||
{
|
||||
|
|
|
@ -41,6 +41,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
Base(deviceId, name)
|
||||
{
|
||||
m_parameterUpdateRequired = true;
|
||||
this->m_valueSharable = false;
|
||||
SetDims(TensorShape(), 0);
|
||||
}
|
||||
LearnableParameter(DEVICEID_TYPE deviceId, const wstring & name, const TensorShape & shape) :
|
||||
|
@ -48,6 +49,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
m_parameterUpdateRequired = true;
|
||||
CreateMatrixIfNull(m_value);
|
||||
this->m_valueSharable = false;
|
||||
// for now we split off the trailing dimension into the matrix column dimension
|
||||
// TODO: This is for compat, but is is inconsistent. Decide what a sample layout means for a node without MBLayout w.r.t. non-tensor ops.
|
||||
auto dims = shape.GetDims();
|
||||
|
@ -197,6 +199,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
PrintNodeValuesToFile(printValues, fstream);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#if 0
|
||||
|
@ -261,6 +264,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
SetDims(sampleLayout, 0);
|
||||
UpdateFunctionValuesSize(); // we must allocate the matrix so that the readers get objects with valid row dimensions (some readers expect that)
|
||||
m_parameterUpdateRequired = false;
|
||||
this->m_valueSharable = false;
|
||||
}
|
||||
protected:
|
||||
InputValueBase(DEVICEID_TYPE deviceId, const wstring & name, const TensorShape & sampleLayout, bool isSparse) :
|
||||
|
|
|
@ -44,7 +44,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & fr) override
|
||||
{
|
||||
static int c = 0; if (c++ == 0) { fprintf(stderr, "#NLop%d#\n", (int)opForward); }
|
||||
//static int c = 0; if (c++ == 0) { fprintf(stderr, "#NLop%d#\n", (int)opForward); }
|
||||
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto result = ValueTensorFor(rank, fr);
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "Matrix.h"
|
||||
#include "TensorShape.h"
|
||||
#include "ComputationNode.h"
|
||||
#include "Sequences.h"
|
||||
|
||||
#include <unordered_set>
|
||||
#include <map>
|
||||
|
@ -26,7 +27,7 @@
|
|||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// ShiftNode (input, fromOffset, boundaryValue, dim=-1, numSteps=1, insertDim=0) -- delay and rolling window
|
||||
// ShiftNode (input, fromOffset, boundaryValue, dim=-1) -- delay and rolling window
|
||||
//
|
||||
// This shifts the input by (-fromOffset) steps. In other words, output(t) will be input(t+fromOffset).
|
||||
// E.g. for fromOffset=-1, this gives the past value.
|
||||
|
@ -34,36 +35,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
//
|
||||
// This node can be used in a recurrent loop. This requires special handling by the ComputationNetwork,
|
||||
// for both execution (sequential execution) and creation (avoiding circular references).
|
||||
// TODO: When outside a recurrent loop and used with frame randomization, this will communicate to the reader
|
||||
// that additional frames are needed, which will then return a frame range. TODO: This will not match
|
||||
// the labels, which are still 1 frame. Think through which dimension this should go in.
|
||||
//
|
||||
// Values shifted in from beyond sequence boundaries will be copied from boundaryValue.
|
||||
// Normally, this is a scalar Constant(). However, it can be any node, which will be indexed from the end
|
||||
// (e.g. for fromOffset=-1, the last frame of boundaryValue will be used). This can implement
|
||||
// sequence-to-sequence models. Broadcasting is supported, so it can be e.g. a single output-dimension vector
|
||||
// (e.g. for fromOffset=-1, the last frame of boundaryValue will be used). This can implement the basic
|
||||
// sequence-to-sequence model. Broadcasting is supported, so it can be e.g. a single output-dimension vector
|
||||
// applied to all sequences.
|
||||
//
|
||||
// To delay (past value), use negative fromOffset. To access future value, use positive fromOffset.
|
||||
//
|
||||
// To pull in multiple offsets, use offsetRange>1. This will pull in offsetRange consecutive offsets starting
|
||||
// with fromOffset. This implements a rolling window. A new dimension will be inserted at multiOffsetDim
|
||||
// (default 0 means after the last sample dimension). Special considerations:
|
||||
// - If the boundaryValue is not wide enough, the sequence will be dropped (e.g. if you pull in 5 history frames,
|
||||
// but the sequence in boundaryValue only has 4 samples).
|
||||
// - If you feed back such an expanded output into this node in a loop, you get an inconsistency
|
||||
// and will eventually fail. You must pull the dimensions apart.
|
||||
// - If the current time step (offset 0) is included in the range (e.g. fromOffset=-1, offsetRange=3) then
|
||||
// this node cannot participate in a recurrence.
|
||||
//
|
||||
// By default, this shifts over the time dimension, but you can choose to shift over any
|
||||
// sample tensor dimension instead using 'dim' (-1 stands for time). This will only work, however,
|
||||
// when all involved nodes are implemented using the tensor library. Nodes implemented using
|
||||
// Matrix slices can only support iterating over time.
|
||||
//
|
||||
// If the boundaryValue has 0 elements, the sequence will be trimmed (frames reaching beyond the boundary
|
||||
// are dropped). This will initially not be implemented for the time dimension (as it would require
|
||||
// change of MBLayout).
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
template<class ElemType>
|
||||
|
@ -74,24 +58,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
public:
|
||||
enum BoundaryMode : int // how to fill frames at boundaries
|
||||
{
|
||||
reachAcross = -1, // go across the boundary: use boundaryValue. This is for recurrence.
|
||||
duplicate = 0, // duplicate frame at boundary, e.g. duplicate first frame. Non-recurrent mode only.
|
||||
trim = 1 // drop frames. Non-recurrent mode only.
|
||||
reachAcross = -1, // go across the boundary: use boundaryValue
|
||||
duplicate = 0 // duplicate frame at boundary, e.g. duplicate first frame. Non-recurrent mode only.
|
||||
};
|
||||
ShiftNode(DEVICEID_TYPE deviceId, const wstring & name, int fromOffset, BoundaryMode boundaryMode, int shiftDimension, size_t numSteps, int insertedDimParam) :
|
||||
Base(deviceId, name), m_fromOffset(fromOffset), m_numSteps(numSteps),
|
||||
ShiftNode(DEVICEID_TYPE deviceId, const wstring & name, int fromOffset, BoundaryMode boundaryMode, int shiftDimParam) :
|
||||
Base(deviceId, name), m_fromOffset(fromOffset),
|
||||
m_boundaryMode(boundaryMode),
|
||||
m_shiftDimension(shiftDimension), m_insertedDimParam(insertedDimParam),
|
||||
m_insertExpandShapeAt(SIZE_MAX/*uninitialized at this point*/)
|
||||
m_shiftDimParam(shiftDimParam),
|
||||
m_shiftDim(SIZE_MAX),
|
||||
m_state(deviceId)
|
||||
{
|
||||
CreateMatrixIfNull(m_value);
|
||||
SetDims(TensorShape(), 0); // empty for now
|
||||
}
|
||||
ShiftNode(DEVICEID_TYPE deviceId, const wstring & name) :
|
||||
ShiftNode(deviceId, name, 1, BoundaryMode::reachAcross, -1, 1, 0)
|
||||
ShiftNode(deviceId, name, 1, BoundaryMode::reachAcross, -1)
|
||||
{ }
|
||||
ShiftNode(const ScriptableObjects::IConfigRecordPtr configp) :
|
||||
ShiftNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"fromOffset"), (BoundaryMode)(int)configp->Get(L"boundaryMode"), configp->Get(L"dim"), configp->Get(L"numSteps"), configp->Get(L"insertedDim"))
|
||||
ShiftNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"fromOffset"), (BoundaryMode)(int)configp->Get(L"boundaryMode"), configp->Get(L"dim"))
|
||||
{
|
||||
// We do NOT attach the inputs, as we cannot resolve the main input without causing a circular reference.
|
||||
// Instead, we capture them in a lambda, which will be called by ComputationNetwork during the build process through LateAttachInputs() below.
|
||||
|
@ -111,19 +95,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
void Save(File& fstream) const
|
||||
{
|
||||
Base::Save(fstream);
|
||||
fstream << m_fromOffset << m_numSteps << m_boundaryMode << m_shiftDimension << m_insertedDimParam;
|
||||
fstream << m_fromOffset << m_boundaryMode << m_shiftDimParam;
|
||||
}
|
||||
|
||||
virtual void Load(File& fstream, size_t modelVersion) override
|
||||
{
|
||||
Base::Load(fstream, modelVersion);
|
||||
fstream >> m_fromOffset >> m_numSteps >> m_boundaryMode >> m_shiftDimension >> m_insertedDimParam;
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & fr) override
|
||||
{
|
||||
assert(inputIndex == 0); inputIndex;
|
||||
fr;
|
||||
fstream >> m_fromOffset >> m_boundaryMode >> m_shiftDimParam;
|
||||
}
|
||||
|
||||
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
|
||||
|
@ -133,6 +111,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
Base::BeginForwardProp();
|
||||
|
||||
// TODO: If we have a truncated-BPTT state then verify that the sequence indices match with m_state->m_sequences, and the tensor dimensions.
|
||||
|
||||
// in case of trimming, narrow the layout
|
||||
// We actually do not drop content, only reduce the range of sequences.
|
||||
// This is meant to optimize for the case where we have multiple sequences concatenated while trimming a small amount only.
|
||||
|
@ -142,34 +122,216 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
Base::EndForwardProp();
|
||||
|
||||
// In BPTT, we carry over left-to-right state across minibatches.
|
||||
// In truncated BPTT, we carry over left-to-right state across minibatches.
|
||||
// The necessary frames are stored in m_state->m_delayedValue.
|
||||
|
||||
// Only if layout has anything exceeding the MB.
|
||||
if (GetMBLayout()->HasSequenceBeyondEnd()) // only if layout has any sequence that has ends beyond this minibatch
|
||||
{
|
||||
}
|
||||
else
|
||||
m_state.clear();
|
||||
}
|
||||
private:
|
||||
typedef std::pair<SmallVector<int>, SmallVector<int>> SliceBounds; // slice bounds for dimension k are [first[k], second[k]) (think STL begin/end)
|
||||
|
||||
TensorView<ElemType> DataTensorFor(Matrix<ElemType> & data, TensorShape shape/*original shape of 'data'*/, SliceBounds slice)
|
||||
{
|
||||
shape.NarrowTo(slice);
|
||||
return TensorView<ElemType>(data, shape);
|
||||
}
|
||||
|
||||
// This function assumes BeginForwardProp/EndForwardProp() to be called before/after the iteration loop.
|
||||
// helper to shift dimension 'm_shiftDim' of SliceBounds by an offset (a common operation below)
|
||||
SliceBounds ShiftDim(const SliceBounds & in, int shiftBy)
|
||||
{
|
||||
SliceBounds result = in;
|
||||
result.first [m_shiftDim] += shiftBy;
|
||||
result.second[m_shiftDim] += shiftBy;
|
||||
return result;
|
||||
}
|
||||
|
||||
static SmallVector<int> ToIntDims(const TensorShape & shape)
|
||||
{
|
||||
SmallVector<int> dimsSigned;
|
||||
dimsSigned.append(shape.GetDims().begin(), shape.GetDims().end()); // we need the bounds as signed integers as they may shift into negative ranges
|
||||
return dimsSigned;
|
||||
}
|
||||
|
||||
// determine shapes and slices to move
|
||||
// This is used for both forward and backprop.
|
||||
// 'In' below refers to Input(0) where 'Out' refers to the output of *this.
|
||||
void DetermineSlices(size_t rank, const FrameRange & fr,
|
||||
TensorShape & inShape, TensorShape & outShape, // our MB's shape
|
||||
SliceBounds & inSliceLogical, SliceBounds & outSliceLogical) // the logical ranges to shift
|
||||
{
|
||||
// get the slice bounds for the given FrameRange
|
||||
outShape = GetTensorShape(rank); // describes the full tensor including sequence and time dimensions
|
||||
inShape = Input(0)->GetTensorShape(rank);
|
||||
|
||||
// determine the logical in and out slices
|
||||
// This may now have bounds that fall outside, which we need to split off next.
|
||||
outSliceLogical = TensorSliceWithMBLayoutFor(ToIntDims(outShape), fr, GetMBLayout());
|
||||
inSliceLogical = TensorSliceWithMBLayoutFor(ToIntDims(inShape), fr.WithTimeOffset(m_fromOffset), GetMBLayout()); // apply the offset
|
||||
}
|
||||
|
||||
// determine stripes to move w.r.t. main storage and from/to state
|
||||
// For efficiency:
|
||||
// - this function assumes that the return values have been freshly constructed (it won't reset them)
|
||||
// - it may return a slice with end < begin which indicates an empty slice
|
||||
void PartitionSlices(const SliceBounds & inSliceLogical, const SliceBounds & outSliceLogical, // the move we want to make
|
||||
int T, // our actual size
|
||||
SliceBounds & inSliceMain, SliceBounds & outSliceMain, // the part that goes main-to-main
|
||||
SliceBounds & inSliceState, SliceBounds & outSliceState) // the part that goes from/to state
|
||||
{
|
||||
inSliceMain = inSliceLogical;
|
||||
outSliceMain = outSliceLogical;
|
||||
if (inSliceMain.first[m_shiftDim] < 0)
|
||||
{
|
||||
assert(inSliceMain.second[m_shiftDim] < T);
|
||||
if (!m_state.empty()) // truncated BPTT case
|
||||
{
|
||||
// determine range that lives in state
|
||||
SliceBounds inSliceOutside = inSliceMain; // beginning falls to the left of the MB
|
||||
if (inSliceOutside.second[m_shiftDim] > 0)
|
||||
inSliceOutside.second[m_shiftDim] = 0; // trim end; e.g. [-2,97) -> [-2,0), but [-2,-1) remains
|
||||
// now inSliceOutside represents only the region that falls outside
|
||||
|
||||
// map to dimensions of our saved state
|
||||
SliceBounds inSliceState = ShiftDim(inSliceOutside, m_state.m_shape[m_shiftDim]);
|
||||
// E.g. for offset = -4, m_state will be 4 elements, so [-2,0) -> [2,4), and [-2,-1) -> [2,3)
|
||||
|
||||
// map to target dimensions
|
||||
SliceBounds outSliceState = ShiftDim(inSliceOutside, -m_fromOffset);
|
||||
assert(inSliceState == outSliceState); // (when we fall out on the left, both must be the same)
|
||||
}
|
||||
// else: no truncated BPTT means we must have a proper boundary. So don't write those values here, they will be initialized with boundary values below.
|
||||
|
||||
// and trim main (if 'from' is entirely outside, such as in the common single-frame case, we get begin >= end)
|
||||
outSliceMain.first[m_shiftDim] += -inSliceMain.first[m_shiftDim];
|
||||
inSliceMain.first[m_shiftDim] += -inSliceMain.first[m_shiftDim];
|
||||
assert(inSliceMain.first[m_shiftDim] == 0);
|
||||
}
|
||||
else if (inSliceMain.second[m_shiftDim] > T)
|
||||
{
|
||||
if (!m_state.empty())
|
||||
{
|
||||
// determine range to get from state
|
||||
SliceBounds inSliceOutside = inSliceMain;
|
||||
if (inSliceOutside.first[m_shiftDim] < T)
|
||||
inSliceOutside.first[m_shiftDim] = T; // trim end; e.g. [2,102) -> [100,102), but [101,102) remains
|
||||
// now inSliceOutside is where we should copy from, with indices completely out of bounds
|
||||
|
||||
// map to dimensions of our saved state
|
||||
SliceBounds inSliceState = ShiftDim(inSliceOutside, -T);
|
||||
// E.g. for offset = 4, m_state will be 4 elements, so [100,102) -> [0,2), and [101,102) -> [1,2)
|
||||
|
||||
// map to target dimensions
|
||||
SliceBounds outSliceState = ShiftDim(inSliceOutside, T - m_fromOffset);
|
||||
// E.g. [0,2) -> [96,98), and [1,2) -> [97,98)
|
||||
}
|
||||
// and trim main (if 'from' is entirely outside, such as in the common single-frame case, we get begin >= end)
|
||||
outSliceMain.first[m_shiftDim] -= (inSliceMain.second[m_shiftDim] - T);
|
||||
inSliceMain.second[m_shiftDim] -= (inSliceMain.second[m_shiftDim] - T);
|
||||
assert(inSliceMain.second[m_shiftDim] == T);
|
||||
}
|
||||
}
|
||||
public:
|
||||
virtual void ForwardProp(const FrameRange & fr) override
|
||||
{
|
||||
if (fr.GetIterationDimension() != m_shiftDimParam)
|
||||
LogicError("ShiftNode::ForwardProp(): FrameRange not iterating over user-specified dimension.");
|
||||
|
||||
// for debugging, invalidate the output region, so we will catch if we missed to update something
|
||||
#ifdef _DEBUG
|
||||
ValueFor(fr).Invalidate();
|
||||
#endif
|
||||
|
||||
// STEP 1: whole-sale copy a shifted version of the input to the output
|
||||
// - consider the saved parts from the last minibatch as part of the input at dimensions beyond the bounds
|
||||
// - ignore boundary conditions for now
|
||||
// - ignore boundary conditions at this point (will be fixed subsequently)
|
||||
// This will copy a little too much in case of multiple concatenated sequences within a single parallel sequence.
|
||||
|
||||
// get the tensors without shift
|
||||
// get the logical ranges we want to shift
|
||||
TensorShape inShape, outShape; // expanded tensor shapes of input and output
|
||||
SliceBounds inSliceLogical, outSliceLogical; // the logical ranges to shift
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto result = ValueTensorFor(rank, fr);
|
||||
auto input = Input(0)->ValueTensorFor(rank, fr);
|
||||
DetermineSlices(rank, fr, inShape, outShape, inSliceLogical, outSliceLogical);
|
||||
|
||||
// shift the dimension in the input
|
||||
// now copy the two stripes--one that is main-to-main, and one that pulls in data from previous state (truncated BPTT only)
|
||||
// This correctly handles if input is a tensor with strides. This is currently not the case, but may be if we support in-place.
|
||||
|
||||
SliceBounds inSliceMain, outSliceMain; // main-to-main
|
||||
SliceBounds inSliceState, outSliceState; // from state
|
||||
PartitionSlices(inSliceLogical, outSliceLogical, outShape[m_shiftDim], inSliceMain, outSliceMain, inSliceState, outSliceState);
|
||||
|
||||
if (!inSliceState.first.empty() && inSliceState.second[m_shiftDim] > inSliceState.first[m_shiftDim])
|
||||
{
|
||||
// Note: If all sequences begin at the start of the range, this would copy invalid values which would be overwrittten below.
|
||||
// This is prevented in that m_state will be set to empty in the previous MB if all sequences ended, which will in turn return an empty slice.
|
||||
auto from = DataTensorFor(m_state.m_delayedValue, m_state.m_shape, inSliceState);
|
||||
auto to = DataTensorFor(Value(), outShape, outSliceState);
|
||||
to.AssignCopyOf(from);
|
||||
}
|
||||
if (inSliceMain.second[m_shiftDim] > inSliceMain.first[m_shiftDim])
|
||||
{
|
||||
auto from = DataTensorFor(Input(0)->Value(), inShape, inSliceMain);
|
||||
auto to = DataTensorFor( Value(), outShape, outSliceMain);
|
||||
to.AssignCopyOf(from);
|
||||
}
|
||||
// We have now pulled anything from within the logical bounds.
|
||||
// Any frame that pulls from outside contains invalid values (either not initialized or copied from incorrect source), which must be fixed next.
|
||||
|
||||
// STEP 2: fix up the boundary conditions
|
||||
// - fill in xxx
|
||||
// - fill in all frames that are too close to boundary and must be filled from context (recurrent) or by replication (non-recurrent only)
|
||||
|
||||
// turn selected frame and shifted frame into a tensor
|
||||
if (fr.IsAllFrames() || GetMBLayout()->IsBeyondStartOrEnd(fr.WithTimeOffset(m_fromOffset))) // short-cut test whether there is anything to do
|
||||
{
|
||||
auto ts = outSliceLogical.first[m_shiftDim];
|
||||
auto te = outSliceLogical.second[m_shiftDim];
|
||||
//size_t sequenceDim = outShape.size() - 2; // TODO: In case of multiple time dims, this must be adjusted. Code dup from TensorSliceWithMBLayoutFor(). Encapsulate this.
|
||||
// iterate over all sequences in this batch and handle all that overlap with the target region
|
||||
for (const auto & seq : GetMBLayout()->GetAllSequences())
|
||||
{
|
||||
if (seq.tEnd <= ts || seq.tBegin >= te) // no overlap--skip
|
||||
continue;
|
||||
|
||||
// copy all that's in range
|
||||
// get tensor to fill in. This may be out of bounds, and may only partially overlap with [ts,te)
|
||||
auto seqLen = abs(m_fromOffset);
|
||||
auto seqBegin = m_fromOffset < 0 ? seq.tBegin : seq.tBegin + seq.GetNumTimeSteps() - seqLen; // e.g. m_fromOffset = -4 -> [0,4) , +4 -> [Len-4,Len)
|
||||
auto outSliceFill = TensorSliceWithMBLayoutFor(ToIntDims(outShape), fr.WithTimeOffset(seqBegin).WithTimeRange(seqLen).Sequence(seq.s), GetMBLayout());
|
||||
|
||||
// fix up all that is not
|
||||
// get tensor to fill from
|
||||
// We fill either from the provided boundary node or from ourselves (BoundaryMode::duplicate = clamp).
|
||||
bool clamp = m_boundaryMode == BoundaryMode::duplicate;
|
||||
ComputationNodeBasePtr boundaryNode = clamp ? shared_from_this() : Input(0);
|
||||
auto boundaryShape = boundaryNode->GetTensorShape(rank);
|
||||
auto fromSeq = clamp ?
|
||||
seq.s :
|
||||
boundaryNode->HasMBLayout() ?
|
||||
boundaryNode->GetMBLayout()->FindSequence(seq.seqId).seqId :
|
||||
SIZE_MAX;
|
||||
auto fromBegin = 0;
|
||||
auto boundarySliceLogical = TensorSliceWithMBLayoutFor(ToIntDims(boundaryShape), fr.WithTimeOffset(fromBegin).WithTimeRange(seqLen).Sequence(fromSeq), GetMBLayout());
|
||||
|
||||
boundarySliceLogical;
|
||||
|
||||
//inSliceLogical = TensorSliceWithMBLayoutFor(ToIntDims(inShape), fr.WithTimeOffset(m_fromOffset), GetMBLayout()); // apply the offset
|
||||
|
||||
|
||||
|
||||
// clip against [ts,te)
|
||||
// copy
|
||||
sin(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & fr) override
|
||||
{
|
||||
// To allow for bulk gradient computation, we will clear out any gradient that should not be propagated.
|
||||
// We do that directly to our incoming output gradient. This is OK because we own this, and it is no longer used after this operation
|
||||
// (it is invalid to call BackpropTo() multiple times since it adds to the outgoing Input() gradient).
|
||||
assert(inputIndex == 0); inputIndex;
|
||||
fr;
|
||||
}
|
||||
|
||||
virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
|
||||
|
@ -177,46 +339,29 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
assert(m_inputs.size() == 2);
|
||||
ComputationNodeBase::Validate(isFinalValidationPass);
|
||||
|
||||
if (isFinalValidationPass)
|
||||
sin(1.0f);
|
||||
|
||||
// MBLayout is just inherited
|
||||
m_pMBLayout = Input(0)->GetMBLayout();
|
||||
if (isFinalValidationPass && !m_pMBLayout)
|
||||
InvalidArgument("%ls %ls operation must operate on data (must have an MB Layout).", NodeName().c_str(), OperationName().c_str());
|
||||
|
||||
// determine final sample layout
|
||||
auto inputSampleLayout = Input(0)->GetSampleLayout();
|
||||
auto inputDims = inputSampleLayout.GetDims();
|
||||
if (m_insertedDimParam < 0)
|
||||
InvalidArgument("%ls %ls operation: Specified insertion location %d refers to a time dimension, but this is not allowed.",
|
||||
NodeName().c_str(), OperationName().c_str(), m_insertedDimParam);
|
||||
m_insertExpandShapeAt = m_numSteps > 1 ? 0 : (m_insertedDimParam > 0 ? m_insertedDimParam - 1 : inputDims.size());
|
||||
if (m_insertExpandShapeAt > inputDims.size())
|
||||
if (isFinalValidationPass)
|
||||
InvalidArgument("%ls %ls operation: Specified insertion location %d beyond end of input sample layout [%s].",
|
||||
NodeName().c_str(), OperationName().c_str(), m_insertedDimParam, string(inputSampleLayout).c_str());
|
||||
else
|
||||
m_insertExpandShapeAt = inputDims.size(); // this may be an error, but we want to catch that only in the final pass
|
||||
SmallVector<size_t> dims;
|
||||
if (m_numSteps > 1 && inputDims.size() + 1 > dims.capacity())
|
||||
InvalidArgument("%ls %ls operation: Too many dimensions. Did you feed back output of this node without stripping the extra dimensions?",
|
||||
NodeName().c_str(), OperationName().c_str());
|
||||
dims.append(inputDims.begin(), inputDims.begin() + m_insertExpandShapeAt);
|
||||
if (m_numSteps > 1) // insert the new dimension if we expand into more than one step
|
||||
dims.push_back(m_numSteps);
|
||||
dims.append(inputDims.begin() + m_insertExpandShapeAt, inputDims.end());
|
||||
auto sampleLayout = TensorShape(dims);
|
||||
// as is the sample layout
|
||||
SetDims(Input(0));
|
||||
|
||||
SetDims(sampleLayout, 0);
|
||||
// determine the dimension that is to be shifted (convert user-specified as a zero-based index)
|
||||
if (isFinalValidationPass)
|
||||
{
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto valueShape = GetTensorShape(rank); // bounds of the Value()
|
||||
m_shiftDim = m_shiftDimParam > 0 ? m_shiftDimParam - 1/*regular dimensions are specified as 1-based*/ : valueShape.size() + m_shiftDimParam/*-1 for time dimension*/;
|
||||
}
|
||||
}
|
||||
|
||||
// special interface for use by loop detection
|
||||
virtual int /*IRecurrentNode::*/GetRecurrenceSteppingDirection() const override
|
||||
{
|
||||
if (m_boundaryMode != BoundaryMode::reachAcross)
|
||||
if (m_boundaryMode != BoundaryMode::reachAcross) // duplicating boundary frames cannot be done with recurrence
|
||||
return 0;
|
||||
else if (m_fromOffset + (int)m_numSteps <= 0)
|
||||
else if (m_fromOffset < 0)
|
||||
return +1;
|
||||
else if (m_fromOffset > 0)
|
||||
return -1;
|
||||
|
@ -231,48 +376,61 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
auto node = dynamic_pointer_cast<ShiftNode<ElemType>>(nodeP);
|
||||
node->m_fromOffset = m_fromOffset;
|
||||
node->m_numSteps = m_numSteps;
|
||||
node->m_boundaryMode = m_boundaryMode;
|
||||
node->m_shiftDimension = m_shiftDimension;
|
||||
node->m_insertedDimParam = m_insertedDimParam;
|
||||
node->m_insertExpandShapeAt = m_insertExpandShapeAt;
|
||||
node->m_shiftDimParam = m_shiftDimParam;
|
||||
node->m_shiftDim = m_shiftDim;
|
||||
node->m_state = m_state;
|
||||
}
|
||||
}
|
||||
|
||||
class ShiftNodeState : public INodeState
|
||||
{
|
||||
Matrix<ElemType> m_delayedValue; // saves the activation of the previous step that this node points to
|
||||
vector<MBLayout::SequenceInfo> m_delayedSequences; // and associated sequence info. This is only used for consistency checking (it must match).
|
||||
public:
|
||||
Matrix<ElemType> m_delayedValue; // saves the activation of the previous step that this node points to
|
||||
TensorShape m_shape; // tensor shape that describes m_delayedValue
|
||||
vector<MBLayout::SequenceInfo> m_delayedSequences; // and associated sequence info. This is only used for consistency checking (it must match).
|
||||
ShiftNodeState(DEVICEID_TYPE deviceId) : m_delayedValue(deviceId) { }
|
||||
bool empty() const { return m_delayedSequences.empty(); }
|
||||
void clear() { m_delayedValue.Resize(0, 0); m_shape = TensorShape(); m_delayedSequences.clear(); }
|
||||
};
|
||||
typedef std::shared_ptr<ShiftNodeState> ShiftNodeStatePtr;
|
||||
|
||||
// state export/import
|
||||
// This is done with a shared_ptr. The moment state is exported, the internal state is cleared; ownership is transferred to the exporting entity.
|
||||
// This way, the next invocation does not overwrite the exported state, but is required to create a new one if needed.
|
||||
// On the other hand, once imported, the state object is owned by the node and will be overwritten with the next state.
|
||||
virtual NodeStatePtr ExportState() { return std::move(m_state); }
|
||||
virtual void ImportState(NodeStatePtr && state) override
|
||||
// This is done with a shared_ptr. The current state is exported, the internal state is cleared.
|
||||
// Ownership of members is logically transferred to the exporting entity.
|
||||
// Physically, however, since we often transfer between CPU and GPU, activation data is merely copied,
|
||||
// and the GPU or CPU object resized to (0,0) without giving up the memory.
|
||||
virtual NodeStatePtr ExportState() // TODO: can we instead pass the shared_ptr object in? So we don't need to create a new one all the time? Or should we still take ownership of the ptr?
|
||||
{
|
||||
m_state = dynamic_pointer_cast<ShiftNodeState>(state);
|
||||
if (state && !m_state)
|
||||
auto state = make_shared<ShiftNodeState>(CPUDEVICE);
|
||||
state->m_delayedValue.SetValue(m_state.m_delayedValue); // note: this will transfer from GPU to CPU
|
||||
m_state.m_delayedValue.Resize(0, 0);
|
||||
state->m_shape = std::move(m_state.m_shape);
|
||||
state->m_delayedSequences = std::move(m_state.m_delayedSequences);
|
||||
return state;
|
||||
}
|
||||
virtual void ImportState(const NodeStatePtr & statep) override
|
||||
{
|
||||
ShiftNodeStatePtr state = dynamic_pointer_cast<ShiftNodeState>(statep);
|
||||
if (!state)
|
||||
LogicError("ImportState: Wrong state object passed (wrong type).");
|
||||
m_state.m_delayedValue.SetValue(state->m_delayedValue); // note: this will transfer from CPU to GPU
|
||||
state->m_delayedValue.Resize(0, 0);
|
||||
m_state.m_shape = std::move(state->m_shape);
|
||||
m_state.m_delayedSequences = std::move(state->m_delayedSequences);
|
||||
}
|
||||
protected:
|
||||
// parameters remembered from construction
|
||||
int m_fromOffset; // offset to pull from
|
||||
int m_numSteps; // offset range
|
||||
BoundaryMode m_boundaryMode; // how to fill at the boundary (reach across, duplicate, or trim)
|
||||
int m_shiftDimension; // dimension to shift (default: time)
|
||||
int m_insertedDimParam; // in case of multiple steps, this is where a new dimension will be inserted
|
||||
int m_fromOffset; // offset to pull from
|
||||
BoundaryMode m_boundaryMode; // how to fill at the boundary (reach across or duplicate)
|
||||
int m_shiftDimParam; // dimension to shift (default: time)
|
||||
|
||||
// derived params set up in Validate()
|
||||
size_t m_insertExpandShapeAt; // at which dimension to insert (internal 0-based index)
|
||||
size_t m_shiftDim; // m_shiftDimParam matched to the real tensor index
|
||||
|
||||
ShiftNodeStatePtr m_state; // saves the activation of the previous step that this node points to
|
||||
ShiftNodeState m_state; // state that is carried over across evaluations
|
||||
// Note: The version held by this node lives in the GPU, whereas the versions being exported carry CPU-side copies
|
||||
|
||||
function<void()> m_attachInputsFn; // for late expansion of inputs (scripting)
|
||||
function<void()> m_attachInputsFn; // for late expansion of inputs (scripting)
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
@ -333,7 +491,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// - ranges of neighbor frames as a secondary tensor dimension (i.e. can be used to implement a rolling window)
|
||||
// - full support/efficiency of non-recurrent use (in which case the range can be from negative to positive, e.g. a symmetric rolling window)
|
||||
// - denoting which tensor dimension to loop over (this may not be completed, but I will plant a seed)
|
||||
// - support for Yongqiang’s sub-minibatching with BPTT (export/import state)
|
||||
// - support for Yongqiang’s sub-minibatching with truncated BPTT (export/import state)
|
||||
// - more efficient storage of carried-over state (only store the needed frames, not a full copy of the previous MB as currently; which will on the other hand also allow windows that reach back beyond a minibatch)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
|
@ -486,7 +644,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
virtual void EndForwardProp() override // called after last iteration step of ForwardProp()
|
||||
{
|
||||
// In BPTT, we carry over left-to-right state across minibatches.
|
||||
// In truncated BPTT, we carry over left-to-right state across minibatches.
|
||||
// It is kept in m_delayedValue, m_delayedActivationMBLayout.
|
||||
// This could be optimized as follows:
|
||||
// - only keep the required number of frames (m_timeStep)
|
||||
|
@ -620,27 +778,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
if (dir == -1) // we look into past
|
||||
{
|
||||
#if 0
|
||||
bool allAtBoundary = true;
|
||||
// if the current last frames are all sentence end or no feature , there is no need to carry on state info
|
||||
if (m_pMBLayout->Is(FrameRange(nT-1), MinibatchPackingFlags::SequenceEnd | MinibatchPackingFlags::NoFeature))
|
||||
{
|
||||
for (size_t u = 0; u < nU; u++)
|
||||
{
|
||||
if (!m_pMBLayout->Is(FrameRange(nT - 1).Sequence(u), MinibatchPackingFlags::SequenceEnd | MinibatchPackingFlags::NoFeature))
|
||||
{
|
||||
allAtBoundary = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
allAtBoundary = false;
|
||||
}
|
||||
|
||||
if (allAtBoundary)
|
||||
#endif
|
||||
if (!m_pMBLayout->HasSequenceBeyondEnd()) // only need to export state if anything crosses the MB boundary
|
||||
{
|
||||
auto pState = make_shared<DelayedValueNodeState<ElemType>>(m_deviceId);
|
||||
|
@ -655,26 +792,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
pExportedState = pState;
|
||||
}
|
||||
}
|
||||
if (dir == 1) // we look into future
|
||||
else if (dir == 1) // we look into future
|
||||
{
|
||||
#if 0
|
||||
// TODO: check whether all at boundary and don't carry state if it is the case
|
||||
size_t nT = m_pMBLayout->GetNumTimeSteps();
|
||||
size_t nU = m_pMBLayout->GetNumParallelSequences();
|
||||
bool allAtBoundary = true;
|
||||
if (m_pMBLayout->Is(FrameRange(nullptr, 0), MinibatchPackingFlags::NoFeature | MinibatchPackingFlags::SequenceStart))
|
||||
{
|
||||
for (size_t u = 0; u < nU; u++)
|
||||
{
|
||||
if (!m_pMBLayout->Is(FrameRange(nullptr, 0).Sequence(u), MinibatchPackingFlags::SequenceStart | MinibatchPackingFlags::NoFeature))
|
||||
{
|
||||
allAtBoundary = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (allAtBoundary)
|
||||
#endif
|
||||
if (!m_pMBLayout->HasSequenceBeyondBegin()) // only need to export state if anything crosses the MB boundary
|
||||
{
|
||||
auto pState = make_shared<DelayedValueNodeState<ElemType>>(m_deviceId);
|
||||
|
@ -689,19 +808,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
pExportedState = pState;
|
||||
}
|
||||
}
|
||||
if (dir != -1 && dir != 1)
|
||||
else
|
||||
{
|
||||
RuntimeError("Unrecognized direction in DelayedValueNodeBase");
|
||||
LogicError("Unrecognized direction in DelayedValueNodeBase");
|
||||
}
|
||||
return pExportedState;
|
||||
}
|
||||
|
||||
virtual void /*IStatefulNode::*/ImportState(NodeStatePtr && pImportedState) override
|
||||
virtual void /*IStatefulNode::*/ImportState(const NodeStatePtr & pImportedState) override
|
||||
{
|
||||
DelayedNodeStatePtr pState = dynamic_pointer_cast<DelayedValueNodeState<ElemType>> (pImportedState);
|
||||
|
||||
if (!pState)
|
||||
RuntimeError("Expecting DelayValueNodeState after down casting");
|
||||
LogicError("Expecting DelayValueNodeState after downcasting");
|
||||
|
||||
pState->ExportDelayedMBLayout(m_delayedActivationMBLayout); // pstate copy to m_delayedActivationMBLayout
|
||||
if (pState->IsEmpty())
|
||||
|
@ -715,18 +834,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
int dir = direction;
|
||||
if (dir == -1) // looking backward
|
||||
{
|
||||
m_delayedValue.SetColumnSlice(delayedActivation, (nT - 1)*nU, nU);
|
||||
}
|
||||
if (dir == 1)
|
||||
{
|
||||
//m_delayedValue.CopyColumnsStrided(delayedActivation, nU, 1, nT);
|
||||
else if (dir == 1)
|
||||
m_delayedValue.SetColumnSlice(delayedActivation, 0, nU);
|
||||
}
|
||||
if (dir != -1 && dir == 1)
|
||||
{// it is really a compile error ?
|
||||
RuntimeError("Unrecognized direction in DelayedValueNodeBase");
|
||||
}
|
||||
else
|
||||
LogicError("Unrecognized direction in DelayedValueNodeBase");
|
||||
}
|
||||
protected:
|
||||
|
||||
|
|
|
@ -1234,8 +1234,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
else if (inputIndex == 1)
|
||||
{
|
||||
BackpropToRight(*m_softmaxOfRight, Input(0)->Value(), Input(inputIndex)->Gradient(),
|
||||
Gradient(), *m_gammaFromLattice, m_fsSmoothingWeight, m_frameDropThreshold);
|
||||
FrameRange fr(Input(0)->GetMBLayout());
|
||||
BackpropToRight(*m_softmaxOfRight, Input(0)->Value(), Input(inputIndex)->Gradient(),
|
||||
Gradient(), *m_gammaFromLattice, m_fsSmoothingWeight, m_frameDropThreshold);
|
||||
MaskMissingColumnsToZero(Input(inputIndex)->Gradient(), Input(0)->GetMBLayout(), fr);
|
||||
|
||||
#ifdef _DEBUG
|
||||
Input(inputIndex)->InvalidateMissingGradientColumns(FrameRange(Input(inputIndex)->GetMBLayout()));
|
||||
#endif
|
||||
|
@ -1368,14 +1371,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
RequestMatrixFromPool(m_gammaFromLattice, matrixPool);
|
||||
}
|
||||
|
||||
// Release gradient and temp matrices that are no longer needed after all the children's gradients are computed.
|
||||
virtual void ReleaseMatricesAfterBackprop(MatrixPool& matrixPool)
|
||||
{
|
||||
Base::ReleaseMatricesAfterBackprop(matrixPool);
|
||||
ReleaseMatrixToPool(m_logSoftmaxOfRight, matrixPool);
|
||||
ReleaseMatrixToPool(m_softmaxOfRight, matrixPool);
|
||||
ReleaseMatrixToPool(m_gammaFromLattice, matrixPool);
|
||||
}
|
||||
//request matrices needed to do node function value evaluation
|
||||
virtual void ReleaseMatricesAfterBackprop(MatrixPool& matrixPool)
|
||||
{
|
||||
Base::ReleaseMatricesAfterBackprop(matrixPool);
|
||||
ReleaseMatrixToPool(m_logSoftmaxOfRight, matrixPool);
|
||||
ReleaseMatrixToPool(m_softmaxOfRight, matrixPool);
|
||||
ReleaseMatrixToPool(m_gammaFromLattice, matrixPool);
|
||||
}
|
||||
|
||||
// TODO: method names should be CamelCase
|
||||
std::vector<shared_ptr<const msra::dbn::latticepair>> * getLatticePtr()
|
||||
|
@ -1415,6 +1418,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_doReferenceAlignment = doreferencealign;
|
||||
}
|
||||
|
||||
void SetGammarCalculationParam(const double& amf, const double& lmf, const double& wp, const double& bMMIfactor, const bool& sMBR)
|
||||
{
|
||||
msra::lattices::SeqGammarCalParam param;
|
||||
param.amf = amf;
|
||||
param.lmf = lmf;
|
||||
param.wp = wp;
|
||||
param.bMMIfactor = bMMIfactor;
|
||||
param.sMBRmode = sMBR;
|
||||
m_gammaCalculator.SetGammarCalculationParams(param);
|
||||
}
|
||||
|
||||
void gettime(unsigned long long &gammatime, unsigned long long &partialtime)
|
||||
{
|
||||
gammatime = m_gammatime;
|
||||
|
@ -1427,6 +1441,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
shared_ptr<Matrix<ElemType>> m_gammaFromLattice;
|
||||
double m_frameDropThreshold;
|
||||
double m_fsSmoothingWeight; // frame-sequence criterion interpolation weight --TODO: can this be done outside?
|
||||
double m_seqGammarAMF;
|
||||
double m_seqGammarLMF;
|
||||
double m_seqGammarWP;
|
||||
double m_seqGammarbMMIFactor;
|
||||
double m_seqGammarUsesMBR;
|
||||
bool m_doReferenceAlignment;
|
||||
std::vector<shared_ptr<const msra::dbn::latticepair>> m_lattices;
|
||||
msra::asr::simplesenonehmm m_hmm;
|
||||
|
|
|
@ -74,7 +74,7 @@
|
|||
<AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>ComputationNetworkLib.lib; Math.lib; kernel32.lib; user32.lib; shell32.lib; SequenceTrainingLib.lib; %(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)\; "c:\Program Files\NVIDIA Corporation\GDK\gdk_win7_amd64_release\nvml\lib"</AdditionalLibraryDirectories>
|
||||
|
@ -102,7 +102,7 @@
|
|||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
|
|
|
@ -137,7 +137,7 @@ struct GridDim
|
|||
std::vector<cudaDeviceProp> props(numDevices);
|
||||
for (int i = 0; i < numDevices; i++)
|
||||
CUDA_CALL(cudaGetDeviceProperties(&props[i], i));
|
||||
#if 1 // on Linux, maxGridSize[0] gets reported as 0
|
||||
#if 0 // on Linux, maxGridSize[0] gets reported as 0
|
||||
for (int i = 0; i < numDevices; i++)
|
||||
fprintf(stderr, "%d procs %d warps %d %d %d max grid on %s\n", (int)props[i].multiProcessorCount, (int)props[i].warpSize, (int)props[i].maxGridSize[0], (int)props[i].maxGridSize[1], (int)props[i].maxGridSize[2], props[i].name);
|
||||
#endif
|
||||
|
|
|
@ -2246,7 +2246,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
slice.m_computeDevice = m_computeDevice;
|
||||
slice.m_numRows = m_numRows;
|
||||
slice.m_numCols = numCols;
|
||||
slice.m_nz = SecondaryIndexValueAt(startColumn + numCols) - SecondaryIndexValueAt(startColumn);
|
||||
slice.m_nz = ( numCols == m_numCols ) ? m_nz : SecondaryIndexValueAt(startColumn + numCols) - SecondaryIndexValueAt(startColumn);
|
||||
slice.m_elemSizeAllocated = m_elemSizeAllocated;
|
||||
slice.m_totalBufferSizeAllocated = m_totalBufferSizeAllocated;
|
||||
slice.m_pArray = m_pArray;
|
||||
|
|
|
@ -87,9 +87,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return (MajorIndexLocation() + (m_format == matrixFormatSparseCSC ? SecondaryIndexValueAt(0) : 0));
|
||||
}
|
||||
|
||||
// TODO: Comment these methods more thoroughly, e.g., why it uses numNZ instead of m_elemSizeAllocated.
|
||||
size_t MajorIndexCount() const
|
||||
{
|
||||
return MajorIndexCount(m_numRows, m_numCols, m_elemSizeAllocated, m_format);
|
||||
return MajorIndexCount(m_numRows, m_numCols, m_nz, m_format);
|
||||
}
|
||||
size_t MajorIndexCount(const size_t numRows, const size_t numCols, const size_t numNZ, const MatrixFormat format) const
|
||||
{
|
||||
|
@ -113,6 +114,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return MajorIndexLocation() + m_numRows;
|
||||
else
|
||||
return MajorIndexLocation() + m_elemSizeAllocated + m_sliceViewOffset;
|
||||
//return MajorIndexLocation() + m_elemSizeAllocated + m_sliceViewOffset;
|
||||
}
|
||||
size_t SecondaryIndexCount(const size_t numRows, const size_t numCols, const size_t numNZReserved, const MatrixFormat format) const
|
||||
{
|
||||
|
|
|
@ -79,7 +79,7 @@
|
|||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>libacml_mp_dll.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)\</AdditionalLibraryDirectories>
|
||||
|
@ -127,7 +127,7 @@
|
|||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
|
|
|
@ -91,7 +91,7 @@
|
|||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>cudart.lib;cublas.lib;cusparse.lib;curand.lib;libacml_mp_dll.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<Profile>true</Profile>
|
||||
|
|
|
@ -1383,17 +1383,62 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
void Matrix<ElemType>::NormalGrad(Matrix<ElemType>& gradients, Matrix<ElemType>& functionValues, const ElemType learnRatePerSample, const ElemType momentum)
|
||||
void Matrix<ElemType>::NormalGrad(Matrix<ElemType>& gradients,
|
||||
Matrix<ElemType>& functionValues,
|
||||
const ElemType learnRatePerSample,
|
||||
const ElemType momentum,
|
||||
const bool useNesterovMomentum
|
||||
)
|
||||
{
|
||||
DecideAndMoveToRightDevice(*this, gradients, functionValues);
|
||||
|
||||
DISPATCH_MATRIX_ON_FLAG(&gradients,
|
||||
|
||||
if (!useNesterovMomentum)
|
||||
{
|
||||
DISPATCH_MATRIX_ON_FLAG(&gradients,
|
||||
nullptr,
|
||||
ScaleAndAdd((1-momentum) * learnRatePerSample, gradients, momentum, *this); functionValues -= *this,
|
||||
ScaleAndAdd((1-momentum) * learnRatePerSample, gradients, momentum, *this); functionValues -= *this,
|
||||
if (momentum != 0) gradients.m_CPUSparseMatrix->NormalGrad(*m_CPUMatrix, momentum); ScaleAndAdd(-learnRatePerSample, gradients, functionValues),
|
||||
if (momentum != 0) gradients.m_GPUSparseMatrix->NormalGrad(*m_GPUMatrix, momentum); ScaleAndAdd(-learnRatePerSample, gradients, functionValues)
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
DISPATCH_MATRIX_ON_FLAG(&gradients,
|
||||
nullptr,
|
||||
{/* CPU dense */
|
||||
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
|
||||
ScaleAndAdd(-momentum, *this, functionValues);
|
||||
ScaleAndAdd(-(1 - momentum)*learnRatePerSample, gradients, functionValues);
|
||||
// w_t = w_{t-1} - momentum * v_ {t-1} - (1-momentum)*learnRatePerSampele*gardient,
|
||||
},
|
||||
{/* GPU dense */
|
||||
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
|
||||
ScaleAndAdd(-momentum, *this, functionValues);
|
||||
ScaleAndAdd(-(1 - momentum)*learnRatePerSample, gradients, functionValues);
|
||||
},
|
||||
{ /* CPU sparse */
|
||||
if (momentum != 0)
|
||||
{
|
||||
Matrix<ElemType> gradientCache(gradients.GetDeviceId());
|
||||
gradientCache.SetValue(gradients);
|
||||
gradients.m_CPUSparseMatrix->NormalGrad(*m_CPUMatrix, momentum);
|
||||
ScaleAndAdd(-momentum, *this, functionValues);
|
||||
ScaleAndAdd(-(1 - momentum)*learnRatePerSample, gradientCache, functionValues);
|
||||
}
|
||||
},
|
||||
{ /* GPU sparse */
|
||||
if (momentum != 0)
|
||||
{
|
||||
Matrix<ElemType> gradientCache(gradients.GetDeviceId());
|
||||
gradientCache.SetValue(gradients);
|
||||
gradients.m_GPUSparseMatrix->NormalGrad(*m_GPUMatrix, momentum);
|
||||
ScaleAndAdd(-momentum, *this, functionValues);
|
||||
ScaleAndAdd(-(1 - momentum)*learnRatePerSample, gradientCache, functionValues);
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
//both this and gradients will be changed
|
||||
|
|
|
@ -164,7 +164,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
void ShiftBy(int numShift);
|
||||
|
||||
// TODO: all these scalars should be passed as doubles and cast down inside
|
||||
void NormalGrad(Matrix<ElemType>& gradients, Matrix<ElemType>& functionValues, const ElemType learnRatePerSample, const ElemType momentum);
|
||||
void NormalGrad(Matrix<ElemType>& gradients, Matrix<ElemType>& functionValues, const ElemType learnRatePerSample, const ElemType momentum, const bool useNAG);
|
||||
ElemType Adagrad(Matrix<ElemType>& gradients, const bool needAveMultiplier);
|
||||
void FSAdagrad(size_t mbSize, Matrix<ElemType>& gradients, Matrix<ElemType>& functionValues, const ElemType learnRatePerSample, const ElemType momentum);
|
||||
ElemType RmsProp(Matrix<ElemType>& gradients, ElemType RMS_GAMMA, ElemType RMS_WGT_INC, ElemType RMS_WGT_MAX, ElemType RMS_WGT_DEC, ElemType RMS_WGT_MIN, const bool needAveMultiplier);
|
||||
|
|
|
@ -237,8 +237,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
void TensorView<ElemType>::DoUnaryOpOf(ElemType beta, const TensorView & a, ElemType alpha, ElementWiseOperator op)
|
||||
{
|
||||
static int cc = 0; if (cc++ == 0)
|
||||
fprintf(stderr, "Tensor Op: Op %d: %s -> %s\n", (int)op, string(a.GetShape()).c_str(), string(GetShape()).c_str());
|
||||
//static int cc = 0; if (cc++ == 0)
|
||||
// fprintf(stderr, "Tensor Op: Op %d: %s -> %s\n", (int)op, string(a.GetShape()).c_str(), string(GetShape()).c_str());
|
||||
|
||||
// prepare all tensor descriptor information as needed for execution
|
||||
array<size_t, 2> offsets;
|
||||
|
@ -257,8 +257,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
void TensorView<ElemType>::DoBinaryOpOf(ElemType beta, const TensorView & a, const TensorView & b, ElemType alpha, ElementWiseOperator op)
|
||||
{
|
||||
static int cc = 0; if (cc++ == 0)
|
||||
fprintf(stderr, "Tensor Op: Op %d: %s op %s -> %s\n", (int)op, string(a.GetShape()).c_str(), string(b.GetShape()).c_str(), string(GetShape()).c_str());
|
||||
//static int cc = 0; if (cc++ == 0)
|
||||
// fprintf(stderr, "Tensor Op: Op %d: %s op %s -> %s\n", (int)op, string(a.GetShape()).c_str(), string(b.GetShape()).c_str(), string(GetShape()).c_str());
|
||||
|
||||
array<size_t, 3> offsets;
|
||||
array<SmallVector<ptrdiff_t>, 3> regularStrides, reducingStrides;
|
||||
|
@ -275,8 +275,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
void TensorView<ElemType>::DoTernaryOpOf(ElemType beta, const TensorView & a, const TensorView & b, const TensorView & c, ElemType alpha, ElementWiseOperator op)
|
||||
{
|
||||
static int cc = 0; if (cc++ == 0)
|
||||
fprintf(stderr, "Tensor Op: Op %d: %s, %s, %s -> %s\n", (int)op, string(a.GetShape()).c_str(), string(b.GetShape()).c_str(), string(c.GetShape()).c_str(), string(GetShape()).c_str());
|
||||
//static int cc = 0; if (cc++ == 0)
|
||||
// fprintf(stderr, "Tensor Op: Op %d: %s, %s, %s -> %s\n", (int)op, string(a.GetShape()).c_str(), string(b.GetShape()).c_str(), string(c.GetShape()).c_str(), string(GetShape()).c_str());
|
||||
|
||||
array<size_t, 4> offsets;
|
||||
array<SmallVector<ptrdiff_t>, 4> regularStrides, reducingStrides;
|
||||
|
|
|
@ -356,26 +356,39 @@ struct latticefunctionskernels
|
|||
const size_t te = ts + numframes; // end time of current unit
|
||||
|
||||
size_t state1step0to1 = te; // inflection point from state 0 to 1, record in state 1
|
||||
//size_t state1stepm1to1 = te;
|
||||
size_t state2step0to1 = te; // inflection point from state 0 to 1, record in state 2
|
||||
//size_t state2stepm1to1 = te; // inflection point from state 0 to 1, record in state 2
|
||||
size_t state2step1to2 = te; // inflection point from state 1 to 2, record in state 2
|
||||
size_t state2step0to2 = te;
|
||||
|
||||
//now we only support transition from -1 to 0 or 2 for sil
|
||||
float pathscore0 = fwscore ; // log pp in state 0
|
||||
float pathscore1 = LOGZERO; // log pp in state 1
|
||||
float pathscore2 = LOGZERO; // log pp in state 2
|
||||
if(isSil)
|
||||
pathscore2 = fwscore;
|
||||
float pathscore0 = fwscore; // log pp in state 0
|
||||
float pathscore1 = fwscore; // log pp in state 1
|
||||
float pathscore2 = fwscore; // log pp in state 2
|
||||
|
||||
|
||||
|
||||
// first frame
|
||||
if (ts != te) // for t = ts, initialization
|
||||
{
|
||||
if (isSil) //for sil, -1 to 2 and -1 to 0 is permitted
|
||||
/* if (isSil) //for sil, -1 to 2 and -1 to 0 is permitted
|
||||
{
|
||||
pathscore0 += getlogtransp(transP,-1,0) + logLLs(senoneid0,ts);
|
||||
pathscore2 += getlogtransp(transP,-1,2) + logLLs(senoneid2,ts);
|
||||
}
|
||||
else //for others, only -1 to 0 is permitted
|
||||
pathscore0 += logLLs(senoneid0,ts); // Note: no need to incorporate LLs for state [1] and [2] because the path log LLs are LOGZERO anyway
|
||||
else //for others, only -1 to 0 is permitted
|
||||
{
|
||||
pathscore0 += getlogtransp(transP, -1, 0) + logLLs(senoneid0, ts);
|
||||
pathscore1 += getlogtransp(transP, -1, 1) + logLLs(senoneid1, ts);
|
||||
|
||||
}*/
|
||||
pathscore2 += getlogtransp(transP, -1, 2) + logLLs(senoneid2, ts);
|
||||
pathscore1 += getlogtransp(transP, -1, 1) + logLLs(senoneid1, ts);
|
||||
//state1stepm1to1 = ts;
|
||||
pathscore0 += getlogtransp(transP, -1, 0) + logLLs(senoneid0, ts);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -399,17 +412,22 @@ struct latticefunctionskernels
|
|||
{
|
||||
pathscore2 = pathscore12;
|
||||
state2step0to1 = state1step0to1; // record the inflection point
|
||||
//state2stepm1to1 = state1stepm1to1;
|
||||
state2step1to2 = t; // record the inflection point
|
||||
state2step0to2 = te;
|
||||
if (isSil)
|
||||
backptrmatrix (2, t-ts-1) = 1;
|
||||
}
|
||||
if (isSil) // only silence have path from 0 to 2
|
||||
//if (isSil) // only silence have path from 0 to 2
|
||||
{
|
||||
const float pathscore02 = pathscore0 + getlogtransp(transP,0,2); // log pp from state 0 to 2
|
||||
if (pathscore02 >= pathscore2) // if state 0->2
|
||||
{
|
||||
pathscore2 = pathscore02;
|
||||
backptrmatrix (2, t-ts-1) = 0;
|
||||
if (isSil)
|
||||
backptrmatrix (2, t-ts-1) = 0;
|
||||
state2step0to2 = t;
|
||||
state2step1to2 = te;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -422,9 +440,11 @@ struct latticefunctionskernels
|
|||
{
|
||||
pathscore1 = pathscore01;
|
||||
state1step0to1 = t; // record the inflection point
|
||||
//state1stepm1to1 = te;
|
||||
if (isSil)
|
||||
backptrmatrix (1, t-ts-1) = 0;
|
||||
}
|
||||
|
||||
if (isSil) // only silence have path from 2 to 1
|
||||
{
|
||||
const float pathscore21 = pathscore2last + getlogtransp(transP,2,1);
|
||||
|
@ -495,19 +515,35 @@ struct latticefunctionskernels
|
|||
|
||||
if (!isSil)
|
||||
{
|
||||
state2step0to1 += alignindex - ts; // convert to align measure
|
||||
state2step1to2 += alignindex - ts;
|
||||
for (size_t t = alignindex; t < alignindex + numframes; t++) // set the final alignment
|
||||
{
|
||||
size_t senoneid;
|
||||
if (t < state2step0to1) // in state 0
|
||||
senoneid = senoneid0;
|
||||
else if(t < state2step1to2) // in state 1
|
||||
senoneid = senoneid1;
|
||||
else // in state 2
|
||||
senoneid = senoneid2;
|
||||
alignresult[t] = (unsigned short) senoneid;
|
||||
}
|
||||
if (state2step0to2 < te) //from 0 to 2
|
||||
{
|
||||
state2step0to2 += alignindex - ts;
|
||||
for (size_t t = alignindex; t < alignindex + numframes; t++) // set the final alignment
|
||||
{
|
||||
size_t senoneid;
|
||||
if (t < state2step0to2) // in state 0
|
||||
senoneid = senoneid0;
|
||||
else // in state 2
|
||||
senoneid = senoneid2;
|
||||
alignresult[t] = (unsigned short)senoneid;
|
||||
}
|
||||
}
|
||||
else //from 1 to 2
|
||||
{
|
||||
state2step0to1 += alignindex - ts; // convert to align measure
|
||||
state2step1to2 += alignindex - ts;
|
||||
for (size_t t = alignindex; t < alignindex + numframes; t++) // set the final alignment
|
||||
{
|
||||
size_t senoneid;
|
||||
if (state2step0to1 <alignindex - ts + te && t < state2step0to1)
|
||||
senoneid = senoneid0;
|
||||
else if(t < state2step1to2) // in state 1
|
||||
senoneid = senoneid1;
|
||||
else // in state 2
|
||||
senoneid = senoneid2;
|
||||
alignresult[t] = (unsigned short) senoneid;
|
||||
}
|
||||
}
|
||||
}
|
||||
else // for silence
|
||||
{
|
||||
|
|
|
@ -70,7 +70,7 @@
|
|||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)\;..\..\Math\$(Platform)\$(Configuration);..\$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
|
||||
|
@ -91,7 +91,7 @@
|
|||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
|
|
|
@ -72,7 +72,7 @@
|
|||
<AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)\;..\..\Math\$(Platform)\$(Configuration);..\$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
|
||||
|
@ -93,7 +93,7 @@
|
|||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
|
|
|
@ -100,7 +100,7 @@
|
|||
<UseFullPaths>true</UseFullPaths>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalLibraryDirectories>$(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
|
@ -115,7 +115,7 @@
|
|||
<UseFullPaths>true</UseFullPaths>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalLibraryDirectories>$(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>ucireader.lib;Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
|
@ -133,7 +133,7 @@
|
|||
<UseFullPaths>true</UseFullPaths>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
|
@ -152,7 +152,7 @@
|
|||
<UseFullPaths>true</UseFullPaths>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
|
|
|
@ -100,6 +100,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
vector<wstring> scriptpaths;
|
||||
vector<wstring> RootPathInScripts;
|
||||
wstring RootPathInLatticeTocs;
|
||||
vector<wstring> mlfpaths;
|
||||
vector<vector<wstring>>mlfpathsmulti;
|
||||
size_t firstfilesonly = SIZE_MAX; // set to a lower value for testing
|
||||
|
@ -263,7 +264,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
expand_wildcards(thisLattice(L"numLatTocFile"), paths);
|
||||
latticetocs.first.insert(latticetocs.first.end(), paths.begin(), paths.end());
|
||||
}
|
||||
|
||||
RootPathInLatticeTocs =(wstring) thisLattice(L"prefixPathInToc",L"");
|
||||
}
|
||||
|
||||
//get HMM related file names
|
||||
|
@ -448,7 +449,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (!_wcsicmp(readMethod.c_str(), L"blockRandomize"))
|
||||
{
|
||||
// construct all the parameters we don't need, but need to be passed to the constructor...
|
||||
m_lattices.reset(new msra::dbn::latticesource(latticetocs, m_hset.getsymmap()));
|
||||
|
||||
m_lattices.reset(new msra::dbn::latticesource(latticetocs, m_hset.getsymmap(), RootPathInLatticeTocs));
|
||||
m_lattices->setverbosity(m_verbosity);
|
||||
|
||||
// now get the frame source. This has better randomization and doesn't create temp files
|
||||
m_frameSource.reset(new msra::dbn::minibatchutterancesourcemulti(infilesmulti, labelsmulti, m_featDims, m_labelDims, numContextLeft, numContextRight, randomize, *m_lattices, m_latticeMap, m_frameMode));
|
||||
|
@ -941,6 +944,23 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
if (!skip)
|
||||
{
|
||||
// a stopgap
|
||||
if (m_numFramesToProcess[i] > 0 && m_latticeBufferMultiUtt[i] && m_latticeBufferMultiUtt[i]->getnumframes() != m_numFramesToProcess[i])
|
||||
{
|
||||
// BUGBUG: we just found that (due to some bugs yet to be tracked down),
|
||||
// the filled number of frames is inconsistent with the number frames in lattices (though it rarely occurs)
|
||||
// This is just a stopgap, to be removed after the bugs are found and fixed
|
||||
bool needRenew = true;
|
||||
while (needRenew)
|
||||
{
|
||||
size_t framenum = m_numFramesToProcess[i];
|
||||
fprintf(stderr, "WARNING: mismatched number of frames filled in the reader: %d in data vs %d in lattices. Ignoring this utterance %ls\n",
|
||||
(int)framenum, (int)m_latticeBufferMultiUtt[i]->getnumframes(), m_latticeBufferMultiUtt[i]->getkey().c_str());
|
||||
ReNewBufferForMultiIO(i);
|
||||
needRenew = m_numFramesToProcess[i] > 0 && m_latticeBufferMultiUtt[i] && m_latticeBufferMultiUtt[i]->getnumframes() != m_numFramesToProcess[i];
|
||||
}
|
||||
|
||||
}
|
||||
m_numValidFrames[i] = m_numFramesToProcess[i];
|
||||
if (m_numValidFrames[i] > 0)
|
||||
{
|
||||
|
@ -972,49 +992,50 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_extraNumSeqs = 0;
|
||||
if (!m_frameMode)
|
||||
{
|
||||
// insert extra utterances to parallel sequences that have enough space left
|
||||
// As long as there is a gap at the end of any parallel sequence that is large enough for another utterance, fill it in.
|
||||
size_t nextMinibatchUttnum = 0;
|
||||
bool inserted;
|
||||
// The next utterances have already been prepared under parallel-sequence indices [i], in prep for the next MB.
|
||||
// For each, we will go through all parallel sequences [j] to see whether the entry currently held for the next [i] fits into [j].
|
||||
for (size_t i = 0; i < m_numSeqsPerMB; i++)
|
||||
for (size_t src = 0; src < m_numSeqsPerMB; )
|
||||
{
|
||||
while (nextMinibatchUttnum <= i)
|
||||
size_t framenum = m_numFramesToProcess[src];
|
||||
if (framenum == 0)
|
||||
{
|
||||
size_t framenum = m_numFramesToProcess[i];
|
||||
inserted = false;
|
||||
if (framenum > 0) // non-empty entry: see were it fits
|
||||
{
|
||||
// greedily search for a parallel sequence with enough space at the end to insert this utterance
|
||||
for (size_t j = 0; j < m_numSeqsPerMB; j++)
|
||||
{
|
||||
if (framenum + m_numValidFrames[j] < m_mbNumTimeSteps)
|
||||
{
|
||||
// enough space: insert it as parallel sequence [j] (instead of [i] in the next MB)
|
||||
m_extraSeqsPerMB.push_back(j);
|
||||
if (m_latticeBufferMultiUtt[i] != nullptr)
|
||||
{
|
||||
m_extraLatticeBufferMultiUtt.push_back(m_latticeBufferMultiUtt[i]);
|
||||
m_extraLabelsIDBufferMultiUtt.push_back(m_labelsIDBufferMultiUtt[i]);
|
||||
m_extraPhoneboundaryIDBufferMultiUtt.push_back(m_phoneboundaryIDBufferMultiUtt[i]);
|
||||
}
|
||||
fillOneUttDataforParallelmode(matrices, m_numValidFrames[j], framenum, j, i);
|
||||
m_pMBLayout->AddSequence(NEW_SEQUENCE_ID, j, m_numValidFrames[j], m_numValidFrames[j] + framenum);
|
||||
src++;
|
||||
continue;
|
||||
}
|
||||
if (m_latticeBufferMultiUtt[src]!=nullptr && m_latticeBufferMultiUtt[src]->getnumframes()!=framenum)
|
||||
{
|
||||
// BUGBUG: we just found that (due to some bugs yet to be tracked down),
|
||||
// the filled number of frames is inconsistent with the number frames in lattices (though it rarely occurs)
|
||||
// This is just a stopgap, to be removed after the bugs are found and fixed
|
||||
fprintf(stderr, "WARNING: mismatched number of frames filled in the reader: %d in data vs %d in lattices. Ignoring this utterance %ls\n",
|
||||
(int)framenum, (int)m_latticeBufferMultiUtt[src]->getnumframes(), m_latticeBufferMultiUtt[src]->getkey().c_str());
|
||||
src++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// consume it
|
||||
ReNewBufferForMultiIO(i); // replace current [i] with a new one; then try again with this new one at [i]
|
||||
m_numValidFrames[j] += framenum;
|
||||
m_extraNumSeqs++;
|
||||
inserted = true;
|
||||
break;
|
||||
}
|
||||
bool slotFound = false;
|
||||
for (size_t des = 0; des < m_numSeqsPerMB; des++) // try to found a slot
|
||||
{
|
||||
if (framenum + m_numValidFrames[des] < m_mbNumTimeSteps)
|
||||
{ // found !
|
||||
m_extraSeqsPerMB.push_back(des);
|
||||
if (m_latticeBufferMultiUtt[src] != nullptr)
|
||||
{
|
||||
m_extraLatticeBufferMultiUtt.push_back(m_latticeBufferMultiUtt[src]);
|
||||
m_extraLabelsIDBufferMultiUtt.push_back(m_labelsIDBufferMultiUtt[src]);
|
||||
m_extraPhoneboundaryIDBufferMultiUtt.push_back(m_phoneboundaryIDBufferMultiUtt[src]);
|
||||
}
|
||||
fillOneUttDataforParallelmode(matrices, m_numValidFrames[des], framenum, des, src);
|
||||
m_pMBLayout->AddSequence(NEW_SEQUENCE_ID, des, m_numValidFrames[des], m_numValidFrames[des] + framenum);
|
||||
|
||||
ReNewBufferForMultiIO(src);
|
||||
m_numValidFrames[des] += framenum;
|
||||
m_extraNumSeqs++;
|
||||
slotFound = true;
|
||||
break;
|
||||
}
|
||||
if (!inserted)
|
||||
{
|
||||
nextMinibatchUttnum++; // didn't fit anywhere: done with entry [i]
|
||||
}
|
||||
}
|
||||
if (!slotFound)
|
||||
{
|
||||
src++; // done with this source; try next source;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -32,6 +32,9 @@ private:
|
|||
intargvector m_numSeqsPerMBForAllEpochs;
|
||||
size_t m_numSeqsPerMB; // requested number of parallel sequences
|
||||
size_t m_mbNumTimeSteps; // number of time steps to fill/filled (note: for frame randomization, this the #frames, and not 1 as later reported)
|
||||
size_t m_mbMaxNumTimeSteps; // max time steps we take in a MB layout; any setence longer than this max will be discarded (and a warning will be issued )
|
||||
// this is used to prevent CUDA out-of memory errors
|
||||
|
||||
vector<size_t> m_numFramesToProcess; // [seq index] number of frames available (left to return) in each parallel sequence
|
||||
vector<size_t> m_switchFrame; /// TODO: something like the position where a new sequence starts; still supported?
|
||||
vector<size_t> m_numValidFrames; // [seq index] valid #frames in each parallel sequence. Frames (s, t) with t >= m_numValidFrames[s] are NoInput.
|
||||
|
|
|
@ -69,7 +69,7 @@
|
|||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
|
@ -87,7 +87,7 @@
|
|||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
|
|
|
@ -75,7 +75,7 @@
|
|||
<OpenMPSupport>true</OpenMPSupport>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>Math.lib;$(OpenCVLib);%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
|
|
|
@ -71,7 +71,7 @@
|
|||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)\;..\..\Math\$(Platform)\$(Configuration);..\$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
|
||||
|
@ -92,7 +92,7 @@
|
|||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
|
|
|
@ -71,7 +71,7 @@
|
|||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)\;..\..\Math\$(Platform)\$(Configuration);..\$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
|
||||
|
@ -92,7 +92,7 @@
|
|||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
|
|
|
@ -72,7 +72,7 @@
|
|||
<AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)\;..\..\Math\$(Platform)\$(Configuration);..\$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
|
||||
|
@ -93,7 +93,7 @@
|
|||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
|
|
|
@ -72,7 +72,7 @@
|
|||
<AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)\;..\..\Math\$(Platform)\$(Configuration);..\$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
|
||||
|
@ -93,7 +93,7 @@
|
|||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
|
|
|
@ -70,7 +70,7 @@
|
|||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)\;..\..\Math\$(Platform)\$(Configuration);..\$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
|
||||
|
@ -91,7 +91,7 @@
|
|||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
|
|
|
@ -91,7 +91,7 @@
|
|||
<SDLCheck>true</SDLCheck>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>..\..\Source\Math\$(Platform)\$(Configuration);..\$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
|
||||
|
@ -107,7 +107,7 @@
|
|||
<AdditionalIncludeDirectories>..\..\common\include;..\..\Source\Math</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)\;..\..\Source\Math\$(Platform)\$(Configuration);..\$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
|
||||
|
@ -124,7 +124,7 @@
|
|||
<SDLCheck>true</SDLCheck>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
|
@ -144,7 +144,7 @@
|
|||
<AdditionalIncludeDirectories>..\..\common\include;..\..\Source\Math</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
|
|
|
@ -63,6 +63,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
using SGDBase::m_L2RegWeight;
|
||||
using SGDBase::m_L1RegWeight;
|
||||
using SGDBase::m_needAveMultiplier;
|
||||
using SGDBase::m_useNesterovMomentum;
|
||||
using SGDBase::m_traceLevel;
|
||||
using SGDBase::m_numMBsToShowResult;
|
||||
using SGDBase::m_gradientCheckSigDigit;
|
||||
|
@ -392,8 +393,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
if (m_loadBestModel)
|
||||
{
|
||||
encoderNet->ReloadPersistableParameters<ElemType>(GetEncoderModelNameForEpoch(i - 1));
|
||||
decoderNet->ReloadPersistableParameters<ElemType>(GetDecoderModelNameForEpoch(i - 1));
|
||||
encoderNet->RereadPersistableParameters<ElemType>(GetEncoderModelNameForEpoch(i - 1));
|
||||
decoderNet->RereadPersistableParameters<ElemType>(GetDecoderModelNameForEpoch(i - 1));
|
||||
|
||||
size_t dummyMinibatchSize = 0;
|
||||
this->LoadCheckPointInfo(i - 1,
|
||||
|
@ -721,7 +722,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
//persist model and check-point info
|
||||
for (size_t k = 0; k < iNumNetworks; k++)
|
||||
{
|
||||
nets[k]->ReloadPersistableParameters<ElemType>(GetModelNameForEpoch(i, false, msra::strfun::wstrprintf(L".%d", k)));
|
||||
nets[k]->RereadPersistableParameters<ElemType>(GetModelNameForEpoch(i, false, msra::strfun::wstrprintf(L".%d", k)));
|
||||
nets[k]->ResetEvalTimeStamps();
|
||||
}
|
||||
|
||||
|
@ -930,7 +931,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
Matrix<ElemType>& smoothedGradient = (*smoothedGradientIter);
|
||||
|
||||
UpdateWeights(node, smoothedGradient, learnRatePerSample, GetMomentumPerSample(epochNumber/*BUGBUG workaround:*/, dataReader[0]->GetNumParallelSequences()), actualMBSize, m_L2RegWeight, m_L1RegWeight, m_needAveMultiplier);
|
||||
UpdateWeights(node, smoothedGradient, learnRatePerSample, GetMomentumPerSample(epochNumber/*BUGBUG workaround:*/, dataReader[0]->GetNumParallelSequences()), actualMBSize, m_L2RegWeight, m_L1RegWeight, m_needAveMultiplier, m_useNesterovMomentum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -310,7 +310,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// likewise for sequence training parameters
|
||||
if (isSequenceTrainingCriterion)
|
||||
{
|
||||
ComputationNetwork::SetSeqParam<ElemType>(net, criterionNodes[0], m_hSmoothingWeight, m_frameDropThresh, m_doReferenceAlign);
|
||||
ComputationNetwork::SetSeqParam<ElemType>(net, criterionNodes[0], m_hSmoothingWeight, m_frameDropThresh, m_doReferenceAlign,
|
||||
m_seqGammarCalcAMF, m_seqGammarCalcLMF, m_seqGammarCalcWP, m_seqGammarCalcbMMIFactor, m_seqGammarCalcUsesMBR );
|
||||
}
|
||||
|
||||
// --- MAIN EPOCH LOOP
|
||||
|
@ -519,6 +520,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if ((m_parallelizationMethod == ParallelizationMethod::ModelAveragingSGD) && (g_mpi->NumNodesInUse() > 1))
|
||||
{
|
||||
g_mpi->Bcast(&epochCriterion, 1, g_mpi->MainNodeRank());
|
||||
g_mpi->Bcast(&lrControlCriterion, 1, g_mpi->MainNodeRank());
|
||||
}
|
||||
|
||||
bool loadedPrevModel = false;
|
||||
|
@ -543,7 +545,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
auto bestModelPath = GetModelNameForEpoch(i - m_learnRateAdjustInterval);
|
||||
fprintf(stderr, "Loading previous model with best training-criterion value: %ls.\n", bestModelPath.c_str());
|
||||
net->ReloadPersistableParameters<ElemType>(bestModelPath);
|
||||
net->RereadPersistableParameters<ElemType>(bestModelPath);
|
||||
LoadCheckPointInfo(i - m_learnRateAdjustInterval,
|
||||
/*out*/ totalSamplesSeen,
|
||||
/*out*/ learnRatePerSample,
|
||||
|
@ -771,13 +773,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// Sub-minibatching is used if a single minibatch is too large to fit into GPU RAM.
|
||||
DataReaderHelpers::SubminibatchDispatcher<ElemType> smbDispatcher;
|
||||
size_t numSubminibatchesNeeded = 0;
|
||||
if (m_maxSamplesInRAM < SIZE_MAX) // user-specified maximum number of samples that fit into GPU RAM; or 0 if not enabled
|
||||
if (m_maxSamplesInRAM < SIZE_MAX || m_numSubminiBatches > 1) // user-specified maximum number of samples that fit into GPU RAM; or 0 if not enabled
|
||||
{
|
||||
// into how many pieces would we need to break the minibatch?
|
||||
// TODO: The following calculation relies on the ill-devised definition of "minibatch" of the current truncated BPTT implementation. Adapt this once fixed.
|
||||
size_t numParallelSequences = trainSetDataReader->GetNumParallelSequences();
|
||||
size_t estimatedMBSize = tunedMBSize * numParallelSequences;
|
||||
numSubminibatchesNeeded = (size_t)std::ceil((float)estimatedMBSize / m_maxSamplesInRAM);
|
||||
if (m_maxSamplesInRAM < SIZE_MAX)
|
||||
{
|
||||
// into how many pieces would we need to break the minibatch?
|
||||
// TODO: The following calculation relies on the ill-devised definition of "minibatch" of the current truncated BPTT implementation. Adapt this once fixed.
|
||||
size_t numParallelSequences = trainSetDataReader->GetNumParallelSequences();
|
||||
size_t estimatedMBSize = tunedMBSize * numParallelSequences;
|
||||
numSubminibatchesNeeded = (size_t)std::ceil((float)estimatedMBSize / m_maxSamplesInRAM);
|
||||
}
|
||||
if (m_numSubminiBatches > 1)
|
||||
{
|
||||
numSubminibatchesNeeded = m_numSubminiBatches;
|
||||
}
|
||||
}
|
||||
// this is non-trivial, we need a manager object to handle this
|
||||
if (numSubminibatchesNeeded > 1)
|
||||
|
@ -807,7 +816,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
if (numSubminibatchesNeeded > 1)
|
||||
{
|
||||
fprintf(stderr, ", with maximum %d samples in RAM", (int)m_maxSamplesInRAM);
|
||||
if (m_maxSamplesInRAM < SIZE_MAX)
|
||||
fprintf(stderr, ", with maximum %d samples in RAM", (int)m_maxSamplesInRAM);
|
||||
else
|
||||
fprintf(stderr, ", with %d subminibatch", (int)numSubminibatchesNeeded);
|
||||
}
|
||||
fprintf(stderr, ".\n");
|
||||
|
||||
|
@ -998,7 +1010,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
UpdateWeights(node, smoothedGradient, learnRatePerSample,
|
||||
GetMomentumPerSample(epochNumber/*BUGBUG workaround:*/, net->GetMBLayoutPtr()->GetNumParallelSequences()), aggregateNumSamples,
|
||||
m_L2RegWeight, m_L1RegWeight,
|
||||
m_needAveMultiplier);
|
||||
m_needAveMultiplier, m_useNesterovMomentum);
|
||||
#ifdef _DEBUG
|
||||
if (dynamic_pointer_cast<ComputationNode<ElemType>>(node)->Value().HasNan("TrainOneEpoch/UpdateWeights(): "))
|
||||
LogicError("%ls %ls operation has NaNs in functionValues after parameter update.", node->NodeName().c_str(), node->OperationName().c_str());
|
||||
|
@ -1438,7 +1450,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
int baseModelEpoch = epochNumber - 1;
|
||||
net->ReloadPersistableParameters<ElemType>(GetModelNameForEpoch(baseModelEpoch));
|
||||
net->RereadPersistableParameters<ElemType>(GetModelNameForEpoch(baseModelEpoch));
|
||||
|
||||
double learnRate = learnRatePerSample;
|
||||
size_t dummyMinibatchSize = 0;
|
||||
|
@ -1598,7 +1610,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
int baseModelEpoch = epochNumber - 1;
|
||||
net->ReloadPersistableParameters<ElemType>(GetModelNameForEpoch(baseModelEpoch));
|
||||
net->RereadPersistableParameters<ElemType>(GetModelNameForEpoch(baseModelEpoch));
|
||||
|
||||
double dummyLearnRate;
|
||||
double dummtPrevCriterion;
|
||||
|
@ -2029,7 +2041,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
size_t actualMBSize,
|
||||
const double L2RegWeight,
|
||||
const double L1RegWeight,
|
||||
const bool needAveMultiplier)
|
||||
const bool needAveMultiplier,
|
||||
const bool useNesterovMomentum
|
||||
)
|
||||
{
|
||||
// we use simple linear (instead of log linear) scaling here
|
||||
const double momentum = MomentumPerMB(momentumPerSample, actualMBSize);
|
||||
|
@ -2070,7 +2084,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (adpType == GradientsUpdateType::None)
|
||||
{
|
||||
smoothedGradient.NormalGrad(gradientValues, functionValues,
|
||||
(ElemType)learnRatePerSample, (ElemType)momentum);
|
||||
(ElemType)learnRatePerSample, (ElemType)momentum, useNesterovMomentum);
|
||||
}
|
||||
else if (adpType == GradientsUpdateType::AdaGrad ||
|
||||
(adpType == GradientsUpdateType::RmsProp && gradientValues.GetMatrixType() == MatrixType::SPARSE) ||
|
||||
|
@ -2120,7 +2134,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
const double momentumPerSample,
|
||||
const size_t actualMBSize,
|
||||
const double L2RegWeight, const double L1RegWeight,
|
||||
const bool needAveMultiplier) const
|
||||
const bool needAveMultiplier,
|
||||
const bool useNesterovMomentum
|
||||
) const
|
||||
{
|
||||
#if DUMPOUTPUT
|
||||
fprintf(stderr, "Update_%ls\n", node->NodeName().c_str());
|
||||
|
@ -2131,7 +2147,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
UpdateWeightsS(this, dynamic_pointer_cast<ComputationNode<ElemType>>(node)->Value(), dynamic_pointer_cast<ComputationNode<ElemType>>(node)->Gradient(),
|
||||
smoothedGradient, learnRatePerSample, momentumPerSample,
|
||||
actualMBSize, L2RegWeight, L1RegWeight,
|
||||
needAveMultiplier);
|
||||
needAveMultiplier, m_useNesterovMomentum);
|
||||
node->BumpEvalTimeStamp();
|
||||
}
|
||||
|
||||
|
@ -2501,6 +2517,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_mbSize = configSGD(L"minibatchSize", ConfigRecordType::Array(intargvector(vector<int>{ 256 })));
|
||||
m_truncated = configSGD(L"truncated", false);
|
||||
m_maxSamplesInRAM = configSGD(L"maxSamplesInRAM", (size_t)SIZE_MAX);
|
||||
m_numSubminiBatches = configSGD(L"numSubminibatches", (size_t)1);
|
||||
|
||||
// the number of samples in each epoch (0 means, use all the samples in each epoch).
|
||||
m_epochSize = configSGD(L"epochSize", (size_t)0);
|
||||
|
@ -2520,6 +2537,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
floatargvector momentumPerMB = configSGD(L"momentumPerMB", ConfigRecordType::Array(floatargvector()));
|
||||
floatargvector momentumPerSample = configSGD(L"momentumPerSample", ConfigRecordType::Array(floatargvector()));
|
||||
floatargvector momentumAsTimeConstant = configSGD(L"momentumAsTimeConstant", ConfigRecordType::Array(floatargvector()));
|
||||
bool useNesterovMomentum = configSGD(L"useNAG", false);
|
||||
|
||||
|
||||
m_maxTempMemSizeInSamplesForCNN = configSGD(L"maxTempMemSizeInSamplesForCNN", (size_t)0);
|
||||
|
||||
|
@ -2534,6 +2553,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_hSmoothingWeight = configSGD(L"hSmoothingWeight", 0.95);
|
||||
m_frameDropThresh = configSGD(L"frameDropThresh", 1e-10);
|
||||
m_doReferenceAlign = configSGD(L"doReferenceAlign", false);
|
||||
m_seqGammarCalcUsesMBR = configSGD(L"seqGammarUsesMBR", false);
|
||||
m_seqGammarCalcAMF = configSGD(L"seqGammarAMF", 14.0);
|
||||
m_seqGammarCalcLMF = configSGD(L"seqGammarLMF", 14.0);
|
||||
m_seqGammarCalcbMMIFactor = configSGD(L"seqGammarBMMIFactor", 0.0);
|
||||
m_seqGammarCalcWP = configSGD(L"seqGammarWordPen", 0.0);
|
||||
|
||||
m_dropoutRates = configSGD(L"dropoutRate", ConfigRecordType::Array(floatargvector(vector<float>{ 0.0f })));
|
||||
|
||||
|
@ -2639,6 +2663,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_momentumParam = floatargvector(L"0.9");
|
||||
m_momentumSpecifiedForMBSize = m_mbSize;
|
||||
}
|
||||
m_useNesterovMomentum = useNesterovMomentum;
|
||||
|
||||
for (int i = 0; i < m_momentumParam.size(); i++)
|
||||
{
|
||||
if (m_momentumParam[i] >= 1.0 || m_momentumParam[i] < 0.0)
|
||||
|
|
|
@ -111,6 +111,7 @@ protected:
|
|||
intargvector m_learningRatesSpecifiedForMBSize; // 1 for per sample, m_mbSize[] for per MB
|
||||
floatargvector m_momentumParam;
|
||||
intargvector m_momentumSpecifiedForMBSize;
|
||||
bool m_useNesterovMomentum;
|
||||
|
||||
// Determine the MB size used for mapping a given learning-rate or momentum parameter to a per-sample value.
|
||||
// MB size is the number of samples across all time steps and parallel sequences.
|
||||
|
@ -157,7 +158,11 @@ protected:
|
|||
// To mitigate this issue, we adopt the sub-minibatch implementation, where
|
||||
// each m_mbSize[epoch] is divided by a few sub-minibatch of which size will be no more than m_maxSamplesInRAM
|
||||
// a forward-backward is performed for each sub-minibathch; a model update is performed after each minibatch
|
||||
|
||||
size_t m_numSubminiBatches;
|
||||
// alternative method to specify how to split minibatches into subminibatches
|
||||
// default is 1, which means no subminibatch is used
|
||||
// if m_maxTempMemSizeInSamples = SIZE_MAX (which means users do not specify the option) and m_numSubminiBatches > 1
|
||||
// we divide one minibatch to m_numSubminiBatches subMinibatches
|
||||
|
||||
// the number of samples in each epoch (0 means, use all the samples in each epoch).
|
||||
size_t m_epochSize;
|
||||
|
@ -245,6 +250,11 @@ protected:
|
|||
double m_hSmoothingWeight;
|
||||
double m_frameDropThresh;
|
||||
bool m_doReferenceAlign;
|
||||
double m_seqGammarCalcAMF;
|
||||
double m_seqGammarCalcLMF;
|
||||
double m_seqGammarCalcWP;
|
||||
double m_seqGammarCalcbMMIFactor;
|
||||
bool m_seqGammarCalcUsesMBR;
|
||||
};
|
||||
|
||||
template<class ElemType> class IDistGradAggregator;
|
||||
|
@ -436,7 +446,9 @@ public:
|
|||
size_t actualMBSize,
|
||||
const double L2RegWeight,
|
||||
const double L1RegWeight,
|
||||
const bool needAveMultiplier);
|
||||
const bool needAveMultiplier,
|
||||
const bool useNesterovMomentum
|
||||
);
|
||||
|
||||
protected:
|
||||
// UpdateWeights - update the weights in
|
||||
|
@ -446,7 +458,8 @@ protected:
|
|||
const double momentumPerSample,
|
||||
const size_t actualMBSize,
|
||||
const double L2RegWeight, const double L1RegWeight,
|
||||
const bool needAveMultiplier) const;
|
||||
const bool needAveMultiplier,
|
||||
const bool useNesterovMomentum) const;
|
||||
|
||||
void ClipGradient(Matrix<ElemType>& gradient, const size_t actualMBSize) const;
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@
|
|||
<SDLCheck>true</SDLCheck>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
|
|
|
@ -11,6 +11,23 @@
|
|||
#pragma warning (disable: 4127) // conditional expression is constant
|
||||
|
||||
namespace msra { namespace lattices {
|
||||
|
||||
struct SeqGammarCalParam{
|
||||
double amf;
|
||||
double lmf;
|
||||
double wp;
|
||||
double bMMIfactor;
|
||||
bool sMBRmode;
|
||||
SeqGammarCalParam()
|
||||
{
|
||||
amf = 14.0;
|
||||
lmf = 14.0;
|
||||
wp = 0.0;
|
||||
bMMIfactor = 0.0;
|
||||
sMBRmode = false;
|
||||
}
|
||||
};
|
||||
|
||||
template<class ElemType>
|
||||
class GammaCalculation
|
||||
{
|
||||
|
@ -19,9 +36,9 @@ namespace msra { namespace lattices {
|
|||
GammaCalculation() : cpumode(false)
|
||||
{
|
||||
initialmark = false;
|
||||
lmf = 14.0f; // Note that 9 was best for Fisher --these should best be configurable
|
||||
lmf = 7.0f; // Note that 9 was best for Fisher --these should best be configurable
|
||||
wp = 0.0f;
|
||||
amf = 14.0f;
|
||||
amf = 7.0f;
|
||||
boostmmifactor = 0.0f;
|
||||
seqsMBRmode = false;
|
||||
}
|
||||
|
@ -30,6 +47,9 @@ namespace msra { namespace lattices {
|
|||
|
||||
}
|
||||
|
||||
//========================================
|
||||
// Sec. 1 init functions
|
||||
//========================================
|
||||
void init(msra::asr::simplesenonehmm hset, int DeviceId)
|
||||
{
|
||||
m_deviceid = DeviceId;
|
||||
|
@ -47,7 +67,21 @@ namespace msra { namespace lattices {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
//========================================
|
||||
// Sec. 2 set functions
|
||||
//========================================
|
||||
void SetGammarCalculationParams(const SeqGammarCalParam& gammarParam)
|
||||
{
|
||||
lmf = (float)gammarParam.lmf;
|
||||
amf = (float)gammarParam.amf;
|
||||
wp = (float)gammarParam.wp;
|
||||
seqsMBRmode = gammarParam.sMBRmode;
|
||||
boostmmifactor = (float)gammarParam.bMMIfactor;
|
||||
}
|
||||
|
||||
//========================================
|
||||
// Sec. 3 calculation functions
|
||||
//========================================
|
||||
void calgammaformb( Microsoft::MSR::CNTK::Matrix<ElemType>& functionValues,
|
||||
std::vector<shared_ptr<const msra::dbn::latticepair>> &lattices,
|
||||
const Microsoft::MSR::CNTK::Matrix<ElemType>& loglikelihood,
|
||||
|
|
|
@ -442,6 +442,7 @@ template<typename FLOAT> static bool islogzero (FLOAT v) { return v < LOGZERO/2;
|
|||
LogicError("invalid backpointer resulting in state index out of range");
|
||||
|
||||
int bp = (int) backpointers(j,t); // save the backpointer before overwriting it (gammas and backpointers are aliases of each other)
|
||||
//thisedgealignmentsj[t] = (unsigned short)hmm.getsenoneid(j - js);
|
||||
if (!returnsenoneids) // return binary gammas (for MMI; this mode is compatible with softalignmode)
|
||||
for (size_t i = js; i < je; i++)
|
||||
loggammas(i,t) = ((int) i == j) ? 0.0f : LOGZERO;
|
||||
|
|
|
@ -743,8 +743,8 @@ namespace msra { namespace lattices {
|
|||
double totalfwscore = 0.0f;
|
||||
if (!parallelstate->emulation)
|
||||
{
|
||||
|
||||
fprintf(stderr, "parallelforwardbackwardlattice: %d launches for forward, %d launches for backward\n", (int)batchsizeforward.size(), (int)batchsizebackward.size());
|
||||
if (verbosity>=2)
|
||||
fprintf(stderr, "parallelforwardbackwardlattice: %d launches for forward, %d launches for backward\n", (int)batchsizeforward.size(), (int)batchsizebackward.size());
|
||||
|
||||
const bool allocateframescorrect = (returnEframescorrect || boostingfactor != 0.0f);
|
||||
const bool copyuids = (returnEframescorrect || boostingfactor != 0.0f);
|
||||
|
|
|
@ -67,7 +67,7 @@ speechTrain = [
|
|||
|
||||
// LSTM cell
|
||||
# TODO: This is temporary test code for the new ShiftNode (until we switch PastValue() itself over)
|
||||
PastValueShift(dimDummy, input) = Shift(input, /*fromOffsets=*/-1, /*boundaryValue=*/Constant(0.1), dim=-1, numSteps=1, insertedDim=2)
|
||||
PastValueShift(dimDummy, input) = Shift(input, /*fromOffsets=*/-1, /*boundaryValue=*/Constant(0.1), dim=-1)
|
||||
PastValue1 = PastValue
|
||||
#PastValue1 = PastValueShift
|
||||
dh = PastValue1(outputDim, output); // hidden state(t-1)
|
||||
|
|
|
@ -56,6 +56,9 @@ makebuildinfo()
|
|||
if [ ! -z "$CUB_PATH" ]; then
|
||||
printf "#define _CUB_PATH_ \"%s\"\n" $CUB_PATH >> $target
|
||||
fi
|
||||
if [ ! -z "$CUDNN_PATH" ]; then
|
||||
printf "#define _CUDNN_PATH_ \"%s\"\n" $CUDNN_PATH >> $target
|
||||
fi
|
||||
printf "#define _BUILDTYPE_ \"%s\"\n" $BUILDTYPE >> $target
|
||||
printf "#endif\n" >> $target
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче