made gcc happy again (mostly missing headers or wrong declaration orders);
Makefile adapted to new paths, but not yet building Network and SGD as separate libs
This commit is contained in:
Родитель
c0d0f1ff0b
Коммит
9aecb5649d
|
@ -4,6 +4,8 @@
|
|||
// </copyright>
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include "Basics.h"
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
@ -16,6 +18,7 @@
|
|||
#endif
|
||||
#include "fileutil.h" // for f{ge,pu}t{,Text}()
|
||||
#include <fstream> // for LoadMatrixFromTextFile() --TODO: change to using this File class
|
||||
#include <sstream>
|
||||
|
||||
namespace Microsoft{ namespace MSR { namespace CNTK {
|
||||
|
||||
|
|
|
@ -109,7 +109,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
size_t numRows = 0;
|
||||
size_t numCols = 0;
|
||||
auto array = File::LoadMatrixFromTextFile<ElemType>(msra::strfun::utf8(initFromFilePath), numRows, numCols); // TODO: change pathname to wstring
|
||||
FunctionValues().SetValue(numRows, numCols, array.data(), matrixFlagNormal, GetDeviceId());
|
||||
FunctionValues().SetValue(numRows, numCols, array.data(), matrixFlagNormal, m_deviceId);
|
||||
}
|
||||
|
||||
virtual const std::wstring OperationName() const {return TypeName();}
|
||||
|
|
|
@ -12,174 +12,260 @@ extern Microsoft::MSR::CNTK::MPIWrapper *g_mpi;
|
|||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
template<class ElemType>
|
||||
void DecimateMinibatch(std::map<std::wstring, MSR::CNTK::Matrix<ElemType>*>& mb, int numProcessor, int myID)
|
||||
{
|
||||
int rank = myID;
|
||||
int procs = numProcessor;
|
||||
|
||||
size_t rv = 0;
|
||||
if (procs > 1)
|
||||
template<class ElemType>
|
||||
void DecimateMinibatch(std::map<std::wstring, MSR::CNTK::Matrix<ElemType>*>& mb, int numProcessor, int myID)
|
||||
{
|
||||
for (auto it = mb.begin(); it != mb.end(); ++it)
|
||||
int rank = myID;
|
||||
int procs = numProcessor;
|
||||
|
||||
size_t rv = 0;
|
||||
if (procs > 1)
|
||||
{
|
||||
MSR::CNTK::Matrix<ElemType> &mat = *(it->second);
|
||||
size_t nCols = mat.GetNumCols();
|
||||
size_t col_start = (nCols * rank) / procs;
|
||||
size_t col_end = (nCols * (rank + 1)) / procs;
|
||||
if (col_end > nCols)
|
||||
for (auto it = mb.begin(); it != mb.end(); ++it)
|
||||
{
|
||||
// this shouldn't happen
|
||||
col_end = nCols;
|
||||
}
|
||||
|
||||
if (col_end == col_start)
|
||||
{
|
||||
MSR::CNTK::Matrix<ElemType> tmp(mat.GetNumRows(), 0, AUTOPLACEMATRIX, DENSE);
|
||||
mat.SetValue(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
MSR::CNTK::Matrix<ElemType> tmp = mat.ColumnSlice(col_start, col_end - col_start);
|
||||
mat.SetValue(tmp);
|
||||
}
|
||||
|
||||
if (rv == 0)
|
||||
{
|
||||
rv = mat.GetNumCols();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rv != mat.GetNumCols())
|
||||
MSR::CNTK::Matrix<ElemType> &mat = *(it->second);
|
||||
size_t nCols = mat.GetNumCols();
|
||||
size_t col_start = (nCols * rank) / procs;
|
||||
size_t col_end = (nCols * (rank + 1)) / procs;
|
||||
if (col_end > nCols)
|
||||
{
|
||||
throw std::logic_error("Uneven number of columns among inputs.");
|
||||
// this shouldn't happen
|
||||
col_end = nCols;
|
||||
}
|
||||
|
||||
if (col_end == col_start)
|
||||
{
|
||||
MSR::CNTK::Matrix<ElemType> tmp(mat.GetNumRows(), 0, AUTOPLACEMATRIX, DENSE);
|
||||
mat.SetValue(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
MSR::CNTK::Matrix<ElemType> tmp = mat.ColumnSlice(col_start, col_end - col_start);
|
||||
mat.SetValue(tmp);
|
||||
}
|
||||
|
||||
if (rv == 0)
|
||||
{
|
||||
rv = mat.GetNumCols();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rv != mat.GetNumCols())
|
||||
{
|
||||
throw std::logic_error("Uneven number of columns among inputs.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<ElemType>*> &mb, /* (input) matrix to be decimated */
|
||||
int rank, int numprocs, /* (input) rank info */
|
||||
size_t& nSlices, /* (input/output): on input, # parallel sentence total , on output, # paralel sentence in this node */
|
||||
Matrix<float>& SentenceBoundary, /* (output) nSlices X nMBsize matrix */
|
||||
vector<MinibatchPackingFlag>& PackingFlags, /* (output) 1 X nMBsize vector */
|
||||
IDataReader<ElemType>* trainDataReader) /* (input) to have access to reader */
|
||||
{
|
||||
// For RNN, a input Matrix is organized in the following way:
|
||||
// | x_t^1 x_t^2 ... x_t^N | .... | x_{t+T-1}^1 ... x_{t+T-1}^N |
|
||||
// |<---- block 1 ---->| .... |<------ block T ----->|
|
||||
// N is the nSlice (input)
|
||||
// The decimation here is to split each block to individual GPUs
|
||||
// So After decimation
|
||||
// | x_t^{st} ... x_t^{en-1}| .... | x_{t+T-1}^{st} ... x_{t+T-1}^{en-1} |
|
||||
// Each block now has nSlice/nProcs
|
||||
//
|
||||
// Correspondingly, the SentenceBoundary and PackingFlags will be revised
|
||||
trainDataReader->SetSentenceSegBatch(SentenceBoundary, PackingFlags);
|
||||
|
||||
size_t rv = 0;
|
||||
size_t nOrigParallelUtts = nSlices;
|
||||
static bool warned = false;
|
||||
if (numprocs > 1)
|
||||
template<class ElemType>
|
||||
size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<ElemType>*> &mb, /* (input) matrix to be decimated */
|
||||
int rank, int numprocs, /* (input) rank info */
|
||||
size_t& nSlices, /* (input/output): on input, # parallel sentence total , on output, # paralel sentence in this node */
|
||||
Matrix<float>& SentenceBoundary, /* (output) nSlices X nMBsize matrix */
|
||||
vector<MinibatchPackingFlag>& PackingFlags, /* (output) 1 X nMBsize vector */
|
||||
IDataReader<ElemType>* trainDataReader) /* (input) to have access to reader */
|
||||
{
|
||||
// decide new parallel utterances
|
||||
size_t sent_start = 0;
|
||||
size_t sent_end = 0;
|
||||
if (nOrigParallelUtts % numprocs != 0)
|
||||
// For RNN, a input Matrix is organized in the following way:
|
||||
// | x_t^1 x_t^2 ... x_t^N | .... | x_{t+T-1}^1 ... x_{t+T-1}^N |
|
||||
// |<---- block 1 ---->| .... |<------ block T ----->|
|
||||
// N is the nSlice (input)
|
||||
// The decimation here is to split each block to individual GPUs
|
||||
// So After decimation
|
||||
// | x_t^{st} ... x_t^{en-1}| .... | x_{t+T-1}^{st} ... x_{t+T-1}^{en-1} |
|
||||
// Each block now has nSlice/nProcs
|
||||
//
|
||||
// Correspondingly, the SentenceBoundary and PackingFlags will be revised
|
||||
trainDataReader->SetSentenceSegBatch(SentenceBoundary, PackingFlags);
|
||||
|
||||
size_t rv = 0;
|
||||
size_t nOrigParallelUtts = nSlices;
|
||||
static bool warned = false;
|
||||
if (numprocs > 1)
|
||||
{
|
||||
if (!warned)
|
||||
// decide new parallel utterances
|
||||
size_t sent_start = 0;
|
||||
size_t sent_end = 0;
|
||||
if (nOrigParallelUtts % numprocs != 0)
|
||||
{
|
||||
/* give a warning of potential bandwidth wasting */
|
||||
fprintf(stderr, "WARNING: %d GPUs are used in model averaging, but the number of parallel utterances are %d, a potential training speed degradation.\n",
|
||||
(int)g_mpi->NumNodesInUse(), (int)nOrigParallelUtts);
|
||||
warned = true;
|
||||
}
|
||||
if (rank == numprocs - 1)
|
||||
{
|
||||
nSlices = nOrigParallelUtts - (nOrigParallelUtts / numprocs + 1) * (numprocs - 1);
|
||||
sent_start = (nOrigParallelUtts / numprocs + 1) * (numprocs - 1);
|
||||
sent_end = nOrigParallelUtts;
|
||||
if (!warned)
|
||||
{
|
||||
/* give a warning of potential bandwidth wasting */
|
||||
fprintf(stderr, "WARNING: %d GPUs are used in model averaging, but the number of parallel utterances are %d, a potential training speed degradation.\n",
|
||||
(int)g_mpi->NumNodesInUse(), (int)nOrigParallelUtts);
|
||||
warned = true;
|
||||
}
|
||||
if (rank == numprocs - 1)
|
||||
{
|
||||
nSlices = nOrigParallelUtts - (nOrigParallelUtts / numprocs + 1) * (numprocs - 1);
|
||||
sent_start = (nOrigParallelUtts / numprocs + 1) * (numprocs - 1);
|
||||
sent_end = nOrigParallelUtts;
|
||||
}
|
||||
else
|
||||
{
|
||||
nSlices = nOrigParallelUtts / numprocs + 1;
|
||||
sent_start = nSlices * rank;
|
||||
sent_end = nSlices * (rank + 1);
|
||||
if (sent_end > nOrigParallelUtts) sent_end = nOrigParallelUtts;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nSlices = nOrigParallelUtts / numprocs + 1;
|
||||
sent_start = nSlices * rank;
|
||||
sent_end = nSlices * (rank + 1);
|
||||
nSlices = nOrigParallelUtts / numprocs;
|
||||
sent_start = rank*nSlices;
|
||||
sent_end = (rank + 1)*nSlices;
|
||||
if (sent_end > nOrigParallelUtts) sent_end = nOrigParallelUtts;
|
||||
}
|
||||
// decimate data
|
||||
for (auto it = mb.begin(); it != mb.end(); ++it)
|
||||
{
|
||||
MSR::CNTK::Matrix<ElemType> &mat = *(it->second);
|
||||
size_t nCols = mat.GetNumCols();
|
||||
|
||||
if (nCols % nOrigParallelUtts != 0)
|
||||
{
|
||||
// this should not happen for DNN, RNN with truncated BPTT, not sure about other special stuff ...
|
||||
RuntimeError("ERROR: minibatch size %d, but with %d parallel utterances\n", nCols, nOrigParallelUtts);
|
||||
}
|
||||
size_t nBlocks = nCols / nOrigParallelUtts;
|
||||
// for RNN, nBlocks is the size of truncated BPTT
|
||||
if (sent_end == sent_start)
|
||||
{
|
||||
// should never happen, print debug info
|
||||
RuntimeError("ERROR: in DecimateMinibatch, col_st=col_en=%d, nCol=%d, nBlock=%d, nParaUtts=%d, nGPU=%d\n",
|
||||
(int)sent_start, (int)nCols, (int)nBlocks, (int)nOrigParallelUtts, (int)numprocs);
|
||||
}
|
||||
|
||||
MSR::CNTK::Matrix<ElemType> tmp(mat.GetNumRows(), nSlices*nBlocks, mat.GetPreferredDeviceId(), mat.GetMatrixType());
|
||||
|
||||
// do the column slice for each block
|
||||
for (size_t iblock = 0; iblock < nBlocks; iblock++)
|
||||
{
|
||||
tmp.SetColumnSlice(mat.ColumnSlice(nOrigParallelUtts*iblock + sent_start, nSlices),
|
||||
iblock*nSlices, nSlices);
|
||||
}
|
||||
mat.SetValue(tmp);
|
||||
|
||||
// assert the cols are even among nodes
|
||||
if (0 == rv)
|
||||
{
|
||||
rv = mat.GetNumCols();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rv != mat.GetNumCols())
|
||||
throw std::logic_error("Uneven number of columns among inputs.");
|
||||
}
|
||||
}
|
||||
// revise sentence boundary and packing flags
|
||||
Matrix<float> newBoundary(CPUDEVICE); // TODO: change Matrix<float> to a typedef
|
||||
size_t nMBSize = PackingFlags.size();
|
||||
newBoundary.Resize(nSlices, nMBSize);
|
||||
newBoundary.AssignRowSliceValuesOf(SentenceBoundary, sent_start, nSlices);
|
||||
fill(PackingFlags.begin(), PackingFlags.end(), MinibatchPackingFlag::None);
|
||||
for (size_t nt = 0; nt < nMBSize; nt++)
|
||||
{
|
||||
for (size_t ns = 0; ns < nSlices; ns++)
|
||||
{
|
||||
if (newBoundary(ns, nt) == SEQUENCE_START)
|
||||
PackingFlags[nt] |= MinibatchPackingFlag::SequenceStart;
|
||||
if (newBoundary(ns, nt) == SEQUENCE_END)
|
||||
PackingFlags[nt] |= MinibatchPackingFlag::SequenceEnd;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static AdaptationRegType ParseAdaptationRegType(wstring s)
|
||||
{
|
||||
msra::strfun::tolower_ascii(s);
|
||||
if (s == L"" || s == L"none")
|
||||
{
|
||||
return AdaptationRegType::None;
|
||||
}
|
||||
else if (s == L"kl" || s == L"klreg")
|
||||
{
|
||||
return AdaptationRegType::KL;
|
||||
}
|
||||
else
|
||||
{
|
||||
nSlices = nOrigParallelUtts / numprocs;
|
||||
sent_start = rank*nSlices;
|
||||
sent_end = (rank + 1)*nSlices;
|
||||
if (sent_end > nOrigParallelUtts) sent_end = nOrigParallelUtts;
|
||||
throw std::invalid_argument(
|
||||
"ParseAdaptationRegType: Invalid Adaptation Regularization Type. Valid values are "
|
||||
"(None | KL)");
|
||||
}
|
||||
// decimate data
|
||||
for (auto it = mb.begin(); it != mb.end(); ++it)
|
||||
{
|
||||
MSR::CNTK::Matrix<ElemType> &mat = *(it->second);
|
||||
size_t nCols = mat.GetNumCols();
|
||||
|
||||
if (nCols % nOrigParallelUtts != 0)
|
||||
{
|
||||
// this should not happen for DNN, RNN with truncated BPTT, not sure about other special stuff ...
|
||||
RuntimeError("ERROR: minibatch size %d, but with %d parallel utterances\n", nCols, nOrigParallelUtts);
|
||||
}
|
||||
size_t nBlocks = nCols / nOrigParallelUtts;
|
||||
// for RNN, nBlocks is the size of truncated BPTT
|
||||
if (sent_end == sent_start)
|
||||
{
|
||||
// should never happen, print debug info
|
||||
RuntimeError("ERROR: in DecimateMinibatch, col_st=col_en=%d, nCol=%d, nBlock=%d, nParaUtts=%d, nGPU=%d\n",
|
||||
(int)sent_start, (int)nCols, (int)nBlocks, (int)nOrigParallelUtts, (int)numprocs);
|
||||
}
|
||||
|
||||
MSR::CNTK::Matrix<ElemType> tmp(mat.GetNumRows(), nSlices*nBlocks, mat.GetPreferredDeviceId(), mat.GetMatrixType());
|
||||
|
||||
// do the column slice for each block
|
||||
for (size_t iblock = 0; iblock < nBlocks; iblock++)
|
||||
{
|
||||
tmp.SetColumnSlice(mat.ColumnSlice(nOrigParallelUtts*iblock + sent_start, nSlices),
|
||||
iblock*nSlices, nSlices);
|
||||
}
|
||||
mat.SetValue(tmp);
|
||||
|
||||
// assert the cols are even among nodes
|
||||
if (0 == rv)
|
||||
{
|
||||
rv = mat.GetNumCols();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rv != mat.GetNumCols())
|
||||
throw std::logic_error("Uneven number of columns among inputs.");
|
||||
}
|
||||
}
|
||||
// revise sentence boundary and packing flags
|
||||
Matrix<float> newBoundary(CPUDEVICE); // TODO: change Matrix<float> to a typedef
|
||||
size_t nMBSize = PackingFlags.size();
|
||||
newBoundary.Resize(nSlices, nMBSize);
|
||||
newBoundary.AssignRowSliceValuesOf(SentenceBoundary, sent_start, nSlices);
|
||||
fill(PackingFlags.begin(), PackingFlags.end(), MinibatchPackingFlag::None);
|
||||
for (size_t nt = 0; nt < nMBSize; nt++)
|
||||
{
|
||||
for (size_t ns = 0; ns < nSlices; ns++)
|
||||
{
|
||||
if (newBoundary(ns, nt) == SEQUENCE_START)
|
||||
PackingFlags[nt] |= MinibatchPackingFlag::SequenceStart;
|
||||
if (newBoundary(ns, nt) == SEQUENCE_END)
|
||||
PackingFlags[nt] |= MinibatchPackingFlag::SequenceEnd;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
static GradientsUpdateType ParseGradUpdateType(wstring s)
|
||||
{
|
||||
msra::strfun::tolower_ascii(s);
|
||||
if (s == L"" || s == L"none" || s == L"normal" || s == L"simple")
|
||||
{
|
||||
return GradientsUpdateType::None;
|
||||
}
|
||||
else if (s == L"adagrad")
|
||||
{
|
||||
return GradientsUpdateType::AdaGrad;
|
||||
}
|
||||
else if (s == L"rmsprop")
|
||||
{
|
||||
return GradientsUpdateType::RmsProp;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::invalid_argument(
|
||||
"ParseGradUpdateType: Invalid Gradient Updating Type. Valid values are "
|
||||
"(None | AdaGrad | RmsProp )");
|
||||
}
|
||||
}
|
||||
|
||||
static ParallelizationMethod ParseParallelizationMethod(wstring s)
|
||||
{
|
||||
msra::strfun::tolower_ascii(s);
|
||||
if ((s == L"") || (s == L"none"))
|
||||
{
|
||||
return ParallelizationMethod::None;
|
||||
}
|
||||
else if (s == L"dataparallelsgd")
|
||||
{
|
||||
return ParallelizationMethod::DataParallelSGD;
|
||||
}
|
||||
else if (s == L"modelaveragingsgd")
|
||||
{
|
||||
return ParallelizationMethod::ModelAveragingSGD;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::invalid_argument("ParseParallelizationMethod: Invalid Parallelization Method. Valid values are (None | DataParallelSGD | ModelAveragingSGD)");
|
||||
}
|
||||
}
|
||||
|
||||
static LearningRateSearchAlgorithm ParseLearningRateSearchType(wstring s)
|
||||
{
|
||||
msra::strfun::tolower_ascii(s);
|
||||
if (s == L"false" || s == L"none")
|
||||
{
|
||||
return LearningRateSearchAlgorithm::None;
|
||||
}
|
||||
else if (s == L"searchbeforeepoch" || s == L"beforeepoch" || s == L"before")
|
||||
{
|
||||
return LearningRateSearchAlgorithm::SearchBeforeEpoch;
|
||||
}
|
||||
else if (s == L"adjustafterepoch" || s == L"afterepoch" || s == L"after")
|
||||
{
|
||||
return LearningRateSearchAlgorithm::AdjustAfterEpoch;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::invalid_argument(
|
||||
"autoAdjustLR: Invalid learning rate search type. Valid values are "
|
||||
"(None | SearchBeforeEpoch | AdjustAfterEpoch)");
|
||||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
SGD<ElemType>::SGD(const ConfigParameters& configSGD)
|
||||
|
@ -594,7 +680,7 @@ size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<E
|
|||
void SGD<ElemType>::Adapt(wstring origModelFileName, wstring refNodeName,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
IDataReader<ElemType>* validationSetDataReader,
|
||||
const DEVICEID_TYPE deviceID, const bool makeMode = true)
|
||||
const DEVICEID_TYPE deviceID, const bool makeMode)
|
||||
{
|
||||
if (origModelFileName == L"" || trainSetDataReader == nullptr)
|
||||
InvalidArgument("origModel and trainSetDataReader should not be null.");
|
||||
|
@ -644,7 +730,7 @@ size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<E
|
|||
template<class ElemType>
|
||||
void SGD<ElemType>::SequenceTrain(IComputationNetBuilder<ElemType>* netBuilder, wstring origModelFileName,
|
||||
IDataReader<ElemType>* trainSetDataReader, IDataReader<ElemType>* validationSetDataReader,
|
||||
const DEVICEID_TYPE deviceID, const bool makeMode = true)
|
||||
const DEVICEID_TYPE deviceID, const bool makeMode)
|
||||
{
|
||||
if (netBuilder == nullptr || origModelFileName == L"" || trainSetDataReader == nullptr)
|
||||
InvalidArgument("netBuilder, origModel and trainSetDataReader should not be null.");
|
||||
|
@ -711,11 +797,16 @@ size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<E
|
|||
}
|
||||
}
|
||||
|
||||
static double MomentumPerMB(double momentumPerSample, size_t minibatchSize)
|
||||
{
|
||||
return pow(momentumPerSample, minibatchSize);
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void SGD<ElemType>::Train(IComputationNetBuilder<ElemType>* netBuilder,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
IDataReader<ElemType>* validationSetDataReader,
|
||||
const bool makeMode = true)
|
||||
const bool makeMode)
|
||||
{
|
||||
if (netBuilder == nullptr || trainSetDataReader == nullptr)
|
||||
InvalidArgument("netBuilder and trainSetDataReader should not be null.\n");
|
||||
|
@ -1449,7 +1540,7 @@ size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<E
|
|||
/*out*/ double& epochCriterion,
|
||||
/*out*/ std::vector<double>& epochEvalErrors,
|
||||
/*out*/ size_t& totalSamplesSeen,
|
||||
std::string prefixMsg = "")
|
||||
std::string prefixMsg)
|
||||
{
|
||||
TrainOneEpoch(net, refNet, refNode, epochNumber, epochSize,
|
||||
trainSetDataReader, learnRatePerSample, minibatchSize, featureNodes,
|
||||
|
@ -1763,7 +1854,7 @@ size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<E
|
|||
/*out*/ double& epochCriterion,
|
||||
/*out*/ std::vector<double>& epochEvalErrors,
|
||||
/*out*/ size_t& totalSamplesSeen,
|
||||
std::string prefixMsg = "")
|
||||
std::string prefixMsg)
|
||||
{
|
||||
// Since we are getting timing resolution of under microsecond we use double precision
|
||||
// to ensure that we have enough digits to represent small time measurements.
|
||||
|
@ -2511,7 +2602,7 @@ size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<E
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
wstring SGD<ElemType>::GetModelNameForEpoch(const int epoch, bool bLastModel = false)
|
||||
wstring SGD<ElemType>::GetModelNameForEpoch(const int epoch, bool bLastModel)
|
||||
{
|
||||
int epoch1Base = epoch + 1;
|
||||
if (epoch1Base == m_maxEpochs || bLastModel)
|
||||
|
@ -2557,108 +2648,6 @@ size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<E
|
|||
return firstEpoch;
|
||||
}
|
||||
|
||||
static AdaptationRegType ParseAdaptationRegType(wstring s)
|
||||
{
|
||||
msra::strfun::tolower_ascii(s);
|
||||
if (s == L"" || s == L"none")
|
||||
{
|
||||
return AdaptationRegType::None;
|
||||
}
|
||||
else if (s == L"kl" || s == L"klreg")
|
||||
{
|
||||
return AdaptationRegType::KL;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::invalid_argument(
|
||||
"ParseAdaptationRegType: Invalid Adaptation Regularization Type. Valid values are "
|
||||
"(None | KL)");
|
||||
}
|
||||
}
|
||||
|
||||
static GradientsUpdateType ParseGradUpdateType(wstring s)
|
||||
{
|
||||
msra::strfun::tolower_ascii(s);
|
||||
if (s == L"" || s == L"none" || s == L"normal" || s == L"simple")
|
||||
{
|
||||
return GradientsUpdateType::None;
|
||||
}
|
||||
else if (s == L"adagrad")
|
||||
{
|
||||
return GradientsUpdateType::AdaGrad;
|
||||
}
|
||||
else if (s == L"rmsprop")
|
||||
{
|
||||
return GradientsUpdateType::RmsProp;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::invalid_argument(
|
||||
"ParseGradUpdateType: Invalid Gradient Updating Type. Valid values are "
|
||||
"(None | AdaGrad | RmsProp )");
|
||||
}
|
||||
}
|
||||
|
||||
static ParallelizationMethod ParseParallelizationMethod(wstring s)
|
||||
{
|
||||
msra::strfun::tolower_ascii(s);
|
||||
if ((s == L"") || (s == L"none"))
|
||||
{
|
||||
return ParallelizationMethod::None;
|
||||
}
|
||||
else if (s == L"dataparallelsgd")
|
||||
{
|
||||
return ParallelizationMethod::DataParallelSGD;
|
||||
}
|
||||
else if (s == L"modelaveragingsgd")
|
||||
{
|
||||
return ParallelizationMethod::ModelAveragingSGD;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::invalid_argument("ParseParallelizationMethod: Invalid Parallelization Method. Valid values are (None | DataParallelSGD | ModelAveragingSGD)");
|
||||
}
|
||||
}
|
||||
|
||||
static LearningRateSearchAlgorithm ParseLearningRateSearchType(wstring s)
|
||||
{
|
||||
msra::strfun::tolower_ascii(s);
|
||||
if (s == L"false" || s == L"none")
|
||||
{
|
||||
return LearningRateSearchAlgorithm::None;
|
||||
}
|
||||
else if (s == L"searchbeforeepoch" || s == L"beforeepoch" || s == L"before")
|
||||
{
|
||||
return LearningRateSearchAlgorithm::SearchBeforeEpoch;
|
||||
}
|
||||
else if (s == L"adjustafterepoch" || s == L"afterepoch" || s == L"after")
|
||||
{
|
||||
return LearningRateSearchAlgorithm::AdjustAfterEpoch;
|
||||
}
|
||||
else {
|
||||
throw std::invalid_argument(
|
||||
"autoAdjustLR: Invalid learning rate search type. Valid values are "
|
||||
"(None | SearchBeforeEpoch | AdjustAfterEpoch)");
|
||||
}
|
||||
}
|
||||
|
||||
//GradientsUpdateType GradUpdateType() const
|
||||
//{
|
||||
// return m_gradType.mType;
|
||||
//}
|
||||
//
|
||||
//double GradientUpdateNoiseStd() const
|
||||
//{
|
||||
// return m_gradType.mGaussianNoiseInjectStd;
|
||||
//}
|
||||
|
||||
static double MomentumPerMB(double momentumPerSample, size_t minibatchSize)
|
||||
{
|
||||
return pow(momentumPerSample, minibatchSize);
|
||||
}
|
||||
|
||||
// public:
|
||||
|
||||
#define EPSILON 1e-5
|
||||
|
||||
template<class ElemType>
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <fstream>
|
||||
#include <queue>
|
||||
#include "Basics.h"
|
||||
#include "Helpers.h" // for foreach_column() macro
|
||||
#include "fileutil.h"
|
||||
#include "DataReader.h"
|
||||
#include "DataWriter.h"
|
||||
|
|
12
Makefile
12
Makefile
|
@ -50,7 +50,7 @@ endif
|
|||
# The actual compiler/linker flags added can be viewed by running 'mpic++ --showme:compile' and 'mpic++ --showme:link'
|
||||
CXX = mpic++
|
||||
|
||||
INCLUDEPATH:= Common/Include Math/Math MachineLearning/CNTK BrainScript
|
||||
INCLUDEPATH:= Common/Include Math/Math MachineLearning/CNTK MachineLearning/CNTKComputationNetworkLib MachineLearning/CNTKSGDLib BrainScript
|
||||
CPPFLAGS:= -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K
|
||||
CXXFLAGS:= -msse3 -std=c++0x -std=c++11 -fopenmp -fpermissive -fPIC -Werror
|
||||
LIBPATH:=
|
||||
|
@ -355,15 +355,17 @@ endif
|
|||
|
||||
CNTK_SRC =\
|
||||
MachineLearning/CNTK/CNTK.cpp \
|
||||
MachineLearning/CNTK/ComputationNode.cpp \
|
||||
MachineLearning/CNTK/ModelEditLanguage.cpp \
|
||||
MachineLearning/CNTK/NetworkDescriptionLanguage.cpp \
|
||||
MachineLearning/CNTK/Profiler.cpp \
|
||||
MachineLearning/CNTK/ComputationNetwork.cpp \
|
||||
MachineLearning/CNTK/ComputationNetworkBuilder.cpp \
|
||||
MachineLearning/CNTK/SimpleNetworkBuilder.cpp \
|
||||
MachineLearning/CNTK/SynchronousExecutionEngine.cpp \
|
||||
MachineLearning/CNTK/tests.cpp \
|
||||
MachineLearning/CNTKComputationNetworkLib/ComputationNode.cpp \
|
||||
MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp \
|
||||
MachineLearning/CNTKComputationNetworkLib/ComputationNetworkBuilder.cpp \
|
||||
MachineLearning/CNTKComputationNetworkLib/NetworkBuilderFromConfig.cpp \
|
||||
MachineLearning/CNTKSGDLib/Profiler.cpp \
|
||||
MachineLearning/CNTKSGDLib/SGD.cpp \
|
||||
MachineLearning/CNTKEval/CNTKEval.cpp \
|
||||
BrainScript/BrainScriptEvaluator.cpp \
|
||||
BrainScript/BrainScriptParser.cpp \
|
||||
|
|
|
@ -3,7 +3,10 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//
|
||||
|
||||
//helpful macros
|
||||
// TODO: the file's name is too general to be included from outside; MathHelpers.h?
|
||||
|
||||
//iterators
|
||||
#pragma once
|
||||
#undef foreach_row
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "Basics.h"
|
||||
#include "File.h"
|
||||
#include "CommonMatrix.h"
|
||||
#include <limits.h>
|
||||
|
||||
// This class is exported from the Math.dll
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
|
Загрузка…
Ссылка в новой задаче