made gcc happy again (mostly missing headers or wrong declaration orders);
Makefile adapted to new paths, but not yet building Network and SGD as separate libs
This commit is contained in:
Родитель
0a67d0c322
Коммит
54a6b1d2ec
|
@ -4,6 +4,8 @@
|
||||||
// </copyright>
|
// </copyright>
|
||||||
//
|
//
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "Basics.h"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@ -16,6 +18,7 @@
|
||||||
#endif
|
#endif
|
||||||
#include "fileutil.h" // for f{ge,pu}t{,Text}()
|
#include "fileutil.h" // for f{ge,pu}t{,Text}()
|
||||||
#include <fstream> // for LoadMatrixFromTextFile() --TODO: change to using this File class
|
#include <fstream> // for LoadMatrixFromTextFile() --TODO: change to using this File class
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
namespace Microsoft{ namespace MSR { namespace CNTK {
|
namespace Microsoft{ namespace MSR { namespace CNTK {
|
||||||
|
|
||||||
|
|
|
@ -109,7 +109,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
size_t numRows = 0;
|
size_t numRows = 0;
|
||||||
size_t numCols = 0;
|
size_t numCols = 0;
|
||||||
auto array = File::LoadMatrixFromTextFile<ElemType>(msra::strfun::utf8(initFromFilePath), numRows, numCols); // TODO: change pathname to wstring
|
auto array = File::LoadMatrixFromTextFile<ElemType>(msra::strfun::utf8(initFromFilePath), numRows, numCols); // TODO: change pathname to wstring
|
||||||
FunctionValues().SetValue(numRows, numCols, array.data(), matrixFlagNormal, GetDeviceId());
|
FunctionValues().SetValue(numRows, numCols, array.data(), matrixFlagNormal, m_deviceId);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual const std::wstring OperationName() const {return TypeName();}
|
virtual const std::wstring OperationName() const {return TypeName();}
|
||||||
|
|
|
@ -12,174 +12,260 @@ extern Microsoft::MSR::CNTK::MPIWrapper *g_mpi;
|
||||||
|
|
||||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
|
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
void DecimateMinibatch(std::map<std::wstring, MSR::CNTK::Matrix<ElemType>*>& mb, int numProcessor, int myID)
|
void DecimateMinibatch(std::map<std::wstring, MSR::CNTK::Matrix<ElemType>*>& mb, int numProcessor, int myID)
|
||||||
{
|
|
||||||
int rank = myID;
|
|
||||||
int procs = numProcessor;
|
|
||||||
|
|
||||||
size_t rv = 0;
|
|
||||||
if (procs > 1)
|
|
||||||
{
|
{
|
||||||
for (auto it = mb.begin(); it != mb.end(); ++it)
|
int rank = myID;
|
||||||
|
int procs = numProcessor;
|
||||||
|
|
||||||
|
size_t rv = 0;
|
||||||
|
if (procs > 1)
|
||||||
{
|
{
|
||||||
MSR::CNTK::Matrix<ElemType> &mat = *(it->second);
|
for (auto it = mb.begin(); it != mb.end(); ++it)
|
||||||
size_t nCols = mat.GetNumCols();
|
|
||||||
size_t col_start = (nCols * rank) / procs;
|
|
||||||
size_t col_end = (nCols * (rank + 1)) / procs;
|
|
||||||
if (col_end > nCols)
|
|
||||||
{
|
{
|
||||||
// this shouldn't happen
|
MSR::CNTK::Matrix<ElemType> &mat = *(it->second);
|
||||||
col_end = nCols;
|
size_t nCols = mat.GetNumCols();
|
||||||
}
|
size_t col_start = (nCols * rank) / procs;
|
||||||
|
size_t col_end = (nCols * (rank + 1)) / procs;
|
||||||
if (col_end == col_start)
|
if (col_end > nCols)
|
||||||
{
|
|
||||||
MSR::CNTK::Matrix<ElemType> tmp(mat.GetNumRows(), 0, AUTOPLACEMATRIX, DENSE);
|
|
||||||
mat.SetValue(tmp);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MSR::CNTK::Matrix<ElemType> tmp = mat.ColumnSlice(col_start, col_end - col_start);
|
|
||||||
mat.SetValue(tmp);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rv == 0)
|
|
||||||
{
|
|
||||||
rv = mat.GetNumCols();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (rv != mat.GetNumCols())
|
|
||||||
{
|
{
|
||||||
throw std::logic_error("Uneven number of columns among inputs.");
|
// this shouldn't happen
|
||||||
|
col_end = nCols;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (col_end == col_start)
|
||||||
|
{
|
||||||
|
MSR::CNTK::Matrix<ElemType> tmp(mat.GetNumRows(), 0, AUTOPLACEMATRIX, DENSE);
|
||||||
|
mat.SetValue(tmp);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MSR::CNTK::Matrix<ElemType> tmp = mat.ColumnSlice(col_start, col_end - col_start);
|
||||||
|
mat.SetValue(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rv == 0)
|
||||||
|
{
|
||||||
|
rv = mat.GetNumCols();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (rv != mat.GetNumCols())
|
||||||
|
{
|
||||||
|
throw std::logic_error("Uneven number of columns among inputs.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<ElemType>*> &mb, /* (input) matrix to be decimated */
|
size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<ElemType>*> &mb, /* (input) matrix to be decimated */
|
||||||
int rank, int numprocs, /* (input) rank info */
|
int rank, int numprocs, /* (input) rank info */
|
||||||
size_t& nSlices, /* (input/output): on input, # parallel sentence total , on output, # paralel sentence in this node */
|
size_t& nSlices, /* (input/output): on input, # parallel sentence total , on output, # paralel sentence in this node */
|
||||||
Matrix<float>& SentenceBoundary, /* (output) nSlices X nMBsize matrix */
|
Matrix<float>& SentenceBoundary, /* (output) nSlices X nMBsize matrix */
|
||||||
vector<MinibatchPackingFlag>& PackingFlags, /* (output) 1 X nMBsize vector */
|
vector<MinibatchPackingFlag>& PackingFlags, /* (output) 1 X nMBsize vector */
|
||||||
IDataReader<ElemType>* trainDataReader) /* (input) to have access to reader */
|
IDataReader<ElemType>* trainDataReader) /* (input) to have access to reader */
|
||||||
{
|
|
||||||
// For RNN, a input Matrix is organized in the following way:
|
|
||||||
// | x_t^1 x_t^2 ... x_t^N | .... | x_{t+T-1}^1 ... x_{t+T-1}^N |
|
|
||||||
// |<---- block 1 ---->| .... |<------ block T ----->|
|
|
||||||
// N is the nSlice (input)
|
|
||||||
// The decimation here is to split each block to individual GPUs
|
|
||||||
// So After decimation
|
|
||||||
// | x_t^{st} ... x_t^{en-1}| .... | x_{t+T-1}^{st} ... x_{t+T-1}^{en-1} |
|
|
||||||
// Each block now has nSlice/nProcs
|
|
||||||
//
|
|
||||||
// Correspondingly, the SentenceBoundary and PackingFlags will be revised
|
|
||||||
trainDataReader->SetSentenceSegBatch(SentenceBoundary, PackingFlags);
|
|
||||||
|
|
||||||
size_t rv = 0;
|
|
||||||
size_t nOrigParallelUtts = nSlices;
|
|
||||||
static bool warned = false;
|
|
||||||
if (numprocs > 1)
|
|
||||||
{
|
{
|
||||||
// decide new parallel utterances
|
// For RNN, a input Matrix is organized in the following way:
|
||||||
size_t sent_start = 0;
|
// | x_t^1 x_t^2 ... x_t^N | .... | x_{t+T-1}^1 ... x_{t+T-1}^N |
|
||||||
size_t sent_end = 0;
|
// |<---- block 1 ---->| .... |<------ block T ----->|
|
||||||
if (nOrigParallelUtts % numprocs != 0)
|
// N is the nSlice (input)
|
||||||
|
// The decimation here is to split each block to individual GPUs
|
||||||
|
// So After decimation
|
||||||
|
// | x_t^{st} ... x_t^{en-1}| .... | x_{t+T-1}^{st} ... x_{t+T-1}^{en-1} |
|
||||||
|
// Each block now has nSlice/nProcs
|
||||||
|
//
|
||||||
|
// Correspondingly, the SentenceBoundary and PackingFlags will be revised
|
||||||
|
trainDataReader->SetSentenceSegBatch(SentenceBoundary, PackingFlags);
|
||||||
|
|
||||||
|
size_t rv = 0;
|
||||||
|
size_t nOrigParallelUtts = nSlices;
|
||||||
|
static bool warned = false;
|
||||||
|
if (numprocs > 1)
|
||||||
{
|
{
|
||||||
if (!warned)
|
// decide new parallel utterances
|
||||||
|
size_t sent_start = 0;
|
||||||
|
size_t sent_end = 0;
|
||||||
|
if (nOrigParallelUtts % numprocs != 0)
|
||||||
{
|
{
|
||||||
/* give a warning of potential bandwidth wasting */
|
if (!warned)
|
||||||
fprintf(stderr, "WARNING: %d GPUs are used in model averaging, but the number of parallel utterances are %d, a potential training speed degradation.\n",
|
{
|
||||||
(int)g_mpi->NumNodesInUse(), (int)nOrigParallelUtts);
|
/* give a warning of potential bandwidth wasting */
|
||||||
warned = true;
|
fprintf(stderr, "WARNING: %d GPUs are used in model averaging, but the number of parallel utterances are %d, a potential training speed degradation.\n",
|
||||||
}
|
(int)g_mpi->NumNodesInUse(), (int)nOrigParallelUtts);
|
||||||
if (rank == numprocs - 1)
|
warned = true;
|
||||||
{
|
}
|
||||||
nSlices = nOrigParallelUtts - (nOrigParallelUtts / numprocs + 1) * (numprocs - 1);
|
if (rank == numprocs - 1)
|
||||||
sent_start = (nOrigParallelUtts / numprocs + 1) * (numprocs - 1);
|
{
|
||||||
sent_end = nOrigParallelUtts;
|
nSlices = nOrigParallelUtts - (nOrigParallelUtts / numprocs + 1) * (numprocs - 1);
|
||||||
|
sent_start = (nOrigParallelUtts / numprocs + 1) * (numprocs - 1);
|
||||||
|
sent_end = nOrigParallelUtts;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
nSlices = nOrigParallelUtts / numprocs + 1;
|
||||||
|
sent_start = nSlices * rank;
|
||||||
|
sent_end = nSlices * (rank + 1);
|
||||||
|
if (sent_end > nOrigParallelUtts) sent_end = nOrigParallelUtts;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
nSlices = nOrigParallelUtts / numprocs + 1;
|
nSlices = nOrigParallelUtts / numprocs;
|
||||||
sent_start = nSlices * rank;
|
sent_start = rank*nSlices;
|
||||||
sent_end = nSlices * (rank + 1);
|
sent_end = (rank + 1)*nSlices;
|
||||||
if (sent_end > nOrigParallelUtts) sent_end = nOrigParallelUtts;
|
if (sent_end > nOrigParallelUtts) sent_end = nOrigParallelUtts;
|
||||||
}
|
}
|
||||||
|
// decimate data
|
||||||
|
for (auto it = mb.begin(); it != mb.end(); ++it)
|
||||||
|
{
|
||||||
|
MSR::CNTK::Matrix<ElemType> &mat = *(it->second);
|
||||||
|
size_t nCols = mat.GetNumCols();
|
||||||
|
|
||||||
|
if (nCols % nOrigParallelUtts != 0)
|
||||||
|
{
|
||||||
|
// this should not happen for DNN, RNN with truncated BPTT, not sure about other special stuff ...
|
||||||
|
RuntimeError("ERROR: minibatch size %d, but with %d parallel utterances\n", nCols, nOrigParallelUtts);
|
||||||
|
}
|
||||||
|
size_t nBlocks = nCols / nOrigParallelUtts;
|
||||||
|
// for RNN, nBlocks is the size of truncated BPTT
|
||||||
|
if (sent_end == sent_start)
|
||||||
|
{
|
||||||
|
// should never happen, print debug info
|
||||||
|
RuntimeError("ERROR: in DecimateMinibatch, col_st=col_en=%d, nCol=%d, nBlock=%d, nParaUtts=%d, nGPU=%d\n",
|
||||||
|
(int)sent_start, (int)nCols, (int)nBlocks, (int)nOrigParallelUtts, (int)numprocs);
|
||||||
|
}
|
||||||
|
|
||||||
|
MSR::CNTK::Matrix<ElemType> tmp(mat.GetNumRows(), nSlices*nBlocks, mat.GetPreferredDeviceId(), mat.GetMatrixType());
|
||||||
|
|
||||||
|
// do the column slice for each block
|
||||||
|
for (size_t iblock = 0; iblock < nBlocks; iblock++)
|
||||||
|
{
|
||||||
|
tmp.SetColumnSlice(mat.ColumnSlice(nOrigParallelUtts*iblock + sent_start, nSlices),
|
||||||
|
iblock*nSlices, nSlices);
|
||||||
|
}
|
||||||
|
mat.SetValue(tmp);
|
||||||
|
|
||||||
|
// assert the cols are even among nodes
|
||||||
|
if (0 == rv)
|
||||||
|
{
|
||||||
|
rv = mat.GetNumCols();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (rv != mat.GetNumCols())
|
||||||
|
throw std::logic_error("Uneven number of columns among inputs.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// revise sentence boundary and packing flags
|
||||||
|
Matrix<float> newBoundary(CPUDEVICE); // TODO: change Matrix<float> to a typedef
|
||||||
|
size_t nMBSize = PackingFlags.size();
|
||||||
|
newBoundary.Resize(nSlices, nMBSize);
|
||||||
|
newBoundary.AssignRowSliceValuesOf(SentenceBoundary, sent_start, nSlices);
|
||||||
|
fill(PackingFlags.begin(), PackingFlags.end(), MinibatchPackingFlag::None);
|
||||||
|
for (size_t nt = 0; nt < nMBSize; nt++)
|
||||||
|
{
|
||||||
|
for (size_t ns = 0; ns < nSlices; ns++)
|
||||||
|
{
|
||||||
|
if (newBoundary(ns, nt) == SEQUENCE_START)
|
||||||
|
PackingFlags[nt] |= MinibatchPackingFlag::SequenceStart;
|
||||||
|
if (newBoundary(ns, nt) == SEQUENCE_END)
|
||||||
|
PackingFlags[nt] |= MinibatchPackingFlag::SequenceEnd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
static AdaptationRegType ParseAdaptationRegType(wstring s)
|
||||||
|
{
|
||||||
|
msra::strfun::tolower_ascii(s);
|
||||||
|
if (s == L"" || s == L"none")
|
||||||
|
{
|
||||||
|
return AdaptationRegType::None;
|
||||||
|
}
|
||||||
|
else if (s == L"kl" || s == L"klreg")
|
||||||
|
{
|
||||||
|
return AdaptationRegType::KL;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
nSlices = nOrigParallelUtts / numprocs;
|
throw std::invalid_argument(
|
||||||
sent_start = rank*nSlices;
|
"ParseAdaptationRegType: Invalid Adaptation Regularization Type. Valid values are "
|
||||||
sent_end = (rank + 1)*nSlices;
|
"(None | KL)");
|
||||||
if (sent_end > nOrigParallelUtts) sent_end = nOrigParallelUtts;
|
|
||||||
}
|
}
|
||||||
// decimate data
|
|
||||||
for (auto it = mb.begin(); it != mb.end(); ++it)
|
|
||||||
{
|
|
||||||
MSR::CNTK::Matrix<ElemType> &mat = *(it->second);
|
|
||||||
size_t nCols = mat.GetNumCols();
|
|
||||||
|
|
||||||
if (nCols % nOrigParallelUtts != 0)
|
|
||||||
{
|
|
||||||
// this should not happen for DNN, RNN with truncated BPTT, not sure about other special stuff ...
|
|
||||||
RuntimeError("ERROR: minibatch size %d, but with %d parallel utterances\n", nCols, nOrigParallelUtts);
|
|
||||||
}
|
|
||||||
size_t nBlocks = nCols / nOrigParallelUtts;
|
|
||||||
// for RNN, nBlocks is the size of truncated BPTT
|
|
||||||
if (sent_end == sent_start)
|
|
||||||
{
|
|
||||||
// should never happen, print debug info
|
|
||||||
RuntimeError("ERROR: in DecimateMinibatch, col_st=col_en=%d, nCol=%d, nBlock=%d, nParaUtts=%d, nGPU=%d\n",
|
|
||||||
(int)sent_start, (int)nCols, (int)nBlocks, (int)nOrigParallelUtts, (int)numprocs);
|
|
||||||
}
|
|
||||||
|
|
||||||
MSR::CNTK::Matrix<ElemType> tmp(mat.GetNumRows(), nSlices*nBlocks, mat.GetPreferredDeviceId(), mat.GetMatrixType());
|
|
||||||
|
|
||||||
// do the column slice for each block
|
|
||||||
for (size_t iblock = 0; iblock < nBlocks; iblock++)
|
|
||||||
{
|
|
||||||
tmp.SetColumnSlice(mat.ColumnSlice(nOrigParallelUtts*iblock + sent_start, nSlices),
|
|
||||||
iblock*nSlices, nSlices);
|
|
||||||
}
|
|
||||||
mat.SetValue(tmp);
|
|
||||||
|
|
||||||
// assert the cols are even among nodes
|
|
||||||
if (0 == rv)
|
|
||||||
{
|
|
||||||
rv = mat.GetNumCols();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (rv != mat.GetNumCols())
|
|
||||||
throw std::logic_error("Uneven number of columns among inputs.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// revise sentence boundary and packing flags
|
|
||||||
Matrix<float> newBoundary(CPUDEVICE); // TODO: change Matrix<float> to a typedef
|
|
||||||
size_t nMBSize = PackingFlags.size();
|
|
||||||
newBoundary.Resize(nSlices, nMBSize);
|
|
||||||
newBoundary.AssignRowSliceValuesOf(SentenceBoundary, sent_start, nSlices);
|
|
||||||
fill(PackingFlags.begin(), PackingFlags.end(), MinibatchPackingFlag::None);
|
|
||||||
for (size_t nt = 0; nt < nMBSize; nt++)
|
|
||||||
{
|
|
||||||
for (size_t ns = 0; ns < nSlices; ns++)
|
|
||||||
{
|
|
||||||
if (newBoundary(ns, nt) == SEQUENCE_START)
|
|
||||||
PackingFlags[nt] |= MinibatchPackingFlag::SequenceStart;
|
|
||||||
if (newBoundary(ns, nt) == SEQUENCE_END)
|
|
||||||
PackingFlags[nt] |= MinibatchPackingFlag::SequenceEnd;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return rv;
|
static GradientsUpdateType ParseGradUpdateType(wstring s)
|
||||||
}
|
{
|
||||||
|
msra::strfun::tolower_ascii(s);
|
||||||
|
if (s == L"" || s == L"none" || s == L"normal" || s == L"simple")
|
||||||
|
{
|
||||||
|
return GradientsUpdateType::None;
|
||||||
|
}
|
||||||
|
else if (s == L"adagrad")
|
||||||
|
{
|
||||||
|
return GradientsUpdateType::AdaGrad;
|
||||||
|
}
|
||||||
|
else if (s == L"rmsprop")
|
||||||
|
{
|
||||||
|
return GradientsUpdateType::RmsProp;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw std::invalid_argument(
|
||||||
|
"ParseGradUpdateType: Invalid Gradient Updating Type. Valid values are "
|
||||||
|
"(None | AdaGrad | RmsProp )");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static ParallelizationMethod ParseParallelizationMethod(wstring s)
|
||||||
|
{
|
||||||
|
msra::strfun::tolower_ascii(s);
|
||||||
|
if ((s == L"") || (s == L"none"))
|
||||||
|
{
|
||||||
|
return ParallelizationMethod::None;
|
||||||
|
}
|
||||||
|
else if (s == L"dataparallelsgd")
|
||||||
|
{
|
||||||
|
return ParallelizationMethod::DataParallelSGD;
|
||||||
|
}
|
||||||
|
else if (s == L"modelaveragingsgd")
|
||||||
|
{
|
||||||
|
return ParallelizationMethod::ModelAveragingSGD;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw std::invalid_argument("ParseParallelizationMethod: Invalid Parallelization Method. Valid values are (None | DataParallelSGD | ModelAveragingSGD)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static LearningRateSearchAlgorithm ParseLearningRateSearchType(wstring s)
|
||||||
|
{
|
||||||
|
msra::strfun::tolower_ascii(s);
|
||||||
|
if (s == L"false" || s == L"none")
|
||||||
|
{
|
||||||
|
return LearningRateSearchAlgorithm::None;
|
||||||
|
}
|
||||||
|
else if (s == L"searchbeforeepoch" || s == L"beforeepoch" || s == L"before")
|
||||||
|
{
|
||||||
|
return LearningRateSearchAlgorithm::SearchBeforeEpoch;
|
||||||
|
}
|
||||||
|
else if (s == L"adjustafterepoch" || s == L"afterepoch" || s == L"after")
|
||||||
|
{
|
||||||
|
return LearningRateSearchAlgorithm::AdjustAfterEpoch;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw std::invalid_argument(
|
||||||
|
"autoAdjustLR: Invalid learning rate search type. Valid values are "
|
||||||
|
"(None | SearchBeforeEpoch | AdjustAfterEpoch)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
SGD<ElemType>::SGD(const ConfigParameters& configSGD)
|
SGD<ElemType>::SGD(const ConfigParameters& configSGD)
|
||||||
|
@ -594,7 +680,7 @@ size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<E
|
||||||
void SGD<ElemType>::Adapt(wstring origModelFileName, wstring refNodeName,
|
void SGD<ElemType>::Adapt(wstring origModelFileName, wstring refNodeName,
|
||||||
IDataReader<ElemType>* trainSetDataReader,
|
IDataReader<ElemType>* trainSetDataReader,
|
||||||
IDataReader<ElemType>* validationSetDataReader,
|
IDataReader<ElemType>* validationSetDataReader,
|
||||||
const DEVICEID_TYPE deviceID, const bool makeMode = true)
|
const DEVICEID_TYPE deviceID, const bool makeMode)
|
||||||
{
|
{
|
||||||
if (origModelFileName == L"" || trainSetDataReader == nullptr)
|
if (origModelFileName == L"" || trainSetDataReader == nullptr)
|
||||||
InvalidArgument("origModel and trainSetDataReader should not be null.");
|
InvalidArgument("origModel and trainSetDataReader should not be null.");
|
||||||
|
@ -644,7 +730,7 @@ size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<E
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
void SGD<ElemType>::SequenceTrain(IComputationNetBuilder<ElemType>* netBuilder, wstring origModelFileName,
|
void SGD<ElemType>::SequenceTrain(IComputationNetBuilder<ElemType>* netBuilder, wstring origModelFileName,
|
||||||
IDataReader<ElemType>* trainSetDataReader, IDataReader<ElemType>* validationSetDataReader,
|
IDataReader<ElemType>* trainSetDataReader, IDataReader<ElemType>* validationSetDataReader,
|
||||||
const DEVICEID_TYPE deviceID, const bool makeMode = true)
|
const DEVICEID_TYPE deviceID, const bool makeMode)
|
||||||
{
|
{
|
||||||
if (netBuilder == nullptr || origModelFileName == L"" || trainSetDataReader == nullptr)
|
if (netBuilder == nullptr || origModelFileName == L"" || trainSetDataReader == nullptr)
|
||||||
InvalidArgument("netBuilder, origModel and trainSetDataReader should not be null.");
|
InvalidArgument("netBuilder, origModel and trainSetDataReader should not be null.");
|
||||||
|
@ -711,11 +797,16 @@ size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<E
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static double MomentumPerMB(double momentumPerSample, size_t minibatchSize)
|
||||||
|
{
|
||||||
|
return pow(momentumPerSample, minibatchSize);
|
||||||
|
}
|
||||||
|
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
void SGD<ElemType>::Train(IComputationNetBuilder<ElemType>* netBuilder,
|
void SGD<ElemType>::Train(IComputationNetBuilder<ElemType>* netBuilder,
|
||||||
IDataReader<ElemType>* trainSetDataReader,
|
IDataReader<ElemType>* trainSetDataReader,
|
||||||
IDataReader<ElemType>* validationSetDataReader,
|
IDataReader<ElemType>* validationSetDataReader,
|
||||||
const bool makeMode = true)
|
const bool makeMode)
|
||||||
{
|
{
|
||||||
if (netBuilder == nullptr || trainSetDataReader == nullptr)
|
if (netBuilder == nullptr || trainSetDataReader == nullptr)
|
||||||
InvalidArgument("netBuilder and trainSetDataReader should not be null.\n");
|
InvalidArgument("netBuilder and trainSetDataReader should not be null.\n");
|
||||||
|
@ -1449,7 +1540,7 @@ size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<E
|
||||||
/*out*/ double& epochCriterion,
|
/*out*/ double& epochCriterion,
|
||||||
/*out*/ std::vector<double>& epochEvalErrors,
|
/*out*/ std::vector<double>& epochEvalErrors,
|
||||||
/*out*/ size_t& totalSamplesSeen,
|
/*out*/ size_t& totalSamplesSeen,
|
||||||
std::string prefixMsg = "")
|
std::string prefixMsg)
|
||||||
{
|
{
|
||||||
TrainOneEpoch(net, refNet, refNode, epochNumber, epochSize,
|
TrainOneEpoch(net, refNet, refNode, epochNumber, epochSize,
|
||||||
trainSetDataReader, learnRatePerSample, minibatchSize, featureNodes,
|
trainSetDataReader, learnRatePerSample, minibatchSize, featureNodes,
|
||||||
|
@ -1763,7 +1854,7 @@ size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<E
|
||||||
/*out*/ double& epochCriterion,
|
/*out*/ double& epochCriterion,
|
||||||
/*out*/ std::vector<double>& epochEvalErrors,
|
/*out*/ std::vector<double>& epochEvalErrors,
|
||||||
/*out*/ size_t& totalSamplesSeen,
|
/*out*/ size_t& totalSamplesSeen,
|
||||||
std::string prefixMsg = "")
|
std::string prefixMsg)
|
||||||
{
|
{
|
||||||
// Since we are getting timing resolution of under microsecond we use double precision
|
// Since we are getting timing resolution of under microsecond we use double precision
|
||||||
// to ensure that we have enough digits to represent small time measurements.
|
// to ensure that we have enough digits to represent small time measurements.
|
||||||
|
@ -2511,7 +2602,7 @@ size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<E
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
wstring SGD<ElemType>::GetModelNameForEpoch(const int epoch, bool bLastModel = false)
|
wstring SGD<ElemType>::GetModelNameForEpoch(const int epoch, bool bLastModel)
|
||||||
{
|
{
|
||||||
int epoch1Base = epoch + 1;
|
int epoch1Base = epoch + 1;
|
||||||
if (epoch1Base == m_maxEpochs || bLastModel)
|
if (epoch1Base == m_maxEpochs || bLastModel)
|
||||||
|
@ -2557,108 +2648,6 @@ size_t DecimateMinibatchWithSentences(std::map<std::wstring, MSR::CNTK::Matrix<E
|
||||||
return firstEpoch;
|
return firstEpoch;
|
||||||
}
|
}
|
||||||
|
|
||||||
static AdaptationRegType ParseAdaptationRegType(wstring s)
|
|
||||||
{
|
|
||||||
msra::strfun::tolower_ascii(s);
|
|
||||||
if (s == L"" || s == L"none")
|
|
||||||
{
|
|
||||||
return AdaptationRegType::None;
|
|
||||||
}
|
|
||||||
else if (s == L"kl" || s == L"klreg")
|
|
||||||
{
|
|
||||||
return AdaptationRegType::KL;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
throw std::invalid_argument(
|
|
||||||
"ParseAdaptationRegType: Invalid Adaptation Regularization Type. Valid values are "
|
|
||||||
"(None | KL)");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static GradientsUpdateType ParseGradUpdateType(wstring s)
|
|
||||||
{
|
|
||||||
msra::strfun::tolower_ascii(s);
|
|
||||||
if (s == L"" || s == L"none" || s == L"normal" || s == L"simple")
|
|
||||||
{
|
|
||||||
return GradientsUpdateType::None;
|
|
||||||
}
|
|
||||||
else if (s == L"adagrad")
|
|
||||||
{
|
|
||||||
return GradientsUpdateType::AdaGrad;
|
|
||||||
}
|
|
||||||
else if (s == L"rmsprop")
|
|
||||||
{
|
|
||||||
return GradientsUpdateType::RmsProp;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
throw std::invalid_argument(
|
|
||||||
"ParseGradUpdateType: Invalid Gradient Updating Type. Valid values are "
|
|
||||||
"(None | AdaGrad | RmsProp )");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static ParallelizationMethod ParseParallelizationMethod(wstring s)
|
|
||||||
{
|
|
||||||
msra::strfun::tolower_ascii(s);
|
|
||||||
if ((s == L"") || (s == L"none"))
|
|
||||||
{
|
|
||||||
return ParallelizationMethod::None;
|
|
||||||
}
|
|
||||||
else if (s == L"dataparallelsgd")
|
|
||||||
{
|
|
||||||
return ParallelizationMethod::DataParallelSGD;
|
|
||||||
}
|
|
||||||
else if (s == L"modelaveragingsgd")
|
|
||||||
{
|
|
||||||
return ParallelizationMethod::ModelAveragingSGD;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
throw std::invalid_argument("ParseParallelizationMethod: Invalid Parallelization Method. Valid values are (None | DataParallelSGD | ModelAveragingSGD)");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static LearningRateSearchAlgorithm ParseLearningRateSearchType(wstring s)
|
|
||||||
{
|
|
||||||
msra::strfun::tolower_ascii(s);
|
|
||||||
if (s == L"false" || s == L"none")
|
|
||||||
{
|
|
||||||
return LearningRateSearchAlgorithm::None;
|
|
||||||
}
|
|
||||||
else if (s == L"searchbeforeepoch" || s == L"beforeepoch" || s == L"before")
|
|
||||||
{
|
|
||||||
return LearningRateSearchAlgorithm::SearchBeforeEpoch;
|
|
||||||
}
|
|
||||||
else if (s == L"adjustafterepoch" || s == L"afterepoch" || s == L"after")
|
|
||||||
{
|
|
||||||
return LearningRateSearchAlgorithm::AdjustAfterEpoch;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
throw std::invalid_argument(
|
|
||||||
"autoAdjustLR: Invalid learning rate search type. Valid values are "
|
|
||||||
"(None | SearchBeforeEpoch | AdjustAfterEpoch)");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//GradientsUpdateType GradUpdateType() const
|
|
||||||
//{
|
|
||||||
// return m_gradType.mType;
|
|
||||||
//}
|
|
||||||
//
|
|
||||||
//double GradientUpdateNoiseStd() const
|
|
||||||
//{
|
|
||||||
// return m_gradType.mGaussianNoiseInjectStd;
|
|
||||||
//}
|
|
||||||
|
|
||||||
static double MomentumPerMB(double momentumPerSample, size_t minibatchSize)
|
|
||||||
{
|
|
||||||
return pow(momentumPerSample, minibatchSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
// public:
|
|
||||||
|
|
||||||
#define EPSILON 1e-5
|
#define EPSILON 1e-5
|
||||||
|
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
#include "Basics.h"
|
#include "Basics.h"
|
||||||
|
#include "Helpers.h" // for foreach_column() macro
|
||||||
#include "fileutil.h"
|
#include "fileutil.h"
|
||||||
#include "DataReader.h"
|
#include "DataReader.h"
|
||||||
#include "DataWriter.h"
|
#include "DataWriter.h"
|
||||||
|
|
12
Makefile
12
Makefile
|
@ -50,7 +50,7 @@ endif
|
||||||
# The actual compiler/linker flags added can be viewed by running 'mpic++ --showme:compile' and 'mpic++ --showme:link'
|
# The actual compiler/linker flags added can be viewed by running 'mpic++ --showme:compile' and 'mpic++ --showme:link'
|
||||||
CXX = mpic++
|
CXX = mpic++
|
||||||
|
|
||||||
INCLUDEPATH:= Common/Include Math/Math MachineLearning/CNTK BrainScript
|
INCLUDEPATH:= Common/Include Math/Math MachineLearning/CNTK MachineLearning/CNTKComputationNetworkLib MachineLearning/CNTKSGDLib BrainScript
|
||||||
CPPFLAGS:= -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K
|
CPPFLAGS:= -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K
|
||||||
CXXFLAGS:= -msse3 -std=c++0x -std=c++11 -fopenmp -fpermissive -fPIC -Werror
|
CXXFLAGS:= -msse3 -std=c++0x -std=c++11 -fopenmp -fpermissive -fPIC -Werror
|
||||||
LIBPATH:=
|
LIBPATH:=
|
||||||
|
@ -355,15 +355,17 @@ endif
|
||||||
|
|
||||||
CNTK_SRC =\
|
CNTK_SRC =\
|
||||||
MachineLearning/CNTK/CNTK.cpp \
|
MachineLearning/CNTK/CNTK.cpp \
|
||||||
MachineLearning/CNTK/ComputationNode.cpp \
|
|
||||||
MachineLearning/CNTK/ModelEditLanguage.cpp \
|
MachineLearning/CNTK/ModelEditLanguage.cpp \
|
||||||
MachineLearning/CNTK/NetworkDescriptionLanguage.cpp \
|
MachineLearning/CNTK/NetworkDescriptionLanguage.cpp \
|
||||||
MachineLearning/CNTK/Profiler.cpp \
|
|
||||||
MachineLearning/CNTK/ComputationNetwork.cpp \
|
|
||||||
MachineLearning/CNTK/ComputationNetworkBuilder.cpp \
|
|
||||||
MachineLearning/CNTK/SimpleNetworkBuilder.cpp \
|
MachineLearning/CNTK/SimpleNetworkBuilder.cpp \
|
||||||
MachineLearning/CNTK/SynchronousExecutionEngine.cpp \
|
MachineLearning/CNTK/SynchronousExecutionEngine.cpp \
|
||||||
MachineLearning/CNTK/tests.cpp \
|
MachineLearning/CNTK/tests.cpp \
|
||||||
|
MachineLearning/CNTKComputationNetworkLib/ComputationNode.cpp \
|
||||||
|
MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp \
|
||||||
|
MachineLearning/CNTKComputationNetworkLib/ComputationNetworkBuilder.cpp \
|
||||||
|
MachineLearning/CNTKComputationNetworkLib/NetworkBuilderFromConfig.cpp \
|
||||||
|
MachineLearning/CNTKSGDLib/Profiler.cpp \
|
||||||
|
MachineLearning/CNTKSGDLib/SGD.cpp \
|
||||||
MachineLearning/CNTKEval/CNTKEval.cpp \
|
MachineLearning/CNTKEval/CNTKEval.cpp \
|
||||||
BrainScript/BrainScriptEvaluator.cpp \
|
BrainScript/BrainScriptEvaluator.cpp \
|
||||||
BrainScript/BrainScriptParser.cpp \
|
BrainScript/BrainScriptParser.cpp \
|
||||||
|
|
|
@ -3,7 +3,10 @@
|
||||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
// </copyright>
|
// </copyright>
|
||||||
//
|
//
|
||||||
|
|
||||||
//helpful macros
|
//helpful macros
|
||||||
|
// TODO: the file's name is too general to be included from outside; MathHelpers.h?
|
||||||
|
|
||||||
//iterators
|
//iterators
|
||||||
#pragma once
|
#pragma once
|
||||||
#undef foreach_row
|
#undef foreach_row
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#include "Basics.h"
|
#include "Basics.h"
|
||||||
#include "File.h"
|
#include "File.h"
|
||||||
#include "CommonMatrix.h"
|
#include "CommonMatrix.h"
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
// This class is exported from the Math.dll
|
// This class is exported from the Math.dll
|
||||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
|
|
Загрузка…
Ссылка в новой задаче