Merge remote-tracking branch 'origin/master' into linux-gcc

Conflicts:
	Common/Include/DataReader.h
	Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Chapter.lyx
	MachineLearning/CNTK/TrainingCriterionNodes.h
	Math/Math/Matrix.cpp
This commit is contained in:
Yu Zhang 2015-07-07 17:00:17 -07:00
Родитель 2c96d0adaa 142d18058e
Коммит 56b9100826
14 изменённых файлов: 716 добавлений и 178 удалений

Просмотреть файл

@ -29,9 +29,16 @@
namespace Microsoft { namespace MSR { namespace CNTK {
const size_t randomizeAuto = ((size_t)-1)>>2; // randomize range set automatically, parameter value for Init()
const size_t randomizeNone = 0; // don't randomize, parameter value for Init()
const size_t requestDataSize = randomizeAuto; // StartMinibatchLoop default parameter, sets number of requested frames equal to the number of frames in the dataset
// randomize range set automatically, parameter value for Init()
const size_t randomizeAuto = ((size_t) -1) >> 2;
// don't randomize, parameter value for Init()
const size_t randomizeNone = 0;
// StartMinibatchLoop default parameter, sets number of requested
// frames equal to the constant 3fffffffffffffff computed by ((size_t) -1) >> 2 above.
// We use this constant as a stand in for the total number of frames in the dataset.
const size_t requestDataSize = randomizeAuto;
enum EndDataType
{
@ -48,7 +55,7 @@ class DATAREADER_API IDataReader
{
public:
typedef std::string LabelType;
typedef unsigned LabelIdType;
typedef unsigned int LabelIdType;
unsigned m_seed;
size_t mBlgSize; /// number of utterances per minibatch
bool mDoRandomize;

Просмотреть файл

@ -612,6 +612,7 @@ public:
// pop out of content level
contentLevel = false;
}
if (quoteFound)
{
// skip the closing quote
@ -660,7 +661,7 @@ public:
std::string ReadConfigFiles(const std::string& filePaths);
std::string ReadConfigFiles(const std::wstring& filePaths);
std::string ResolveIncludeStatements(const std::string& configString, std::vector<std::string>& resolvedConfigFiles);
void LoadConfigFile(const std::wstring & filePath);
void LoadConfigFile(const std::wstring& filePath);
void LoadConfigFileAndResolveVariables(const std::wstring& filePath, const ConfigParameters& config);
void LoadConfigFiles(const std::wstring& filePaths, const std::string* configStringToAppend = nullptr);
@ -873,17 +874,17 @@ public:
}
// Insert - insert an 'name=value' string into the dictionary
void Insert(const std::string &str)
void Insert(const std::string& str)
{
ParseValue(str, 0, str.length());
}
bool Exists(const std::wstring & name) const
bool Exists(const std::wstring& name) const
{
return Exists(msra::strfun::utf8(name));
}
bool Exists(const std::string & name) const
bool Exists(const std::string& name) const
{
if (find(name) != end())
{
@ -899,42 +900,42 @@ public:
}
// ExistsCurrent - check to see if a key exists in THIS config, don't check parent
bool ExistsCurrent(const std::string & name) const
bool ExistsCurrent(const std::string& name) const
{
return (find(name) != end());
}
// dict(name, default) for strings
ConfigValue operator()(const std::wstring & name,
const wchar_t *defaultvalue) const
ConfigValue operator()(const std::wstring& name,
const wchar_t* defaultvalue) const
{
return operator()(msra::strfun::utf8(name), defaultvalue);
}
// dict(name, default) for strings
ConfigValue operator()(const std::string & name,
const wchar_t *defaultvalue) const
ConfigValue operator()(const std::string& name,
const wchar_t* defaultvalue) const
{
return operator()(name, msra::strfun::utf8(defaultvalue).c_str());
}
// dict(name, default) for strings
ConfigValue operator()(const std::wstring & name,
const char *defaultvalue) const
ConfigValue operator()(const std::wstring& name,
const char* defaultvalue) const
{
return operator()(msra::strfun::utf8(name), defaultvalue);
}
// dict(name, default) for strings
ConfigValue operator()(const std::string & name,
const char *defaultvalue) const
ConfigValue operator()(const std::string& name,
const char* defaultvalue) const
{
ConfigValue value = Find(name, defaultvalue);
return value;
}
ConfigValue Find(const std::string & name,
const char *defaultvalue = NULL) const
ConfigValue Find(const std::string& name,
const char* defaultvalue = NULL) const
{
auto iter = find(name);
ConfigValue result;
@ -975,10 +976,11 @@ public:
// any whitespace characters. If an opening "$" is found without a closing "$", an exception is thrown.
// configString - the string that you would like to resolve variables in.
// returns: A copy of 'configString' with all the variables resolved.
std::string ResolveVariablesInSingleLine(const std::string &configLine) const
std::string ResolveVariablesInSingleLine(const std::string& configLine) const
{
// ensure that this method was called on a single line (eg, no newline characters exist in 'configLine').
if (configLine.find_first_of("\n") != std::string::npos) {
if (configLine.find_first_of("\n") != std::string::npos)
{
throw std::logic_error(
"\"ResolveVariablesInSingleLine\" shouldn't be called with a string containing a newline character");
}
@ -1053,7 +1055,7 @@ public:
// we shouldn't insert newlines where they didn't already exist.
// configString - the string that you would like to resolve variables in.
// returns: A copy of 'configString' with all the variables resolved.
std::string ResolveVariables(const std::string &configString) const
std::string ResolveVariables(const std::string& configString) const
{
std::string newConfigString;
if (configString.find_first_of("\n") != std::string::npos)
@ -1347,14 +1349,14 @@ class argvector: public std::vector<T>
RuntimeError("argvector: invalid arg value");
}
}
static void parse(const std::wstring & in, std::wstring & val)
static void parse(const std::wstring& in, std::wstring& val)
{
val = in;
}
public:
// constructor --construct empty, then assign a wstring from command-line argument
void operator=(const std::wstring & arg)
void operator=(const std::wstring& arg)
{
clear();
// separate the arguments
@ -1387,7 +1389,7 @@ public:
}
// constructor --use this for setting default values
argvector(const std::wstring & arg)
argvector(const std::wstring& arg)
{
*this = arg;
}
@ -1438,7 +1440,7 @@ public:
}
// we give full read access to the vector, so we can use it bounded as well
const std::vector<T> & tovector() const
const std::vector<T>& tovector() const
{
return *this;
}

Просмотреть файл

@ -2049,7 +2049,7 @@ void BatchSequenceReader<ElemType>::GetLabelOutput(std::map<std::wstring,
}
if (curDevId != CPUDEVICE)
{
labels->TransferFromDeviceToDevice(CPUDEVICE, curDevId, true, false, false);
labels->TransferFromDeviceToDevice(CPUDEVICE, curDevId, false, false, false);
}
}

Просмотреть файл

@ -1725,6 +1725,102 @@ numBestSearchEpoch
\end_layout
\begin_layout Standard
Used in the Adaptive Minibatch Sizing mode.
\end_layout
\begin_layout Itemize
\emph on
numMiniBatch4LRSearch
\emph default
\begin_inset Index idx
status open
\begin_layout Plain Layout
numMiniBatch4LRSearch
\end_layout
\end_inset
: the number of minibatches used to search the minibatch size when
in adaptive minibatch size mode.
Default value is 500.
It's typically set to 10-20% of the total minibatches in an epoch
this is shared with the search for learning rate in
SearchBeforeEpoch mode.
\end_layout
\begin_layout Itemize
\emph on
autoAdjustMinibatch
\emph default
\begin_inset Index idx
status open
\begin_layout Plain Layout
autoAdjustMinibatch
\end_layout
\end_inset
: enable or disable whether minibatch size is adaptively adjusted.
Default value is false.
Adapative minibatch sizing will begin on
epochs starting after user minbatch sizes expcitily
specified are complete. For example if the user
specifed minibatchSize=256:1024, then 256 and 1024
are used in the first 2 Epochs and adaptive minibatch
sizing is used aferwards
\end_layout
\begin_layout Itemize
\emph on
minibatchSizeTuningFrequency
\emph default
\begin_inset Index idx
status open
\begin_layout Plain Layout
minibatchSizeTuningFrequency
\end_layout
\end_inset
: The number of epochs to skip, on a periodic basis, before
dynamically adjusting the minibatch size.
Default value is 1.
\end_layout
\begin_layout Itemize
\emph on
minibatchSizeTuningMax
\emph default
\begin_inset Index idx
status open
\begin_layout Plain Layout
minibatchSizeTuningMax
\end_layout
\end_inset
: The maximum size allowed for an
adaptively adjusted minibatch size.
Default value is 1048576.
\end_layout
\end_deeper
\begin_layout Subsubsection
Gradient control

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -978,18 +978,27 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual void EvaluateThisNode() //-sum(left_i * log(softmax_i(right)))
{
if (m_evalMode == NCEEvalMode::Softmax || Inputs(0)->FunctionValues().GetNumRows() == 1)
int positive = 0, negative = 0;
if (Inputs(0)->FunctionValues().GetNumRows() == 1)
{
for (int i = 0; i < Inputs(0)->FunctionValues().GetNumCols(); i++)
{
if (Inputs(0)->FunctionValues()(0, i) > 0)
positive++;
else if (Inputs(0)->FunctionValues()(0, i) < 0)
negative++;
}
assert(positive * negative == 0);
}
if (m_evalMode == NCEEvalMode::Softmax || (Inputs(0)->FunctionValues().GetNumRows() == 1 && positive > 0))
{
// evaluation uses softmax
m_logSoftmax.AssignProductOf(Inputs(1)->FunctionValues(), true, Inputs(2)->FunctionValues(), false);
m_logSoftmax += Inputs(3)->FunctionValues();
m_logSoftmax.InplaceLogSoftmax(false);
FunctionValues().Resize(1, 1);
FunctionValues().SetValue(0);
for (int i = 0; i < Inputs(0)->FunctionValues().GetNumCols(); i++)
FunctionValues()(0, 0) -= m_logSoftmax(i, (size_t)Inputs(0)->FunctionValues()(0, i));
FunctionValues().AssignSoftmaxSum(Inputs(0)->FunctionValues(), m_logSoftmax);
}
else if (m_evalMode == NCEEvalMode::Unnormalized)
else if (m_evalMode == NCEEvalMode::Unnormalized || (Inputs(0)->FunctionValues().GetNumRows() == 1 && negative > 0))
{
FunctionValues().AssignNceUnnormalizedEval(Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues());
}

Просмотреть файл

@ -3881,6 +3881,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
return CPUMatrix<ElemType>::MultiplyAndWeightedAdd(1.0, a, transposeA, b, transposeB, 1.0, c);
}
template<class ElemType>
void CPUMatrix<ElemType>::AssignSoftmaxSum(const CPUMatrix<ElemType>& softmax, CPUMatrix<ElemType>& c)
{
ElemType log_likelihood = 0.0;
size_t batch_size = this->GetNumCols();
#pragma omp parallel for reduction(+:log_likelihood)
for (int instance_id = 0; instance_id < batch_size; instance_id++)
{
int sample = (int)(*this)(0, instance_id);
log_likelihood += softmax(instance_id, sample);
}
c(0, 0) = -log_likelihood;
}
template<class ElemType>
void CPUMatrix<ElemType>::AssignNCEUnnormalizedEval(const CPUMatrix<ElemType>& a,

Просмотреть файл

@ -217,6 +217,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
void AssignNoiseContrastiveEstimation(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias,
CPUMatrix<ElemType>& tmp, CPUMatrix<ElemType>& c);
void AssignSoftmaxSum(const CPUMatrix<ElemType>& a, CPUMatrix<ElemType>& softmax);
void AssignNCEUnnormalizedEval(const CPUMatrix<ElemType>& a,
const CPUMatrix<ElemType>& b, const CPUMatrix<ElemType>& bias, CPUMatrix<ElemType>& c);

Просмотреть файл

@ -1957,7 +1957,27 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
}
template<class ElemType>
void GPUMatrix<ElemType>::AssignSoftmaxSum(const GPUMatrix<ElemType>& a, GPUMatrix<ElemType>& c)
{
UNCONST(ElemType, a, my_a);
cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
int p = 512;
int width = a.GetNumRows();
while (p / 2 > width) p = p / 2;
_assignSoftmaxSum<ElemType> << <1, p >> >(
my_a.GetArray(),
width,
GetArray(),
c.GetArray()
);
if (do_sync) CUDA_CALL(cudaEventRecord(done));
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
}
template<class ElemType>
void GPUMatrix<ElemType>::AssignNCEUnnormalizedEval(const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, GPUMatrix<ElemType>& c)
{

Просмотреть файл

@ -295,7 +295,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t sampleCount, GPUMatrix<ElemType>& tmp, GPUMatrix<ElemType>& c);
void AssignNCEDerivative(GPUMatrix<ElemType>& tmp, const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, size_t inputIndex, GPUMatrix<ElemType>& c);
void AssignNCEUnnormalizedEval(const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, GPUMatrix<ElemType>& c);
void AssignSoftmaxSum(const GPUMatrix<ElemType>& a, GPUMatrix<ElemType>& softmax);
void Print(const char* matrixName, size_t rowStart, size_t rowEnd, size_t colStart, size_t colEnd) const;
void Print(const char* matrixName = NULL) const; //print whole matrix. can be expensive

Просмотреть файл

@ -2932,6 +2932,59 @@ __global__ void _computeNceOutput(
}
}
template<class ElemType>
__global__ void _assignSoftmaxSum(
const ElemType* softmax,
int sampleCount,
const ElemType* a,
ElemType* c) // run on 512 threads per block
{
// val and col are in CSR format
// val is an array contains log_Pn(w). To differentiate positive and negative samples,
// we store log_Pn(w) as it is for positive samples, and -log_Pn(w) for negative samples
// col is an array contains index of the word samples
// a is a matrix in column major format contains output from hidden layer
// b is the weight matrix for output layer
// tmp is the buffer that stores NCE output calculated from _computeNceOutput
// c is the matrix to store objective
__shared__ ElemType partials[512];
partials[threadIdx.x] = 0;
int total = sampleCount;
int loadPerThread = (total + blockDim.x - 1) / blockDim.x;
// find out the items this thread is responsible for
int start = loadPerThread * threadIdx.x;
int end = min(total, loadPerThread * (threadIdx.x + 1));
for (int i = start; i < end; i++)
{
int wid = (int)a[i];
partials[threadIdx.x] += softmax[IDX2C(i, wid, sampleCount)];
}
__syncthreads();
// now sum up the objective function
int nTotalThreads = blockDim.x;
while (nTotalThreads >1)
{
int halfPoint = (nTotalThreads >> 1);
if (threadIdx.x < halfPoint)
partials[threadIdx.x] += partials[threadIdx.x + halfPoint];
__syncthreads();
nTotalThreads = (nTotalThreads >> 1);
}
if (threadIdx.x == 0)
c[0] = -partials[0];
}
template<class ElemType>
__global__ void _assignNoiseContrastiveEstimation(
const ElemType* val,

Просмотреть файл

@ -747,9 +747,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#define NUM_MATRIXTYPE_CHANGED_WARN 20
m_numTimesMatrixTypeChanged++;
if (m_numTimesMatrixTypeChanged == NUM_MATRIXTYPE_CHANGED_WARN)
fprintf(stderr, "WARNING: The same matrix with dim [%d, %d] has been transferred between different devices for %d times.\n", GetNumRows(), GetNumCols(), NUM_MATRIXTYPE_CHANGED_WARN);
{
fprintf(stderr, "WARNING: The same matrix with dim [%lu, %lu] has been transferred between different devices for %d times.\n", (unsigned long)GetNumRows(), (unsigned long)GetNumCols(), NUM_MATRIXTYPE_CHANGED_WARN);
}
if (GetDeviceId()<0) //CPU
{
if (newMatrixType==MatrixType::SPARSE)
@ -1241,14 +1243,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
if (numRows != GetNumRows() || numCols != GetNumCols())
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->Reshape(numRows, numCols),
m_GPUMatrix->Reshape(numRows, numCols),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED
);
}
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->Reshape(numRows, numCols),
m_GPUMatrix->Reshape(numRows, numCols),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED
);
}
}
template<class ElemType>
@ -3667,6 +3669,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return *this;
}
template<class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignSoftmaxSum(const Matrix<ElemType>& a, const Matrix<ElemType>& softmax)
{
this->Resize(1, 1);
if (this->GetDeviceId() < 0)
a.m_CPUMatrix->AssignSoftmaxSum(*softmax.m_CPUMatrix, *this->m_CPUMatrix);
else
a.m_GPUMatrix->AssignSoftmaxSum(*softmax.m_GPUMatrix, *this->m_GPUMatrix);
return *this;
}
template<class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignNceUnnormalizedEval(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, const Matrix<ElemType>& bias)
{
@ -4454,7 +4468,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
// Matrix<ElemType>& Matrix<ElemType>::Shift(const Matrix<ElemType>& a, size_t shift)
//Matrix<ElemType>& Matrix<ElemType>::Shift(const Matrix<ElemType>& a, size_t shift)
//[this]= (a right shift by n), padded with zeros
// shift left, shift needs to be negative value
// shift right, shift needs to be positive value

Просмотреть файл

@ -150,7 +150,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType>& AssignNoiseContrastiveEstimation(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, const Matrix<ElemType>& bias, Matrix<ElemType>& tmp);
Matrix<ElemType>& AssignNCEDerivative(const Matrix<ElemType>& tmp, const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, size_t inputIndex);
Matrix<ElemType>& AssignSoftmaxSum(const Matrix<ElemType>& a, const Matrix<ElemType>& softmax);
Matrix<ElemType>& AssignNceUnnormalizedEval(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, const Matrix<ElemType>& bias);
Matrix<ElemType> Transpose(); // This method doesn't change state of Matrix. It should be a const function

Просмотреть файл

@ -1070,6 +1070,10 @@ namespace Microsoft {
}
template<class ElemType>
void GPUMatrix<ElemType>::AssignSoftmaxSum(const GPUMatrix<ElemType>& a, GPUMatrix<ElemType>& c)
{
}
template<class ElemType>
void GPUMatrix<ElemType>::AssignNCEUnnormalizedEval(const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, GPUMatrix<ElemType>& c)