Improve V2 Learner API, add basic serialization.

FSAdaGrad, RMSProp: pre-allocate smoothed gradients with expected
    number of columns.
    Improve configuring built-in learners: add learning rates and
    momentums schedules.
    Add NDArrayView as a DictionaryValue type.
    Add tests for serialization and basic learner functionality.
This commit is contained in:
Alexey Reznichenko 2016-08-18 14:33:38 +02:00
Родитель 32f97fd266
Коммит 9bd9308d2e
13 изменённых файлов: 1180 добавлений и 272 удалений

Просмотреть файл

@ -409,6 +409,8 @@ CNTKLIBRARY_TESTS_SRC =\
Tests/UnitTests/V2LibraryTests/TensorTests.cpp \
Tests/UnitTests/V2LibraryTests/TrainerTests.cpp \
Tests/UnitTests/V2LibraryTests/CifarResNet.cpp \
Tests/UnitTests/V2LibraryTests/SerializationTests.cpp \
Tests/UnitTests/V2LibraryTests/LearnerTests.cpp \
CNTKLIBRARY_TESTS:=$(BINDIR)/v2librarytests
CNTKLIBRARY_TESTS_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(patsubst %.cpp, $(OBJDIR)/%.o, $(CNTKLIBRARY_TESTS_SRC)))

Просмотреть файл

@ -20,10 +20,12 @@
#include <array>
#include <stdarg.h>
#include <assert.h>
#include <map>
#include <unordered_map>
#include <unordered_set>
#include <string>
#include <sstream>
#include <iosfwd>
#include<algorithm>
namespace CNTK
@ -242,7 +244,7 @@ namespace CNTK
}
///
/// Creates and returns a new shape contructed by appending the dimensions of the specified 'shape' to 'this' shape's dimensions.
/// Creates and returns a new shape constructed by appending the dimensions of the specified 'shape' to 'this' shape's dimensions.
///
NDShape AppendShape(const NDShape& shape) const
{
@ -1645,6 +1647,7 @@ namespace CNTK
NDShape,
Vector,
Dictionary,
NDArrayView,
};
static const char* TypeName(Type type)
@ -1669,6 +1672,8 @@ namespace CNTK
return "Vector";
case Type::Dictionary:
return "Dictionary";
case Type::NDArrayView:
return "NDArrayView";
default:
LogicError("Unknown DictionaryValue::Type");
}
@ -1715,8 +1720,9 @@ namespace CNTK
static_assert((std::is_same<T, NDShape>::value ||
std::is_same<T, std::wstring>::value ||
std::is_same<T, std::vector<DictionaryValue>>::value ||
std::is_same<T, Dictionary>::value),
"Unsupported ValueType");
std::is_same<T, Dictionary>::value ||
std::is_same<T, NDArrayView>::value),
"Unsupported ValueType");
AllocateDataPtr(value);
}
@ -1728,6 +1734,13 @@ namespace CNTK
*this = other;
}
DictionaryValue(DictionaryValue&& other) : m_valueType(Type::Bool)
{
// The m_valueType must have been set to a non-ptr type to prevent an attempt to interpret
// the underlying underlying uninitialized value as a ptr and free it.
*this = std::move(other);
}
DictionaryValue& operator=(const DictionaryValue& other)
{
if (this != &other)
@ -1745,11 +1758,34 @@ namespace CNTK
AllocateDataPtr(other.GetValue<std::vector<DictionaryValue>>());
else if (other.m_valueType == Type::Dictionary)
AllocateDataPtr(other.GetValue<Dictionary>());
else if (other.m_valueType == Type::NDArrayView)
AllocateDataPtr(other.GetValue<NDArrayView>());
}
return *this;
}
DictionaryValue& operator=(DictionaryValue&& other)
{
FreeDataPtr();
m_valueType = other.m_valueType;
m_data = other.m_data;
if (other.m_valueType == Type::String ||
other.m_valueType == Type::NDShape ||
other.m_valueType == Type::Vector ||
other.m_valueType == Type::Dictionary ||
other.m_valueType == Type::NDArrayView)
{
other.m_data.m_ptr = nullptr;
}
other.m_valueType = Type::None;
return *this;
}
~DictionaryValue()
{
FreeDataPtr();
@ -1786,7 +1822,8 @@ namespace CNTK
template <typename T, typename std::enable_if<std::is_same<T, NDShape>::value ||
std::is_same<T, std::wstring>::value ||
std::is_same<T, std::vector<DictionaryValue>>::value ||
std::is_same<T, Dictionary>::value>::type* = nullptr>
std::is_same<T, Dictionary>::value ||
std::is_same<T, NDArrayView>::value>::type* = nullptr>
const T& GetValue() const
{
VerifyType<T>();
@ -1803,8 +1840,11 @@ namespace CNTK
return m_valueType;
}
friend CNTK_API Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, DictionaryValue& us);
friend CNTK_API Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const DictionaryValue& us);
CNTK_API bool operator==(const DictionaryValue& other) const;
CNTK_API bool operator!=(const DictionaryValue& other) const;
friend CNTK_API std::istream& operator>>(std::istream& stream, DictionaryValue& us);
friend CNTK_API std::ostream& operator<<(std::ostream& stream, const DictionaryValue& us);
private:
template <typename T>
@ -1816,8 +1856,9 @@ namespace CNTK
std::is_same<T, double>::value ||
std::is_same<T, std::wstring>::value ||
std::is_same<T, NDShape>::value ||
std::is_same<T, std::vector<DictionaryValue>>::value ||
std::is_same<T, Dictionary>::value),
std::is_same<T, std::vector<DictionaryValue>>::value ||
std::is_same<T, Dictionary>::value ||
std::is_same<T, NDArrayView>::value),
"Unsupported ValueType");
if (std::is_same<T, bool>::value) return Type::Bool;
@ -1828,6 +1869,7 @@ namespace CNTK
if (std::is_same<T, NDShape>::value) return Type::NDShape;
if (std::is_same<T, std::vector<DictionaryValue>>::value) return Type::Vector;
if (std::is_same<T, Dictionary>::value) return Type::Dictionary;
if (std::is_same<T, NDArrayView>::value) return Type::NDArrayView;
}
template <typename T>
@ -1853,6 +1895,8 @@ namespace CNTK
FreePtrAsType<std::vector<DictionaryValue>>();
else if (m_valueType == Type::Dictionary)
FreePtrAsType<Dictionary>();
else if (m_valueType == Type::Dictionary)
FreePtrAsType<NDArrayView>();
}
Type m_valueType;
@ -1906,9 +1950,11 @@ namespace CNTK
return Contains(key.c_str());
}
CNTK_API bool operator==(const Dictionary& other) const;
CNTK_API bool operator!=(const Dictionary& other) const;
friend CNTK_API Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, Dictionary& us);
friend CNTK_API Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const Dictionary& us);
friend CNTK_API std::istream& operator>>(std::istream& stream, Dictionary& us);
friend CNTK_API std::ostream& operator<<(std::ostream& stream, const Dictionary& us);
private:
std::shared_ptr<std::unordered_map<std::wstring, DictionaryValue>> m_dictionaryData;
@ -1957,37 +2003,127 @@ namespace CNTK
};
///
/// A collection of key-value pairs that represents training parameter schedule in
/// terms of the number of processed samples.
/// This class provides a number of convenience constructors to allow easy conversion
/// from a single value, a vector of values and a list of pairs to the training schedule.
///
template <typename T>
class TrainingParameterSchedule
{
public:
///
/// Create a schedule with a constant parameter value.
///
TrainingParameterSchedule(T value)
: m_schedule({ std::make_pair(0, value) }), m_unit(1)
{}
///
/// Create a schedule where the parameter changes its value every 'unit' samples:
/// schedule[0] is used for the first 'unit' samples, schedule[1] -- for the second,
/// and so on. The last value is then used repeatedly until the end of training.
///
TrainingParameterSchedule(const std::vector<T>& schedule, size_t unit = 1)
: m_unit(unit)
{
// TODO: 0 will be used to mean "the entire sweep"
if (unit == 0)
RuntimeError("TrainingParameterSchedule::constructor : 'unit' cannot be 0.");
if (schedule.size() == 0)
RuntimeError("TrainingParameterSchedule::constructor : schedule is empty.");
size_t i = 1;
for (const auto& value : schedule)
{
m_schedule[m_unit * i++] = value;
}
}
///
/// Create a schedule using the list of key-value pairs, where the key specifies
/// the number of 'units' the parameter should maintain the corresponding value.
/// The value from the last pair is used repeatedly until the end of training.
/// For example, {{1, 0.05}, {2, 0.1}, {1, 0.005}} and unit = 100, corresponds to
/// a schedule where the value of '0.05' is used for the first 100 samples, then
/// '0.1' is used for the second 200 samples, after which the values is switched
/// to '0.005'.
///
TrainingParameterSchedule(const std::initializer_list<std::pair<const size_t, T>>& schedule, size_t unit = 1)
: m_unit(unit)
{
// TODO: 0 will be used to mean "the entire sweep"
if (unit == 0)
RuntimeError("TrainingParameterSchedule::constructor : 'unit' cannot be 0.");
if (schedule.size() == 0)
RuntimeError("TrainingParameterSchedule::constructor : schedule is empty.");
size_t i = 0;
for (const auto& it : schedule)
{
if (it.first == 0)
RuntimeError("TrainingParameterSchedule::constructor : unit count cannot be 0.");
i += it.first;
m_schedule[m_unit * i] = it.second;
}
}
///
/// Returns a value corresponding to the absolute sample count from the beginning of training.
///
CNTK_API const T& operator[](size_t samleCount) const;
private:
std::map<size_t, T> m_schedule;
size_t m_unit;
};
typedef TrainingParameterSchedule<double> LearningRatesPerSample;
typedef TrainingParameterSchedule<double> MomentumsPerSample;
///
/// Create an instance of the CNTK built-in SGD learner.
///
/// TODO: add additional SGD parameters here (a collection of learning rate values)
CNTK_API LearnerPtr SGDLearner(const std::unordered_set<Parameter>& parameters, double learningRatePerSample);
CNTK_API LearnerPtr SGDLearner(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates);
///
/// Create an instance of the CNTK built-in Momentum SGD learner.
///
/// TODO: add additional Momentum parameters here (a collection of momentum rate values)
CNTK_API LearnerPtr MomentumSGDLearner(const std::unordered_set<Parameter>& parameters);
CNTK_API LearnerPtr MomentumSGDLearner(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums);
///
/// Create an instance of the CNTK built-in Nesterov's accelerated SGD learner.
///
CNTK_API LearnerPtr NesterovLearner(const std::unordered_set<Parameter>& parameters);
CNTK_API LearnerPtr NesterovLearner(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums);
///
/// Create an instance of the CNTK built-in AdaGrad learner.
///
CNTK_API LearnerPtr AdaGradLearner(const std::unordered_set<Parameter>& parameters, bool needAveMultiplier = true);
CNTK_API LearnerPtr AdaGradLearner(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool needAveMultiplier = true);
///
/// Create an instance of the CNTK built-in FSAdaGrad (improved AdaGrad) learner.
///
CNTK_API LearnerPtr FSAdaGradLearner(const std::unordered_set<Parameter>& parameters);
CNTK_API LearnerPtr FSAdaGradLearner(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums);
///
/// Create an instance of the CNTK built-in RMSProp learner.
///
CNTK_API LearnerPtr RMSPropLearner(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
double gamma,
double inc,
double dec,
@ -1997,7 +2133,7 @@ namespace CNTK
///
/// Trainer is the top-level abstraction responsible for the orchestration of the training of a model
/// using the specified learners and training data either explicilty supplied as Value objects or from
/// using the specified learners and training data either explicitly supplied as Value objects or from
/// a MinibatchSource object.
///
class Trainer
@ -2085,7 +2221,7 @@ namespace CNTK
};
///
/// Abstraction for generating minbatches of samples for training/evaluation.
/// Abstraction for generating minibatches of samples for training/evaluation.
///
class MinibatchSource : public std::enable_shared_from_this<MinibatchSource>
{
@ -2101,7 +2237,7 @@ namespace CNTK
/// #samples or both. In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken. The actual
/// returned size of the minibatch is the min across all streams. Also the requested MB size fields in the maps are updated by the
/// MinibatchSource to contain the actual #sequences and #samples in the returned minibatch for the corresponding stream.
/// The return value indciates if the MinibatchSource will return any further data in subsequent calls of this function.
/// The return value indicates if the MinibatchSource will return any further data in subsequent calls of this function.
///
virtual std::unordered_map<StreamInfo, MinibatchData> GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits,
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()) = 0;

Просмотреть файл

@ -53,8 +53,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template <typename ElementType>
class ComputationNode;
class File;
}}}
// TODO: The following should be reconciled with the equivalent code in the CNTK implementation
@ -139,7 +137,7 @@ namespace CNTK
#define NOT_IMPLEMENTED \
{ \
fprintf(stderr, "Inside File: %s Line: %d Function: %s -> Feature Not Implemented.\n", __FILE__, __LINE__, __FUNCTION__); \
LogicError("Inside File: %s Line: %d Function: %s -> Feature Not Implemented.\n", __FILE__, __LINE__, __FUNCTION__); \
CNTK::LogicError("Inside File: %s Line: %d Function: %s -> Feature Not Implemented.\n", __FILE__, __LINE__, __FUNCTION__); \
}
#endif
}

Просмотреть файл

@ -8,19 +8,18 @@
#include "Utils.h"
#define UPDATE_FUNCTION \
switch (smoothedGradientValue->GetDataType()) \
switch (smoothedGradientValue->GetDataType()) \
{ \
case DataType::Float: \
Update<float>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
Update<float>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
break; \
case DataType::Double: \
Update<double>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
Update<double>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
break; \
default: \
NOT_IMPLEMENTED; \
}
using namespace Microsoft::MSR::CNTK;
using namespace std;
@ -141,7 +140,7 @@ namespace CNTK
// L1 regularizer with proximal gradient descent method
if (m_additionalOptions.l1RegularizationWeight > 0)
{
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
// multiply by actualMBSize so that it's invariant to minibatch size since learning rate is per sample
auto weight = ElementType(learningRate * m_additionalOptions.l1RegularizationWeight * actualMBSize);
parameterValue->GetWritableMatrix<ElementType>()->InplaceSoftThreshold(weight);
@ -154,48 +153,49 @@ namespace CNTK
return arrayView->GetWritableTensorView<ElementType>();
}
LearnerBase::LearnerBase(const unordered_set<Parameter>& parameters)
LearnerBase::LearnerBase(const unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool allocateSmoothGradients /* = true */)
: Learner(parameters),
m_learningRatePerSample(0.0),
m_sampleCount(0)
m_learningRates(learningRates),
m_sampleCount(0),
m_minibatchCount(0)
{
const unordered_set<Parameter>& parameterSet = parameters;
for (const auto& parameter : parameterSet)
for (const auto& parameter : parameters)
{
// TODO: using the same device to allocate data for all smoothed gradients. Is this correct?
// Should the device be specified on the per-parameter basis?
NDArrayViewPtr view;
if (parameter.GetDataType() == DataType::Float)
if (!allocateSmoothGradients)
{
view = MakeSharedObject<NDArrayView>(0.0f, parameter.Shape(), parameter.Value()->Device());
continue;
}
else
{
view = MakeSharedObject<NDArrayView>(0.0, parameter.Shape(), parameter.Value()->Device());
}
NDArrayViewPtr view = AllocateNDArrayView(parameter, parameter.Shape());
m_smoothedGradientValues.insert(make_pair(parameter, view));
m_additionalOptions.learningRateMultipliers.insert(make_pair(parameter, 1.0));
}
}
void LearnerBase::ResetSmoothedGradients()
/*static*/ NDArrayViewPtr LearnerBase::AllocateNDArrayView(const Parameter& parameter, const NDShape& shape)
{
for (const auto& parameter : Parameters())
if (parameter.GetDataType() == DataType::Float)
{
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
const auto& data = smoothedGradientValue;
switch (data->GetDataType())
{
case DataType::Float:
data->SetValue(0.0f);
break;
case DataType::Double:
data->SetValue(0.0);
break;
default:
LogicError("Unsupported DataType %s", ::CNTK::DataTypeName(data->GetDataType()));
}
return MakeSharedObject<NDArrayView>(float(0.0), shape, parameter.Value()->Device());
}
else
{
return MakeSharedObject<NDArrayView>(0.0, shape, parameter.Value()->Device());
}
}
/*static*/ NDShape LearnerBase::GetMatrixShape(const Parameter& parameter)
{
if (parameter.GetDataType() == DataType::Float)
{
auto matrix = GetMatrix<float>(parameter.Value());
return { matrix->GetNumRows(), matrix->GetNumCols() };
}
else
{
auto matrix = GetMatrix<double>(parameter.Value());
return { matrix->GetNumRows(), matrix->GetNumCols() };
}
}
@ -219,17 +219,19 @@ namespace CNTK
#endif
#if DUMPOUTPUT
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount));
LOGPRINTF(stderr, "learnRatePerSample=%0.8f, momentum=%0.8f, actualMBSize=%ld\n",
m_learningRatePerSample, m_momentumPerSample, trainingSampleCount);
learningRate, momentum, trainingSampleCount);
LOGPRINTF(stderr, "GradUpdateType()=%s, GradientUpdateNoiseStd()=%0.8f\n",
LearnerType().c_str(), m_GaussianNoiseInjectStd);
LearnerType().c_str(), m_additionalOptions.gaussianNoiseInjectionStdDev);
Print(gradientValue, "Gradient Update");
Print(smoothedGradientValue, "Smoothed Gradient Input");
#endif
UPDATE_FUNCTION;
#if DUMPOUTPUT
Print(parameterValue, "Parameter Update");
Print(parameter.Value(), "Parameter Update");
#endif
#ifdef _DEBUG
@ -239,6 +241,7 @@ namespace CNTK
#endif
}
m_sampleCount += trainingSampleCount;
m_minibatchCount++;
return false;
}
@ -265,9 +268,16 @@ namespace CNTK
/*virtual*/ Dictionary LearnerBase::GetCheckpointState() const /*override*/
{
NOT_IMPLEMENTED; // Until the new checkpointing is fully fleshed out, nobody should be calling this.
Dictionary checkpoint;
checkpoint[L"checkpointVersion"] = checkpointVersion;
checkpoint[L"sampleCount"] = m_sampleCount;
checkpoint[L"minibatchCount"] = m_minibatchCount;
// TODO: should we also save learning rate schedule into the checkpoint?
// If that is the case, need to be able to override this method in subclasses
// and save momentum schedule as well.
for (const auto& parameter : Parameters())
{
// TODO: parameter name is not guaranteed to be unique. Instead, all serializable objects
@ -277,31 +287,48 @@ namespace CNTK
{
LogicError("Parameter names must be unique");
}
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
// Potentially, could store things like dimensions, element size, format, etc., but
// that seems to be redundant, since all of that is passed in the constructor.
checkpoint[parameter.Name()] = SerializeToVector(smoothedGradientValue);
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
checkpoint[parameter.Name()] = *smoothedGradientValue;
}
return checkpoint;
}
/*virtual*/ void LearnerBase::RestoreFromCheckpoint(const Dictionary& checkpoint) /*override*/
{
NOT_IMPLEMENTED; // Until the new checkpointing is fully fleshed out, nobody should be calling this.
m_sampleCount = checkpoint[L"sampleCount"].GetValue<size_t>();
m_minibatchCount = checkpoint[L"minibatchCount"].GetValue<size_t>();
size_t version = checkpoint[L"minibatchCount"].GetValue<size_t>();
if (checkpointVersion != version)
{
// At the moment, we only support one version, so this should never happen.
LogicError("Unsupported checkpoint version.");
}
for (const auto& parameter : Parameters())
{
if (!checkpoint.Contains(parameter.Name()))
{
LogicError("Checkpoint does not contain state for parameter %ls", parameter.Name().c_str());
}
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
const NDArrayView& checkpointedValue = checkpoint[parameter.Name()].GetValue<NDArrayView>();
if (smoothedGradientValue->GetDataType() != checkpointedValue.GetDataType())
{
LogicError("A value restored from a checkpoint for the smoothed gradient data type for parameter %ls does not match the expected value",
parameter.Name().c_str());
}
const DictionaryValue& state = checkpoint[parameter.Name()];
if (smoothedGradientValue->Shape() != checkpointedValue.Shape())
{
LogicError("A value restored from a checkpoint for the smoothed gradient shape for parameter %ls does not match the expected value",
parameter.Name().c_str());
}
const auto& data = smoothedGradientValue;
DeserializeFromVector(data, state.GetValue<vector<DictionaryValue>>());
smoothedGradientValue->CopyFrom(checkpointedValue);
}
}
@ -313,23 +340,25 @@ namespace CNTK
template <typename ElementType>
void LearnerSGD::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
{
UNUSED(trainingSampleCount);
const auto& parameterValue = parameter.Value();
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue);
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
const auto& learningRate = ElementType(ParameterDependentLearningRate(parameter));
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount));
// TODO: break up the NormalGrad into 3 different functions, each with its own set of parameters
// (one for vanilla SGD, the other for momentum SGD, and the third one for NAG).
smoothedGradientMatrix->NormalGrad(*gradientMatrix, *parameterMatrix,
learningRate, ElementType(m_momentumPerSample), m_useNesterovAcceleration);
learningRate, momentum, m_useNesterovAcceleration);
}
LearnerAdaGrad::LearnerAdaGrad(const unordered_set<Parameter>& parameters, bool needAveMultiplier)
: LearnerBase(parameters), m_needAveMultiplier(needAveMultiplier)
LearnerAdaGrad::LearnerAdaGrad(const unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool needAveMultiplier)
: LearnerBase(parameters, learningRates),
m_needAveMultiplier(needAveMultiplier)
{
}
@ -348,15 +377,23 @@ namespace CNTK
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto aveMultiplier = smoothedGradientMatrix->Adagrad(*gradientMatrix, m_needAveMultiplier);
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
}
LearnerFSAdaGrad::LearnerFSAdaGrad(const unordered_set<Parameter>& parameters)
: LearnerMomentumSGD(parameters)
LearnerFSAdaGrad::LearnerFSAdaGrad(const unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums)
: LearnerMomentumSGD(parameters, learningRates, momentums, /*allocateSmoothGradients*/ false)
{
for (const auto& parameter : parameters)
{
auto shape = GetMatrixShape(parameter);
NDArrayViewPtr view = AllocateNDArrayView(parameter, {shape[0], 2 * shape[1]});
m_smoothedGradientValues.insert(make_pair(parameter, view));
}
}
/*virtual*/ void LearnerFSAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
@ -373,21 +410,33 @@ namespace CNTK
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue);
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
//const double momentum = MomentumPerMB(m_momentumPerSample, trainingSampleCount);
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
smoothedGradientMatrix->FSAdagrad(trainingSampleCount, *gradientMatrix, *parameterMatrix,
learningRate, ElementType(m_momentumPerSample));
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount));
smoothedGradientMatrix->FSAdagrad(trainingSampleCount, *gradientMatrix, *parameterMatrix, learningRate, momentum);
}
LearnerRMSProp::LearnerRMSProp(const unordered_set<Parameter>& parameters,
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier)
: LearnerBase(parameters),
m_gamma(gamma), m_inc(inc), m_dec(dec), m_max(max), m_min(min),
m_needAveMultiplier(needAveMultiplier)
LearnerRMSProp::LearnerRMSProp(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates,
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier)
: LearnerBase(parameters, learningRates, /*allocateSmoothGradients*/ false),
m_gamma(gamma), m_inc(inc), m_dec(dec), m_max(max), m_min(min),
m_needAveMultiplier(needAveMultiplier)
{
for (const auto& parameter : parameters)
{
// When needAveMultiplier == true, CPU and GPU implementations of RMSProp require different number of columns.
// TODO: verify that this is correct.
size_t factor = 3;
if (needAveMultiplier && parameter.Value()->Device().Type() == DeviceKind::GPU)
{
factor = 4;
}
auto shape = GetMatrixShape(parameter);
NDArrayViewPtr view = AllocateNDArrayView(parameter, {shape[0], factor * shape[1]});
m_smoothedGradientValues.insert(make_pair(parameter, view));
}
}
/*virtual*/ void LearnerRMSProp::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
@ -405,12 +454,12 @@ namespace CNTK
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto aveMultiplier = smoothedGradientMatrix->RmsProp(*gradientMatrix,
ElementType(m_gamma), ElementType(m_inc),
ElementType(m_max), ElementType(m_dec),
ElementType(m_min), m_needAveMultiplier);
ElementType(m_gamma), ElementType(m_inc),
ElementType(m_max), ElementType(m_dec),
ElementType(m_min), m_needAveMultiplier);
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
}
@ -418,34 +467,35 @@ namespace CNTK
template shared_ptr<Matrix<float>> LearnerBase::GetWritableMatrix<float>(const NDArrayViewPtr& arrayView);
template shared_ptr<Matrix<double>> LearnerBase::GetWritableMatrix<double>(const NDArrayViewPtr& arrayView);
LearnerPtr SGDLearner(const unordered_set<Parameter>& parameters, double learningRatePerSample)
LearnerPtr SGDLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates)
{
return MakeSharedObject<LearnerSGD>(parameters, learningRatePerSample);
return MakeSharedObject<LearnerSGD>(parameters, learningRates);
}
LearnerPtr MomentumSGDLearner(const unordered_set<Parameter>& parameters)
LearnerPtr MomentumSGDLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, const MomentumsPerSample& momentums)
{
return MakeSharedObject<LearnerMomentumSGD>(parameters);
return MakeSharedObject<LearnerMomentumSGD>(parameters, learningRates, momentums);
}
LearnerPtr NesterovLearner(const unordered_set<Parameter>& parameters)
LearnerPtr NesterovLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, const MomentumsPerSample& momentums)
{
return MakeSharedObject<LearnerNesterov>(parameters);
return MakeSharedObject<LearnerNesterov>(parameters, learningRates, momentums);
}
LearnerPtr AdaGradLearner(const unordered_set<Parameter>& parameters, bool needAveMultiplier)
LearnerPtr AdaGradLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, bool needAveMultiplier)
{
return MakeSharedObject<LearnerAdaGrad>(parameters, needAveMultiplier);
return MakeSharedObject<LearnerAdaGrad>(parameters, learningRates, needAveMultiplier);
}
LearnerPtr FSAdaGradLearner(const unordered_set<Parameter>& parameters)
LearnerPtr FSAdaGradLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, const MomentumsPerSample& momentums)
{
return MakeSharedObject<LearnerFSAdaGrad>(parameters);
return MakeSharedObject<LearnerFSAdaGrad>(parameters, learningRates, momentums);
}
LearnerPtr RMSPropLearner(const unordered_set<Parameter>& parameters,
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier)
LearnerPtr RMSPropLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates,
double gamma, double inc, double dec, double max, double min,
bool needAveMultiplier)
{
return MakeSharedObject<LearnerRMSProp>(parameters, gamma, inc, dec, max, min, needAveMultiplier);
return MakeSharedObject<LearnerRMSProp>(parameters, learningRates, gamma, inc, dec, max, min, needAveMultiplier);
}
}

Просмотреть файл

@ -9,6 +9,7 @@
namespace CNTK
{
// TODO: Move this to Trainer along with Pre-, PostProcess and ClipGradient.
// A collection of additional options that are applicable for all standard learners
// (after these options are set, they retain their value for the entire lifespan of a learner).
struct AdditionalLearningOptions
@ -18,7 +19,6 @@ namespace CNTK
double gaussianNoiseInjectionStdDev = 0.0;
bool gradientClippingWithTruncation = true;
double gradientClippingThresholdPerSample = std::numeric_limits<double>::infinity();
std::unordered_map<Parameter, double> learningRateMultipliers;
};
// An abstract base class at the root of the standard learners hierarchy
@ -33,32 +33,16 @@ namespace CNTK
virtual void RestoreFromCheckpoint(const Dictionary& checkpoint) override final;
void SetAdditionalOptions(const AdditionalLearningOptions& additionalOptions)
{
m_additionalOptions = additionalOptions;
}
// TODO: should this be called ResetMomentum?
// needed for BlockMomemtumSGD to reset SGD momentum after aggregation.
void ResetSmoothedGradients();
// TODO: move learning rate and momentum scheduling and adjustment functionality
// inside the learner and drop these setters.
void SetLearningRate(double value) { m_learningRatePerSample = value; }
protected:
LearnerBase(const std::unordered_set<Parameter>& parameters);
LearnerBase(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool allocateSmoothGradients = true);
virtual void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const = 0;
double ParameterDependentLearningRate(const Parameter& parameter) const
{
return m_learningRatePerSample * m_additionalOptions.learningRateMultipliers.at(parameter);
}
std::string LearnerType() const;
double m_learningRatePerSample;
LearningRatesPerSample m_learningRates;
AdditionalLearningOptions m_additionalOptions;
@ -91,6 +75,16 @@ namespace CNTK
template <typename ElementType>
void PostProcess(const Parameter& parameter, const NDArrayViewPtr& gradientValue, size_t actualMBSize) const;
// Returns an NDArrayView with the required shape, with the same data type as parameter value
// and allocated on the same device.
static NDArrayViewPtr AllocateNDArrayView(const Parameter& parameter, const NDShape& shape);
// Retrieves the shape of the matrix corresponding to the parameter value.
static NDShape GetMatrixShape(const Parameter& parameter);
size_t m_sampleCount;
size_t m_minibatchCount;
private:
// Templatized update function, it invokes preprocess and postprocess using the provided
// template parameter and also invokes virtual Update method implemented in one of the subclasses.
@ -101,18 +95,20 @@ namespace CNTK
static bool HasNan(const NDArrayViewPtr& value, const char* name);
static void Print(const NDArrayViewPtr& value, const char* msg);
size_t m_sampleCount;
static const size_t checkpointVersion = 1;
};
// Vanilla gradient descent optimization algorithm.
class LearnerSGD : public LearnerBase
{
public:
LearnerSGD(const std::unordered_set<Parameter>& parameters, double learningRatePerSample = 0)
: LearnerBase(parameters), m_momentumPerSample(0.0), m_useNesterovAcceleration(false)
{
SetLearningRate(learningRatePerSample);
}
LearnerSGD(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool allocateSmoothGradients = true)
: LearnerBase(parameters, learningRates, allocateSmoothGradients),
m_momentums(0.0),
m_useNesterovAcceleration(false)
{ }
protected:
@ -121,7 +117,8 @@ namespace CNTK
template <typename ElementType>
void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const;
double m_momentumPerSample;
// TODO: Move m_momentums to LearnerMomentumSGD as soon as NormalGrad is refactored.
MomentumsPerSample m_momentums;
bool m_useNesterovAcceleration;
};
@ -129,20 +126,25 @@ namespace CNTK
class LearnerMomentumSGD : public LearnerSGD
{
public:
LearnerMomentumSGD(const std::unordered_set<Parameter>& parameters)
: LearnerSGD(parameters)
{}
void SetMomentum(double value) { m_momentumPerSample = value; }
LearnerMomentumSGD(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums,
bool allocateSmoothGradients = true)
: LearnerSGD(parameters, learningRates, allocateSmoothGradients)
{
m_momentums = momentums;
}
};
// Nesterov's accelerated SGDLearnerBase descent.
class LearnerNesterov : public LearnerSGD
class LearnerNesterov : public LearnerMomentumSGD
{
public:
LearnerNesterov(const std::unordered_set<Parameter>& parameters)
: LearnerSGD(parameters)
LearnerNesterov(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums)
: LearnerMomentumSGD(parameters, learningRates, momentums)
{
m_useNesterovAcceleration = true;
}
@ -152,7 +154,9 @@ namespace CNTK
{
public:
LearnerAdaGrad(const std::unordered_set<Parameter>& parameters, bool needAveMultiplier);
LearnerAdaGrad(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool needAveMultiplier);
protected:
bool m_needAveMultiplier;
@ -167,7 +171,9 @@ namespace CNTK
{
public:
LearnerFSAdaGrad(const std::unordered_set<Parameter>& parameters);
LearnerFSAdaGrad(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums);
protected:
@ -182,7 +188,9 @@ namespace CNTK
public:
LearnerRMSProp(const std::unordered_set<Parameter>& parameters,
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier);
const LearningRatesPerSample& learningRates,
double gamma, double inc, double dec, double max, double min,
bool needAveMultiplier);
protected:

Просмотреть файл

@ -6,20 +6,100 @@
#include "stdafx.h"
#include "CNTKLibrary.h"
#include "Utils.h"
#include "File.h"
#include <istream>
#include <ostream>
using namespace std;
namespace CNTK
{
// This wrapper redefines operator<< in terms of unformatted (binary) write operation.
struct BinaryOStreamWrapper
{
BinaryOStreamWrapper(ostream& s) : m_stream(s) {}
template<typename T>
typename std::enable_if<std::is_pod<T>::value, BinaryOStreamWrapper&>::type
operator<<(const T& value)
{
m_stream.write(reinterpret_cast<const char*>(&value), sizeof(T));
return *this ;
}
BinaryOStreamWrapper& operator<<(const wstring& str)
{
*this << str.length();
m_stream.write(reinterpret_cast<const char*>(str.c_str()), str.length() * sizeof(wchar_t));
return *this;
}
operator ostream& () { return m_stream; }
ostream& m_stream;
BinaryOStreamWrapper(const BinaryOStreamWrapper&) = delete; BinaryOStreamWrapper(BinaryOStreamWrapper&&) = delete; BinaryOStreamWrapper& operator=(const BinaryOStreamWrapper&) = delete; BinaryOStreamWrapper& operator=(BinaryOStreamWrapper&&) = delete;
};
// This wrapper redefines operator>> in terms of unformatted (binary) read operation.
struct BinaryIStreamWrapper
{
BinaryIStreamWrapper(istream& s) : m_stream(s) {}
template<typename T>
typename std::enable_if<std::is_pod<T>::value, BinaryIStreamWrapper&>::type
operator>>(T& value)
{
static_assert(sizeof(T) <= sizeof(size_t), "size_t is the largest supported type.");
m_stream.read(buf, sizeof(T));
value = *(reinterpret_cast<T*>(buf));
return *this ;
}
BinaryIStreamWrapper& operator>>(wstring& str)
{
size_t length;
*this >> length;
str.resize(length);
for (size_t i = 0; i < length; ++i)
{
m_stream.read(buf, sizeof(wchar_t));
str[i] = *(reinterpret_cast<wchar_t*>(buf));
}
return *this;
}
operator istream& () const { return m_stream ;}
istream& m_stream;
char buf[sizeof(size_t)];
BinaryIStreamWrapper(const BinaryIStreamWrapper&) = delete; BinaryIStreamWrapper(BinaryIStreamWrapper&&) = delete; BinaryIStreamWrapper& operator=(const BinaryIStreamWrapper&) = delete; BinaryIStreamWrapper& operator=(BinaryIStreamWrapper&&) = delete;
};
template <typename T>
T* CreateDataPtr(const T& value)
{
return new T(value);
}
template <>
NDArrayView* CreateDataPtr<NDArrayView>(const NDArrayView& value)
{
// TODO: replace this copy with an alias to value.
NDArrayView* viewPtr = new NDArrayView(value.GetDataType(), value.Shape(), DeviceDescriptor::CPUDevice());
viewPtr->CopyFrom(value);
return viewPtr;
}
template <typename T>
void DictionaryValue::AllocateDataPtr(const T& value)
{
static_assert(is_same<T, NDShape>::value ||
is_same<T, wstring>::value ||
is_same<T, vector<DictionaryValue>>::value ||
is_same<T, Dictionary>::value, "AllocateDataPtr called with invalid type");
m_data.m_ptr = new T(value);
is_same<T, Dictionary>::value ||
is_same<T, NDArrayView>::value,
"AllocateDataPtr called with invalid type");
m_data.m_ptr = CreateDataPtr<T>(value);
}
template <typename T>
@ -31,12 +111,163 @@ namespace CNTK
m_data.m_ptr = nullptr;
}
Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, DictionaryValue& us)
template <typename ElementType>
bool AreEqual(NDArrayView& view1, NDArrayView& view2)
{
if (view1.GetDataType() != view2.GetDataType() ||
view1.Shape() != view2.Shape())
{
return false;
}
ElementType* data1 = nullptr;
ElementType* data2 = nullptr;
if (view1.Device().Type() == DeviceKind::CPU)
{
data1 = view1.WritableDataBuffer<ElementType>();
data2 = view2.WritableDataBuffer<ElementType>();
}
else
{
NDArrayViewPtr temp1CpuDataView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), view1.Shape(), DeviceDescriptor::CPUDevice());
temp1CpuDataView->CopyFrom(view1);
data1 = temp1CpuDataView->WritableDataBuffer<ElementType>();
NDArrayViewPtr temp2CpuDataView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), view2.Shape(), DeviceDescriptor::CPUDevice());
temp2CpuDataView->CopyFrom(view2);
data2 = temp2CpuDataView->WritableDataBuffer<ElementType>();
}
size_t numElements = view1.Shape().TotalSize();
for (size_t i = 0; i < numElements; ++i)
{
if (data1[i] != data2[i])
{
return false;
}
}
return true;
}
bool DictionaryValue::operator==(const DictionaryValue& other) const
{
if (this == &other)
{
return true;
}
if (m_valueType != other.m_valueType)
{
return false;
}
switch (m_valueType)
{
case DictionaryValue::Type::Bool:
return (m_data.m_boolean == other.m_data.m_boolean);
case DictionaryValue::Type::SizeT:
return (m_data.m_sizeT == other.m_data.m_sizeT);
case DictionaryValue::Type::Float:
return (m_data.m_float == other.m_data.m_float);
case DictionaryValue::Type::Double:
return (m_data.m_double == other.m_data.m_double);
case DictionaryValue::Type::String:
{
wstring* strPtr1 = reinterpret_cast<wstring*>(m_data.m_ptr);
wstring* strPtr2 = reinterpret_cast<wstring*>(other.m_data.m_ptr);
return (*strPtr1 == *strPtr2);
}
case DictionaryValue::Type::NDShape:
{
NDShape* shapePtr1 = reinterpret_cast<NDShape*>(m_data.m_ptr);
NDShape* shapePtr2 = reinterpret_cast<NDShape*>(other.m_data.m_ptr);
return (*shapePtr1 == *shapePtr2);
}
case DictionaryValue::Type::Vector:
{
vector<DictionaryValue>* vectorPtr1 = reinterpret_cast<vector<DictionaryValue>*>(m_data.m_ptr);
vector<DictionaryValue>* vectorPtr2 = reinterpret_cast<vector<DictionaryValue>*>(other.m_data.m_ptr);
return (*vectorPtr1 == *vectorPtr2);
}
case DictionaryValue::Type::Dictionary:
{
Dictionary* dictPtr1 = reinterpret_cast<Dictionary*>(m_data.m_ptr);
Dictionary* dictPtr2 = reinterpret_cast<Dictionary*>(other.m_data.m_ptr);
return (*dictPtr1 == *dictPtr2);
}
case DictionaryValue::Type::NDArrayView:
{
NDArrayView* viewPtr1 = reinterpret_cast<NDArrayView*>(m_data.m_ptr);
NDArrayView* viewPtr2 = reinterpret_cast<NDArrayView*>(other.m_data.m_ptr);
switch (viewPtr1->GetDataType())
{
case DataType::Float:
return AreEqual<float>(*viewPtr1, *viewPtr2);
case DataType::Double:
return AreEqual<double>(*viewPtr1, *viewPtr2);
default:
NOT_IMPLEMENTED;
}
}
default:
NOT_IMPLEMENTED;
}
}
bool DictionaryValue::operator!=(const DictionaryValue& other) const
{
return !(*this == other);
}
BinaryOStreamWrapper& operator<<(BinaryOStreamWrapper& stream, const NDShape& us)
{
auto size = us.NumAxes();
stream << size;
for (auto i = 0; i < size; i++)
{
stream << us[i];
}
return stream;
}
template <typename T>
void Write(BinaryOStreamWrapper& stream, const NDArrayView& view)
{
assert(view.Device().Type() == DeviceKind::CPU);
auto numElements = view.Shape().TotalSize();
const T* buffer = view.DataBuffer<T>();
for (auto i = 0; i < numElements; ++i)
{
stream << buffer[i];
}
}
template <typename T>
void Read(BinaryIStreamWrapper& stream, NDArrayView& view)
{
assert(view.Device().Type() == DeviceKind::CPU);
auto numElements = view.Shape().TotalSize();
T* buffer = view.WritableDataBuffer<T>();
for (auto i = 0; i < numElements; ++i)
{
stream >> buffer[i];
}
}
istream& operator>>(istream& stdStream, DictionaryValue& us)
{
BinaryIStreamWrapper stream(stdStream);
size_t version;
stream >> version;
stream >> us.m_valueType;
unsigned int type;
stream >> type;
us.m_valueType = static_cast<DictionaryValue::Type>(type);
switch (us.ValueType())
{
@ -52,28 +283,72 @@ namespace CNTK
case DictionaryValue::Type::Double:
stream >> us.m_data.m_double;
break;
case DictionaryValue::Type::String:
{
wstring* strPtr = new wstring();
stream >> *strPtr;
us.m_data.m_ptr = strPtr;
break;
}
case DictionaryValue::Type::NDShape:
{
size_t size;
stream >> size;
vector<size_t> dims(size);
NDShape* shapePtr = new NDShape(size);
for (auto i = 0; i < size; i++)
{
stream >> dims[i];
stream >> shapePtr->operator[](i);
}
us.AllocateDataPtr(NDShape(dims));
us.m_data.m_ptr = shapePtr;
break;
}
case DictionaryValue::Type::Vector:
{
{
size_t size;
stream >> size;
vector<DictionaryValue> values(size);
vector<DictionaryValue>* vectorPtr = new vector<DictionaryValue>(size);
for (auto i = 0; i < size; i++)
{
stream >> values[i];
stream >> vectorPtr->at(i);
}
us.AllocateDataPtr(values);
us.m_data.m_ptr = vectorPtr;
break;
}
case DictionaryValue::Type::Dictionary:
{
Dictionary* dictPtr = new Dictionary();
stream >> *dictPtr;
us.m_data.m_ptr = dictPtr;
break;
}
case DictionaryValue::Type::NDArrayView:
{
unsigned int type;
stream >> type;
DataType dtype = static_cast<DataType>(type);
size_t size;
stream >> size;
NDShape shape(size);
for (auto i = 0; i < size; i++)
{
stream >> shape[i];
}
NDArrayView* viewPtr = new NDArrayView(dtype, shape, DeviceDescriptor::CPUDevice());
switch (dtype)
{
case DataType::Float:
Read<float>(stream, *viewPtr);
break;
case DataType::Double:
Read<double>(stream, *viewPtr);
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(dtype));
}
us.m_data.m_ptr = viewPtr;
break;
}
default:
@ -82,11 +357,13 @@ namespace CNTK
return stream;
}
Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const DictionaryValue& us)
ostream& operator<<(ostream& stdStream, const DictionaryValue& us)
{
BinaryOStreamWrapper stream(stdStream);
stream << us.version;
stream << us.ValueType();
stream << static_cast<unsigned int>(us.ValueType());
switch (us.ValueType())
{
@ -102,15 +379,16 @@ namespace CNTK
case DictionaryValue::Type::Double:
stream << us.m_data.m_double;
break;
case DictionaryValue::Type::String:
{
wstring* stringPtr = reinterpret_cast<wstring*>(us.m_data.m_ptr);
stream << *stringPtr;
break;
}
case DictionaryValue::Type::NDShape:
{
NDShape* shapePtr = reinterpret_cast<NDShape*>(us.m_data.m_ptr);
auto size = shapePtr->NumAxes();
stream << size;
for (auto i = 0; i < size; i++)
{
stream << shapePtr->operator[](i);
}
stream << *shapePtr;
break;
}
case DictionaryValue::Type::Vector:
@ -121,7 +399,31 @@ namespace CNTK
stream << size;
for (auto i = 0; i < size; i++)
{
stream << vectorPtr->operator[](i);
stream << vectorPtr->at(i);
}
break;
}
case DictionaryValue::Type::Dictionary:
{
Dictionary* dictPtr = reinterpret_cast<Dictionary*>(us.m_data.m_ptr);
stream << *dictPtr;
break;
}
case DictionaryValue::Type::NDArrayView:
{
NDArrayView* viewPtr = reinterpret_cast<NDArrayView*>(us.m_data.m_ptr);
stream << static_cast<unsigned int>(viewPtr->GetDataType());
stream << viewPtr->Shape();
switch (viewPtr->GetDataType())
{
case DataType::Float:
Write<float>(stream, *viewPtr);
break;
case DataType::Double:
Write<double>(stream, *viewPtr);
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
}
break;
}
@ -148,7 +450,7 @@ namespace CNTK
Dictionary& Dictionary::operator=(const Dictionary& other)
{
assert(this != &other);
m_dictionaryData.reset(new std::unordered_map<std::wstring, DictionaryValue>(*(other.m_dictionaryData)));
m_dictionaryData.reset(new unordered_map<wstring, DictionaryValue>(*(other.m_dictionaryData)));
return *this;
}
@ -183,20 +485,51 @@ namespace CNTK
return (m_dictionaryData->find(key) != m_dictionaryData->end());
}
Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const Dictionary& us)
bool Dictionary::operator==(const Dictionary& other) const
{
if (this == &other)
{
return true;
}
if (m_dictionaryData->size() != other.m_dictionaryData->size())
{
return false;
}
for (auto& kv : *m_dictionaryData)
{
auto result = other.m_dictionaryData->find(kv.first);
if (result == other.m_dictionaryData->end() || kv.second != result->second)
{
return false;
}
}
return true;
}
bool Dictionary::operator!=(const Dictionary& other) const
{
return !(*this == other);
}
ostream& operator<<(ostream& stdStream, const Dictionary& us)
{
BinaryOStreamWrapper stream(stdStream);
stream << us.version;
stream << us.m_dictionaryData->size();
for (auto it = us.m_dictionaryData->begin(); it != us.m_dictionaryData->end(); ++it)
for (auto& kv : *(us.m_dictionaryData))
{
stream << it->first;
stream << it->second;
stream << kv.first;
stream << kv.second;
}
return stream;
}
Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, Dictionary& us)
istream& operator>>(istream& stdStream, Dictionary& us)
{
BinaryIStreamWrapper stream(stdStream);
size_t version;
stream >> version;
size_t size;
@ -206,113 +539,36 @@ namespace CNTK
{
wstring key;
stream >> key;
DictionaryValue value;
stream >> value;
us.m_dictionaryData->insert(make_pair(key, value));
stream >> us[key];
}
return stream;
}
// Returns the element whose key is greater than the required sample count
// or the last element if no such key exists.
template <typename T>
vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr)
const T& TrainingParameterSchedule<T>::operator[](size_t sampleCount) const
{
if (viewPtr->IsSparse())
assert(m_schedule.size() > 0);
auto it = m_schedule.upper_bound(sampleCount);
if (it == m_schedule.end())
{
LogicError("Sparse NDArrayView cannot be serialized into a vector.");
--it;
}
auto numElements = viewPtr->Shape().TotalSize();
vector<DictionaryValue> values(numElements);
NDArrayViewPtr cpuDataViewPtr = viewPtr;
if ((viewPtr->Device().Type() != DeviceKind::CPU))
{
cpuDataViewPtr = MakeSharedObject<NDArrayView>(viewPtr->GetDataType(), viewPtr->Shape(), DeviceDescriptor::CPUDevice());
cpuDataViewPtr->CopyFrom(*viewPtr);
}
const T* buffer = cpuDataViewPtr->DataBuffer<T>();
for (auto i = 0; i < numElements; ++i)
{
T v = buffer[i];
values[i] = DictionaryValue(v);
}
return values;
return it->second;
}
template <typename T>
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const vector<DictionaryValue>& values)
{
if (viewPtr->IsSparse())
{
LogicError("Sparse NDArrayView cannot be deserialized from a vector.");
}
auto numElements = viewPtr->Shape().TotalSize();
if (values.size() != numElements)
{
LogicError("Number of elements (%lu) in the deserialized representation does not match the expected value (%lu)",
values.size(), numElements);
}
NDArrayViewPtr cpuDataViewPtr = viewPtr;
if ((viewPtr->Device().Type() != DeviceKind::CPU))
{
cpuDataViewPtr = MakeSharedObject<NDArrayView>(viewPtr->GetDataType(), viewPtr->Shape(), DeviceDescriptor::CPUDevice());
}
T* buffer = cpuDataViewPtr->WritableDataBuffer<T>();
for (auto i = 0; i < numElements; ++i)
{
buffer[i] = values[i].GetValue<T>();
}
if ((viewPtr->Device().Type() != DeviceKind::CPU))
{
viewPtr->CopyFrom(*cpuDataViewPtr);
}
}
// TODO: we store the type info for every element in the vector, which is extremely redundant.
// Instead, it'd be nice to introduce some sort of DictionaryValueVector.
vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr)
{
switch (viewPtr->GetDataType())
{
case DataType::Float:
return SerializeToVector<float>(viewPtr);
case DataType::Double:
return SerializeToVector<double>(viewPtr);
default:
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
}
}
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const vector<DictionaryValue>& values)
{
switch (viewPtr->GetDataType())
{
case DataType::Float:
DeserializeFromVector<float>(viewPtr, values);
break;
case DataType::Double:
DeserializeFromVector<double>(viewPtr, values);
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
}
}
template void DictionaryValue::AllocateDataPtr<NDShape>(const NDShape& value);
template void DictionaryValue::AllocateDataPtr<vector<DictionaryValue>>(const vector<DictionaryValue>& value);
template void DictionaryValue::AllocateDataPtr<wstring>(const wstring& value);
template void DictionaryValue::AllocateDataPtr<Dictionary>(const Dictionary& value);
template void DictionaryValue::AllocateDataPtr<NDArrayView>(const NDArrayView& value);
template void DictionaryValue::FreePtrAsType<NDShape>();
template void DictionaryValue::FreePtrAsType<vector<DictionaryValue>>();
template void DictionaryValue::FreePtrAsType<wstring>();
template void DictionaryValue::FreePtrAsType<Dictionary>();
template void DictionaryValue::FreePtrAsType<NDArrayView>();
template const double& TrainingParameterSchedule<double>::operator[](size_t key) const;
}

Просмотреть файл

@ -167,10 +167,6 @@ namespace CNTK
return var.IsInput() && var.IsSparse();
}
std::vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr);
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const std::vector<DictionaryValue>& values);
inline void AddIndentation(std::wstringstream& s, size_t numIndentationSpaces)
{
for (size_t i = 0; i < numIndentationSpaces; ++i)
@ -313,4 +309,9 @@ namespace CNTK
return{ paddedOutputMapCount, kernelShape };
}
inline double MomentumPerMB(double momentumPerSample, size_t minibatchSize)
{
return std::pow(momentumPerSample, minibatchSize);
}
}

Просмотреть файл

@ -1,9 +1,10 @@
#pragma once
#include <exception>
#include <algorithm>
#include "CNTKLibrary.h"
#include <functional>
#include <fstream>
#include <random>
static const double relativeTolerance = 0.001f;
static const double absoluteTolerance = 0.000001f;
@ -21,6 +22,8 @@ inline void FloatingPointVectorCompare(const std::vector<ElementType>& first, co
}
}
static std::mt19937_64 rng(0);
#pragma warning(push)
#pragma warning(disable: 4996)
@ -40,6 +43,12 @@ static inline int _wunlink(const wchar_t *p)
{
return unlink(wtocharpath(p).c_str());
}
static inline FILE *_wfopen(const wchar_t *path, const wchar_t *mode)
{
return fopen(wtocharpath(path).c_str(), wtocharpath(mode).c_str());
}
#endif
template <typename ElementType>
@ -112,3 +121,30 @@ inline float PrevMinibatchTrainingLossValue(const CNTK::Trainer& trainer)
}
#pragma warning(pop)
inline CNTK::NDShape CreateShape(size_t numAxes, size_t maxDimSize)
{
CNTK::NDShape shape(numAxes);
for (size_t i = 0; i < numAxes; ++i)
{
shape[i] = (rng() % maxDimSize) + 1;
}
return shape;
}
inline void OpenStream(std::fstream& stream, const std::wstring& filename, bool readonly)
{
if (filename.empty())
std::runtime_error("File: filename is empty");
std::ios_base::openmode mode = std::ios_base::binary;
mode = mode | (readonly ? std::ios_base::in : std::ios_base::out);
#ifdef _MSC_VER
stream.open(filename.c_str(), mode);
#else
stream.open(wtocharpath(filename.c_str()).c_str(), mode);
#endif
stream.exceptions(std::ios_base::failbit | std::ios_base::badbit);
}

Просмотреть файл

@ -0,0 +1,185 @@
#include "CNTKLibrary.h"
#include "Common.h"
#include <string>
#include <random>
#include <initializer_list>
using namespace CNTK;
using namespace std;
static const size_t maxMinibatchSize = 1000;
static const size_t maxNumAxes = 5;
static const size_t maxDimSize = 10;
template <typename ElementType>
void TestUpdate(LearnerPtr& learner, NDShape& shape, size_t numMinibatches, const DeviceDescriptor& device)
{
auto seed = (unsigned long) rng();
unordered_map<Parameter, NDArrayViewPtr> gradientValues;
for (auto i = 0; i < numMinibatches; i++)
{
for (auto& parameter : learner->Parameters())
{
gradientValues[parameter] = NDArrayView::RandomUniform<ElementType>(shape, -1.0, 1.0, seed + i, device);
}
learner->Update(gradientValues, 1);
}
}
template <typename ElementType>
unordered_set<Parameter> CreateParameters(const NDShape& shape, size_t numParameters, const DeviceDescriptor& device)
{
unordered_set<Parameter> parameters;
for (int i = 0; i < numParameters; i++)
{
parameters.insert(
Parameter(NDArrayView::RandomUniform<ElementType>(shape, -1.0, 1.0, i, device),
L"parameter_" + to_wstring(i)));
}
return parameters;
}
template <typename ElementType>
void TestSGDLearner(size_t numParameters, size_t numMinibatches, const DeviceDescriptor& device)
{
NDShape shape = CreateShape(rng() % maxNumAxes + 1, maxDimSize);
auto parameters = CreateParameters<ElementType>(shape, numParameters, device);
auto learner = SGDLearner(parameters, 0.4);
TestUpdate<ElementType>(learner, shape, numMinibatches, device);
}
template <typename ElementType>
void TestMomentumSGDLearner(size_t numParameters, size_t numMinibatches, const DeviceDescriptor& device)
{
NDShape shape = CreateShape(rng() % maxNumAxes + 1, maxDimSize);
auto parameters = CreateParameters<ElementType>(shape, numParameters, device);
MomentumsPerSample momentums({ { 1, 1.0 }, { 3, 0.1 }, { 10, 0.01 } }, 2);
auto learner = MomentumSGDLearner(parameters, vector<double>{0.3, 0.2, 0.1}, momentums);
TestUpdate<ElementType>(learner, shape, numMinibatches, device);
}
template <typename ElementType>
void TestNesterovLearner(size_t numParameters, size_t numMinibatches, const DeviceDescriptor& device)
{
NDShape shape = CreateShape(rng() % maxNumAxes + 1, maxDimSize);
auto parameters = CreateParameters<ElementType>(shape, numParameters, device);
auto learner = NesterovLearner(parameters, LearningRatesPerSample({ { 1, 0.5 }, { 10, 0.25 }, { 20, 0.125 } }, 3 ), 0.2);
TestUpdate<ElementType>(learner, shape, numMinibatches, device);
}
template <typename ElementType>
void TestAdaGradLearner(size_t numParameters, size_t numMinibatches, const DeviceDescriptor& device)
{
NDShape shape = CreateShape(rng() % maxNumAxes + 1, maxDimSize);
auto parameters = CreateParameters<ElementType>(shape, numParameters, device);
auto learner = AdaGradLearner(parameters, { vector<double>{0.5, 0.4, 0.3, 0.2, 0.1}, 2 }, true);
TestUpdate<ElementType>(learner, shape, numMinibatches, device);
}
template <typename ElementType>
void TestFSAdaGradLearner(size_t numParameters, size_t numMinibatches, const DeviceDescriptor& device)
{
NDShape shape = CreateShape(rng() % maxNumAxes + 1, maxDimSize);
auto parameters = CreateParameters<ElementType>(shape, numParameters, device);
auto learner = FSAdaGradLearner(parameters, vector<double>{ 0.5 }, vector<double>{0.05});
TestUpdate<ElementType>(learner, shape, numMinibatches, device);
}
template <typename ElementType>
void TestRMSPropLearner(size_t numParameters, size_t numMinibatches, const DeviceDescriptor& device)
{
NDShape shape = CreateShape(rng() % maxNumAxes + 1, maxDimSize);
auto parameters = CreateParameters<ElementType>(shape, numParameters, device);
auto learner = RMSPropLearner(parameters, { { 3, 0.7 }, { 1, 0.2 } }, 0.01, 0.02, 0.03, 0.1, 0.001 );
TestUpdate<ElementType>(learner, shape, numMinibatches, device);
}
void TestTrainingParametersSchedule()
{
LearningRatesPerSample schedule1 = 0.5;
assert(schedule1[0] == 0.5);
assert(schedule1[1] == 0.5);
assert(schedule1[100] == 0.5);
LearningRatesPerSample schedule2 = vector<double>{ 0.5 };
assert(schedule2[0] == 0.5);
assert(schedule2[10] == 0.5);
assert(schedule2[100] == 0.5);
LearningRatesPerSample schedule3 = vector<double>{ 0.5, 0.3 };
assert(schedule3[0] == 0.5);
assert(schedule3[1] == 0.3);
assert(schedule3[100] == 0.3);
LearningRatesPerSample schedule4 = { vector<double>{ 0.5 }, 10 };
assert(schedule4[0] == 0.5);
assert(schedule4[10] == 0.5);
assert(schedule4[100] == 0.5);
LearningRatesPerSample schedule5 = { vector<double>{ 0.5, 0.3, 0.2 }, 10 };
assert(schedule5[0] == 0.5);
assert(schedule5[9] == 0.5);
assert(schedule5[10] == 0.3);
assert(schedule5[19] == 0.3);
assert(schedule5[20] == 0.2);
assert(schedule5[100] == 0.2);
LearningRatesPerSample schedule6 = { {1, 0.5} };
assert(schedule6[0] == 0.5);
assert(schedule6[10] == 0.5);
assert(schedule6[100] == 0.5);
LearningRatesPerSample schedule7 = { { 1, 0.5 }, { 1, 0.3 }, {1, 0.2} };
assert(schedule7[0] == 0.5);
assert(schedule7[1] == 0.3);
assert(schedule7[2] == 0.2);
assert(schedule7[100] == 0.2);
LearningRatesPerSample schedule8( { { 1, 0.5 }, { 1, 0.3 }, { 1, 0.2 } }, 10 );
assert(schedule8[0] == 0.5);
assert(schedule8[9] == 0.5);
assert(schedule8[10] == 0.3);
assert(schedule8[19] == 0.3);
assert(schedule8[20] == 0.2);
assert(schedule8[100] == 0.2);
LearningRatesPerSample schedule9 = { { 3, 0.5 }, { 2, 0.3 }, {1, 0.2} };
assert(schedule9[0] == 0.5);
assert(schedule9[2] == 0.5);
assert(schedule9[3] == 0.3);
assert(schedule9[4] == 0.3);
assert(schedule9[5] == 0.2);
assert(schedule9[100] == 0.2);
LearningRatesPerSample schedule10( { { 3, 0.5 }, { 2, 0.3 }, { 1, 0.2 } }, 10 );
assert(schedule10[0] == 0.5);
assert(schedule10[29] == 0.5);
assert(schedule10[30] == 0.3);
assert(schedule10[49] == 0.3);
assert(schedule10[50] == 0.2);
assert(schedule10[100] == 0.2);
}
void LearnerTests()
{
TestTrainingParametersSchedule();
TestSGDLearner<double>(5, 3, DeviceDescriptor::CPUDevice());
#ifndef CPUONLY
TestMomentumSGDLearner<float>(3, 11, DeviceDescriptor::GPUDevice(0));
TestNesterovLearner<float>(1, 20, DeviceDescriptor::GPUDevice(0));
#else
TestMomentumSGDLearner<float>(3, 11, DeviceDescriptor::CPUDevice());
TestNesterovLearner<float>(1, 20, DeviceDescriptor::CPUDevice());
#endif
TestAdaGradLearner<double>(2, 10, DeviceDescriptor::CPUDevice());
TestFSAdaGradLearner<double>(10, 2, DeviceDescriptor::CPUDevice());
TestRMSPropLearner<float>(3, 3, DeviceDescriptor::CPUDevice());
}

Просмотреть файл

@ -9,6 +9,8 @@ void FeedForwardTests();
void RecurrentFunctionTests();
void TrainerTests();
void TestCifarResnet();
void SerializationTests();
void LearnerTests();
int main()
{
@ -19,6 +21,8 @@ int main()
RecurrentFunctionTests();
TrainerTests();
SerializationTests();
LearnerTests();
TestCifarResnet();

Просмотреть файл

@ -0,0 +1,224 @@
#include "CNTKLibrary.h"
#include "Common.h"
#include <string>
#include <random>
#include <vector>
using namespace CNTK;
using namespace std;
using namespace Microsoft::MSR::CNTK;
static const size_t maxNestingDepth = 10;
static const size_t maxNestedDictSize = 10;
static const size_t maxNestedVectorSize = 100;
static const size_t maxNDShapeSize = 100;
static const size_t maxNumAxes = 10;
static const size_t maxDimSize = 15;
static size_t keyCounter = 0;
static uniform_real_distribution<double> double_dist = uniform_real_distribution<double>();
static uniform_real_distribution<float> float_dist = uniform_real_distribution<float>();
static std::wstring tempFilePath = L"serialization.tmp";
DictionaryValue CreateDictionaryValue(DictionaryValue::Type, size_t);
DictionaryValue::Type GetType()
{
return DictionaryValue::Type(rng() % (unsigned int) DictionaryValue::Type::NDArrayView + 1);
}
void AddKeyValuePair(Dictionary& dict, size_t depth)
{
auto type = GetType();
while (depth >= maxNestingDepth &&
type == DictionaryValue::Type::Vector ||
type == DictionaryValue::Type::Dictionary)
{
type = GetType();
}
dict[L"key" + to_wstring(keyCounter++)] = CreateDictionaryValue(type, depth);
}
Dictionary CreateDictionary(size_t size, size_t depth = 0)
{
Dictionary dict;
for (auto i = 0; i < size; ++i)
{
AddKeyValuePair(dict, depth);
}
return dict;
}
template <typename ElementType>
NDArrayViewPtr CreateNDArrayView(size_t numAxes, const DeviceDescriptor& device)
{
NDShape viewShape(numAxes);
for (size_t i = 0; i < numAxes; ++i)
viewShape[i] = (rng() % maxDimSize) + 1;
return NDArrayView::RandomUniform<ElementType>(viewShape, ElementType(-4.0), ElementType(19.0), 1, device);
}
NDArrayViewPtr CreateNDArrayView()
{
auto numAxes = (rng() % maxNumAxes) + 1;
auto device = DeviceDescriptor::CPUDevice();
#ifndef CPUONLY
if (rng() % 2 == 0)
{
device = DeviceDescriptor::GPUDevice(0);
}
#endif
return (rng() % 2 == 0) ?
CreateNDArrayView<float>(numAxes, device) : CreateNDArrayView<double>(numAxes, device);
}
DictionaryValue CreateDictionaryValue(DictionaryValue::Type type, size_t depth)
{
switch (type)
{
case DictionaryValue::Type::Bool:
return DictionaryValue(!!(rng() % 2));
case DictionaryValue::Type::SizeT:
return DictionaryValue(rng());
case DictionaryValue::Type::Float:
return DictionaryValue(float_dist(rng));
case DictionaryValue::Type::Double:
return DictionaryValue(double_dist(rng));
case DictionaryValue::Type::String:
return DictionaryValue(to_wstring(rng()));
case DictionaryValue::Type::NDShape:
{
size_t size = rng() % maxNDShapeSize + 1;
NDShape shape(size);
for (auto i = 0; i < size; i++)
{
shape[i] = rng();
}
return DictionaryValue(shape);
}
case DictionaryValue::Type::Vector:
{
auto type = GetType();
size_t size = rng() % maxNestedVectorSize + 1;
vector<DictionaryValue> vector(size);
for (auto i = 0; i < size; i++)
{
vector[i] = CreateDictionaryValue(type, depth + 1);
}
return DictionaryValue(vector);
}
case DictionaryValue::Type::Dictionary:
return DictionaryValue(CreateDictionary(rng() % maxNestedDictSize + 1, depth + 1));
case DictionaryValue::Type::NDArrayView:
return DictionaryValue(*(CreateNDArrayView()));
default:
NOT_IMPLEMENTED;
}
}
void TestDictionarySerialization(size_t dictSize)
{
if ((_wunlink(tempFilePath.c_str()) != 0) && (errno != ENOENT))
std::runtime_error("Error deleting temporary test file 'serialization.tmp'.");
Dictionary originalDict = CreateDictionary(dictSize);
{
fstream stream;
OpenStream(stream, tempFilePath, false);
stream << originalDict;
stream.flush();
}
Dictionary deserializedDict;
{
fstream stream;
OpenStream(stream, tempFilePath, true);
stream >> deserializedDict;
}
if (originalDict != deserializedDict)
throw std::runtime_error("TestDictionarySerialization: original and deserialized dictionaries are not identical.");
}
template <typename ElementType>
void TestLearnerSerialization(int numParameters, const DeviceDescriptor& device)
{
if ((_wunlink(tempFilePath.c_str()) != 0) && (errno != ENOENT))
std::runtime_error("Error deleting temporary test file 'serialization.tmp'.");
NDShape shape = CreateShape(5, maxDimSize);
unordered_set<Parameter> parameters;
unordered_map<Parameter, NDArrayViewPtr> gradientValues;
for (int i = 0; i < numParameters; i++)
{
Parameter parameter(NDArrayView::RandomUniform<ElementType>(shape, -0.5, 0.5, i, device), L"parameter_" + to_wstring(i));
parameters.insert(parameter);
gradientValues[parameter] = NDArrayView::RandomUniform<ElementType>(shape, -0.5, 0.5, numParameters + i, device);
}
auto learner1 = SGDLearner(parameters, 0.05);
learner1->Update(gradientValues, 1);
{
auto checkpoint = learner1->GetCheckpointState();
fstream stream;
OpenStream(stream, tempFilePath, false);
stream << checkpoint;
stream.flush();
}
auto learner2 = SGDLearner(parameters, 0.05);
{
Dictionary checkpoint;
fstream stream;
OpenStream(stream, tempFilePath, true);
stream >> checkpoint;
learner2->RestoreFromCheckpoint(checkpoint);
}
int i = 0;
for (auto parameter : parameters)
{
gradientValues[parameter] = NDArrayView::RandomUniform<ElementType>(shape, -0.5, 0.5, 2*numParameters + i, device);
i++;
}
learner1->Update(gradientValues, 1);
learner2->Update(gradientValues, 1);
auto checkpoint1 = learner1->GetCheckpointState();
auto checkpoint2 = learner2->GetCheckpointState();
if (checkpoint1 != checkpoint2)
throw std::runtime_error("TestLearnerSerialization: original and restored from a checkpoint learners diverge.");
}
void SerializationTests()
{
TestDictionarySerialization(4);
TestDictionarySerialization(8);
TestDictionarySerialization(16);
TestLearnerSerialization<float>(5, DeviceDescriptor::CPUDevice());
TestLearnerSerialization<double>(10, DeviceDescriptor::CPUDevice());
#ifndef CPUONLY
TestLearnerSerialization<float>(5, DeviceDescriptor::GPUDevice(0));
TestLearnerSerialization<double>(10, DeviceDescriptor::GPUDevice(0));;
#endif
}

Просмотреть файл

@ -110,6 +110,8 @@
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="CifarResNet.cpp" />
<ClCompile Include="LearnerTests.cpp" />
<ClCompile Include="SerializationTests.cpp" />
<ClCompile Include="FeedForwardTests.cpp" />
<ClCompile Include="Main.cpp" />
<ClCompile Include="NDArrayViewTests.cpp" />

Просмотреть файл

@ -36,6 +36,12 @@
<ClCompile Include="CifarResNet.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="SerializationTests.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="LearnerTests.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Common.h">