Improve V2 Learner API, add basic serialization.
FSAdaGrad, RMSProp: pre-allocate smoothed gradients with expected number of columns. Improve configuring built-in learners: add learning rates and momentums schedules. Add NDArrayView as a DictionaryValue type. Add tests for serialization and basic learner functionality.
This commit is contained in:
Родитель
32f97fd266
Коммит
9bd9308d2e
2
Makefile
2
Makefile
|
@ -409,6 +409,8 @@ CNTKLIBRARY_TESTS_SRC =\
|
|||
Tests/UnitTests/V2LibraryTests/TensorTests.cpp \
|
||||
Tests/UnitTests/V2LibraryTests/TrainerTests.cpp \
|
||||
Tests/UnitTests/V2LibraryTests/CifarResNet.cpp \
|
||||
Tests/UnitTests/V2LibraryTests/SerializationTests.cpp \
|
||||
Tests/UnitTests/V2LibraryTests/LearnerTests.cpp \
|
||||
|
||||
CNTKLIBRARY_TESTS:=$(BINDIR)/v2librarytests
|
||||
CNTKLIBRARY_TESTS_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(patsubst %.cpp, $(OBJDIR)/%.o, $(CNTKLIBRARY_TESTS_SRC)))
|
||||
|
|
|
@ -20,10 +20,12 @@
|
|||
#include <array>
|
||||
#include <stdarg.h>
|
||||
#include <assert.h>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <iosfwd>
|
||||
#include<algorithm>
|
||||
|
||||
namespace CNTK
|
||||
|
@ -242,7 +244,7 @@ namespace CNTK
|
|||
}
|
||||
|
||||
///
|
||||
/// Creates and returns a new shape contructed by appending the dimensions of the specified 'shape' to 'this' shape's dimensions.
|
||||
/// Creates and returns a new shape constructed by appending the dimensions of the specified 'shape' to 'this' shape's dimensions.
|
||||
///
|
||||
NDShape AppendShape(const NDShape& shape) const
|
||||
{
|
||||
|
@ -1645,6 +1647,7 @@ namespace CNTK
|
|||
NDShape,
|
||||
Vector,
|
||||
Dictionary,
|
||||
NDArrayView,
|
||||
};
|
||||
|
||||
static const char* TypeName(Type type)
|
||||
|
@ -1669,6 +1672,8 @@ namespace CNTK
|
|||
return "Vector";
|
||||
case Type::Dictionary:
|
||||
return "Dictionary";
|
||||
case Type::NDArrayView:
|
||||
return "NDArrayView";
|
||||
default:
|
||||
LogicError("Unknown DictionaryValue::Type");
|
||||
}
|
||||
|
@ -1715,8 +1720,9 @@ namespace CNTK
|
|||
static_assert((std::is_same<T, NDShape>::value ||
|
||||
std::is_same<T, std::wstring>::value ||
|
||||
std::is_same<T, std::vector<DictionaryValue>>::value ||
|
||||
std::is_same<T, Dictionary>::value),
|
||||
"Unsupported ValueType");
|
||||
std::is_same<T, Dictionary>::value ||
|
||||
std::is_same<T, NDArrayView>::value),
|
||||
"Unsupported ValueType");
|
||||
|
||||
AllocateDataPtr(value);
|
||||
}
|
||||
|
@ -1728,6 +1734,13 @@ namespace CNTK
|
|||
*this = other;
|
||||
}
|
||||
|
||||
DictionaryValue(DictionaryValue&& other) : m_valueType(Type::Bool)
|
||||
{
|
||||
// The m_valueType must have been set to a non-ptr type to prevent an attempt to interpret
|
||||
// the underlying underlying uninitialized value as a ptr and free it.
|
||||
*this = std::move(other);
|
||||
}
|
||||
|
||||
DictionaryValue& operator=(const DictionaryValue& other)
|
||||
{
|
||||
if (this != &other)
|
||||
|
@ -1745,11 +1758,34 @@ namespace CNTK
|
|||
AllocateDataPtr(other.GetValue<std::vector<DictionaryValue>>());
|
||||
else if (other.m_valueType == Type::Dictionary)
|
||||
AllocateDataPtr(other.GetValue<Dictionary>());
|
||||
else if (other.m_valueType == Type::NDArrayView)
|
||||
AllocateDataPtr(other.GetValue<NDArrayView>());
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
DictionaryValue& operator=(DictionaryValue&& other)
|
||||
{
|
||||
FreeDataPtr();
|
||||
|
||||
m_valueType = other.m_valueType;
|
||||
m_data = other.m_data;
|
||||
|
||||
if (other.m_valueType == Type::String ||
|
||||
other.m_valueType == Type::NDShape ||
|
||||
other.m_valueType == Type::Vector ||
|
||||
other.m_valueType == Type::Dictionary ||
|
||||
other.m_valueType == Type::NDArrayView)
|
||||
{
|
||||
other.m_data.m_ptr = nullptr;
|
||||
}
|
||||
|
||||
other.m_valueType = Type::None;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
~DictionaryValue()
|
||||
{
|
||||
FreeDataPtr();
|
||||
|
@ -1786,7 +1822,8 @@ namespace CNTK
|
|||
template <typename T, typename std::enable_if<std::is_same<T, NDShape>::value ||
|
||||
std::is_same<T, std::wstring>::value ||
|
||||
std::is_same<T, std::vector<DictionaryValue>>::value ||
|
||||
std::is_same<T, Dictionary>::value>::type* = nullptr>
|
||||
std::is_same<T, Dictionary>::value ||
|
||||
std::is_same<T, NDArrayView>::value>::type* = nullptr>
|
||||
const T& GetValue() const
|
||||
{
|
||||
VerifyType<T>();
|
||||
|
@ -1803,8 +1840,11 @@ namespace CNTK
|
|||
return m_valueType;
|
||||
}
|
||||
|
||||
friend CNTK_API Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, DictionaryValue& us);
|
||||
friend CNTK_API Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const DictionaryValue& us);
|
||||
CNTK_API bool operator==(const DictionaryValue& other) const;
|
||||
CNTK_API bool operator!=(const DictionaryValue& other) const;
|
||||
|
||||
friend CNTK_API std::istream& operator>>(std::istream& stream, DictionaryValue& us);
|
||||
friend CNTK_API std::ostream& operator<<(std::ostream& stream, const DictionaryValue& us);
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
|
@ -1816,8 +1856,9 @@ namespace CNTK
|
|||
std::is_same<T, double>::value ||
|
||||
std::is_same<T, std::wstring>::value ||
|
||||
std::is_same<T, NDShape>::value ||
|
||||
std::is_same<T, std::vector<DictionaryValue>>::value ||
|
||||
std::is_same<T, Dictionary>::value),
|
||||
std::is_same<T, std::vector<DictionaryValue>>::value ||
|
||||
std::is_same<T, Dictionary>::value ||
|
||||
std::is_same<T, NDArrayView>::value),
|
||||
"Unsupported ValueType");
|
||||
|
||||
if (std::is_same<T, bool>::value) return Type::Bool;
|
||||
|
@ -1828,6 +1869,7 @@ namespace CNTK
|
|||
if (std::is_same<T, NDShape>::value) return Type::NDShape;
|
||||
if (std::is_same<T, std::vector<DictionaryValue>>::value) return Type::Vector;
|
||||
if (std::is_same<T, Dictionary>::value) return Type::Dictionary;
|
||||
if (std::is_same<T, NDArrayView>::value) return Type::NDArrayView;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -1853,6 +1895,8 @@ namespace CNTK
|
|||
FreePtrAsType<std::vector<DictionaryValue>>();
|
||||
else if (m_valueType == Type::Dictionary)
|
||||
FreePtrAsType<Dictionary>();
|
||||
else if (m_valueType == Type::Dictionary)
|
||||
FreePtrAsType<NDArrayView>();
|
||||
}
|
||||
|
||||
Type m_valueType;
|
||||
|
@ -1906,9 +1950,11 @@ namespace CNTK
|
|||
return Contains(key.c_str());
|
||||
}
|
||||
|
||||
CNTK_API bool operator==(const Dictionary& other) const;
|
||||
CNTK_API bool operator!=(const Dictionary& other) const;
|
||||
|
||||
friend CNTK_API Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, Dictionary& us);
|
||||
friend CNTK_API Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const Dictionary& us);
|
||||
friend CNTK_API std::istream& operator>>(std::istream& stream, Dictionary& us);
|
||||
friend CNTK_API std::ostream& operator<<(std::ostream& stream, const Dictionary& us);
|
||||
|
||||
private:
|
||||
std::shared_ptr<std::unordered_map<std::wstring, DictionaryValue>> m_dictionaryData;
|
||||
|
@ -1957,37 +2003,127 @@ namespace CNTK
|
|||
|
||||
};
|
||||
|
||||
///
|
||||
/// A collection of key-value pairs that represents training parameter schedule in
|
||||
/// terms of the number of processed samples.
|
||||
/// This class provides a number of convenience constructors to allow easy conversion
|
||||
/// from a single value, a vector of values and a list of pairs to the training schedule.
|
||||
///
|
||||
template <typename T>
|
||||
class TrainingParameterSchedule
|
||||
{
|
||||
public:
|
||||
///
|
||||
/// Create a schedule with a constant parameter value.
|
||||
///
|
||||
TrainingParameterSchedule(T value)
|
||||
: m_schedule({ std::make_pair(0, value) }), m_unit(1)
|
||||
{}
|
||||
|
||||
///
|
||||
/// Create a schedule where the parameter changes its value every 'unit' samples:
|
||||
/// schedule[0] is used for the first 'unit' samples, schedule[1] -- for the second,
|
||||
/// and so on. The last value is then used repeatedly until the end of training.
|
||||
///
|
||||
TrainingParameterSchedule(const std::vector<T>& schedule, size_t unit = 1)
|
||||
: m_unit(unit)
|
||||
{
|
||||
// TODO: 0 will be used to mean "the entire sweep"
|
||||
if (unit == 0)
|
||||
RuntimeError("TrainingParameterSchedule::constructor : 'unit' cannot be 0.");
|
||||
|
||||
if (schedule.size() == 0)
|
||||
RuntimeError("TrainingParameterSchedule::constructor : schedule is empty.");
|
||||
|
||||
size_t i = 1;
|
||||
for (const auto& value : schedule)
|
||||
{
|
||||
m_schedule[m_unit * i++] = value;
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Create a schedule using the list of key-value pairs, where the key specifies
|
||||
/// the number of 'units' the parameter should maintain the corresponding value.
|
||||
/// The value from the last pair is used repeatedly until the end of training.
|
||||
/// For example, {{1, 0.05}, {2, 0.1}, {1, 0.005}} and unit = 100, corresponds to
|
||||
/// a schedule where the value of '0.05' is used for the first 100 samples, then
|
||||
/// '0.1' is used for the second 200 samples, after which the values is switched
|
||||
/// to '0.005'.
|
||||
///
|
||||
TrainingParameterSchedule(const std::initializer_list<std::pair<const size_t, T>>& schedule, size_t unit = 1)
|
||||
: m_unit(unit)
|
||||
{
|
||||
// TODO: 0 will be used to mean "the entire sweep"
|
||||
if (unit == 0)
|
||||
RuntimeError("TrainingParameterSchedule::constructor : 'unit' cannot be 0.");
|
||||
|
||||
if (schedule.size() == 0)
|
||||
RuntimeError("TrainingParameterSchedule::constructor : schedule is empty.");
|
||||
|
||||
size_t i = 0;
|
||||
for (const auto& it : schedule)
|
||||
{
|
||||
if (it.first == 0)
|
||||
RuntimeError("TrainingParameterSchedule::constructor : unit count cannot be 0.");
|
||||
|
||||
i += it.first;
|
||||
m_schedule[m_unit * i] = it.second;
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Returns a value corresponding to the absolute sample count from the beginning of training.
|
||||
///
|
||||
CNTK_API const T& operator[](size_t samleCount) const;
|
||||
|
||||
private:
|
||||
std::map<size_t, T> m_schedule;
|
||||
size_t m_unit;
|
||||
};
|
||||
|
||||
typedef TrainingParameterSchedule<double> LearningRatesPerSample;
|
||||
typedef TrainingParameterSchedule<double> MomentumsPerSample;
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in SGD learner.
|
||||
///
|
||||
/// TODO: add additional SGD parameters here (a collection of learning rate values)
|
||||
CNTK_API LearnerPtr SGDLearner(const std::unordered_set<Parameter>& parameters, double learningRatePerSample);
|
||||
CNTK_API LearnerPtr SGDLearner(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates);
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in Momentum SGD learner.
|
||||
///
|
||||
/// TODO: add additional Momentum parameters here (a collection of momentum rate values)
|
||||
CNTK_API LearnerPtr MomentumSGDLearner(const std::unordered_set<Parameter>& parameters);
|
||||
CNTK_API LearnerPtr MomentumSGDLearner(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
const MomentumsPerSample& momentums);
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in Nesterov's accelerated SGD learner.
|
||||
///
|
||||
CNTK_API LearnerPtr NesterovLearner(const std::unordered_set<Parameter>& parameters);
|
||||
CNTK_API LearnerPtr NesterovLearner(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
const MomentumsPerSample& momentums);
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in AdaGrad learner.
|
||||
///
|
||||
CNTK_API LearnerPtr AdaGradLearner(const std::unordered_set<Parameter>& parameters, bool needAveMultiplier = true);
|
||||
CNTK_API LearnerPtr AdaGradLearner(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
bool needAveMultiplier = true);
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in FSAdaGrad (improved AdaGrad) learner.
|
||||
///
|
||||
CNTK_API LearnerPtr FSAdaGradLearner(const std::unordered_set<Parameter>& parameters);
|
||||
CNTK_API LearnerPtr FSAdaGradLearner(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
const MomentumsPerSample& momentums);
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in RMSProp learner.
|
||||
///
|
||||
CNTK_API LearnerPtr RMSPropLearner(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
double gamma,
|
||||
double inc,
|
||||
double dec,
|
||||
|
@ -1997,7 +2133,7 @@ namespace CNTK
|
|||
|
||||
///
|
||||
/// Trainer is the top-level abstraction responsible for the orchestration of the training of a model
|
||||
/// using the specified learners and training data either explicilty supplied as Value objects or from
|
||||
/// using the specified learners and training data either explicitly supplied as Value objects or from
|
||||
/// a MinibatchSource object.
|
||||
///
|
||||
class Trainer
|
||||
|
@ -2085,7 +2221,7 @@ namespace CNTK
|
|||
};
|
||||
|
||||
///
|
||||
/// Abstraction for generating minbatches of samples for training/evaluation.
|
||||
/// Abstraction for generating minibatches of samples for training/evaluation.
|
||||
///
|
||||
class MinibatchSource : public std::enable_shared_from_this<MinibatchSource>
|
||||
{
|
||||
|
@ -2101,7 +2237,7 @@ namespace CNTK
|
|||
/// #samples or both. In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken. The actual
|
||||
/// returned size of the minibatch is the min across all streams. Also the requested MB size fields in the maps are updated by the
|
||||
/// MinibatchSource to contain the actual #sequences and #samples in the returned minibatch for the corresponding stream.
|
||||
/// The return value indciates if the MinibatchSource will return any further data in subsequent calls of this function.
|
||||
/// The return value indicates if the MinibatchSource will return any further data in subsequent calls of this function.
|
||||
///
|
||||
virtual std::unordered_map<StreamInfo, MinibatchData> GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits,
|
||||
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()) = 0;
|
||||
|
|
|
@ -53,8 +53,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
template <typename ElementType>
|
||||
class ComputationNode;
|
||||
|
||||
class File;
|
||||
}}}
|
||||
|
||||
// TODO: The following should be reconciled with the equivalent code in the CNTK implementation
|
||||
|
@ -139,7 +137,7 @@ namespace CNTK
|
|||
#define NOT_IMPLEMENTED \
|
||||
{ \
|
||||
fprintf(stderr, "Inside File: %s Line: %d Function: %s -> Feature Not Implemented.\n", __FILE__, __LINE__, __FUNCTION__); \
|
||||
LogicError("Inside File: %s Line: %d Function: %s -> Feature Not Implemented.\n", __FILE__, __LINE__, __FUNCTION__); \
|
||||
CNTK::LogicError("Inside File: %s Line: %d Function: %s -> Feature Not Implemented.\n", __FILE__, __LINE__, __FUNCTION__); \
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -8,19 +8,18 @@
|
|||
#include "Utils.h"
|
||||
|
||||
#define UPDATE_FUNCTION \
|
||||
switch (smoothedGradientValue->GetDataType()) \
|
||||
switch (smoothedGradientValue->GetDataType()) \
|
||||
{ \
|
||||
case DataType::Float: \
|
||||
Update<float>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
|
||||
Update<float>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
|
||||
break; \
|
||||
case DataType::Double: \
|
||||
Update<double>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
|
||||
Update<double>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
|
||||
break; \
|
||||
default: \
|
||||
NOT_IMPLEMENTED; \
|
||||
}
|
||||
|
||||
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
using namespace std;
|
||||
|
||||
|
@ -141,7 +140,7 @@ namespace CNTK
|
|||
// L1 regularizer with proximal gradient descent method
|
||||
if (m_additionalOptions.l1RegularizationWeight > 0)
|
||||
{
|
||||
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
|
||||
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
|
||||
// multiply by actualMBSize so that it's invariant to minibatch size since learning rate is per sample
|
||||
auto weight = ElementType(learningRate * m_additionalOptions.l1RegularizationWeight * actualMBSize);
|
||||
parameterValue->GetWritableMatrix<ElementType>()->InplaceSoftThreshold(weight);
|
||||
|
@ -154,48 +153,49 @@ namespace CNTK
|
|||
return arrayView->GetWritableTensorView<ElementType>();
|
||||
}
|
||||
|
||||
LearnerBase::LearnerBase(const unordered_set<Parameter>& parameters)
|
||||
LearnerBase::LearnerBase(const unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
bool allocateSmoothGradients /* = true */)
|
||||
: Learner(parameters),
|
||||
m_learningRatePerSample(0.0),
|
||||
m_sampleCount(0)
|
||||
m_learningRates(learningRates),
|
||||
m_sampleCount(0),
|
||||
m_minibatchCount(0)
|
||||
{
|
||||
const unordered_set<Parameter>& parameterSet = parameters;
|
||||
for (const auto& parameter : parameterSet)
|
||||
for (const auto& parameter : parameters)
|
||||
{
|
||||
// TODO: using the same device to allocate data for all smoothed gradients. Is this correct?
|
||||
// Should the device be specified on the per-parameter basis?
|
||||
NDArrayViewPtr view;
|
||||
if (parameter.GetDataType() == DataType::Float)
|
||||
if (!allocateSmoothGradients)
|
||||
{
|
||||
view = MakeSharedObject<NDArrayView>(0.0f, parameter.Shape(), parameter.Value()->Device());
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
view = MakeSharedObject<NDArrayView>(0.0, parameter.Shape(), parameter.Value()->Device());
|
||||
}
|
||||
|
||||
|
||||
NDArrayViewPtr view = AllocateNDArrayView(parameter, parameter.Shape());
|
||||
m_smoothedGradientValues.insert(make_pair(parameter, view));
|
||||
m_additionalOptions.learningRateMultipliers.insert(make_pair(parameter, 1.0));
|
||||
}
|
||||
}
|
||||
|
||||
void LearnerBase::ResetSmoothedGradients()
|
||||
/*static*/ NDArrayViewPtr LearnerBase::AllocateNDArrayView(const Parameter& parameter, const NDShape& shape)
|
||||
{
|
||||
for (const auto& parameter : Parameters())
|
||||
if (parameter.GetDataType() == DataType::Float)
|
||||
{
|
||||
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
|
||||
const auto& data = smoothedGradientValue;
|
||||
switch (data->GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
data->SetValue(0.0f);
|
||||
break;
|
||||
case DataType::Double:
|
||||
data->SetValue(0.0);
|
||||
break;
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", ::CNTK::DataTypeName(data->GetDataType()));
|
||||
}
|
||||
return MakeSharedObject<NDArrayView>(float(0.0), shape, parameter.Value()->Device());
|
||||
}
|
||||
else
|
||||
{
|
||||
return MakeSharedObject<NDArrayView>(0.0, shape, parameter.Value()->Device());
|
||||
}
|
||||
}
|
||||
|
||||
/*static*/ NDShape LearnerBase::GetMatrixShape(const Parameter& parameter)
|
||||
{
|
||||
if (parameter.GetDataType() == DataType::Float)
|
||||
{
|
||||
auto matrix = GetMatrix<float>(parameter.Value());
|
||||
return { matrix->GetNumRows(), matrix->GetNumCols() };
|
||||
}
|
||||
else
|
||||
{
|
||||
auto matrix = GetMatrix<double>(parameter.Value());
|
||||
return { matrix->GetNumRows(), matrix->GetNumCols() };
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -219,17 +219,19 @@ namespace CNTK
|
|||
#endif
|
||||
|
||||
#if DUMPOUTPUT
|
||||
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
|
||||
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount));
|
||||
LOGPRINTF(stderr, "learnRatePerSample=%0.8f, momentum=%0.8f, actualMBSize=%ld\n",
|
||||
m_learningRatePerSample, m_momentumPerSample, trainingSampleCount);
|
||||
learningRate, momentum, trainingSampleCount);
|
||||
LOGPRINTF(stderr, "GradUpdateType()=%s, GradientUpdateNoiseStd()=%0.8f\n",
|
||||
LearnerType().c_str(), m_GaussianNoiseInjectStd);
|
||||
LearnerType().c_str(), m_additionalOptions.gaussianNoiseInjectionStdDev);
|
||||
Print(gradientValue, "Gradient Update");
|
||||
Print(smoothedGradientValue, "Smoothed Gradient Input");
|
||||
#endif
|
||||
UPDATE_FUNCTION;
|
||||
|
||||
#if DUMPOUTPUT
|
||||
Print(parameterValue, "Parameter Update");
|
||||
Print(parameter.Value(), "Parameter Update");
|
||||
#endif
|
||||
|
||||
#ifdef _DEBUG
|
||||
|
@ -239,6 +241,7 @@ namespace CNTK
|
|||
#endif
|
||||
}
|
||||
m_sampleCount += trainingSampleCount;
|
||||
m_minibatchCount++;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -265,9 +268,16 @@ namespace CNTK
|
|||
|
||||
/*virtual*/ Dictionary LearnerBase::GetCheckpointState() const /*override*/
|
||||
{
|
||||
NOT_IMPLEMENTED; // Until the new checkpointing is fully fleshed out, nobody should be calling this.
|
||||
Dictionary checkpoint;
|
||||
|
||||
checkpoint[L"checkpointVersion"] = checkpointVersion;
|
||||
checkpoint[L"sampleCount"] = m_sampleCount;
|
||||
checkpoint[L"minibatchCount"] = m_minibatchCount;
|
||||
|
||||
// TODO: should we also save learning rate schedule into the checkpoint?
|
||||
// If that is the case, need to be able to override this method in subclasses
|
||||
// and save momentum schedule as well.
|
||||
|
||||
for (const auto& parameter : Parameters())
|
||||
{
|
||||
// TODO: parameter name is not guaranteed to be unique. Instead, all serializable objects
|
||||
|
@ -277,31 +287,48 @@ namespace CNTK
|
|||
{
|
||||
LogicError("Parameter names must be unique");
|
||||
}
|
||||
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
|
||||
|
||||
// Potentially, could store things like dimensions, element size, format, etc., but
|
||||
// that seems to be redundant, since all of that is passed in the constructor.
|
||||
checkpoint[parameter.Name()] = SerializeToVector(smoothedGradientValue);
|
||||
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
|
||||
checkpoint[parameter.Name()] = *smoothedGradientValue;
|
||||
}
|
||||
return checkpoint;
|
||||
}
|
||||
|
||||
/*virtual*/ void LearnerBase::RestoreFromCheckpoint(const Dictionary& checkpoint) /*override*/
|
||||
{
|
||||
NOT_IMPLEMENTED; // Until the new checkpointing is fully fleshed out, nobody should be calling this.
|
||||
m_sampleCount = checkpoint[L"sampleCount"].GetValue<size_t>();
|
||||
m_minibatchCount = checkpoint[L"minibatchCount"].GetValue<size_t>();
|
||||
|
||||
size_t version = checkpoint[L"minibatchCount"].GetValue<size_t>();
|
||||
if (checkpointVersion != version)
|
||||
{
|
||||
// At the moment, we only support one version, so this should never happen.
|
||||
LogicError("Unsupported checkpoint version.");
|
||||
}
|
||||
|
||||
for (const auto& parameter : Parameters())
|
||||
{
|
||||
if (!checkpoint.Contains(parameter.Name()))
|
||||
{
|
||||
LogicError("Checkpoint does not contain state for parameter %ls", parameter.Name().c_str());
|
||||
}
|
||||
|
||||
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
|
||||
const NDArrayView& checkpointedValue = checkpoint[parameter.Name()].GetValue<NDArrayView>();
|
||||
|
||||
if (smoothedGradientValue->GetDataType() != checkpointedValue.GetDataType())
|
||||
{
|
||||
LogicError("A value restored from a checkpoint for the smoothed gradient data type for parameter %ls does not match the expected value",
|
||||
parameter.Name().c_str());
|
||||
}
|
||||
|
||||
const DictionaryValue& state = checkpoint[parameter.Name()];
|
||||
if (smoothedGradientValue->Shape() != checkpointedValue.Shape())
|
||||
{
|
||||
LogicError("A value restored from a checkpoint for the smoothed gradient shape for parameter %ls does not match the expected value",
|
||||
parameter.Name().c_str());
|
||||
}
|
||||
|
||||
const auto& data = smoothedGradientValue;
|
||||
|
||||
DeserializeFromVector(data, state.GetValue<vector<DictionaryValue>>());
|
||||
smoothedGradientValue->CopyFrom(checkpointedValue);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -313,23 +340,25 @@ namespace CNTK
|
|||
template <typename ElementType>
|
||||
void LearnerSGD::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
|
||||
{
|
||||
UNUSED(trainingSampleCount);
|
||||
|
||||
const auto& parameterValue = parameter.Value();
|
||||
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue);
|
||||
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
|
||||
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
|
||||
|
||||
const auto& learningRate = ElementType(ParameterDependentLearningRate(parameter));
|
||||
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
|
||||
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount));
|
||||
|
||||
// TODO: break up the NormalGrad into 3 different functions, each with its own set of parameters
|
||||
// (one for vanilla SGD, the other for momentum SGD, and the third one for NAG).
|
||||
smoothedGradientMatrix->NormalGrad(*gradientMatrix, *parameterMatrix,
|
||||
learningRate, ElementType(m_momentumPerSample), m_useNesterovAcceleration);
|
||||
learningRate, momentum, m_useNesterovAcceleration);
|
||||
}
|
||||
|
||||
LearnerAdaGrad::LearnerAdaGrad(const unordered_set<Parameter>& parameters, bool needAveMultiplier)
|
||||
: LearnerBase(parameters), m_needAveMultiplier(needAveMultiplier)
|
||||
LearnerAdaGrad::LearnerAdaGrad(const unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
bool needAveMultiplier)
|
||||
: LearnerBase(parameters, learningRates),
|
||||
m_needAveMultiplier(needAveMultiplier)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -348,15 +377,23 @@ namespace CNTK
|
|||
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
|
||||
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
|
||||
|
||||
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
|
||||
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
|
||||
|
||||
auto aveMultiplier = smoothedGradientMatrix->Adagrad(*gradientMatrix, m_needAveMultiplier);
|
||||
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
|
||||
}
|
||||
|
||||
LearnerFSAdaGrad::LearnerFSAdaGrad(const unordered_set<Parameter>& parameters)
|
||||
: LearnerMomentumSGD(parameters)
|
||||
LearnerFSAdaGrad::LearnerFSAdaGrad(const unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
const MomentumsPerSample& momentums)
|
||||
: LearnerMomentumSGD(parameters, learningRates, momentums, /*allocateSmoothGradients*/ false)
|
||||
{
|
||||
for (const auto& parameter : parameters)
|
||||
{
|
||||
auto shape = GetMatrixShape(parameter);
|
||||
NDArrayViewPtr view = AllocateNDArrayView(parameter, {shape[0], 2 * shape[1]});
|
||||
m_smoothedGradientValues.insert(make_pair(parameter, view));
|
||||
}
|
||||
}
|
||||
|
||||
/*virtual*/ void LearnerFSAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
|
||||
|
@ -373,21 +410,33 @@ namespace CNTK
|
|||
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue);
|
||||
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
|
||||
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
|
||||
|
||||
//const double momentum = MomentumPerMB(m_momentumPerSample, trainingSampleCount);
|
||||
|
||||
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
|
||||
|
||||
smoothedGradientMatrix->FSAdagrad(trainingSampleCount, *gradientMatrix, *parameterMatrix,
|
||||
learningRate, ElementType(m_momentumPerSample));
|
||||
|
||||
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
|
||||
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount));
|
||||
smoothedGradientMatrix->FSAdagrad(trainingSampleCount, *gradientMatrix, *parameterMatrix, learningRate, momentum);
|
||||
}
|
||||
|
||||
LearnerRMSProp::LearnerRMSProp(const unordered_set<Parameter>& parameters,
|
||||
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier)
|
||||
: LearnerBase(parameters),
|
||||
m_gamma(gamma), m_inc(inc), m_dec(dec), m_max(max), m_min(min),
|
||||
m_needAveMultiplier(needAveMultiplier)
|
||||
LearnerRMSProp::LearnerRMSProp(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates,
|
||||
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier)
|
||||
: LearnerBase(parameters, learningRates, /*allocateSmoothGradients*/ false),
|
||||
m_gamma(gamma), m_inc(inc), m_dec(dec), m_max(max), m_min(min),
|
||||
m_needAveMultiplier(needAveMultiplier)
|
||||
{
|
||||
for (const auto& parameter : parameters)
|
||||
{
|
||||
// When needAveMultiplier == true, CPU and GPU implementations of RMSProp require different number of columns.
|
||||
// TODO: verify that this is correct.
|
||||
size_t factor = 3;
|
||||
if (needAveMultiplier && parameter.Value()->Device().Type() == DeviceKind::GPU)
|
||||
{
|
||||
factor = 4;
|
||||
}
|
||||
|
||||
auto shape = GetMatrixShape(parameter);
|
||||
NDArrayViewPtr view = AllocateNDArrayView(parameter, {shape[0], factor * shape[1]});
|
||||
|
||||
m_smoothedGradientValues.insert(make_pair(parameter, view));
|
||||
}
|
||||
}
|
||||
|
||||
/*virtual*/ void LearnerRMSProp::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
|
||||
|
@ -405,12 +454,12 @@ namespace CNTK
|
|||
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
|
||||
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
|
||||
|
||||
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
|
||||
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
|
||||
|
||||
auto aveMultiplier = smoothedGradientMatrix->RmsProp(*gradientMatrix,
|
||||
ElementType(m_gamma), ElementType(m_inc),
|
||||
ElementType(m_max), ElementType(m_dec),
|
||||
ElementType(m_min), m_needAveMultiplier);
|
||||
ElementType(m_gamma), ElementType(m_inc),
|
||||
ElementType(m_max), ElementType(m_dec),
|
||||
ElementType(m_min), m_needAveMultiplier);
|
||||
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
|
||||
}
|
||||
|
||||
|
@ -418,34 +467,35 @@ namespace CNTK
|
|||
template shared_ptr<Matrix<float>> LearnerBase::GetWritableMatrix<float>(const NDArrayViewPtr& arrayView);
|
||||
template shared_ptr<Matrix<double>> LearnerBase::GetWritableMatrix<double>(const NDArrayViewPtr& arrayView);
|
||||
|
||||
LearnerPtr SGDLearner(const unordered_set<Parameter>& parameters, double learningRatePerSample)
|
||||
LearnerPtr SGDLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates)
|
||||
{
|
||||
return MakeSharedObject<LearnerSGD>(parameters, learningRatePerSample);
|
||||
return MakeSharedObject<LearnerSGD>(parameters, learningRates);
|
||||
}
|
||||
|
||||
LearnerPtr MomentumSGDLearner(const unordered_set<Parameter>& parameters)
|
||||
LearnerPtr MomentumSGDLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, const MomentumsPerSample& momentums)
|
||||
{
|
||||
return MakeSharedObject<LearnerMomentumSGD>(parameters);
|
||||
return MakeSharedObject<LearnerMomentumSGD>(parameters, learningRates, momentums);
|
||||
}
|
||||
|
||||
LearnerPtr NesterovLearner(const unordered_set<Parameter>& parameters)
|
||||
LearnerPtr NesterovLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, const MomentumsPerSample& momentums)
|
||||
{
|
||||
return MakeSharedObject<LearnerNesterov>(parameters);
|
||||
return MakeSharedObject<LearnerNesterov>(parameters, learningRates, momentums);
|
||||
}
|
||||
|
||||
LearnerPtr AdaGradLearner(const unordered_set<Parameter>& parameters, bool needAveMultiplier)
|
||||
LearnerPtr AdaGradLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, bool needAveMultiplier)
|
||||
{
|
||||
return MakeSharedObject<LearnerAdaGrad>(parameters, needAveMultiplier);
|
||||
return MakeSharedObject<LearnerAdaGrad>(parameters, learningRates, needAveMultiplier);
|
||||
}
|
||||
|
||||
LearnerPtr FSAdaGradLearner(const unordered_set<Parameter>& parameters)
|
||||
LearnerPtr FSAdaGradLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, const MomentumsPerSample& momentums)
|
||||
{
|
||||
return MakeSharedObject<LearnerFSAdaGrad>(parameters);
|
||||
return MakeSharedObject<LearnerFSAdaGrad>(parameters, learningRates, momentums);
|
||||
}
|
||||
|
||||
LearnerPtr RMSPropLearner(const unordered_set<Parameter>& parameters,
|
||||
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier)
|
||||
LearnerPtr RMSPropLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates,
|
||||
double gamma, double inc, double dec, double max, double min,
|
||||
bool needAveMultiplier)
|
||||
{
|
||||
return MakeSharedObject<LearnerRMSProp>(parameters, gamma, inc, dec, max, min, needAveMultiplier);
|
||||
return MakeSharedObject<LearnerRMSProp>(parameters, learningRates, gamma, inc, dec, max, min, needAveMultiplier);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
namespace CNTK
|
||||
{
|
||||
// TODO: Move this to Trainer along with Pre-, PostProcess and ClipGradient.
|
||||
// A collection of additional options that are applicable for all standard learners
|
||||
// (after these options are set, they retain their value for the entire lifespan of a learner).
|
||||
struct AdditionalLearningOptions
|
||||
|
@ -18,7 +19,6 @@ namespace CNTK
|
|||
double gaussianNoiseInjectionStdDev = 0.0;
|
||||
bool gradientClippingWithTruncation = true;
|
||||
double gradientClippingThresholdPerSample = std::numeric_limits<double>::infinity();
|
||||
std::unordered_map<Parameter, double> learningRateMultipliers;
|
||||
};
|
||||
|
||||
// An abstract base class at the root of the standard learners hierarchy
|
||||
|
@ -33,32 +33,16 @@ namespace CNTK
|
|||
|
||||
virtual void RestoreFromCheckpoint(const Dictionary& checkpoint) override final;
|
||||
|
||||
void SetAdditionalOptions(const AdditionalLearningOptions& additionalOptions)
|
||||
{
|
||||
m_additionalOptions = additionalOptions;
|
||||
}
|
||||
|
||||
// TODO: should this be called ResetMomentum?
|
||||
// needed for BlockMomemtumSGD to reset SGD momentum after aggregation.
|
||||
void ResetSmoothedGradients();
|
||||
|
||||
// TODO: move learning rate and momentum scheduling and adjustment functionality
|
||||
// inside the learner and drop these setters.
|
||||
void SetLearningRate(double value) { m_learningRatePerSample = value; }
|
||||
|
||||
protected:
|
||||
LearnerBase(const std::unordered_set<Parameter>& parameters);
|
||||
LearnerBase(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
bool allocateSmoothGradients = true);
|
||||
|
||||
virtual void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const = 0;
|
||||
|
||||
double ParameterDependentLearningRate(const Parameter& parameter) const
|
||||
{
|
||||
return m_learningRatePerSample * m_additionalOptions.learningRateMultipliers.at(parameter);
|
||||
}
|
||||
|
||||
std::string LearnerType() const;
|
||||
|
||||
double m_learningRatePerSample;
|
||||
LearningRatesPerSample m_learningRates;
|
||||
|
||||
AdditionalLearningOptions m_additionalOptions;
|
||||
|
||||
|
@ -91,6 +75,16 @@ namespace CNTK
|
|||
template <typename ElementType>
|
||||
void PostProcess(const Parameter& parameter, const NDArrayViewPtr& gradientValue, size_t actualMBSize) const;
|
||||
|
||||
// Returns an NDArrayView with the required shape, with the same data type as parameter value
|
||||
// and allocated on the same device.
|
||||
static NDArrayViewPtr AllocateNDArrayView(const Parameter& parameter, const NDShape& shape);
|
||||
|
||||
// Retrieves the shape of the matrix corresponding to the parameter value.
|
||||
static NDShape GetMatrixShape(const Parameter& parameter);
|
||||
|
||||
size_t m_sampleCount;
|
||||
size_t m_minibatchCount;
|
||||
|
||||
private:
|
||||
// Templatized update function, it invokes preprocess and postprocess using the provided
|
||||
// template parameter and also invokes virtual Update method implemented in one of the subclasses.
|
||||
|
@ -101,18 +95,20 @@ namespace CNTK
|
|||
static bool HasNan(const NDArrayViewPtr& value, const char* name);
|
||||
static void Print(const NDArrayViewPtr& value, const char* msg);
|
||||
|
||||
size_t m_sampleCount;
|
||||
static const size_t checkpointVersion = 1;
|
||||
};
|
||||
|
||||
// Vanilla gradient descent optimization algorithm.
|
||||
class LearnerSGD : public LearnerBase
|
||||
{
|
||||
public:
|
||||
LearnerSGD(const std::unordered_set<Parameter>& parameters, double learningRatePerSample = 0)
|
||||
: LearnerBase(parameters), m_momentumPerSample(0.0), m_useNesterovAcceleration(false)
|
||||
{
|
||||
SetLearningRate(learningRatePerSample);
|
||||
}
|
||||
LearnerSGD(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
bool allocateSmoothGradients = true)
|
||||
: LearnerBase(parameters, learningRates, allocateSmoothGradients),
|
||||
m_momentums(0.0),
|
||||
m_useNesterovAcceleration(false)
|
||||
{ }
|
||||
|
||||
protected:
|
||||
|
||||
|
@ -121,7 +117,8 @@ namespace CNTK
|
|||
template <typename ElementType>
|
||||
void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const;
|
||||
|
||||
double m_momentumPerSample;
|
||||
// TODO: Move m_momentums to LearnerMomentumSGD as soon as NormalGrad is refactored.
|
||||
MomentumsPerSample m_momentums;
|
||||
bool m_useNesterovAcceleration;
|
||||
};
|
||||
|
||||
|
@ -129,20 +126,25 @@ namespace CNTK
|
|||
class LearnerMomentumSGD : public LearnerSGD
|
||||
{
|
||||
public:
|
||||
LearnerMomentumSGD(const std::unordered_set<Parameter>& parameters)
|
||||
: LearnerSGD(parameters)
|
||||
{}
|
||||
|
||||
void SetMomentum(double value) { m_momentumPerSample = value; }
|
||||
LearnerMomentumSGD(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
const MomentumsPerSample& momentums,
|
||||
bool allocateSmoothGradients = true)
|
||||
: LearnerSGD(parameters, learningRates, allocateSmoothGradients)
|
||||
{
|
||||
m_momentums = momentums;
|
||||
}
|
||||
};
|
||||
|
||||
// Nesterov's accelerated SGDLearnerBase descent.
|
||||
class LearnerNesterov : public LearnerSGD
|
||||
class LearnerNesterov : public LearnerMomentumSGD
|
||||
{
|
||||
public:
|
||||
|
||||
LearnerNesterov(const std::unordered_set<Parameter>& parameters)
|
||||
: LearnerSGD(parameters)
|
||||
LearnerNesterov(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
const MomentumsPerSample& momentums)
|
||||
: LearnerMomentumSGD(parameters, learningRates, momentums)
|
||||
{
|
||||
m_useNesterovAcceleration = true;
|
||||
}
|
||||
|
@ -152,7 +154,9 @@ namespace CNTK
|
|||
{
|
||||
public:
|
||||
|
||||
LearnerAdaGrad(const std::unordered_set<Parameter>& parameters, bool needAveMultiplier);
|
||||
LearnerAdaGrad(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
bool needAveMultiplier);
|
||||
|
||||
protected:
|
||||
bool m_needAveMultiplier;
|
||||
|
@ -167,7 +171,9 @@ namespace CNTK
|
|||
{
|
||||
public:
|
||||
|
||||
LearnerFSAdaGrad(const std::unordered_set<Parameter>& parameters);
|
||||
LearnerFSAdaGrad(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
const MomentumsPerSample& momentums);
|
||||
|
||||
protected:
|
||||
|
||||
|
@ -182,7 +188,9 @@ namespace CNTK
|
|||
public:
|
||||
|
||||
LearnerRMSProp(const std::unordered_set<Parameter>& parameters,
|
||||
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier);
|
||||
const LearningRatesPerSample& learningRates,
|
||||
double gamma, double inc, double dec, double max, double min,
|
||||
bool needAveMultiplier);
|
||||
|
||||
protected:
|
||||
|
||||
|
|
|
@ -6,20 +6,100 @@
|
|||
#include "stdafx.h"
|
||||
#include "CNTKLibrary.h"
|
||||
#include "Utils.h"
|
||||
#include "File.h"
|
||||
#include <istream>
|
||||
#include <ostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace CNTK
|
||||
{
|
||||
// This wrapper redefines operator<< in terms of unformatted (binary) write operation.
|
||||
struct BinaryOStreamWrapper
|
||||
{
|
||||
BinaryOStreamWrapper(ostream& s) : m_stream(s) {}
|
||||
|
||||
template<typename T>
|
||||
typename std::enable_if<std::is_pod<T>::value, BinaryOStreamWrapper&>::type
|
||||
operator<<(const T& value)
|
||||
{
|
||||
m_stream.write(reinterpret_cast<const char*>(&value), sizeof(T));
|
||||
return *this ;
|
||||
}
|
||||
|
||||
BinaryOStreamWrapper& operator<<(const wstring& str)
|
||||
{
|
||||
*this << str.length();
|
||||
m_stream.write(reinterpret_cast<const char*>(str.c_str()), str.length() * sizeof(wchar_t));
|
||||
return *this;
|
||||
}
|
||||
|
||||
operator ostream& () { return m_stream; }
|
||||
|
||||
ostream& m_stream;
|
||||
BinaryOStreamWrapper(const BinaryOStreamWrapper&) = delete; BinaryOStreamWrapper(BinaryOStreamWrapper&&) = delete; BinaryOStreamWrapper& operator=(const BinaryOStreamWrapper&) = delete; BinaryOStreamWrapper& operator=(BinaryOStreamWrapper&&) = delete;
|
||||
};
|
||||
|
||||
// This wrapper redefines operator>> in terms of unformatted (binary) read operation.
|
||||
struct BinaryIStreamWrapper
|
||||
{
|
||||
BinaryIStreamWrapper(istream& s) : m_stream(s) {}
|
||||
|
||||
template<typename T>
|
||||
typename std::enable_if<std::is_pod<T>::value, BinaryIStreamWrapper&>::type
|
||||
operator>>(T& value)
|
||||
{
|
||||
static_assert(sizeof(T) <= sizeof(size_t), "size_t is the largest supported type.");
|
||||
m_stream.read(buf, sizeof(T));
|
||||
value = *(reinterpret_cast<T*>(buf));
|
||||
return *this ;
|
||||
}
|
||||
|
||||
BinaryIStreamWrapper& operator>>(wstring& str)
|
||||
{
|
||||
size_t length;
|
||||
*this >> length;
|
||||
str.resize(length);
|
||||
for (size_t i = 0; i < length; ++i)
|
||||
{
|
||||
m_stream.read(buf, sizeof(wchar_t));
|
||||
str[i] = *(reinterpret_cast<wchar_t*>(buf));
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
operator istream& () const { return m_stream ;}
|
||||
|
||||
istream& m_stream;
|
||||
char buf[sizeof(size_t)];
|
||||
BinaryIStreamWrapper(const BinaryIStreamWrapper&) = delete; BinaryIStreamWrapper(BinaryIStreamWrapper&&) = delete; BinaryIStreamWrapper& operator=(const BinaryIStreamWrapper&) = delete; BinaryIStreamWrapper& operator=(BinaryIStreamWrapper&&) = delete;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
T* CreateDataPtr(const T& value)
|
||||
{
|
||||
return new T(value);
|
||||
}
|
||||
|
||||
template <>
|
||||
NDArrayView* CreateDataPtr<NDArrayView>(const NDArrayView& value)
|
||||
{
|
||||
// TODO: replace this copy with an alias to value.
|
||||
NDArrayView* viewPtr = new NDArrayView(value.GetDataType(), value.Shape(), DeviceDescriptor::CPUDevice());
|
||||
viewPtr->CopyFrom(value);
|
||||
return viewPtr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void DictionaryValue::AllocateDataPtr(const T& value)
|
||||
{
|
||||
static_assert(is_same<T, NDShape>::value ||
|
||||
is_same<T, wstring>::value ||
|
||||
is_same<T, vector<DictionaryValue>>::value ||
|
||||
is_same<T, Dictionary>::value, "AllocateDataPtr called with invalid type");
|
||||
m_data.m_ptr = new T(value);
|
||||
is_same<T, Dictionary>::value ||
|
||||
is_same<T, NDArrayView>::value,
|
||||
"AllocateDataPtr called with invalid type");
|
||||
m_data.m_ptr = CreateDataPtr<T>(value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -31,12 +111,163 @@ namespace CNTK
|
|||
m_data.m_ptr = nullptr;
|
||||
}
|
||||
|
||||
Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, DictionaryValue& us)
|
||||
template <typename ElementType>
|
||||
bool AreEqual(NDArrayView& view1, NDArrayView& view2)
|
||||
{
|
||||
if (view1.GetDataType() != view2.GetDataType() ||
|
||||
view1.Shape() != view2.Shape())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
ElementType* data1 = nullptr;
|
||||
ElementType* data2 = nullptr;
|
||||
if (view1.Device().Type() == DeviceKind::CPU)
|
||||
{
|
||||
data1 = view1.WritableDataBuffer<ElementType>();
|
||||
data2 = view2.WritableDataBuffer<ElementType>();
|
||||
}
|
||||
else
|
||||
{
|
||||
NDArrayViewPtr temp1CpuDataView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), view1.Shape(), DeviceDescriptor::CPUDevice());
|
||||
temp1CpuDataView->CopyFrom(view1);
|
||||
data1 = temp1CpuDataView->WritableDataBuffer<ElementType>();
|
||||
|
||||
NDArrayViewPtr temp2CpuDataView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), view2.Shape(), DeviceDescriptor::CPUDevice());
|
||||
temp2CpuDataView->CopyFrom(view2);
|
||||
data2 = temp2CpuDataView->WritableDataBuffer<ElementType>();
|
||||
}
|
||||
|
||||
size_t numElements = view1.Shape().TotalSize();
|
||||
|
||||
for (size_t i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (data1[i] != data2[i])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DictionaryValue::operator==(const DictionaryValue& other) const
|
||||
{
|
||||
if (this == &other)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (m_valueType != other.m_valueType)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (m_valueType)
|
||||
{
|
||||
case DictionaryValue::Type::Bool:
|
||||
return (m_data.m_boolean == other.m_data.m_boolean);
|
||||
case DictionaryValue::Type::SizeT:
|
||||
return (m_data.m_sizeT == other.m_data.m_sizeT);
|
||||
case DictionaryValue::Type::Float:
|
||||
return (m_data.m_float == other.m_data.m_float);
|
||||
case DictionaryValue::Type::Double:
|
||||
return (m_data.m_double == other.m_data.m_double);
|
||||
case DictionaryValue::Type::String:
|
||||
{
|
||||
wstring* strPtr1 = reinterpret_cast<wstring*>(m_data.m_ptr);
|
||||
wstring* strPtr2 = reinterpret_cast<wstring*>(other.m_data.m_ptr);
|
||||
return (*strPtr1 == *strPtr2);
|
||||
}
|
||||
case DictionaryValue::Type::NDShape:
|
||||
{
|
||||
NDShape* shapePtr1 = reinterpret_cast<NDShape*>(m_data.m_ptr);
|
||||
NDShape* shapePtr2 = reinterpret_cast<NDShape*>(other.m_data.m_ptr);
|
||||
return (*shapePtr1 == *shapePtr2);
|
||||
}
|
||||
case DictionaryValue::Type::Vector:
|
||||
{
|
||||
vector<DictionaryValue>* vectorPtr1 = reinterpret_cast<vector<DictionaryValue>*>(m_data.m_ptr);
|
||||
vector<DictionaryValue>* vectorPtr2 = reinterpret_cast<vector<DictionaryValue>*>(other.m_data.m_ptr);
|
||||
return (*vectorPtr1 == *vectorPtr2);
|
||||
}
|
||||
case DictionaryValue::Type::Dictionary:
|
||||
{
|
||||
Dictionary* dictPtr1 = reinterpret_cast<Dictionary*>(m_data.m_ptr);
|
||||
Dictionary* dictPtr2 = reinterpret_cast<Dictionary*>(other.m_data.m_ptr);
|
||||
return (*dictPtr1 == *dictPtr2);
|
||||
}
|
||||
case DictionaryValue::Type::NDArrayView:
|
||||
{
|
||||
NDArrayView* viewPtr1 = reinterpret_cast<NDArrayView*>(m_data.m_ptr);
|
||||
NDArrayView* viewPtr2 = reinterpret_cast<NDArrayView*>(other.m_data.m_ptr);
|
||||
|
||||
switch (viewPtr1->GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
return AreEqual<float>(*viewPtr1, *viewPtr2);
|
||||
case DataType::Double:
|
||||
return AreEqual<double>(*viewPtr1, *viewPtr2);
|
||||
default:
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
}
|
||||
default:
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
}
|
||||
|
||||
bool DictionaryValue::operator!=(const DictionaryValue& other) const
|
||||
{
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
|
||||
BinaryOStreamWrapper& operator<<(BinaryOStreamWrapper& stream, const NDShape& us)
|
||||
{
|
||||
auto size = us.NumAxes();
|
||||
stream << size;
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
stream << us[i];
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Write(BinaryOStreamWrapper& stream, const NDArrayView& view)
|
||||
{
|
||||
assert(view.Device().Type() == DeviceKind::CPU);
|
||||
|
||||
auto numElements = view.Shape().TotalSize();
|
||||
const T* buffer = view.DataBuffer<T>();
|
||||
for (auto i = 0; i < numElements; ++i)
|
||||
{
|
||||
stream << buffer[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Read(BinaryIStreamWrapper& stream, NDArrayView& view)
|
||||
{
|
||||
assert(view.Device().Type() == DeviceKind::CPU);
|
||||
|
||||
auto numElements = view.Shape().TotalSize();
|
||||
T* buffer = view.WritableDataBuffer<T>();
|
||||
for (auto i = 0; i < numElements; ++i)
|
||||
{
|
||||
stream >> buffer[i];
|
||||
}
|
||||
}
|
||||
|
||||
istream& operator>>(istream& stdStream, DictionaryValue& us)
|
||||
{
|
||||
BinaryIStreamWrapper stream(stdStream);
|
||||
size_t version;
|
||||
stream >> version;
|
||||
|
||||
stream >> us.m_valueType;
|
||||
|
||||
unsigned int type;
|
||||
stream >> type;
|
||||
us.m_valueType = static_cast<DictionaryValue::Type>(type);
|
||||
|
||||
switch (us.ValueType())
|
||||
{
|
||||
|
@ -52,28 +283,72 @@ namespace CNTK
|
|||
case DictionaryValue::Type::Double:
|
||||
stream >> us.m_data.m_double;
|
||||
break;
|
||||
case DictionaryValue::Type::String:
|
||||
{
|
||||
wstring* strPtr = new wstring();
|
||||
stream >> *strPtr;
|
||||
us.m_data.m_ptr = strPtr;
|
||||
break;
|
||||
}
|
||||
case DictionaryValue::Type::NDShape:
|
||||
{
|
||||
size_t size;
|
||||
stream >> size;
|
||||
vector<size_t> dims(size);
|
||||
NDShape* shapePtr = new NDShape(size);
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
stream >> dims[i];
|
||||
stream >> shapePtr->operator[](i);
|
||||
}
|
||||
us.AllocateDataPtr(NDShape(dims));
|
||||
us.m_data.m_ptr = shapePtr;
|
||||
break;
|
||||
}
|
||||
case DictionaryValue::Type::Vector:
|
||||
{
|
||||
{
|
||||
size_t size;
|
||||
stream >> size;
|
||||
vector<DictionaryValue> values(size);
|
||||
vector<DictionaryValue>* vectorPtr = new vector<DictionaryValue>(size);
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
stream >> values[i];
|
||||
stream >> vectorPtr->at(i);
|
||||
}
|
||||
us.AllocateDataPtr(values);
|
||||
us.m_data.m_ptr = vectorPtr;
|
||||
break;
|
||||
}
|
||||
case DictionaryValue::Type::Dictionary:
|
||||
{
|
||||
Dictionary* dictPtr = new Dictionary();
|
||||
stream >> *dictPtr;
|
||||
us.m_data.m_ptr = dictPtr;
|
||||
break;
|
||||
}
|
||||
case DictionaryValue::Type::NDArrayView:
|
||||
{
|
||||
unsigned int type;
|
||||
stream >> type;
|
||||
DataType dtype = static_cast<DataType>(type);
|
||||
|
||||
size_t size;
|
||||
stream >> size;
|
||||
NDShape shape(size);
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
stream >> shape[i];
|
||||
}
|
||||
|
||||
NDArrayView* viewPtr = new NDArrayView(dtype, shape, DeviceDescriptor::CPUDevice());
|
||||
switch (dtype)
|
||||
{
|
||||
case DataType::Float:
|
||||
Read<float>(stream, *viewPtr);
|
||||
break;
|
||||
case DataType::Double:
|
||||
Read<double>(stream, *viewPtr);
|
||||
break;
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(dtype));
|
||||
}
|
||||
|
||||
us.m_data.m_ptr = viewPtr;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -82,11 +357,13 @@ namespace CNTK
|
|||
return stream;
|
||||
}
|
||||
|
||||
Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const DictionaryValue& us)
|
||||
ostream& operator<<(ostream& stdStream, const DictionaryValue& us)
|
||||
{
|
||||
BinaryOStreamWrapper stream(stdStream);
|
||||
|
||||
stream << us.version;
|
||||
|
||||
stream << us.ValueType();
|
||||
stream << static_cast<unsigned int>(us.ValueType());
|
||||
|
||||
switch (us.ValueType())
|
||||
{
|
||||
|
@ -102,15 +379,16 @@ namespace CNTK
|
|||
case DictionaryValue::Type::Double:
|
||||
stream << us.m_data.m_double;
|
||||
break;
|
||||
case DictionaryValue::Type::String:
|
||||
{
|
||||
wstring* stringPtr = reinterpret_cast<wstring*>(us.m_data.m_ptr);
|
||||
stream << *stringPtr;
|
||||
break;
|
||||
}
|
||||
case DictionaryValue::Type::NDShape:
|
||||
{
|
||||
NDShape* shapePtr = reinterpret_cast<NDShape*>(us.m_data.m_ptr);
|
||||
auto size = shapePtr->NumAxes();
|
||||
stream << size;
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
stream << shapePtr->operator[](i);
|
||||
}
|
||||
stream << *shapePtr;
|
||||
break;
|
||||
}
|
||||
case DictionaryValue::Type::Vector:
|
||||
|
@ -121,7 +399,31 @@ namespace CNTK
|
|||
stream << size;
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
stream << vectorPtr->operator[](i);
|
||||
stream << vectorPtr->at(i);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DictionaryValue::Type::Dictionary:
|
||||
{
|
||||
Dictionary* dictPtr = reinterpret_cast<Dictionary*>(us.m_data.m_ptr);
|
||||
stream << *dictPtr;
|
||||
break;
|
||||
}
|
||||
case DictionaryValue::Type::NDArrayView:
|
||||
{
|
||||
NDArrayView* viewPtr = reinterpret_cast<NDArrayView*>(us.m_data.m_ptr);
|
||||
stream << static_cast<unsigned int>(viewPtr->GetDataType());
|
||||
stream << viewPtr->Shape();
|
||||
switch (viewPtr->GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
Write<float>(stream, *viewPtr);
|
||||
break;
|
||||
case DataType::Double:
|
||||
Write<double>(stream, *viewPtr);
|
||||
break;
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -148,7 +450,7 @@ namespace CNTK
|
|||
Dictionary& Dictionary::operator=(const Dictionary& other)
|
||||
{
|
||||
assert(this != &other);
|
||||
m_dictionaryData.reset(new std::unordered_map<std::wstring, DictionaryValue>(*(other.m_dictionaryData)));
|
||||
m_dictionaryData.reset(new unordered_map<wstring, DictionaryValue>(*(other.m_dictionaryData)));
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -183,20 +485,51 @@ namespace CNTK
|
|||
return (m_dictionaryData->find(key) != m_dictionaryData->end());
|
||||
}
|
||||
|
||||
Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const Dictionary& us)
|
||||
bool Dictionary::operator==(const Dictionary& other) const
|
||||
{
|
||||
if (this == &other)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (m_dictionaryData->size() != other.m_dictionaryData->size())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
for (auto& kv : *m_dictionaryData)
|
||||
{
|
||||
auto result = other.m_dictionaryData->find(kv.first);
|
||||
if (result == other.m_dictionaryData->end() || kv.second != result->second)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Dictionary::operator!=(const Dictionary& other) const
|
||||
{
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream& stdStream, const Dictionary& us)
|
||||
{
|
||||
BinaryOStreamWrapper stream(stdStream);
|
||||
stream << us.version;
|
||||
stream << us.m_dictionaryData->size();
|
||||
for (auto it = us.m_dictionaryData->begin(); it != us.m_dictionaryData->end(); ++it)
|
||||
for (auto& kv : *(us.m_dictionaryData))
|
||||
{
|
||||
stream << it->first;
|
||||
stream << it->second;
|
||||
stream << kv.first;
|
||||
stream << kv.second;
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
|
||||
Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, Dictionary& us)
|
||||
istream& operator>>(istream& stdStream, Dictionary& us)
|
||||
{
|
||||
BinaryIStreamWrapper stream(stdStream);
|
||||
size_t version;
|
||||
stream >> version;
|
||||
size_t size;
|
||||
|
@ -206,113 +539,36 @@ namespace CNTK
|
|||
{
|
||||
wstring key;
|
||||
stream >> key;
|
||||
DictionaryValue value;
|
||||
stream >> value;
|
||||
us.m_dictionaryData->insert(make_pair(key, value));
|
||||
stream >> us[key];
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
|
||||
// Returns the element whose key is greater than the required sample count
|
||||
// or the last element if no such key exists.
|
||||
template <typename T>
|
||||
vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr)
|
||||
const T& TrainingParameterSchedule<T>::operator[](size_t sampleCount) const
|
||||
{
|
||||
if (viewPtr->IsSparse())
|
||||
assert(m_schedule.size() > 0);
|
||||
auto it = m_schedule.upper_bound(sampleCount);
|
||||
if (it == m_schedule.end())
|
||||
{
|
||||
LogicError("Sparse NDArrayView cannot be serialized into a vector.");
|
||||
--it;
|
||||
}
|
||||
|
||||
auto numElements = viewPtr->Shape().TotalSize();
|
||||
|
||||
vector<DictionaryValue> values(numElements);
|
||||
|
||||
NDArrayViewPtr cpuDataViewPtr = viewPtr;
|
||||
if ((viewPtr->Device().Type() != DeviceKind::CPU))
|
||||
{
|
||||
cpuDataViewPtr = MakeSharedObject<NDArrayView>(viewPtr->GetDataType(), viewPtr->Shape(), DeviceDescriptor::CPUDevice());
|
||||
cpuDataViewPtr->CopyFrom(*viewPtr);
|
||||
}
|
||||
|
||||
const T* buffer = cpuDataViewPtr->DataBuffer<T>();
|
||||
for (auto i = 0; i < numElements; ++i)
|
||||
{
|
||||
T v = buffer[i];
|
||||
values[i] = DictionaryValue(v);
|
||||
}
|
||||
|
||||
return values;
|
||||
return it->second;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const vector<DictionaryValue>& values)
|
||||
{
|
||||
if (viewPtr->IsSparse())
|
||||
{
|
||||
LogicError("Sparse NDArrayView cannot be deserialized from a vector.");
|
||||
}
|
||||
|
||||
auto numElements = viewPtr->Shape().TotalSize();
|
||||
|
||||
if (values.size() != numElements)
|
||||
{
|
||||
LogicError("Number of elements (%lu) in the deserialized representation does not match the expected value (%lu)",
|
||||
values.size(), numElements);
|
||||
}
|
||||
|
||||
NDArrayViewPtr cpuDataViewPtr = viewPtr;
|
||||
if ((viewPtr->Device().Type() != DeviceKind::CPU))
|
||||
{
|
||||
cpuDataViewPtr = MakeSharedObject<NDArrayView>(viewPtr->GetDataType(), viewPtr->Shape(), DeviceDescriptor::CPUDevice());
|
||||
}
|
||||
|
||||
T* buffer = cpuDataViewPtr->WritableDataBuffer<T>();
|
||||
for (auto i = 0; i < numElements; ++i)
|
||||
{
|
||||
buffer[i] = values[i].GetValue<T>();
|
||||
}
|
||||
|
||||
if ((viewPtr->Device().Type() != DeviceKind::CPU))
|
||||
{
|
||||
viewPtr->CopyFrom(*cpuDataViewPtr);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: we store the type info for every element in the vector, which is extremely redundant.
|
||||
// Instead, it'd be nice to introduce some sort of DictionaryValueVector.
|
||||
vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr)
|
||||
{
|
||||
switch (viewPtr->GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
return SerializeToVector<float>(viewPtr);
|
||||
case DataType::Double:
|
||||
return SerializeToVector<double>(viewPtr);
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
|
||||
}
|
||||
}
|
||||
|
||||
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const vector<DictionaryValue>& values)
|
||||
{
|
||||
switch (viewPtr->GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
DeserializeFromVector<float>(viewPtr, values);
|
||||
break;
|
||||
case DataType::Double:
|
||||
DeserializeFromVector<double>(viewPtr, values);
|
||||
break;
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
|
||||
}
|
||||
}
|
||||
|
||||
template void DictionaryValue::AllocateDataPtr<NDShape>(const NDShape& value);
|
||||
template void DictionaryValue::AllocateDataPtr<vector<DictionaryValue>>(const vector<DictionaryValue>& value);
|
||||
template void DictionaryValue::AllocateDataPtr<wstring>(const wstring& value);
|
||||
template void DictionaryValue::AllocateDataPtr<Dictionary>(const Dictionary& value);
|
||||
template void DictionaryValue::AllocateDataPtr<NDArrayView>(const NDArrayView& value);
|
||||
|
||||
template void DictionaryValue::FreePtrAsType<NDShape>();
|
||||
template void DictionaryValue::FreePtrAsType<vector<DictionaryValue>>();
|
||||
template void DictionaryValue::FreePtrAsType<wstring>();
|
||||
template void DictionaryValue::FreePtrAsType<Dictionary>();
|
||||
template void DictionaryValue::FreePtrAsType<NDArrayView>();
|
||||
|
||||
template const double& TrainingParameterSchedule<double>::operator[](size_t key) const;
|
||||
}
|
||||
|
|
|
@ -167,10 +167,6 @@ namespace CNTK
|
|||
return var.IsInput() && var.IsSparse();
|
||||
}
|
||||
|
||||
std::vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr);
|
||||
|
||||
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const std::vector<DictionaryValue>& values);
|
||||
|
||||
inline void AddIndentation(std::wstringstream& s, size_t numIndentationSpaces)
|
||||
{
|
||||
for (size_t i = 0; i < numIndentationSpaces; ++i)
|
||||
|
@ -313,4 +309,9 @@ namespace CNTK
|
|||
|
||||
return{ paddedOutputMapCount, kernelShape };
|
||||
}
|
||||
|
||||
inline double MomentumPerMB(double momentumPerSample, size_t minibatchSize)
|
||||
{
|
||||
return std::pow(momentumPerSample, minibatchSize);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
#pragma once
|
||||
|
||||
#include <exception>
|
||||
#include <algorithm>
|
||||
#include "CNTKLibrary.h"
|
||||
#include <functional>
|
||||
#include <fstream>
|
||||
#include <random>
|
||||
|
||||
static const double relativeTolerance = 0.001f;
|
||||
static const double absoluteTolerance = 0.000001f;
|
||||
|
@ -21,6 +22,8 @@ inline void FloatingPointVectorCompare(const std::vector<ElementType>& first, co
|
|||
}
|
||||
}
|
||||
|
||||
static std::mt19937_64 rng(0);
|
||||
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable: 4996)
|
||||
|
||||
|
@ -40,6 +43,12 @@ static inline int _wunlink(const wchar_t *p)
|
|||
{
|
||||
return unlink(wtocharpath(p).c_str());
|
||||
}
|
||||
|
||||
static inline FILE *_wfopen(const wchar_t *path, const wchar_t *mode)
|
||||
{
|
||||
return fopen(wtocharpath(path).c_str(), wtocharpath(mode).c_str());
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template <typename ElementType>
|
||||
|
@ -112,3 +121,30 @@ inline float PrevMinibatchTrainingLossValue(const CNTK::Trainer& trainer)
|
|||
}
|
||||
|
||||
#pragma warning(pop)
|
||||
|
||||
inline CNTK::NDShape CreateShape(size_t numAxes, size_t maxDimSize)
|
||||
{
|
||||
CNTK::NDShape shape(numAxes);
|
||||
for (size_t i = 0; i < numAxes; ++i)
|
||||
{
|
||||
shape[i] = (rng() % maxDimSize) + 1;
|
||||
}
|
||||
|
||||
return shape;
|
||||
}
|
||||
|
||||
inline void OpenStream(std::fstream& stream, const std::wstring& filename, bool readonly)
|
||||
{
|
||||
if (filename.empty())
|
||||
std::runtime_error("File: filename is empty");
|
||||
|
||||
std::ios_base::openmode mode = std::ios_base::binary;
|
||||
mode = mode | (readonly ? std::ios_base::in : std::ios_base::out);
|
||||
|
||||
#ifdef _MSC_VER
|
||||
stream.open(filename.c_str(), mode);
|
||||
#else
|
||||
stream.open(wtocharpath(filename.c_str()).c_str(), mode);
|
||||
#endif
|
||||
stream.exceptions(std::ios_base::failbit | std::ios_base::badbit);
|
||||
}
|
|
@ -0,0 +1,185 @@
|
|||
#include "CNTKLibrary.h"
|
||||
#include "Common.h"
|
||||
#include <string>
|
||||
#include <random>
|
||||
#include <initializer_list>
|
||||
|
||||
|
||||
using namespace CNTK;
|
||||
using namespace std;
|
||||
|
||||
static const size_t maxMinibatchSize = 1000;
|
||||
|
||||
static const size_t maxNumAxes = 5;
|
||||
static const size_t maxDimSize = 10;
|
||||
|
||||
template <typename ElementType>
|
||||
void TestUpdate(LearnerPtr& learner, NDShape& shape, size_t numMinibatches, const DeviceDescriptor& device)
|
||||
{
|
||||
auto seed = (unsigned long) rng();
|
||||
unordered_map<Parameter, NDArrayViewPtr> gradientValues;
|
||||
for (auto i = 0; i < numMinibatches; i++)
|
||||
{
|
||||
for (auto& parameter : learner->Parameters())
|
||||
{
|
||||
gradientValues[parameter] = NDArrayView::RandomUniform<ElementType>(shape, -1.0, 1.0, seed + i, device);
|
||||
}
|
||||
|
||||
learner->Update(gradientValues, 1);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
unordered_set<Parameter> CreateParameters(const NDShape& shape, size_t numParameters, const DeviceDescriptor& device)
|
||||
{
|
||||
unordered_set<Parameter> parameters;
|
||||
for (int i = 0; i < numParameters; i++)
|
||||
{
|
||||
parameters.insert(
|
||||
Parameter(NDArrayView::RandomUniform<ElementType>(shape, -1.0, 1.0, i, device),
|
||||
L"parameter_" + to_wstring(i)));
|
||||
}
|
||||
return parameters;
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
void TestSGDLearner(size_t numParameters, size_t numMinibatches, const DeviceDescriptor& device)
|
||||
{
|
||||
NDShape shape = CreateShape(rng() % maxNumAxes + 1, maxDimSize);
|
||||
auto parameters = CreateParameters<ElementType>(shape, numParameters, device);
|
||||
auto learner = SGDLearner(parameters, 0.4);
|
||||
TestUpdate<ElementType>(learner, shape, numMinibatches, device);
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
void TestMomentumSGDLearner(size_t numParameters, size_t numMinibatches, const DeviceDescriptor& device)
|
||||
{
|
||||
NDShape shape = CreateShape(rng() % maxNumAxes + 1, maxDimSize);
|
||||
auto parameters = CreateParameters<ElementType>(shape, numParameters, device);
|
||||
MomentumsPerSample momentums({ { 1, 1.0 }, { 3, 0.1 }, { 10, 0.01 } }, 2);
|
||||
auto learner = MomentumSGDLearner(parameters, vector<double>{0.3, 0.2, 0.1}, momentums);
|
||||
TestUpdate<ElementType>(learner, shape, numMinibatches, device);
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
void TestNesterovLearner(size_t numParameters, size_t numMinibatches, const DeviceDescriptor& device)
|
||||
{
|
||||
NDShape shape = CreateShape(rng() % maxNumAxes + 1, maxDimSize);
|
||||
auto parameters = CreateParameters<ElementType>(shape, numParameters, device);
|
||||
auto learner = NesterovLearner(parameters, LearningRatesPerSample({ { 1, 0.5 }, { 10, 0.25 }, { 20, 0.125 } }, 3 ), 0.2);
|
||||
TestUpdate<ElementType>(learner, shape, numMinibatches, device);
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
void TestAdaGradLearner(size_t numParameters, size_t numMinibatches, const DeviceDescriptor& device)
|
||||
{
|
||||
NDShape shape = CreateShape(rng() % maxNumAxes + 1, maxDimSize);
|
||||
auto parameters = CreateParameters<ElementType>(shape, numParameters, device);
|
||||
auto learner = AdaGradLearner(parameters, { vector<double>{0.5, 0.4, 0.3, 0.2, 0.1}, 2 }, true);
|
||||
TestUpdate<ElementType>(learner, shape, numMinibatches, device);
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
void TestFSAdaGradLearner(size_t numParameters, size_t numMinibatches, const DeviceDescriptor& device)
|
||||
{
|
||||
NDShape shape = CreateShape(rng() % maxNumAxes + 1, maxDimSize);
|
||||
auto parameters = CreateParameters<ElementType>(shape, numParameters, device);
|
||||
auto learner = FSAdaGradLearner(parameters, vector<double>{ 0.5 }, vector<double>{0.05});
|
||||
TestUpdate<ElementType>(learner, shape, numMinibatches, device);
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
void TestRMSPropLearner(size_t numParameters, size_t numMinibatches, const DeviceDescriptor& device)
|
||||
{
|
||||
NDShape shape = CreateShape(rng() % maxNumAxes + 1, maxDimSize);
|
||||
auto parameters = CreateParameters<ElementType>(shape, numParameters, device);
|
||||
auto learner = RMSPropLearner(parameters, { { 3, 0.7 }, { 1, 0.2 } }, 0.01, 0.02, 0.03, 0.1, 0.001 );
|
||||
TestUpdate<ElementType>(learner, shape, numMinibatches, device);
|
||||
}
|
||||
|
||||
void TestTrainingParametersSchedule()
|
||||
{
|
||||
LearningRatesPerSample schedule1 = 0.5;
|
||||
assert(schedule1[0] == 0.5);
|
||||
assert(schedule1[1] == 0.5);
|
||||
assert(schedule1[100] == 0.5);
|
||||
|
||||
LearningRatesPerSample schedule2 = vector<double>{ 0.5 };
|
||||
assert(schedule2[0] == 0.5);
|
||||
assert(schedule2[10] == 0.5);
|
||||
assert(schedule2[100] == 0.5);
|
||||
|
||||
LearningRatesPerSample schedule3 = vector<double>{ 0.5, 0.3 };
|
||||
assert(schedule3[0] == 0.5);
|
||||
assert(schedule3[1] == 0.3);
|
||||
assert(schedule3[100] == 0.3);
|
||||
|
||||
LearningRatesPerSample schedule4 = { vector<double>{ 0.5 }, 10 };
|
||||
assert(schedule4[0] == 0.5);
|
||||
assert(schedule4[10] == 0.5);
|
||||
assert(schedule4[100] == 0.5);
|
||||
|
||||
LearningRatesPerSample schedule5 = { vector<double>{ 0.5, 0.3, 0.2 }, 10 };
|
||||
assert(schedule5[0] == 0.5);
|
||||
assert(schedule5[9] == 0.5);
|
||||
assert(schedule5[10] == 0.3);
|
||||
assert(schedule5[19] == 0.3);
|
||||
assert(schedule5[20] == 0.2);
|
||||
assert(schedule5[100] == 0.2);
|
||||
|
||||
LearningRatesPerSample schedule6 = { {1, 0.5} };
|
||||
assert(schedule6[0] == 0.5);
|
||||
assert(schedule6[10] == 0.5);
|
||||
assert(schedule6[100] == 0.5);
|
||||
|
||||
LearningRatesPerSample schedule7 = { { 1, 0.5 }, { 1, 0.3 }, {1, 0.2} };
|
||||
assert(schedule7[0] == 0.5);
|
||||
assert(schedule7[1] == 0.3);
|
||||
assert(schedule7[2] == 0.2);
|
||||
assert(schedule7[100] == 0.2);
|
||||
|
||||
LearningRatesPerSample schedule8( { { 1, 0.5 }, { 1, 0.3 }, { 1, 0.2 } }, 10 );
|
||||
assert(schedule8[0] == 0.5);
|
||||
assert(schedule8[9] == 0.5);
|
||||
assert(schedule8[10] == 0.3);
|
||||
assert(schedule8[19] == 0.3);
|
||||
assert(schedule8[20] == 0.2);
|
||||
assert(schedule8[100] == 0.2);
|
||||
|
||||
LearningRatesPerSample schedule9 = { { 3, 0.5 }, { 2, 0.3 }, {1, 0.2} };
|
||||
assert(schedule9[0] == 0.5);
|
||||
assert(schedule9[2] == 0.5);
|
||||
assert(schedule9[3] == 0.3);
|
||||
assert(schedule9[4] == 0.3);
|
||||
assert(schedule9[5] == 0.2);
|
||||
assert(schedule9[100] == 0.2);
|
||||
|
||||
LearningRatesPerSample schedule10( { { 3, 0.5 }, { 2, 0.3 }, { 1, 0.2 } }, 10 );
|
||||
assert(schedule10[0] == 0.5);
|
||||
assert(schedule10[29] == 0.5);
|
||||
assert(schedule10[30] == 0.3);
|
||||
assert(schedule10[49] == 0.3);
|
||||
assert(schedule10[50] == 0.2);
|
||||
assert(schedule10[100] == 0.2);
|
||||
}
|
||||
|
||||
|
||||
void LearnerTests()
|
||||
{
|
||||
TestTrainingParametersSchedule();
|
||||
|
||||
TestSGDLearner<double>(5, 3, DeviceDescriptor::CPUDevice());
|
||||
|
||||
#ifndef CPUONLY
|
||||
TestMomentumSGDLearner<float>(3, 11, DeviceDescriptor::GPUDevice(0));
|
||||
TestNesterovLearner<float>(1, 20, DeviceDescriptor::GPUDevice(0));
|
||||
#else
|
||||
TestMomentumSGDLearner<float>(3, 11, DeviceDescriptor::CPUDevice());
|
||||
TestNesterovLearner<float>(1, 20, DeviceDescriptor::CPUDevice());
|
||||
#endif
|
||||
|
||||
TestAdaGradLearner<double>(2, 10, DeviceDescriptor::CPUDevice());
|
||||
|
||||
TestFSAdaGradLearner<double>(10, 2, DeviceDescriptor::CPUDevice());
|
||||
TestRMSPropLearner<float>(3, 3, DeviceDescriptor::CPUDevice());
|
||||
}
|
|
@ -9,6 +9,8 @@ void FeedForwardTests();
|
|||
void RecurrentFunctionTests();
|
||||
void TrainerTests();
|
||||
void TestCifarResnet();
|
||||
void SerializationTests();
|
||||
void LearnerTests();
|
||||
|
||||
int main()
|
||||
{
|
||||
|
@ -19,6 +21,8 @@ int main()
|
|||
RecurrentFunctionTests();
|
||||
|
||||
TrainerTests();
|
||||
SerializationTests();
|
||||
LearnerTests();
|
||||
|
||||
TestCifarResnet();
|
||||
|
||||
|
|
|
@ -0,0 +1,224 @@
|
|||
#include "CNTKLibrary.h"
|
||||
#include "Common.h"
|
||||
#include <string>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
|
||||
using namespace CNTK;
|
||||
using namespace std;
|
||||
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
|
||||
static const size_t maxNestingDepth = 10;
|
||||
static const size_t maxNestedDictSize = 10;
|
||||
static const size_t maxNestedVectorSize = 100;
|
||||
static const size_t maxNDShapeSize = 100;
|
||||
|
||||
static const size_t maxNumAxes = 10;
|
||||
static const size_t maxDimSize = 15;
|
||||
|
||||
|
||||
static size_t keyCounter = 0;
|
||||
static uniform_real_distribution<double> double_dist = uniform_real_distribution<double>();
|
||||
static uniform_real_distribution<float> float_dist = uniform_real_distribution<float>();
|
||||
|
||||
static std::wstring tempFilePath = L"serialization.tmp";
|
||||
|
||||
DictionaryValue CreateDictionaryValue(DictionaryValue::Type, size_t);
|
||||
|
||||
DictionaryValue::Type GetType()
|
||||
{
|
||||
return DictionaryValue::Type(rng() % (unsigned int) DictionaryValue::Type::NDArrayView + 1);
|
||||
}
|
||||
|
||||
void AddKeyValuePair(Dictionary& dict, size_t depth)
|
||||
{
|
||||
auto type = GetType();
|
||||
while (depth >= maxNestingDepth &&
|
||||
type == DictionaryValue::Type::Vector ||
|
||||
type == DictionaryValue::Type::Dictionary)
|
||||
{
|
||||
type = GetType();
|
||||
}
|
||||
dict[L"key" + to_wstring(keyCounter++)] = CreateDictionaryValue(type, depth);
|
||||
}
|
||||
|
||||
Dictionary CreateDictionary(size_t size, size_t depth = 0)
|
||||
{
|
||||
Dictionary dict;
|
||||
for (auto i = 0; i < size; ++i)
|
||||
{
|
||||
AddKeyValuePair(dict, depth);
|
||||
}
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
NDArrayViewPtr CreateNDArrayView(size_t numAxes, const DeviceDescriptor& device)
|
||||
{
|
||||
NDShape viewShape(numAxes);
|
||||
for (size_t i = 0; i < numAxes; ++i)
|
||||
viewShape[i] = (rng() % maxDimSize) + 1;
|
||||
|
||||
return NDArrayView::RandomUniform<ElementType>(viewShape, ElementType(-4.0), ElementType(19.0), 1, device);
|
||||
}
|
||||
|
||||
NDArrayViewPtr CreateNDArrayView()
|
||||
{
|
||||
auto numAxes = (rng() % maxNumAxes) + 1;
|
||||
auto device = DeviceDescriptor::CPUDevice();
|
||||
#ifndef CPUONLY
|
||||
if (rng() % 2 == 0)
|
||||
{
|
||||
device = DeviceDescriptor::GPUDevice(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return (rng() % 2 == 0) ?
|
||||
CreateNDArrayView<float>(numAxes, device) : CreateNDArrayView<double>(numAxes, device);
|
||||
}
|
||||
|
||||
DictionaryValue CreateDictionaryValue(DictionaryValue::Type type, size_t depth)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case DictionaryValue::Type::Bool:
|
||||
return DictionaryValue(!!(rng() % 2));
|
||||
case DictionaryValue::Type::SizeT:
|
||||
return DictionaryValue(rng());
|
||||
case DictionaryValue::Type::Float:
|
||||
return DictionaryValue(float_dist(rng));
|
||||
case DictionaryValue::Type::Double:
|
||||
return DictionaryValue(double_dist(rng));
|
||||
case DictionaryValue::Type::String:
|
||||
return DictionaryValue(to_wstring(rng()));
|
||||
case DictionaryValue::Type::NDShape:
|
||||
{
|
||||
size_t size = rng() % maxNDShapeSize + 1;
|
||||
NDShape shape(size);
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
shape[i] = rng();
|
||||
}
|
||||
return DictionaryValue(shape);
|
||||
}
|
||||
case DictionaryValue::Type::Vector:
|
||||
{
|
||||
auto type = GetType();
|
||||
size_t size = rng() % maxNestedVectorSize + 1;
|
||||
vector<DictionaryValue> vector(size);
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
vector[i] = CreateDictionaryValue(type, depth + 1);
|
||||
}
|
||||
return DictionaryValue(vector);
|
||||
}
|
||||
case DictionaryValue::Type::Dictionary:
|
||||
return DictionaryValue(CreateDictionary(rng() % maxNestedDictSize + 1, depth + 1));
|
||||
case DictionaryValue::Type::NDArrayView:
|
||||
return DictionaryValue(*(CreateNDArrayView()));
|
||||
default:
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
}
|
||||
|
||||
void TestDictionarySerialization(size_t dictSize)
|
||||
{
|
||||
if ((_wunlink(tempFilePath.c_str()) != 0) && (errno != ENOENT))
|
||||
std::runtime_error("Error deleting temporary test file 'serialization.tmp'.");
|
||||
|
||||
Dictionary originalDict = CreateDictionary(dictSize);
|
||||
|
||||
{
|
||||
fstream stream;
|
||||
OpenStream(stream, tempFilePath, false);
|
||||
stream << originalDict;
|
||||
stream.flush();
|
||||
}
|
||||
|
||||
Dictionary deserializedDict;
|
||||
|
||||
{
|
||||
fstream stream;
|
||||
OpenStream(stream, tempFilePath, true);
|
||||
stream >> deserializedDict;
|
||||
}
|
||||
|
||||
if (originalDict != deserializedDict)
|
||||
throw std::runtime_error("TestDictionarySerialization: original and deserialized dictionaries are not identical.");
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
void TestLearnerSerialization(int numParameters, const DeviceDescriptor& device)
|
||||
{
|
||||
if ((_wunlink(tempFilePath.c_str()) != 0) && (errno != ENOENT))
|
||||
std::runtime_error("Error deleting temporary test file 'serialization.tmp'.");
|
||||
|
||||
NDShape shape = CreateShape(5, maxDimSize);
|
||||
|
||||
unordered_set<Parameter> parameters;
|
||||
unordered_map<Parameter, NDArrayViewPtr> gradientValues;
|
||||
for (int i = 0; i < numParameters; i++)
|
||||
{
|
||||
Parameter parameter(NDArrayView::RandomUniform<ElementType>(shape, -0.5, 0.5, i, device), L"parameter_" + to_wstring(i));
|
||||
parameters.insert(parameter);
|
||||
gradientValues[parameter] = NDArrayView::RandomUniform<ElementType>(shape, -0.5, 0.5, numParameters + i, device);
|
||||
}
|
||||
|
||||
auto learner1 = SGDLearner(parameters, 0.05);
|
||||
|
||||
learner1->Update(gradientValues, 1);
|
||||
|
||||
{
|
||||
auto checkpoint = learner1->GetCheckpointState();
|
||||
fstream stream;
|
||||
OpenStream(stream, tempFilePath, false);
|
||||
stream << checkpoint;
|
||||
stream.flush();
|
||||
}
|
||||
|
||||
auto learner2 = SGDLearner(parameters, 0.05);
|
||||
|
||||
{
|
||||
Dictionary checkpoint;
|
||||
fstream stream;
|
||||
OpenStream(stream, tempFilePath, true);
|
||||
stream >> checkpoint;
|
||||
learner2->RestoreFromCheckpoint(checkpoint);
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
for (auto parameter : parameters)
|
||||
{
|
||||
gradientValues[parameter] = NDArrayView::RandomUniform<ElementType>(shape, -0.5, 0.5, 2*numParameters + i, device);
|
||||
i++;
|
||||
}
|
||||
|
||||
learner1->Update(gradientValues, 1);
|
||||
learner2->Update(gradientValues, 1);
|
||||
|
||||
auto checkpoint1 = learner1->GetCheckpointState();
|
||||
auto checkpoint2 = learner2->GetCheckpointState();
|
||||
|
||||
if (checkpoint1 != checkpoint2)
|
||||
throw std::runtime_error("TestLearnerSerialization: original and restored from a checkpoint learners diverge.");
|
||||
}
|
||||
|
||||
|
||||
|
||||
void SerializationTests()
|
||||
{
|
||||
TestDictionarySerialization(4);
|
||||
TestDictionarySerialization(8);
|
||||
TestDictionarySerialization(16);
|
||||
|
||||
TestLearnerSerialization<float>(5, DeviceDescriptor::CPUDevice());
|
||||
TestLearnerSerialization<double>(10, DeviceDescriptor::CPUDevice());
|
||||
|
||||
#ifndef CPUONLY
|
||||
TestLearnerSerialization<float>(5, DeviceDescriptor::GPUDevice(0));
|
||||
TestLearnerSerialization<double>(10, DeviceDescriptor::GPUDevice(0));;
|
||||
#endif
|
||||
}
|
|
@ -110,6 +110,8 @@
|
|||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="CifarResNet.cpp" />
|
||||
<ClCompile Include="LearnerTests.cpp" />
|
||||
<ClCompile Include="SerializationTests.cpp" />
|
||||
<ClCompile Include="FeedForwardTests.cpp" />
|
||||
<ClCompile Include="Main.cpp" />
|
||||
<ClCompile Include="NDArrayViewTests.cpp" />
|
||||
|
|
|
@ -36,6 +36,12 @@
|
|||
<ClCompile Include="CifarResNet.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="SerializationTests.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="LearnerTests.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="Common.h">
|
||||
|
|
Загрузка…
Ссылка в новой задаче