This commit is contained in:
Frank Seide 2017-03-02 11:46:22 -08:00
Родитель c1413f5bd2 de6371bb0d
Коммит 736d2f006c
112 изменённых файлов: 3196 добавлений и 1359 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -186,6 +186,7 @@ core
# prebuild file
Source/CNTK/buildinfo.h
Source/CNTK/buildinfo.h$$
Source/CNTKv2LibraryDll/buildinfo.h
# Unit test output
Tests/UnitTests/ReaderTests/Control/**/*_Output.txt

Просмотреть файл

@ -1497,6 +1497,15 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "V2LibraryEndToEndTests", "T
{E5606ECE-48CA-4464-BB12-09D81D02B9EF} = {E5606ECE-48CA-4464-BB12-09D81D02B9EF}
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Extensibility", "Extensibility", "{3BF56127-6F0F-41CF-BFCE-31165A0A5E73}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "CPP", "CPP", "{7A27E076-296E-41A8-BA76-164071251372}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CPPExtensibilityExamples", "Examples\Extensibility\CPP\CPPExtensibilityExamples.vcxproj", "{40A8CC31-8C08-4156-AE08-E8C0FADC3509}"
ProjectSection(ProjectDependencies) = postProject
{E5606ECE-48CA-4464-BB12-09D81D02B9EF} = {E5606ECE-48CA-4464-BB12-09D81D02B9EF}
EndProjectSection
EndProject
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "PythonExamples", "Examples\PythonExamples.pyproj", "{292FF4EE-D9DD-4BA7-85F7-6A22148D1E01}"
EndProject
Global
@ -1965,6 +1974,16 @@ Global
{743FC7AA-3884-4C96-983A-A33FD6C56227}.Release_NoOpt|x64.Build.0 = Release_NoOpt|x64
{743FC7AA-3884-4C96-983A-A33FD6C56227}.Release|x64.ActiveCfg = Release|x64
{743FC7AA-3884-4C96-983A-A33FD6C56227}.Release|x64.Build.0 = Release|x64
{40A8CC31-8C08-4156-AE08-E8C0FADC3509}.Debug_CpuOnly|x64.ActiveCfg = Debug_CpuOnly|x64
{40A8CC31-8C08-4156-AE08-E8C0FADC3509}.Debug_CpuOnly|x64.Build.0 = Debug_CpuOnly|x64
{40A8CC31-8C08-4156-AE08-E8C0FADC3509}.Debug|x64.ActiveCfg = Debug|x64
{40A8CC31-8C08-4156-AE08-E8C0FADC3509}.Debug|x64.Build.0 = Debug|x64
{40A8CC31-8C08-4156-AE08-E8C0FADC3509}.Release_CpuOnly|x64.ActiveCfg = Release_CpuOnly|x64
{40A8CC31-8C08-4156-AE08-E8C0FADC3509}.Release_CpuOnly|x64.Build.0 = Release_CpuOnly|x64
{40A8CC31-8C08-4156-AE08-E8C0FADC3509}.Release_NoOpt|x64.ActiveCfg = Release_NoOpt|x64
{40A8CC31-8C08-4156-AE08-E8C0FADC3509}.Release_NoOpt|x64.Build.0 = Release_NoOpt|x64
{40A8CC31-8C08-4156-AE08-E8C0FADC3509}.Release|x64.ActiveCfg = Release|x64
{40A8CC31-8C08-4156-AE08-E8C0FADC3509}.Release|x64.Build.0 = Release|x64
{292FF4EE-D9DD-4BA7-85F7-6A22148D1E01}.Debug_CpuOnly|x64.ActiveCfg = Debug|Any CPU
{292FF4EE-D9DD-4BA7-85F7-6A22148D1E01}.Debug|x64.ActiveCfg = Debug|Any CPU
{292FF4EE-D9DD-4BA7-85F7-6A22148D1E01}.Release_CpuOnly|x64.ActiveCfg = Release|Any CPU
@ -2176,6 +2195,9 @@ Global
{5CC403B9-2405-4FFB-A73B-DAE0DC986C76} = {CE223840-1DEE-4849-B530-F06BEE05BAA8}
{D771A06D-CC25-4582-B5CD-D2A4782BB005} = {05E45AF7-C069-4057-BC16-0A532D068CE4}
{743FC7AA-3884-4C96-983A-A33FD6C56227} = {43ED3FD0-824C-4201-BD96-B824DF959ADC}
{3BF56127-6F0F-41CF-BFCE-31165A0A5E73} = {47755F2E-D674-4175-9E38-8EA053455072}
{7A27E076-296E-41A8-BA76-164071251372} = {3BF56127-6F0F-41CF-BFCE-31165A0A5E73}
{40A8CC31-8C08-4156-AE08-E8C0FADC3509} = {7A27E076-296E-41A8-BA76-164071251372}
{292FF4EE-D9DD-4BA7-85F7-6A22148D1E01} = {47755F2E-D674-4175-9E38-8EA053455072}
EndGlobalSection
EndGlobal

Просмотреть файл

@ -0,0 +1,125 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release_NoOpt|x64">
<Configuration>Release_NoOpt</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug_CpuOnly|x64">
<Configuration>Debug_CpuOnly</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release_CpuOnly|x64">
<Configuration>Release_CpuOnly</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Main.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="UserMatrixMultiplicationOp.h" />
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{40A8CC31-8C08-4156-AE08-E8C0FADC3509}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>CPPExtensibilityExamples</RootNamespace>
<ProjectName>CPPExtensibilityExamples</ProjectName>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<Import Project="$(SolutionDir)\CNTK.Cpp.props" />
<PropertyGroup Condition="$(DebugBuild)" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v140</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="$(ReleaseBuild)" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v140</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings" />
<ImportGroup Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="$(DebugBuild)">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="$(ReleaseBuild)">
<LinkIncremental>false</LinkIncremental>
<ExecutablePath>$(ExecutablePath)</ExecutablePath>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\CNTKv2LibraryDll\API</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(OutDir);$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(DebugBuild)">
<ClCompile>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<TreatWarningAsError>true</TreatWarningAsError>
<AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
<RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>CNTKLibrary-2.0.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
<ClCompile>
<WarningLevel>Level4</WarningLevel>
<PrecompiledHeader>
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
<TreatWarningAsError>true</TreatWarningAsError>
<RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MultiThreaded</RuntimeLibrary>
<RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Release_NoOpt|x64'">MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>CNTKLibrary-2.0.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(CpuOnlyBuild)">
<ClCompile>
<PreprocessorDefinitions>CPUONLY;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">MultiThreadedDebug</RuntimeLibrary>
<RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Release_CpuOnly|x64'">MultiThreaded</RuntimeLibrary>
</ClCompile>
</ItemDefinitionGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

Просмотреть файл

@ -0,0 +1,27 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Main.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="UserMatrixMultiplicationOp.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
</Project>

Просмотреть файл

@ -0,0 +1,6 @@
#include "UserMatrixMultiplicationOp.h"
void main()
{
UserTimesFunctionExample();
}

Просмотреть файл

@ -0,0 +1,187 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#include "CNTKLibrary.h"
using namespace CNTK;
class UserTimesFunction final : public Function
{
public:
static FunctionPtr Create(const Variable& leftOperand, const Variable& rightOperand, const std::wstring& name)
{
return AsComposite(MakeSharedObject<UserTimesFunction>(leftOperand, rightOperand, name));
}
UserTimesFunction(const Variable& leftOperand, const Variable& rightOperand, const std::wstring& name)
: Function({ leftOperand, rightOperand }, Dictionary(), name)
{}
private:
static void MatrixMultiply(const NDArrayViewPtr& leftMatrix, const NDArrayViewPtr& rightMatrix, NDArrayViewPtr& outputMatrix, bool transposeRight = false)
{
auto GetNumRowsAndCols = [](const NDShape& shape, bool transpose = false) {
auto numRows = shape[0];
auto numCols = shape[shape.Rank() - 1];
if (transpose)
std::swap(numRows, numCols);
return std::make_pair(numRows, numCols);
};
size_t leftNumRows, leftNumCols;
std::tie(leftNumRows, leftNumCols) = GetNumRowsAndCols(leftMatrix->Shape());
size_t rightNumRows, rightNumCols;
std::tie(rightNumRows, rightNumCols) = GetNumRowsAndCols(rightMatrix->Shape(), transposeRight);
auto numOutRows = leftNumRows;
auto K = leftNumCols;
auto numOutCols = rightNumCols;
assert(!leftMatrix->IsSparse() && !rightMatrix->IsSparse() && !outputMatrix->IsSparse());
assert(K == rightNumRows);
assert((outputMatrix->Shape()[0] == numOutRows) && (outputMatrix->Shape()[1] == numOutCols));
outputMatrix->SetValue(0.0f);
// The operands values are in column major layout
auto Offset = [](size_t rowIdx, size_t colIdx, const NDShape& matrixShape, bool transpose = false) {
if (transpose)
std::swap(rowIdx, colIdx);
return (colIdx * matrixShape[0]) + rowIdx;
};
auto leftBuffer = leftMatrix->DataBuffer<float>();
auto rightBuffer = rightMatrix->DataBuffer<float>();
auto outBuffer = outputMatrix->WritableDataBuffer<float>();
for (size_t j = 0; j < numOutCols; ++j)
for (size_t k = 0; k < K; ++k)
for (size_t i = 0; i < numOutRows; ++i)
outBuffer[Offset(i, j, outputMatrix->Shape())] += leftBuffer[Offset(i, k, leftMatrix->Shape())] * rightBuffer[Offset(k, j, rightMatrix->Shape(), transposeRight)];
}
BackPropStatePtr Forward(const std::vector<ValuePtr>& inputValues,
std::unordered_map<Variable, ValuePtr>& outputs,
const DeviceDescriptor& computeDevice,
const std::unordered_set<Variable>& /*outputsToRetainBackwardStateFor*/) override
{
auto leftOperandData = inputValues[0]->Data();
auto rightOperandData = inputValues[1]->Data();
// Allocate outputValue if needed
auto& outputValue = outputs[this->Output()];
if (outputValue == nullptr)
{
auto numOutRows = leftOperandData->Shape()[0];
auto numOutCols = rightOperandData->Shape()[rightOperandData->Shape().Rank() - 1];
outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(DataType::Float, NDShape({ numOutRows , numOutCols }), computeDevice));
}
auto outputData = outputValue->Data();
MatrixMultiply(leftOperandData, rightOperandData, outputData);
// Let's save the right input's Value in the BackPropSate to be used in the backward pass for computing gradients
return MakeSharedObject<BackPropState>(this->shared_from_this(), computeDevice, std::unordered_map<Variable, ValuePtr>({ {Inputs()[1], inputValues[1] } }));
}
void Backward(const BackPropStatePtr& state,
const std::unordered_map<Variable, ValuePtr>& rootGradientValues,
std::unordered_map<Variable, ValuePtr>& backPropagatedGradientValuesForInputs) override
{
auto leftInputVariable = Inputs()[0];
auto rightInputVariable = Inputs()[1];
if (backPropagatedGradientValuesForInputs.find(rightInputVariable) != backPropagatedGradientValuesForInputs.end())
std::runtime_error("UserTimesFunction does not support computing gradient wrt right operand");
auto rightInputData = state->SavedForwardPropValues().at(rightInputVariable)->Data();
// Allocate input gradient Value if needed
auto& inputGradientValue = backPropagatedGradientValuesForInputs[leftInputVariable];
if (inputGradientValue == nullptr)
inputGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(DataType::Float, leftInputVariable.Shape(), state->Device()));
auto rootGradientData = rootGradientValues.at(this->Output())->Data();
auto inputGradientData = inputGradientValue->Data();
MatrixMultiply(rootGradientData, rightInputData, inputGradientData, /*transposeRight =*/ true);
}
const std::wstring& OpName() const override
{
static const std::wstring opName = L"UserTimesOp";
return opName;
}
Dictionary Serialize() const override { NOT_IMPLEMENTED; }
size_t CurrentVersion() const override { NOT_IMPLEMENTED; }
void InferOutputs(std::vector<Variable>& outputs) override
{
auto leftOperand = Inputs()[0];
auto rightOperand = Inputs()[1];
if (leftOperand.Shape().Rank() != 2)
std::runtime_error("Left operand must be 2D");
if (rightOperand.Shape().Rank() != 1)
std::runtime_error("Right operand must be 1D");
if (!leftOperand.DynamicAxes().empty())
std::runtime_error("Left operand must not have dynamic axes (i.e. should not be minibatch data, but be a Parameter of fixed size)");
outputs.push_back(OutputVariable(NDShape({ leftOperand.Shape()[0] }), leftOperand.GetDataType(), rightOperand.DynamicAxes()));
}
};
#pragma warning(push)
#pragma warning(disable: 4459)
void UserTimesFunctionExample()
{
auto device = DeviceDescriptor::CPUDevice();
size_t outDim = 15;
size_t inDim = 10;
auto W = Parameter(NDShape({ outDim, inDim }), DataType::Float, GlorotUniformInitializer(), device);
auto x = InputVariable(NDShape({ inDim }), DataType::Float, { Axis::DefaultBatchAxis() });
auto userDefinedTimes = UserTimesFunction::Create(W, x, L"UserDefinedTimes");
size_t batchSize = 3;
std::vector<float> inputData(inDim * batchSize);
for (size_t i = 0; i < inputData.size(); ++i)
inputData[i] = (float)rand() / RAND_MAX;
auto inputDataValue = Value::CreateBatch(x.Shape(), inputData, device);
std::vector<float> rootGradientData(outDim * batchSize, 1);
auto rootGradientValue = Value::CreateBatch(userDefinedTimes->Output().Shape(), rootGradientData, device);
std::unordered_map<Variable, ValuePtr> outputValues = { { userDefinedTimes->Output(), nullptr } };
auto backPropState = userDefinedTimes->Forward({ { x, inputDataValue } }, outputValues, device, { userDefinedTimes->Output() });
std::unordered_map<Variable, ValuePtr> inputGradientValues = { { W, nullptr } };
userDefinedTimes->Backward(backPropState, { { userDefinedTimes->Output(), rootGradientValue } }, inputGradientValues);
auto userDefinedTimesOutputValue = outputValues[userDefinedTimes->Output()];
auto userDefinedTimesInputGradientValue = inputGradientValues[W];
// Compare against the CNTK built-in implementation
auto builtInTimes = Times(W, x, L"BuiltInTimes");
outputValues = { { builtInTimes->Output(), nullptr } };
backPropState = builtInTimes->Forward({ { x, inputDataValue } }, outputValues, device, { builtInTimes->Output() });
inputGradientValues = { { W, nullptr } };
builtInTimes->Backward(backPropState, { { builtInTimes->Output(), rootGradientValue } }, inputGradientValues);
auto builtInTimesOutputValue = outputValues[builtInTimes->Output()];
auto builtInTimesInputGradientValue = inputGradientValues[W];
const double relativeTolerance = 0.001f;
const double absoluteTolerance = 0.000001f;
if (!Internal::AreEqual(*userDefinedTimesOutputValue, *builtInTimesOutputValue, relativeTolerance, absoluteTolerance))
std::runtime_error("UserTimesOp's Forward result does not match built-in result");
if (!Internal::AreEqual(*userDefinedTimesInputGradientValue, *builtInTimesInputGradientValue, relativeTolerance, absoluteTolerance))
std::runtime_error("UserTimesOp's Forward result does not match built-in result");
}
#pragma warning(pop)

Просмотреть файл

@ -13,6 +13,7 @@ import cntk
import _cntk_py
from cntk.utils import *
from cntk.training_session import *
from cntk.ops import *
from cntk.distributed import data_parallel_distributed_learner, Communicator
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
@ -140,7 +141,7 @@ def create_alexnet():
}
# Create trainer
def create_trainer(network, epoch_size, num_quantization_bits):
def create_trainer(network, epoch_size, num_quantization_bits, printer):
# Set learning parameters
lr_per_mb = [0.01]*25 + [0.001]*25 + [0.0001]*25 + [0.00001]*25 + [0.000001]
lr_schedule = cntk.learning_rate_schedule(lr_per_mb, unit=cntk.learner.UnitType.minibatch, epoch_size=epoch_size)
@ -156,10 +157,10 @@ def create_trainer(network, epoch_size, num_quantization_bits):
distributed_after=0)
# Create trainer
return cntk.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner)
return cntk.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, printer)
# Train and test
def train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore):
def train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, restore):
# define mapping from intput streams to network inputs
input_map = {
@ -167,23 +168,15 @@ def train_and_test(network, trainer, train_source, test_source, progress_printer
network['label']: train_source.streams.labels
}
training_session = cntk.training_session(
training_minibatch_source = train_source,
trainer = trainer,
model_inputs_to_mb_source_mapping = input_map,
mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size),
progress_printer = progress_printer,
# checkpoint_frequency = epoch_size,
checkpoint_filename = os.path.join(model_path, model_name),
# save_all_checkpoints = True,
progress_frequency = epoch_size,
cv_source = test_source,
cv_mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size),
# cv_frequency = epoch_size,
restore = restore)
# Train all minibatches
training_session.train()
# Train all minibatches
training_session(
trainer=trainer, mb_source = train_source,
var_to_stream = input_map,
mb_size = minibatch_size,
progress_frequency=epoch_size,
checkpoint_config = CheckpointConfig(filename=os.path.join(model_path, model_name), restore=restore),
cv_config= CrossValidationConfig(source=test_source, mb_size=minibatch_size)
).train()
# Train and evaluate the network.
def alexnet_train_and_eval(train_data, test_data, num_quantization_bits=32, minibatch_size=256, epoch_size = 1281167, max_epochs=112,
@ -199,10 +192,10 @@ def alexnet_train_and_eval(train_data, test_data, num_quantization_bits=32, mini
num_epochs=max_epochs)
network = create_alexnet()
trainer = create_trainer(network, epoch_size, num_quantization_bits)
trainer = create_trainer(network, epoch_size, num_quantization_bits, progress_printer)
train_source = create_image_mb_source(train_data, True, total_number_of_samples=max_epochs * epoch_size)
test_source = create_image_mb_source(test_data, False, total_number_of_samples=FULL_DATA_SWEEP)
train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore)
train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, restore)
if __name__=='__main__':

Просмотреть файл

@ -12,6 +12,7 @@ import numpy as np
import cntk
import _cntk_py
import cntk.io.transforms as xforms
from cntk.training_session import *
# default Paths relative to current python file.
abs_path = os.path.dirname(os.path.abspath(__file__))
@ -90,7 +91,7 @@ def create_conv_network():
# Create trainer
def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up):
def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up, progress_writers):
# Set learning parameters
lr_per_sample = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625]
lr_schedule = cntk.learning_rate_schedule(lr_per_sample, unit=cntk.learner.UnitType.sample, epoch_size=epoch_size)
@ -112,10 +113,10 @@ def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_
parameter_learner = cntk.distributed.data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up)
# Create trainer
return cntk.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner)
return cntk.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_writers)
# Train and test
def train_and_test(network, trainer, train_source, test_source, progress_writers, minibatch_size, epoch_size, restore, profiling=False):
def train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, restore, profiling=False):
# define mapping from intput streams to network inputs
input_map = {
@ -123,26 +124,20 @@ def train_and_test(network, trainer, train_source, test_source, progress_writers
network['label']: train_source.streams.labels
}
training_session = cntk.training_session(
training_minibatch_source = train_source,
trainer = trainer,
model_inputs_to_mb_source_mapping = input_map,
mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size),
progress_printer = progress_writers,
checkpoint_frequency = epoch_size,
checkpoint_filename = os.path.join(model_path, "ConvNet_CIFAR10_DataAug"),
# save_all_checkpoints = False,
progress_frequency=epoch_size,
cv_source = test_source,
cv_mb_size_schedule=cntk.minibatch_size_schedule(minibatch_size),
# cv_frequency = epoch_size,
restore=restore)
# Train all minibatches
if profiling:
cntk.start_profiler(sync_gpu=True)
training_session.train()
training_session(
trainer=trainer, mb_source = train_source,
var_to_stream = input_map,
mb_size = minibatch_size,
progress_frequency=epoch_size,
checkpoint_config = CheckpointConfig(frequency = epoch_size,
filename = os.path.join(model_path, "ConvNet_CIFAR10_DataAug"),
restore = restore),
cv_config = CrossValidationConfig(source = test_source, mb_size=minibatch_size)
).train()
if profiling:
cntk.stop_profiler()
@ -169,10 +164,10 @@ def convnet_cifar10_dataaug(train_data, test_data, mean_data, minibatch_size=64,
rank=cntk.distributed.Communicator.rank(),
model=network['output'])
trainer = create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up)
trainer = create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up, [progress_printer, tensorboard_writer])
train_source = create_image_mb_source(train_data, mean_data, train=True, total_number_of_samples=max_epochs * epoch_size)
test_source = create_image_mb_source(test_data, mean_data, train=False, total_number_of_samples=cntk.io.FULL_DATA_SWEEP)
train_and_test(network, trainer, train_source, test_source, [progress_printer, tensorboard_writer], minibatch_size,
train_and_test(network, trainer, train_source, test_source, minibatch_size,
epoch_size, restore, profiling)

Просмотреть файл

@ -7,12 +7,13 @@
import numpy as np
import sys
import os
from cntk import Trainer, training_session, minibatch_size_schedule
from cntk import Trainer, minibatch_size_schedule
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
from cntk.device import cpu, set_default_device
from cntk.learner import sgd, learning_rate_schedule, UnitType
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, relu, element_times, constant
from cntk.utils import ProgressPrinter
from cntk.training_session import *
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "common"))
@ -65,8 +66,6 @@ def simple_mnist():
}
lr_per_minibatch=learning_rate_schedule(0.2, UnitType.minibatch)
# Instantiate the trainer object to drive the model training
trainer = Trainer(z, (ce, pe), sgd(z.parameters, lr=lr_per_minibatch))
# Get minibatches of images to train with and perform model training
minibatch_size = 64
@ -79,16 +78,17 @@ def simple_mnist():
tag='Training',
num_epochs=num_sweeps_to_train_with)
session = training_session(
training_minibatch_source = reader_train,
trainer = trainer,
mb_size_schedule = minibatch_size_schedule(minibatch_size),
progress_printer = progress_printer,
model_inputs_to_mb_source_mapping = input_map,
progress_frequency = num_samples_per_sweep,
max_training_samples = num_samples_per_sweep * num_sweeps_to_train_with)
session.train()
# Instantiate the trainer object to drive the model training
trainer = Trainer(z, (ce, pe), sgd(z.parameters, lr=lr_per_minibatch), progress_printer)
training_session(
trainer=trainer,
mb_source = reader_train,
mb_size = minibatch_size,
var_to_stream = input_map,
max_samples = num_samples_per_sweep * num_sweeps_to_train_with,
progress_frequency=num_samples_per_sweep
).train()
# Load test data
path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt"))

Просмотреть файл

@ -18,6 +18,7 @@ from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_as_time_
from _cntk_py import set_computation_network_trace_level
from cntk.device import set_default_device, gpu
from cntk.distributed import data_parallel_distributed_learner, block_momentum_distributed_learner, Communicator
from cntk.training_session import *
from resnet_models import *
@ -66,7 +67,7 @@ def create_resnet_network(network_name):
# Create trainer
def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up):
def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up, progress_printer):
if network['name'] == 'resnet20':
lr_per_mb = [1.0]*80+[0.1]*40+[0.01]
elif network['name'] == 'resnet110':
@ -94,10 +95,10 @@ def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, b
else:
learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up)
return Trainer(network['output'], (network['ce'], network['pe']), learner)
return Trainer(network['output'], (network['ce'], network['pe']), learner, progress_printer)
# Train and test
def train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, profiling=False):
def train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, profiling=False):
# define mapping from intput streams to network inputs
input_map = {
@ -105,23 +106,17 @@ def train_and_test(network, trainer, train_source, test_source, progress_printer
network['label']: train_source.streams.labels
}
training_session = cntk.training_session(
training_minibatch_source = train_source,
trainer = trainer,
mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size),
progress_printer = progress_printer,
model_inputs_to_mb_source_mapping = input_map,
checkpoint_frequency = epoch_size,
checkpoint_filename="ResNet_CIFAR10_DataAug",
progress_frequency=epoch_size,
cv_source=test_source,
cv_mb_size_schedule=cntk.minibatch_size_schedule(16),
restore=False)
if profiling:
start_profiler(sync_gpu=True)
training_session.train()
training_session(
trainer=trainer, mb_source = train_source,
mb_size = minibatch_size,
var_to_stream = input_map,
checkpoint_config = CheckpointConfig(frequency=epoch_size, filename="ResNet_CIFAR10_DataAug", restore=False),
progress_frequency=epoch_size,
cv_config = CrossValidationConfig(source=test_source, mb_size=16)
).train()
if profiling:
stop_profiler()
@ -146,10 +141,10 @@ def resnet_cifar10(train_data, test_data, mean_data, network_name, epoch_size, n
num_epochs=max_epochs)
network = create_resnet_network(network_name)
trainer = create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up)
trainer = create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up, progress_printer)
train_source = create_image_mb_source(train_data, mean_data, train=True, total_number_of_samples=max_epochs * epoch_size)
test_source = create_image_mb_source(test_data, mean_data, train=False, total_number_of_samples=cntk.io.FULL_DATA_SWEEP)
train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, profiling)
train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, profiling)
if __name__=='__main__':

Просмотреть файл

@ -18,6 +18,7 @@ from cntk.distributed import data_parallel_distributed_learner, Communicator
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
from cntk.layers import Placeholder, Block, Convolution2D, Activation, MaxPooling, Dense, Dropout, default_options, Sequential, For
from cntk.initializer import normal
from cntk.training_session import *
# default Paths relative to current python file.
abs_path = os.path.dirname(os.path.abspath(__file__))
@ -131,7 +132,7 @@ def create_vgg16():
}
# Create trainer
def create_trainer(network, epoch_size, num_quantization_bits):
def create_trainer(network, epoch_size, num_quantization_bits, progress_printer):
# Set learning parameters
lr_per_mb = [0.01]*20 + [0.001]*20 + [0.0001]*20 + [0.00001]*10 + [0.000001]
lr_schedule = cntk.learning_rate_schedule(lr_per_mb, unit=cntk.learner.UnitType.minibatch, epoch_size=epoch_size)
@ -147,10 +148,10 @@ def create_trainer(network, epoch_size, num_quantization_bits):
distributed_after=0)
# Create trainer
return cntk.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner)
return cntk.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_printer)
# Train and test
def train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore):
def train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, restore):
# define mapping from intput streams to network inputs
input_map = {
@ -158,23 +159,17 @@ def train_and_test(network, trainer, train_source, test_source, progress_printer
network['label']: train_source.streams.labels
}
training_session = cntk.training_session(
training_minibatch_source = train_source,
trainer = trainer,
model_inputs_to_mb_source_mapping = input_map,
mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size),
progress_printer = progress_printer,
# checkpoint_frequency = epoch_size,
checkpoint_filename = os.path.join(model_path, model_name),
# save_all_checkpoints = True,
progress_frequency = epoch_size,
cv_source = test_source,
cv_mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size),
# cv_frequency = epoch_size,
restore = restore)
mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size)
# Train all minibatches
training_session.train()
training_session(
trainer=trainer, mb_source = train_source,
var_to_stream = input_map,
mb_size_schedule = mb_size_schedule,
progress_frequency=epoch_size,
checkpoint_config = CheckpointConfig(filename = os.path.join(model_path, model_name), restore=restore),
cv_config = CrossValidationConfig(source=test_source, schedule=mb_size_schedule)
).train()
# Train and evaluate the network.
def vgg16_train_and_eval(train_data, test_data, num_quantization_bits=32, minibatch_size=128, epoch_size = 1281167, max_epochs=80,
@ -190,10 +185,10 @@ def vgg16_train_and_eval(train_data, test_data, num_quantization_bits=32, miniba
num_epochs=max_epochs)
network = create_vgg16()
trainer = create_trainer(network, epoch_size, num_quantization_bits)
trainer = create_trainer(network, epoch_size, num_quantization_bits, progress_printer)
train_source = create_image_mb_source(train_data, True, total_number_of_samples=max_epochs * epoch_size)
test_source = create_image_mb_source(test_data, False, total_number_of_samples=FULL_DATA_SWEEP)
train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore)
train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, restore)
if __name__=='__main__':

Просмотреть файл

@ -18,6 +18,7 @@ from cntk.distributed import data_parallel_distributed_learner, Communicator
from cntk.io import ImageDeserializer, MinibatchSource, StreamDef, StreamDefs, FULL_DATA_SWEEP
from cntk.layers import Placeholder, Block, Convolution2D, Activation, MaxPooling, Dense, Dropout, default_options, Sequential, For
from cntk.initializer import normal
from cntk.training_session import *
# default Paths relative to current python file.
abs_path = os.path.dirname(os.path.abspath(__file__))
@ -131,7 +132,7 @@ def create_vgg19():
}
# Create trainer
def create_trainer(network, epoch_size, num_quantization_bits):
def create_trainer(network, epoch_size, num_quantization_bits, progress_printer):
# Set learning parameters
lr_per_mb = [0.01]*20 + [0.001]*20 + [0.0001]*20 + [0.00001]*10 + [0.000001]
lr_schedule = cntk.learning_rate_schedule(lr_per_mb, unit=cntk.learner.UnitType.minibatch, epoch_size=epoch_size)
@ -147,10 +148,10 @@ def create_trainer(network, epoch_size, num_quantization_bits):
distributed_after=0)
# Create trainer
return cntk.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner)
return cntk.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_printer)
# Train and test
def train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore):
def train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, restore):
# define mapping from intput streams to network inputs
input_map = {
@ -158,23 +159,15 @@ def train_and_test(network, trainer, train_source, test_source, progress_printer
network['label']: train_source.streams.labels
}
training_session = cntk.training_session(
training_minibatch_source = train_source,
trainer = trainer,
model_inputs_to_mb_source_mapping = input_map,
mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size),
progress_printer = progress_printer,
# checkpoint_frequency = epoch_size,
checkpoint_filename = os.path.join(model_path, model_name),
# save_all_checkpoints = True,
progress_frequency = epoch_size,
cv_source = test_source,
cv_mb_size_schedule = cntk.minibatch_size_schedule(minibatch_size),
# cv_frequency = epoch_size,
restore = restore)
# Train all minibatches
training_session.train()
training_session(
trainer=trainer, mb_source = train_source,
var_to_stream = input_map,
mb_size = minibatch_size,
progress_frequency=epoch_size,
checkpoint_config = CheckpointConfig(filename = os.path.join(model_path, model_name), restore=restore),
cv_config = CrossValidationConfig(source=test_source, mb_size=minibatch_size)
).train()
# Train and evaluate the network.
def vgg19_train_and_eval(train_data, test_data, num_quantization_bits=32, minibatch_size=128, epoch_size = 1281167, max_epochs=80,
@ -190,10 +183,10 @@ def vgg19_train_and_eval(train_data, test_data, num_quantization_bits=32, miniba
num_epochs=max_epochs)
network = create_vgg19()
trainer = create_trainer(network, epoch_size, num_quantization_bits)
trainer = create_trainer(network, epoch_size, num_quantization_bits, progress_printer)
train_source = create_image_mb_source(train_data, True, total_number_of_samples=max_epochs * epoch_size)
test_source = create_image_mb_source(test_data, False, total_number_of_samples=FULL_DATA_SWEEP)
train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, restore)
train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, restore)
if __name__=='__main__':

Просмотреть файл

@ -22,10 +22,10 @@ trainNetwork = {
BrainScriptNetworkBuilder = {
cMap = 1
model = inputFeatures => {
conv1 = ConvolutionalLayer {cMap, (5:5), pad = true, activation=ReLU}(inputFeatures)
conv1 = ConvolutionalLayer {cMap, (5:5), pad=true, activation=ReLU}(inputFeatures)
pool1 = MaxPoolingLayer {(4:4), stride=(4:4)}(conv1)
unpool1 = MaxUnpoolingLayer {(4:4), stride=(4:4)}(pool1, conv1)
deconv1 = DeconvLayer {1, (5:5), cMap, lowerPad=(2:2:0), upperPad=(2:2:0), bias=false}(unpool1)
deconv1 = ConvolutionTransposeLayer {1, (5:5), cMap, pad=true, bias=false}(unpool1)
}.deconv1
# inputs

Просмотреть файл

@ -40,7 +40,7 @@ def deconv_mnist(max_epochs=3):
conv1 = cntk.layers.Convolution ((5,5), cMap, pad=True, activation=cntk.ops.relu)(scaled_input)
pool1 = cntk.layers.MaxPooling ((4,4), (4,4))(conv1)
unpool1 = cntk.layers.MaxUnpooling ((4,4), (4,4))(pool1, conv1)
z = cntk.layers.Deconvolution((5,5), num_channels, cMap, lower_pad=(0,2,2), upper_pad=(0,2,2), bias=False, init=cntk.glorot_uniform(0.001))(unpool1)
z = cntk.layers.ConvolutionTranspose((5,5), num_channels, cMap, pad=True, bias=False, init=cntk.glorot_uniform(0.001))(unpool1)
# define rmse loss function (should be 'err = cntk.ops.minus(deconv1, scaled_input)')
f2 = cntk.ops.element_times(cntk.ops.constant(0.00390625), input_var)

Просмотреть файл

@ -90,7 +90,7 @@ PROTOC = $(PROTOBUF_PATH)/bin/protoc
#SSE_FLAGS =
SOURCEDIR:= Source
INCLUDEPATH:= $(addprefix $(SOURCEDIR)/, Common/Include CNTKv2LibraryDll CNTKv2LibraryDll/API CNTKv2LibraryDll/proto Math CNTK ActionsLib ComputationNetworkLib SGDLib SequenceTrainingLib CNTK/BrainScript Readers/ReaderLib PerformanceProfilerDll)
INCLUDEPATH:= $(addprefix $(SOURCEDIR)/, Common/Include CNTKv2LibraryDll CNTKv2LibraryDll/API CNTKv2LibraryDll/proto ../Examples/Extensibility/CPP Math CNTK ActionsLib ComputationNetworkLib SGDLib SequenceTrainingLib CNTK/BrainScript Readers/ReaderLib PerformanceProfilerDll)
INCLUDEPATH+=$(PROTOBUF_PATH)/include
# COMMON_FLAGS include settings that are passed both to NVCC and C++ compilers.
COMMON_FLAGS:= -DHAS_MPI=$(HAS_MPI) -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K -std=c++11
@ -326,7 +326,8 @@ READER_SRC =\
$(SOURCEDIR)/Readers/ReaderLib/FramePacker.cpp \
$(SOURCEDIR)/Readers/ReaderLib/ReaderBase.cpp \
$(SOURCEDIR)/Readers/ReaderLib/Indexer.cpp \
$(SOURCEDIR)/Readers/ReaderLib/ChunkCache.cpp \
$(SOURCEDIR)/Readers/ReaderLib/ChunkCache.cpp \
$(SOURCEDIR)/Readers/ReaderLib/ReaderUtil.cpp \
COMMON_SRC =\
$(SOURCEDIR)/Common/Config.cpp \

Просмотреть файл

@ -5,10 +5,16 @@ Effective January 25, 2017 CNTK [1-bit Stochastic Gradient Descent (1bit-SGD)](h
Give us feedback through these [channels](https://github.com/Microsoft/CNTK/wiki/Feedback-Channels).
# Latest news
***2017-02-28.* V 2.0 Beta 12 Release available at Docker Hub**
CNTK V 2.0 Beta 12 Runtime packages are now available as [Public Images at Docker Hub](https://hub.docker.com/r/microsoft/cntk/).
See more on CNTK as Docker Images in this [Wiki article](https://github.com/Microsoft/CNTK/wiki/CNTK-Docker-Containers).
***2017-02-23.* V 2.0 Beta 12 Release**
Highlights of this Release:
* New and updated features: new activation functions, support of `Argmax` and `Argmin`, improved performance of `numpy` interop, new functionality of existing operators, and more.
* [CNTK for CPU on Windows can now be installed via `pip install` on Anaconda 3](https://github.com/Microsoft/CNTK/wiki/Setup-Windows-Python). Other configurations will be enabled soon.
* [CNTK for CPU on Windows can now be installed via `pip install` on Anaconda 3](https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-your-machine). Other configurations will be enabled soon.
* HTK deserializers are now exposed in Python. All deserializers are exposed in C++.
* The memory pool implementation of CNTK has been updated with a new global optimization algorithm. Hyper memory compression has been removed.
* New features in C++ API.
* [New Eval examples for RNN models](https://github.com/Microsoft/CNTK/blob/master/Examples/Evaluation/CNTKLibraryCSEvalCPUOnlyExamples/CNTKLibraryCSEvalExamples.cs).
* New [CNTK NuGet Packages](https://github.com/Microsoft/CNTK/wiki/NuGet-Package) with CNTK V2 C++ Library.
@ -35,19 +41,6 @@ Get the Release from the [CNTK Releases page](https://github.com/Microsoft/CNTK/
CNTK V 2.0 Beta 10 Runtime packages are now available as [Public Images at Docker Hub](https://hub.docker.com/r/microsoft/cntk/).
See more on CNTK as Docker Images in this [Wiki article](https://github.com/Microsoft/CNTK/wiki/CNTK-Docker-Containers).
***2017-02-01.* V 2.0 Beta 10 Release**
* New and updated core and Python API features ([Operators with UserFunctions](https://www.cntk.ai/pythondocs/extend.html), [Tensorboard support](https://github.com/Microsoft/CNTK/wiki/Using-TensorBoard-for-Visualization), [Python API Fast R CNN](https://github.com/Microsoft/CNTK/wiki/Object-Detection-using-Fast-R-CNN)).
* Improved speed of CrossEntropyWithSoftmax and ClassificationError for sparse labels.
* New Tutorials and Examples:
* A Python version of the deconvolution layer and image auto encoder example was added ([Example **07_Deconvolution** in *Image - Getting Started*](https://github.com/Microsoft/CNTK/tree/v2.0.beta10.0/Examples/Image/GettingStarted)).
* A Python distributed training example for image classification using AlexNet was added, cf. [here](https://github.com/Microsoft/CNTK/tree/v2.0.beta10.0/Examples/Image/Classification/AlexNet/Python)
* [Basic implementation of Generative Adversarial Networks (GAN) networks](https://github.com/Microsoft/CNTK/blob/v2.0.beta10.0/Tutorials/CNTK_206_Basic_GAN.ipynb)
* [Training with Sampled Softmax](https://github.com/Microsoft/CNTK/blob/v2.0.beta10.0/Tutorials/CNTK_207_Training_with_Sampled_Softmax.ipynb)
* New [CNTK NuGet Packages](https://github.com/Microsoft/CNTK/wiki/NuGet-Package).
See more in the [Release Notes](https://github.com/Microsoft/CNTK/wiki/CNTK_2_0_beta_10_Release_Notes).
Get the Release from the [CNTK Releases page](https://github.com/Microsoft/CNTK/releases).
See [all news](https://github.com/Microsoft/CNTK/wiki/News).
# What is The Microsoft Cognitive Toolkit

Просмотреть файл

@ -43,8 +43,9 @@ class Converter(object):
def appendSample(self, sample):
if( len(sample) != self.sampleDim ):
print( "Invalid sample dimension for input {0}" ).format( self.name )
sys.exit()
raise ValueError(
"Invalid sample dimension for input {0}".format(self.name))
if( len(self.vals) == 0 ):
self.vals.append( list() )
@ -65,7 +66,7 @@ class DenseConverter(Converter):
Converter.__init__(self, name, sampleDim)
def headerBytes(self):
output = ""
output = bytearray()
# First is the matrix type. Dense is type 0
output += struct.pack( "i", 0 )
# Next is the elem type, currently float only
@ -77,11 +78,11 @@ class DenseConverter(Converter):
def toBytes(self):
output = ""
output = bytearray()
for sequence in self.vals:
if( len(sequence) != 1 ):
print( "Converter does not support dense sequences." )
sys.exit()
raise ValueError("Dense sequences currently not supported.")
for sample in sequence[0]:
output += struct.pack( "f", float(sample) )
@ -94,17 +95,18 @@ class SparseConverter(Converter):
Converter.__init__(self, name, sampleDim)
def appendSample(self, sample):
for samp in sample:
if( int(samp.split(":")[0]) >= self.sampleDim ):
print( "Invalid sample dimension for input {0}. Max {1}, given {2}" ).format( self.name, self.sampleDim, sample.split( ":" )[0] )
sys.exit()
for pair in sample:
index = int(pair.split(":")[0])
if (index >= self.sampleDim):
raise ValueError("Invalid sample dimension for input {0}. Max {1}, given {2}"
.format(self.name, self.sampleDim, index))
if( len(self.vals) == 0 ):
self.vals.append( list() )
self.vals[-1].append( sample )
def headerBytes(self):
output = ""
output = bytearray()
# First is the matrix type. Sparse is type 1
output += struct.pack( "i", 1 )
# Next is the storage type, currently sparse csc only
@ -120,7 +122,7 @@ class SparseConverter(Converter):
return output
def toBytes(self):
output = ""
output = bytearray()
values = list()
rowInd = list()
colInd = [0]
@ -139,9 +141,9 @@ class SparseConverter(Converter):
colInd.append( nnz )
output += struct.pack( "i", nnz )
output += "".join( [ struct.pack( "f", float(val) ) for val in values ] )
output += "".join( [ struct.pack( "i", int(ind) ) for ind in rowInd ] )
output += "".join( [ struct.pack( "i", int(ind) ) for ind in colInd ] )
output += b''.join( [ struct.pack( "f", float(val) ) for val in values ] )
output += b''.join( [ struct.pack( "i", int(ind) ) for ind in rowInd ] )
output += b''.join( [ struct.pack( "i", int(ind) ) for ind in colInd ] )
return output
@ -174,7 +176,7 @@ def GetConverter( inputtype, name, sampleDim ):
elif( inputtype.lower() == 'sparse' ):
converter = SparseConverter( name, sampleDim )
else:
print( 'Invalid input format {0}' ).format( inputtype )
print('Invalid input format {0}'.format( inputtype ))
sys.exit()
return converter
@ -240,7 +242,6 @@ if __name__ == '__main__':
id += 1
OutputHeader( binaryHeaderFile, converters )
numChunks = 0
with open( args.input, "r" ) as inputFile:
curSequence = list()
@ -280,7 +281,7 @@ if __name__ == '__main__':
binaryHeaderFile.close()
binaryDataFile.close()
destination = open( args.output, 'awb+' )
destination = open( args.output, 'ab+' )
shutil.copyfileobj( open( dataPath, "rb" ), destination )
destination.flush()

Просмотреть файл

@ -420,8 +420,9 @@ void NDLNodeEvaluatorImpl<ElemType>::Evaluate(NDLNode<ElemType>* node, const wst
else
{
bool transpose = node->GetOptionalParameter("transpose", "false");
auto outputShape = paramResolver("outputShape", 0);
nodePtr = builder.Convolution(NULL, NULL, kernelShape, mapCount, stride, sharing,
autoPad, lowerPad, upperPad, transpose, imageLayout, maxTempMemSizeInSamples, name);
autoPad, lowerPad, upperPad, transpose, outputShape, imageLayout, maxTempMemSizeInSamples, name);
}
}

Просмотреть файл

@ -94,34 +94,33 @@ ConvolutionalLayer {numOutputChannels, # e.g. (1) or BS.Constants.None
b = ParameterTensor(_ConcatArrays (Repeat (Length (filterShape), 1), outputChannelsShape), initValue = initBias) # [ 1 x 1 x K ]
sharing = true # TODO: support this
apply (x) = {
c = Convolution (W, x, filterShape, mapDims = numOutputChannels, stride = stride, sharing = sharing, autoPadding = pad, lowerPad = lowerPad, upperPad = upperPad, deconv = false, maxTempMemSizeInSamples = maxTempMemSizeInSamples)
c = Convolution (W, x, filterShape, mapDims = numOutputChannels, stride = stride, sharing = sharing, autoPadding = pad, lowerPad = lowerPad, upperPad = upperPad, maxTempMemSizeInSamples = maxTempMemSizeInSamples)
res = activation (if bias then c + b else c)
}.res
}.apply
# DeconvLayer -- create a deconvolution layer with optional non-linearity
DeconvLayer {numOutputChannels,
filterShape, # e.g. (3:3)
numInputChannels,
bias = true,
activation = (x=>x),
init = 'glorotUniform',
initValueScale = 0.001,
initBias = 0,
stride = 1,
sharing = true,
autoPadding = false,
lowerPad = 0,
upperPad = 0,
maxTempMemSizeInSamples = 0} =
# ConvolutionTransposeLayer -- create a convolution transpose layer with optional non-linearity
ConvolutionTransposeLayer {numOutputChannels,
filterShape, # e.g. (3:3)
numInputChannels,
bias = true,
activation = (x=>x),
init = 'glorotUniform',
initValueScale = 0.001,
initBias = 0,
stride = 1, pad = false,
lowerPad = 0, upperPad = 0,
outputShape = 0,
maxTempMemSizeInSamples = 0} =
{
outputChannelsShape = _AsArray (numOutputChannels)
kernelShape = _ConcatArrays (filterShape, outputChannelsShape)
paramShape = _ConcatArrays (kernelShape, _AsArray (numInputChannels))
W = ParameterTensor{paramShape, init=init, initValueScale=initValueScale, initOnCPUOnly=true}
b = ParameterTensor(_ConcatArrays (Repeat (Length (filterShape), 1), outputChannelsShape), initValue = initBias)
sharing = true # TODO: support this
apply (x) = {
c = Convolution(W, x, kernelShape, mapDims=numInputChannels, stride=stride, sharing=sharing, autoPadding=autoPadding, lowerPad=lowerPad, upperPad=upperPad, deconv=true, maxTempMemSizeInSamples = maxTempMemSizeInSamples)
c = ConvolutionTranspose (W, x, kernelShape, mapDims=numInputChannels, stride=stride, sharing=sharing, autoPadding=pad, lowerPad=lowerPad, upperPad=upperPad, outputShape = outputShape, maxTempMemSizeInSamples = maxTempMemSizeInSamples)
res = activation (if bias then c + b else c)
}.res
}.apply
@ -607,7 +606,8 @@ ReconcileDynamicAxis(dataInput, layoutInput, tag='') = new ComputationNode [ ope
ReconcileMBLayout = ReconcileDynamicAxis # back compat
CastAs (type, data) = ReconcileDynamicAxis (data, type) # read as CastAs<type>(data) where the cast may consist of rearranging the data w.r.t. MBLayout or broadcasting across sequence items
# ND convo & pooling/unpooling --why is autoPadding true? Normally one would want to reduce dimensions, no?
Convolution(weightNode, inputValueNode, kernelDims, mapDims = 0, stride = 1, sharing = true, autoPadding = true, lowerPad = 0, upperPad = 0, deconv=false, imageLayout='CHW', maxTempMemSizeInSamples = 0, tag='') = new ComputationNode [ operation = 'Convolution' ; inputs = _AsNodes (weightNode : inputValueNode); kernelShape = new TensorShape [ dims = kernelDims ] ; mapCount = new TensorShape [ dims = mapDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimSharing = new BoolVector [ items = sharing ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] ; transpose = deconv /*plus the function args*/ ]
Convolution(weightNode, inputValueNode, kernelDims, mapDims = 0, stride = 1, sharing = true, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', maxTempMemSizeInSamples = 0, tag='') = new ComputationNode [ operation = 'Convolution' ; inputs = _AsNodes (weightNode : inputValueNode); kernelShape = new TensorShape [ dims = kernelDims ] ; mapCount = new TensorShape [ dims = mapDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimSharing = new BoolVector [ items = sharing ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] ; transpose = false ; dimOutputShape = new TensorShape [ dims = 0 ] /*plus the function args*/ ]
ConvolutionTranspose(weightNode, inputValueNode, kernelDims, mapDims = 0, stride = 1, sharing = true, autoPadding = true, lowerPad = 0, upperPad = 0, outputShape = 0, imageLayout='CHW', maxTempMemSizeInSamples = 0, tag='') = new ComputationNode [ operation = 'Convolution' ; inputs = _AsNodes (weightNode : inputValueNode); kernelShape = new TensorShape [ dims = kernelDims ] ; mapCount = new TensorShape [ dims = mapDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimSharing = new BoolVector [ items = sharing ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] ; transpose = true ; dimOutputShape = new TensorShape [ dims = outputShape ] /*plus the function args*/ ]
Pooling(input, poolKind/*'max'|'average'*/, kernelDims, stride=1, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'Pooling' ; inputs = _AsNodes (input); pool = poolKind ; kernelShape = new TensorShape [ dims = kernelDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
MaxUnpooling(unpoolInput, poolInput, kernelDims, stride=1, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'MaxUnpooling' ; inputs = _AsNodes (unpoolInput : poolInput); kernelShape = new TensorShape [ dims = kernelDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
# 2D pooling

Просмотреть файл

@ -359,18 +359,27 @@ namespace CNTK
///
std::wstring AsString() const
{
std::wstringstream wStrStream;
wStrStream << L"[";
for (size_t i = 0; i < Rank(); i++)
if (IsUnknown())
{
if (i != 0)
wStrStream << L" x ";
wStrStream << m_shapeDims[i];
return L"[???]";
}
else
{
std::wstringstream wStrStream;
wStrStream << L"[";
for (size_t i = 0; i < Rank(); i++)
{
if (i != 0)
wStrStream << L" x ";
wStrStream << L"]";
return wStrStream.str();
if (m_shapeDims[i] != InferredDimension)
wStrStream << m_shapeDims[i];
else
wStrStream << "?";
}
wStrStream << L"]";
return wStrStream.str();
}
}
private:
@ -991,6 +1000,7 @@ namespace std {
};
}
namespace CNTK
{
///
@ -1612,6 +1622,11 @@ namespace CNTK
///
CNTK_API bool NeedsGradient() const;
///
/// Returns a string representation for this variable.
///
CNTK_API std::wstring AsString() const;
protected:
#ifdef SWIG
public:
@ -2065,6 +2080,12 @@ namespace CNTK
return Create(sampleShape, sequences, {}, device, readOnly);
}
///
/// Create a new Value object containing a collection of variable length sequences.
///
CNTK_API static ValuePtr Create(const NDShape& sampleShape, const std::vector<NDArrayViewPtr>& sequences, const std::vector<bool>& sequenceStartFlags, const DeviceDescriptor& device, bool readOnly, bool createNewCopy);
///
/// Create a new Value object containing a collection of variable length sequences.
/// The created Value object contains a copy of the specified 'sequences' data.
@ -2442,8 +2463,6 @@ namespace CNTK
template <typename ElementType>
static void AppendSparseSequenceData(const NDArrayViewPtr& sequenceData, std::vector<SparseIndexType>& colStarts, std::vector<SparseIndexType>& rowIndices, std::vector<char>& nonZeroValues, size_t maxSequenceLength);
CNTK_API static ValuePtr Create(const NDShape& sampleShape, const std::vector<NDArrayViewPtr>& sequences, const std::vector<bool>& sequenceStartFlags, const DeviceDescriptor& device, bool readOnly, bool createNewCopy);
///
/// Copy the data stored in 'this' Value object to the buffer 'sequences' as a collection of variable length sequences.
/// The output data is in the dense format.
@ -2764,7 +2783,7 @@ namespace CNTK
///
/// Returns the root of the Function graph underlying this block Function.
/// Throws an exception of this is not a block Function
/// Throws an exception if this is not a block Function
///
CNTK_API FunctionPtr BlockRoot() const;
@ -2808,11 +2827,11 @@ namespace CNTK
///
/// Returns a set comprising of all input variables of 'this' Function's variables that are not of kind 'Parameter' or 'Constant'.
///
std::vector<Variable> Arguments() const
std::vector<Variable> Arguments(bool rowMajor = false) const
{
return FilteredInputs<Variable>([](const Variable& var) {
return (var.IsInput() || var.IsPlaceholder() || var.IsOutput());
});
}, rowMajor);
}
///
@ -2916,6 +2935,11 @@ namespace CNTK
///
CNTK_API void PrintGraph() const;
///
/// Returns a string representation of this Function
///
CNTK_API std::wstring AsString() const;
///
/// Maximum number of outputs that is currently supported.
///
@ -2982,11 +3006,11 @@ namespace CNTK
CNTK_API std::vector<Variable>& InitOutputs();
template <typename VariableType, typename FilterFunction>
std::vector<VariableType> FilteredInputs(FilterFunction&& filterFunc) const
std::vector<VariableType> FilteredInputs(FilterFunction&& filterFunc, bool rowMajor = false) const
{
std::vector<VariableType> filteredInputs;
std::unordered_set<Variable> uniqueFilteredInputs;
auto inputs = Inputs();
auto inputs = Inputs(rowMajor);
for (auto inputVar : inputs)
{
if (filterFunc(inputVar) && (uniqueFilteredInputs.find(inputVar) == uniqueFilteredInputs.end()))
@ -3370,7 +3394,18 @@ namespace CNTK
///
/// Create an instance of the CNTK built-in operation for computing the edit distance error for specified operands.
///
CNTK_API FunctionPtr EditDistanceError(const Variable& prediction, const Variable& labels, float substitutionPenalty, float deletionPenalty, float insertionPenalty, bool squashInputs, const std::vector<size_t>& samplesToIgnore, const std::wstring& name = L"");
CNTK_API FunctionPtr EditDistanceError(const Variable& prediction, const Variable& labels, float substitutionPenalty, float deletionPenalty, float insertionPenalty, bool squashInputs, const std::vector<size_t>& tokensToIgnore, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in operation for computing the forwardbackward for specified operands.
///
CNTK_API FunctionPtr ForwardBackward(const Variable& graph, const Variable& features, size_t blankTokenId, int delayConstraint, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in operation for computing the labels to graph for input operands.
///
CNTK_API FunctionPtr LabelsToGraph(const Variable& labels, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in operation for computing the classification prediction error for specified operands.
@ -3495,10 +3530,23 @@ namespace CNTK
const std::vector<bool>& autoPadding = {true},
const NDShape& lowerPad = {0},
const NDShape& upperPad = {0},
bool transpose = false,
size_t maxTempMemSizeInSamples = 0,
const std::wstring& name = L"");
///
/// TODO:
///
CNTK_API FunctionPtr ConvolutionTranspose(const Variable& convolutionMap,
const Variable& operand,
const NDShape& strides = { 1 },
const std::vector<bool>& sharing = { true },
const std::vector<bool>& autoPadding = { true },
const NDShape& lowerPad = { 0 },
const NDShape& upperPad = { 0 },
const NDShape& outputShape = { 0 },
size_t maxTempMemSizeInSamples = 0,
const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in ROI pooling operation on specified tensor input operands with the specified output shape
///
@ -3621,6 +3669,11 @@ namespace CNTK
///
CNTK_API FunctionPtr PReLU(const Variable& alpha, const Variable& operand, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in elementwise softplus operation
///
CNTK_API FunctionPtr Softplus(const Variable& operand, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in argmax operation on specified tensor input operand along the specified axis
///
@ -4275,6 +4328,9 @@ namespace CNTK
void UpdateTestProgress(size_t numSamples, const ValuePtr& evalCriterion, const DeviceDescriptor& computeDevice);
void AddProgressWriters(const std::vector<ProgressWriterPtr>& progressWriters);
// TODO: Workaround for back compat. Should not be used and will be removed in the next version.
friend CNTK_API void ::CNTK::Internal::AddProgressWriters(const TrainerPtr&, const std::vector<ProgressWriterPtr>&);
FunctionPtr m_combinedTrainingFunction;
FunctionPtr m_model;
FunctionPtr m_lossFunction;
@ -4698,6 +4754,55 @@ namespace CNTK
///
CNTK_API QuantizedDistributedCommunicatorPtr QuantizedMPICommunicator(bool zeroThresholdFor1Bit, bool useQuantizationForSelfStripe, size_t numQuantizationBits);
///
/// Cross validation configuration
///
struct CrossValidationConfig
{
public:
/// Cross validation configuration.
/// crossValidationSource: a minibatch source that will be used for cross validation.
/// crossValidationSchedule : a minibatch size schedule for cross validation.
/// crossValidationFrequencyInSamples: frequency in samples when to perform cross validation.
///
CNTK_API CrossValidationConfig(const MinibatchSourcePtr& crossValidationSource,
const MinibatchSizeSchedule& crossValidationSchedule = MinibatchSizeSchedule(1),
size_t crossValidationFrequencyInSamples = std::numeric_limits<size_t>::max());
private:
friend class TrainingSession;
const MinibatchSourcePtr m_source;
const MinibatchSizeSchedule m_mbSize;
const size_t m_frequency;
};
///
/// Checkpoint configuration
///
struct CheckpointConfig
{
public:
///
/// Checkpoint configuration.
/// checkPointFileName: a file name where the checkpoint will be stored.
/// checkpointFrequencyInSamples: frequency in samples when to perform checkpointing.
/// restoreFromCheckpointIfExists: if flag is set, the training session will try to restore before training.
/// preserveAllCheckpoints: if flag is set, all checkpoints will be preserved.
///
CNTK_API CheckpointConfig(
const std::wstring& checkPointFileName,
size_t checkpointFrequencyInSamples = std::numeric_limits<size_t>::max(),
bool restoreFromCheckpointIfExists = true,
bool preserveAllCheckpoints = false);
private:
friend class TrainingSession;
const std::wstring m_fileName;
const bool m_restore;
const bool m_preserveAll;
const size_t m_frequency;
};
///
/// Base abstract class that represents a training session.
/// Derived classes can redefine different aspects of training, overriding base virtual methods (GetMinibatchSize, OnMinibatchStart, etc.)
@ -4709,12 +4814,31 @@ namespace CNTK
size_t frequency;
size_t currentIndex;
size_t sampleCountWhenLastCalled;
std::function<void(size_t currentIndex, const DeviceDescriptor&)> action;
std::function<bool(size_t currentIndex, const DeviceDescriptor&)> action;
};
public:
///
///
/// Constructor of the training session:
/// trainer : an instance of a trainer
/// trainingSource: minibatch source
/// minibatchSizeSchedule: mb size schedule
/// inputVarToStream: var to stream mapping
/// maxNumTrainingSamples: max number of training samples
/// progress : a training configuration
///
CNTK_API TrainingSession(
const TrainerPtr& trainer,
const MinibatchSourcePtr& trainingSource,
const MinibatchSizeSchedule& minibatchSizeSchedule,
const std::unordered_map<Variable, StreamInformation>& inputVarToStream,
size_t maxNumTrainingSamples,
size_t progressFrequency,
const CheckpointConfig& checkpointing,
const CrossValidationConfig& crossValidation);
/// !!! DEPRECATED !!!
/// Constructor of the training session:
/// trainingSource : a minibatch source that will be used for training
/// trainer : an instance of a trainer
/// modelInputsToMinibatchSourceMapping : mapping between the input node of the model and the corresponding stream
@ -4765,7 +4889,7 @@ namespace CNTK
///
virtual size_t GetMinibatchSize()
{
return m_minibatchSizeSchedule[Trainer()->TotalNumberOfSamplesSeen()];
return m_mbSize[Trainer()->TotalNumberOfSamplesSeen()];
}
///
@ -4775,8 +4899,9 @@ namespace CNTK
///
/// Optionally overridable callback that is invoked after each minibatch.
/// If return value is false, the training will be stopped.
///
CNTK_API virtual void OnMinibatchEnd() {};
CNTK_API virtual bool OnMinibatchEnd() { return true; };
///
/// Optionally overridable callback that is invoked before each checkpoint.
@ -4795,8 +4920,12 @@ namespace CNTK
///
/// Optionally overridable callback that is invoked after each cross validation.
/// If return value is false, the training will be stopped.
///
CNTK_API virtual void OnCrossValidationEnd(size_t /*validationIndex*/, double /*averageError*/, size_t /*numberOfSamples*/, size_t /*numberOfMinibatches*/) {};
CNTK_API virtual bool OnCrossValidationEnd(size_t /*validationIndex*/, double /*averageError*/, size_t /*numberOfSamples*/, size_t /*numberOfMinibatches*/)
{
return true;
}
protected:
///
@ -4804,8 +4933,6 @@ namespace CNTK
///
TrainerPtr Trainer() const { return m_trainer; }
MinibatchSourcePtr TrainingMinibatchSource() const { return m_trainingSource; }
private:
/// Disallow copy and move construction and assignment
TrainingSession(const TrainingSession&) = delete; TrainingSession& operator=(const TrainingSession&) = delete; TrainingSession& operator=(TrainingSession&&) = delete; TrainingSession(TrainingSession&&) = delete;
@ -4819,32 +4946,30 @@ namespace CNTK
void SaveCheckpoint(size_t currentIndex);
void SaveFinalCheckpoint();
void CrossValidate(size_t currentIndex, const DeviceDescriptor& computeDevice);
bool CrossValidate(size_t currentIndex, const DeviceDescriptor& computeDevice);
void ReportProgress(size_t currentIndex);
// Checkpointing
const std::wstring m_checkPointFileName;
const bool m_restoreFromCheckpointIfExists;
const bool m_saveAllCheckpoints;
// Training
MinibatchSourcePtr m_trainingSource;
TrainerPtr m_trainer;
std::unordered_map<Variable, StreamInformation> m_modelInputToMinibatchSourceStream;
size_t m_parallelAfterSamples;
size_t m_workerRank;
size_t m_numberOfWorkers;
const MinibatchSizeSchedule m_minibatchSizeSchedule;
const size_t m_maxNumberOfSamples;
// Cross validation.
MinibatchSourcePtr m_crossValidationSource;
const MinibatchSizeSchedule m_crossValidationSchedule;
std::vector<PeriodicAction> m_actions;
// Training.
TrainerPtr m_trainer;
const MinibatchSourcePtr m_source;
const MinibatchSizeSchedule m_mbSize;
const std::unordered_map<Variable, StreamInformation> m_varToStream;
const size_t m_maxNumSamples;
const size_t m_progressFrequency;
// Additional configuration.
CheckpointConfig m_checkpoint;
CrossValidationConfig m_cv;
};
///
/// !!! DEPRECATED !!!
/// Creates an instance of the training session class. Parameters match the paramters of the TrainingSession constructor.
///
CNTK_API TrainingSessionPtr CreateBasicTrainingSession(
@ -4961,6 +5086,18 @@ namespace CNTK
std::unique_ptr<Impl> m_training;
std::unique_ptr<Impl> m_test;
};
/// Creates an instance of the training session class. Parameters match the parameters of the TrainingSession constructor.
///
CNTK_API TrainingSessionPtr CreateTrainingSession(
const TrainerPtr& trainer,
const MinibatchSourcePtr& trainingSource,
const MinibatchSizeSchedule& minibatchSizeSchedule,
const std::unordered_map<Variable, StreamInformation>& inputVarToStream,
size_t maxNumTrainingSamples,
size_t progressFrequency,
const CheckpointConfig& checkpointing,
const CrossValidationConfig& crossValidation);
}

Просмотреть файл

@ -134,6 +134,7 @@ namespace CNTK
{
// Forward declarations
class Utils;
class NDShape;
class PrimitiveFunction;
class CompositeFunction;
class BlockFunction;
@ -224,9 +225,15 @@ namespace CNTK
CNTK_API FunctionPtr ReduceElements(const Variable& operand, const std::wstring& reductionOpName, const Axis& axis, const std::wstring& name = L"");
CNTK_API FunctionPtr ReconcileDynamicAxes(const Variable& operand, const Variable& axesAsOperand, const std::wstring& name = L"");
CNTK_API FunctionPtr Convolution(const Variable& convolutionMap, const Variable& operand, const NDShape& strides, const std::vector<bool>& sharing, const std::vector<bool>& autoPadding,
const NDShape& lowerPad, const NDShape& upperPad, bool transpose, const NDShape& outputShape, size_t maxTempMemSizeInSamples, const std::wstring& name = L"");
// This is meant for debugging purposes only and is very likely to be deprecated in the future.
CNTK_API void SaveAsLegacyModel(const FunctionPtr& rootFunction, const std::wstring& modelFile);
// TODO: Workaround for back compat. Should not be used and will be removed in the next version.
CNTK_API void AddProgressWriters(const TrainerPtr&, const std::vector<ProgressWriterPtr>&);
CNTK_API size_t NewUniqueId();
// Internal hooks for testing and higher-level bindings

Просмотреть файл

@ -22,6 +22,7 @@
#include "RNNNodes.h"
#include "PreComputeNodes.h"
#include "DeprecatedNodes.h"
#include "SpecialPurposeNodes.h"
using namespace Microsoft::MSR::CNTK;
@ -349,6 +350,7 @@ namespace CNTK
primitiveFunctionConfigParameters[PrimitiveFunction::AttributeNameLowerPad] = AsNDShape(convolutionNode->LowerPad());
primitiveFunctionConfigParameters[PrimitiveFunction::AttributeNameUpperPad] = AsNDShape(convolutionNode->UpperPad());
primitiveFunctionConfigParameters[PrimitiveFunction::AttributeNameTranspose] = convolutionNode->Transpose();
primitiveFunctionConfigParameters[PrimitiveFunction::AttributeNameOutputShape] = AsNDShape(convolutionNode->OutputShape());
primitiveFunctionConfigParameters[PrimitiveFunction::AttributeNameMaxTempMemSizeInSamples] = convolutionNode->MaxTempMemSizeInSamples();
opType = PrimitiveOpType::Convolution;
@ -456,6 +458,14 @@ namespace CNTK
opType = PrimitiveOpType::EditDistanceError;
}
else if (node->OperationName() == OperationNameOf(ForwardBackwardNode))
{
auto edNode = node->As<ForwardBackwardNode<ElementType>>();
primitiveFunctionConfigParameters[PrimitiveFunction::AttributeNameDelayConstraint] = edNode->DelayConstraint();
primitiveFunctionConfigParameters[PrimitiveFunction::AttributeNameBlankTokenId] = edNode->BlankTokenId();
opType = PrimitiveOpType::ForwardBackward;
}
else if ((node->OperationName() == OperationNameOf(MeanNode)) || (node->OperationName() == OperationNameOf(InvStdDevNode)))
{
auto precomputeNode = node->As<MeanInvStdDevNodeBase<ElementType>>();
@ -475,7 +485,9 @@ namespace CNTK
return PerDimMeanVarianceNormalize(inputVars[0], meanValue, invStdDevValue, name);
}
else
LogicError("Unsupported ComputationNode with OperationName='%S' found when loading legacy CNTK model", node->OperationName().c_str());
InvalidArgument("Unsupported ComputationNode with OperationName='%S' found when loading legacy CNTK model.\n"
"This is likely a deprecated operation; loading Brainscript/NDL models that contain deprecated operations, is not supported in Python/C++ API.\n"
"Please refer to CNTK documentation and edit/modify your Brainscript model/script to replace the deprecated operation with a supported operation.\n" , node->OperationName().c_str());
if (node->Is<RngUser>())
{

Просмотреть файл

@ -539,6 +539,9 @@ namespace CNTK
case PrimitiveOpType::Sqrt:
computationNodePtr = New<SqrtNode<ElementType>>(network->GetDeviceId(), internalNodeName);
break;
case PrimitiveOpType::ELU:
computationNodePtr = New<ExponentialLinearUnitNode<ElementType>>(network->GetDeviceId(), internalNodeName);
break;
case PrimitiveOpType::Floor:
computationNodePtr = New<FloorNode<ElementType>>(network->GetDeviceId(), internalNodeName);
break;
@ -690,8 +693,15 @@ namespace CNTK
auto sharing = AsVector<bool>(functionConfig[PrimitiveFunction::AttributeNameSharing].Value<std::vector<DictionaryValue>>());
auto autoPadding = AsVector<bool>(functionConfig[PrimitiveFunction::AttributeNameAutoPadding].Value<std::vector<DictionaryValue>>());
auto transpose = functionConfig[PrimitiveFunction::AttributeNameTranspose].Value<bool>();
NDShape outputShape = NDShape::Unknown;
if (functionConfig.Contains(PrimitiveFunction::AttributeNameOutputShape))
outputShape = functionConfig[PrimitiveFunction::AttributeNameOutputShape].Value<NDShape>();
auto maxTempMemSizeInSamples = functionConfig[PrimitiveFunction::AttributeNameMaxTempMemSizeInSamples].Value<size_t>();
computationNodePtr = New<ConvolutionNode<ElementType>>(network->GetDeviceId(), internalNodeName, AsTensorShape(kernelShape), AsTensorShape(outputMapCount), AsTensorShape(strides), sharing, autoPadding, AsTensorShape(lowerPad), AsTensorShape(upperPad), transpose, ImageLayoutKind::CHW, maxTempMemSizeInSamples);
computationNodePtr = New<ConvolutionNode<ElementType>>(network->GetDeviceId(), internalNodeName,
AsTensorShape(kernelShape), AsTensorShape(outputMapCount), AsTensorShape(strides),
sharing, autoPadding, AsTensorShape(lowerPad), AsTensorShape(upperPad), transpose,
outputShape.IsUnknown()? TensorShape(0) : AsTensorShape(outputShape),
ImageLayoutKind::CHW, maxTempMemSizeInSamples);
break;
}
case PrimitiveOpType::CosDistance:
@ -719,6 +729,13 @@ namespace CNTK
computationNodePtr = New<EditDistanceErrorNode<ElementType>>(network->GetDeviceId(), internalNodeName, subPen, delPen, insPen, squashInputs, tokensToIgnore);
break;
}
case PrimitiveOpType::ForwardBackward:
{
auto delayContraint = functionConfig[PrimitiveFunction::AttributeNameDelayConstraint].Value<int>();
auto blankTokenId = functionConfig[PrimitiveFunction::AttributeNameBlankTokenId].Value<size_t>();
computationNodePtr = New<ForwardBackwardNode<ElementType>>(network->GetDeviceId(), internalNodeName, blankTokenId, delayContraint);
break;
}
case PrimitiveOpType::LambdaRank:
computationNodePtr = New<LambdaRankNode<ElementType>>(network->GetDeviceId(), internalNodeName);
break;
@ -847,22 +864,34 @@ namespace CNTK
}
else
{
computationNodePtr = New<UserDefinedV2FunctionNode<ElementType>>(network->GetDeviceId(), internalNodeName, function->shared_from_this());
// For user defined functions, we only attach unique inputs in the internal computation network since, the UDF
// backward implementations directly compute aggregate gradient values for unique inputs
std::vector<ComputationNodeBasePtr> uniqueInputNodesBasePtrs;
for (auto inputNodeBasePtr : inputNodesBasePtrs)
auto outputs = function->RawOutputs();
if (variable == outputs[0])
{
if (std::find(uniqueInputNodesBasePtrs.begin(), uniqueInputNodesBasePtrs.end(), inputNodeBasePtr) == uniqueInputNodesBasePtrs.end())
uniqueInputNodesBasePtrs.push_back(inputNodeBasePtr);
}
computationNodePtr = New<UserDefinedV2FunctionNode<ElementType>>(network->GetDeviceId(), internalNodeName, function->shared_from_this());
inputNodesBasePtrs = uniqueInputNodesBasePtrs;
// For user defined functions, we only attach unique inputs in the internal computation network since, the UDF
// backward implementations directly compute aggregate gradient values for unique inputs
std::vector<ComputationNodeBasePtr> uniqueInputNodesBasePtrs;
for (auto inputNodeBasePtr : inputNodesBasePtrs)
{
if (std::find(uniqueInputNodesBasePtrs.begin(), uniqueInputNodesBasePtrs.end(), inputNodeBasePtr) == uniqueInputNodesBasePtrs.end())
uniqueInputNodesBasePtrs.push_back(inputNodeBasePtr);
}
inputNodesBasePtrs = uniqueInputNodesBasePtrs;
}
else
{
size_t i = 1;
while (outputs[i] != variable) i++;
assert(i < outputs.size());
computationNodePtr = New<SelectUserDefinedV2FunctionOutputNode<ElementType>>(network->GetDeviceId(), CNTKInternalNodeNameFromUidAndName(variable.Uid(), variable.Name()), i);
inputNodesBasePtrs = { variableToNodeMap[outputs[0]] };
}
}
network->AddNodeToNetAndAttachInputs(computationNodePtr, inputNodesBasePtrs);
return computationNodePtr;
}
@ -1012,10 +1041,9 @@ namespace CNTK
};
PreorderTraverseFunctions(rootFunction, PatchBlockArgumentsMapping);
std::function<bool(const Variable&)> IsVariableRoot;
IsVariableRoot = [this, &IsVariableRoot](const Variable& outputVar) {
std::function<bool(const Variable&)> IsVariableRoot = [this, &IsVariableRoot](const Variable& outputVar) {
auto mappingVariable = GetMappingVariable(outputVar);
return (m_isVariableRootMap[outputVar] && ((mappingVariable == outputVar) || IsVariableRoot(mappingVariable)));
return (m_isVariableRootMap[outputVar] && !IsFirstOutputOfMultiOutputUDF(mappingVariable) && ((mappingVariable == outputVar) || IsVariableRoot(mappingVariable)));
};
// If any of the function or requested outputs is not a root node, we need to explicitly

Просмотреть файл

@ -748,6 +748,24 @@ namespace CNTK
});
}
std::wstring Function::AsString() const
{
wstringstream wss;
bool first = true;
if (IsComposite())
wss << "Composite(" << RootFunction()->OpName() << "): ";
else
wss << OpName() <<": ";
bool reverse = Internal::IsReversingTensorShapesInErrorMessagesEnabled();
for (auto arg : Arguments(reverse))
wss << (first ? (first = false, "") : ", ") << arg.AsString();
wss << " -> ";
first = true;
for (auto out : Outputs())
wss << (first ? (first = false, "") : ", ") << out.AsString();
return wss.str();
}
FunctionPtr UnaryOp(PrimitiveOpType op, const Variable& operand, Dictionary&& opConfig, const std::wstring& name)
{
std::vector<Variable> operands = { operand };
@ -785,14 +803,14 @@ namespace CNTK
}
FunctionPtr Exp(const Variable& operand, const std::wstring& name)
{
{
return UnaryOp(PrimitiveOpType::Exp, operand, Dictionary(), name);
}
FunctionPtr Log(const Variable& operand, const std::wstring& name)
{
return UnaryOp(PrimitiveOpType::Log, operand, Dictionary(), name);
}
}
FunctionPtr Square(const Variable& operand, const std::wstring& name)
{
@ -1090,6 +1108,20 @@ namespace CNTK
return BinaryOp(PrimitiveOpType::EditDistanceError, prediction, labels, std::move(additionalProperties), name);
}
FunctionPtr ForwardBackward(const Variable& graph, const Variable& features, size_t blankTokenId, int delayConstraint, const std::wstring& name)
{
auto additionalProperties = Dictionary();
additionalProperties[PrimitiveFunction::AttributeNameBlankTokenId] = blankTokenId;
additionalProperties[PrimitiveFunction::AttributeNameDelayConstraint] = delayConstraint;
return BinaryOp(PrimitiveOpType::ForwardBackward, graph, features, std::move(additionalProperties), name);
}
FunctionPtr LabelsToGraph(const Variable& labels, const std::wstring& name)
{
return UnaryOp(PrimitiveOpType::LabelsToGraph, labels, Dictionary(), name);
}
FunctionPtr PastValue(const Variable& operand, const Variable& initialState, size_t offset, const std::wstring& name)
{
auto additionalProperties = Dictionary();
@ -1155,26 +1187,44 @@ namespace CNTK
const std::vector<bool>& autoPadding,
const NDShape& lowerPad,
const NDShape& upperPad,
bool transpose,
size_t maxTempMemSizeInSamples,
const std::wstring& name)
{
// Currently we require that the Convolution function's operand have a dynamic axis since otherwise
// the internal implementation incorrectly infers the batch axis dimension by picking up the first axis as
// the sample shape and considering the rest to be part of the batch axis
if (operand.DynamicAxes().empty())
LogicError("Convolution currently requires the main operand to have dynamic axes");
return Internal::Convolution(convolutionMap,
operand,
strides,
sharing,
autoPadding,
lowerPad,
upperPad,
false,
{0},
maxTempMemSizeInSamples,
name);
}
auto additionalProperties = Dictionary();
additionalProperties[PrimitiveFunction::AttributeNameStrides] = strides;
additionalProperties[PrimitiveFunction::AttributeNameSharing] = AsDictionaryValueVector(sharing);
additionalProperties[PrimitiveFunction::AttributeNameAutoPadding] = AsDictionaryValueVector(autoPadding);
additionalProperties[PrimitiveFunction::AttributeNameLowerPad] = lowerPad;
additionalProperties[PrimitiveFunction::AttributeNameUpperPad] = upperPad;
additionalProperties[PrimitiveFunction::AttributeNameTranspose] = transpose;
additionalProperties[PrimitiveFunction::AttributeNameMaxTempMemSizeInSamples] = maxTempMemSizeInSamples;
return BinaryOp(PrimitiveOpType::Convolution, convolutionMap, operand, std::move(additionalProperties), name);
FunctionPtr ConvolutionTranspose(const Variable& convolutionMap,
const Variable& operand,
const NDShape& strides,
const std::vector<bool>& sharing,
const std::vector<bool>& autoPadding,
const NDShape& lowerPad,
const NDShape& upperPad,
const NDShape& outputShape,
size_t maxTempMemSizeInSamples,
const std::wstring& name)
{
return Internal::Convolution(convolutionMap,
operand,
strides,
sharing,
autoPadding,
lowerPad,
upperPad,
true,
outputShape,
maxTempMemSizeInSamples,
name);
}
FunctionPtr ROIPooling(const Variable& convolutionMap, const Variable& rois, const NDShape& roiOutputShape, const std::wstring& name/* = L""*/)
@ -1312,13 +1362,7 @@ namespace CNTK
FunctionPtr ELU(const Variable& operand, const std::wstring& name)
{
auto operandPlaceholder = PlaceholderVariable();
auto lessThanZero = Less(operandPlaceholder, Constant::Scalar(operand.GetDataType(), 0.0));
auto result = ElementSelect(lessThanZero,
Minus(Exp(operandPlaceholder), Constant::Scalar(operand.GetDataType(), 1.0)),
operandPlaceholder);
return AsBlock(std::move(result), { { operandPlaceholder, operand } }, L"ELU", name);
return UnaryOp(PrimitiveOpType::ELU, operand, Dictionary(), name);
}
FunctionPtr LeakyReLU(const Variable& operand, const std::wstring& name)
@ -1343,6 +1387,14 @@ namespace CNTK
return AsBlock(std::move(result), { { operandPlaceholder, operand } }, L"PReLU", name);
}
FunctionPtr Softplus(const Variable& operand, const std::wstring& name)
{
auto operandPlaceholder = PlaceholderVariable();
auto result = LogAddExp(operandPlaceholder, Constant::Scalar(operand.GetDataType(), 0.0));
return AsBlock(std::move(result), { { operandPlaceholder, operand } }, L"Softplus", name);
}
FunctionPtr Argmax(const Variable& operand, const Axis& axis, const std::wstring& name)
{
return Internal::ReduceElements(operand, PrimitiveFunction::InternalArgmaxReductionOpName, axis, name);
@ -1607,5 +1659,36 @@ namespace CNTK
// E.g. used for seq2seq.
return BinaryOp(PrimitiveOpType::ReconcileDynamicAxis, operand, axesAsOperand, Dictionary(), name);
}
FunctionPtr Convolution(const Variable& convolutionMap,
const Variable& operand,
const NDShape& strides,
const std::vector<bool>& sharing,
const std::vector<bool>& autoPadding,
const NDShape& lowerPad,
const NDShape& upperPad,
bool transpose,
const NDShape& outputShape,
size_t maxTempMemSizeInSamples,
const std::wstring& name)
{
// Currently we require that the Convolution function's operand have a dynamic axis since otherwise
// the internal implementation incorrectly infers the batch axis dimension by picking up the first axis as
// the sample shape and considering the rest to be part of the batch axis
if (operand.DynamicAxes().empty())
LogicError("Convolution currently requires the main operand to have dynamic axes");
auto additionalProperties = Dictionary();
additionalProperties[PrimitiveFunction::AttributeNameStrides] = strides;
additionalProperties[PrimitiveFunction::AttributeNameSharing] = AsDictionaryValueVector(sharing);
additionalProperties[PrimitiveFunction::AttributeNameAutoPadding] = AsDictionaryValueVector(autoPadding);
additionalProperties[PrimitiveFunction::AttributeNameLowerPad] = lowerPad;
additionalProperties[PrimitiveFunction::AttributeNameUpperPad] = upperPad;
additionalProperties[PrimitiveFunction::AttributeNameTranspose] = transpose;
additionalProperties[PrimitiveFunction::AttributeNameOutputShape] = outputShape;
additionalProperties[PrimitiveFunction::AttributeNameMaxTempMemSizeInSamples] = maxTempMemSizeInSamples;
return BinaryOp(PrimitiveOpType::Convolution, convolutionMap, operand, std::move(additionalProperties), name);
}
}
}

24
Source/CNTKv2LibraryDll/PrimitiveFunction.cpp Executable file → Normal file
Просмотреть файл

@ -54,6 +54,7 @@ namespace CNTK
/*static*/ const std::wstring PrimitiveFunction::AttributeNameLowerPad = L"lowerPad";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameUpperPad = L"upperPad";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameTranspose = L"transpose";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameOutputShape = L"outputShape";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameMaxTempMemSizeInSamples = L"maxTempMemSizeInSamples";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameROIOutputShape = L"roiOutputShape";
/*static*/ const std::wstring PrimitiveFunction::AttributeNamePoolingType = L"poolingType";
@ -81,6 +82,8 @@ namespace CNTK
/*static*/ const std::wstring PrimitiveFunction::AttributeNameInsertionPenalty = L"InsertionPenalty";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameSquashInputs = L"SquashInputs";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameTokensToIgnore = L"TokensToIgnore";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameDelayConstraint = L"DelayConstraint";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameBlankTokenId = L"BlankTokenId";
/*static*/ DataType PrimitiveFunction::GetOutputDataType(PrimitiveOpType op, std::vector<Variable>& inputs, bool inferDimensions)
{
@ -306,7 +309,9 @@ namespace CNTK
case PrimitiveOpType::Sin:
case PrimitiveOpType::Cos:
case PrimitiveOpType::Pass:
case PrimitiveOpType::LabelsToGraph:
case PrimitiveOpType::StopGradient:
case PrimitiveOpType::ELU:
assert(m_inputs.size() == 1);
outputShape = UnaryElementwiseOpOutputShape(m_inputs[0].Shape());
break;
@ -518,6 +523,9 @@ namespace CNTK
auto& strides = m_attributes[PrimitiveFunction::AttributeNameStrides].Value<NDShape>();
auto& lowerPad = m_attributes[PrimitiveFunction::AttributeNameLowerPad].Value<NDShape>();
auto& upperPad = m_attributes[PrimitiveFunction::AttributeNameUpperPad].Value<NDShape>();
NDShape tmpShape = NDShape::Unknown;
if (m_attributes.Contains(PrimitiveFunction::AttributeNameOutputShape))
tmpShape = m_attributes[PrimitiveFunction::AttributeNameOutputShape].Value<NDShape>();
auto sharing = AsVector<bool>(m_attributes[PrimitiveFunction::AttributeNameSharing].Value<std::vector<DictionaryValue>>());
auto autoPadding = AsVector<bool>(m_attributes[PrimitiveFunction::AttributeNameAutoPadding].Value<std::vector<DictionaryValue>>());
bool transpose = m_attributes[PrimitiveFunction::AttributeNameTranspose].Value<bool>();
@ -527,7 +535,20 @@ namespace CNTK
NDShape outputMapCount, kernelShape;
std::tie(outputMapCount, kernelShape) = GetConvolutionOutputMapCountAndKernelShape(m_inputs[0].Shape(), m_inputs[1].Shape());
auto originalKernelShape = kernelShape;
outputShape = ConvolutionOpOutputShape(m_op, m_inputs[1].Shape(), kernelShape, outputMapCount, strides, sharing, autoPadding, lowerPad, upperPad, transpose, true);
auto inputShape = m_inputs[1].Shape();
if (!transpose || tmpShape.IsUnknown() || tmpShape[0] == 0)
{
outputShape = ConvolutionOpOutputShape(m_op, inputShape, kernelShape, outputMapCount, strides, sharing, autoPadding, lowerPad, upperPad, transpose, true);
}
else
{
NDShape inferredInputShape = ConvolutionOpOutputShape(m_op, tmpShape, kernelShape, outputMapCount, strides, sharing, autoPadding, lowerPad, upperPad, false, true);
if (inferredInputShape != inputShape)
RuntimeError("The shape of the convolution transpose operand %ls is different from the result of convoluting the specified output argument using the provided options %ls", inputShape.AsString().c_str(), inferredInputShape.AsString().c_str());
outputShape = tmpShape;
}
if (originalKernelShape != kernelShape)
{
for (size_t i2 = 0; i2 < kernelShape.Rank(); ++i2)
@ -540,6 +561,7 @@ namespace CNTK
}
case PrimitiveOpType::CosDistance:
case PrimitiveOpType::EditDistanceError:
case PrimitiveOpType::ForwardBackward:
case PrimitiveOpType::Logistic:
case PrimitiveOpType::SquaredError:
case PrimitiveOpType::CrossEntropyWithSoftmax:

22
Source/CNTKv2LibraryDll/PrimitiveFunction.h Executable file → Normal file
Просмотреть файл

@ -68,6 +68,8 @@ namespace CNTK
{PrimitiveOpType::CrossEntropyWithSoftmax, L"CrossEntropyWithSoftmax"},
{PrimitiveOpType::ClassificationError, L"ClassificationError"},
{PrimitiveOpType::EditDistanceError, L"EditDistanceError" },
{PrimitiveOpType::ForwardBackward, L"ForwardBackward" },
{PrimitiveOpType::LabelsToGraph, L"LabelsToGraph" },
{PrimitiveOpType::PastValue, L"PastValue"},
{PrimitiveOpType::FutureValue, L"FutureValue"},
{PrimitiveOpType::ReduceElements, L"ReduceElements"},
@ -87,12 +89,13 @@ namespace CNTK
{PrimitiveOpType::Sin, L"Sin"},
{PrimitiveOpType::Cos, L"Cos"},
{PrimitiveOpType::Pass, L"Pass"},
{ PrimitiveOpType::Block, L"Block" },
{ PrimitiveOpType::Unpooling, L"Unpooling" },
{ PrimitiveOpType::LambdaRank, L"LambdaRank" },
{ PrimitiveOpType::NDCG, L"NDCG" },
{ PrimitiveOpType::NoOp, L"NoOp" },
{ PrimitiveOpType::StopGradient, L"StopGradient" }
{PrimitiveOpType::Block, L"Block" },
{PrimitiveOpType::Unpooling, L"Unpooling" },
{PrimitiveOpType::LambdaRank, L"LambdaRank" },
{PrimitiveOpType::NDCG, L"NDCG" },
{PrimitiveOpType::NoOp, L"NoOp" },
{PrimitiveOpType::StopGradient, L"StopGradient" },
{PrimitiveOpType::ELU, L"ELU" },
};
inline const std::wstring& PrimitiveOpTypeName(PrimitiveOpType opType)
@ -210,6 +213,7 @@ namespace CNTK
static const std::wstring AttributeNameLowerPad;
static const std::wstring AttributeNameUpperPad;
static const std::wstring AttributeNameTranspose;
static const std::wstring AttributeNameOutputShape;
static const std::wstring AttributeNameMaxTempMemSizeInSamples;
static const std::wstring AttributeNameROIOutputShape;
static const std::wstring AttributeNamePoolingType;
@ -237,6 +241,8 @@ namespace CNTK
static const std::wstring AttributeNameInsertionPenalty;
static const std::wstring AttributeNameSquashInputs;
static const std::wstring AttributeNameTokensToIgnore;
static const std::wstring AttributeNameDelayConstraint;
static const std::wstring AttributeNameBlankTokenId;
protected:
PrimitiveFunction(PrimitiveOpType op, const std::vector<Variable>& inputs, Dictionary&& functionConfig, const std::wstring& functionName, const std::wstring& uid)
@ -731,6 +737,8 @@ namespace CNTK
// version 2: changed in 7af3a7c0e46cb12f873f1289400a9c5d86746662. TODO(n17s): add description.
// version 3: changed in df0ab4e58186738931968e806b61bc80d7b6e20e. TODO(pkrannen): add description.
// version 4: added extra parameter (#6) for the running mean sample count in BatchNormalization.
static const size_t s_serializationVersion = 7;
// Version 6: Add argmax and argmin to ReduceElement.
// Version 8: Add ELU node.
static const size_t s_serializationVersion = 8;
};
}

2
Source/CNTKv2LibraryDll/PrimitiveOpType.h Executable file → Normal file
Просмотреть файл

@ -74,6 +74,8 @@ namespace CNTK
NoOp = 62,
LabelsToGraph = 63,
StopGradient = 64,
ELU = 65,
ForwardBackward = 66,
// New op types should only be appended to the end of this list
UnknownOP
// and UnknownOP should always be last.

Просмотреть файл

@ -12,6 +12,15 @@
namespace CNTK
{
namespace Internal
{
// TODO: Workaround for back compat. Should not be used and will be removed in the next version.
CNTK_API void AddProgressWriters(const TrainerPtr& t, const std::vector<ProgressWriterPtr>& w)
{
t->AddProgressWriters(w);
}
}
using namespace std;
const static std::wstring s_trainingMinibatchSource = L"TrainingMinibatchSource";
@ -22,6 +31,36 @@ namespace CNTK
find_if(s.begin(), s.end(), [](wchar_t c) { return !isdigit(c); }) == s.end();
}
CheckpointConfig::CheckpointConfig(
const std::wstring& checkPointFileName,
size_t checkpointFrequencyInSamples,
bool restoreFromCheckpointIfExists,
bool preserveAllCheckpoints) :
m_preserveAll(preserveAllCheckpoints),
m_restore(restoreFromCheckpointIfExists),
m_fileName(checkPointFileName),
m_frequency(checkpointFrequencyInSamples)
{
if (m_fileName.empty())
{
if (checkpointFrequencyInSamples != 0 && checkpointFrequencyInSamples != std::numeric_limits<size_t>::max())
InvalidArgument("Checkpoint file name is not allowed to be empty if checkpoint frequency is non zero.");
if (preserveAllCheckpoints)
InvalidArgument("Checkpoint file name is not allowed to be empty if 'preserve all checkpoints' is specified.");
checkpointFrequencyInSamples = 0;
}
}
CrossValidationConfig::CrossValidationConfig(
const MinibatchSourcePtr& crossValidationSource,
const MinibatchSizeSchedule& crossValidationSchedule,
size_t crossValidationFrequencyInSamples):
m_source(crossValidationSource),
m_mbSize(crossValidationSchedule),
m_frequency(crossValidationFrequencyInSamples)
{
}
TrainingSessionPtr CreateBasicTrainingSession(
const MinibatchSourcePtr& trainingSource,
const TrainerPtr& trainer,
@ -38,6 +77,10 @@ namespace CNTK
size_t progressFrequency,
const std::vector<ProgressWriterPtr>& progressWriters)
{
fprintf(stderr, "WARNING:CreateBasicTrainingSession is deprecated and will be removed in the next beta (13)."
"Instructions for updating:"
"Please switch to CreateTrainingSession function and then call SetCheckpointing/SetCrossValidation/SetPrintingProgress as needed.");
return MakeSharedObject<TrainingSession>(trainingSource,
trainer,
modelInputToMinibatchSourceStream,
@ -54,6 +97,24 @@ namespace CNTK
progressWriters);
}
TrainingSessionPtr CreateTrainingSession(
const TrainerPtr& trainer,
const MinibatchSourcePtr& trainingSource,
const MinibatchSizeSchedule& minibatchSizeSchedule,
const std::unordered_map<Variable, StreamInformation>& inputVarToStream,
size_t maxNumTrainingSamples,
size_t progressFrequency,
const CheckpointConfig& checkpointing,
const CrossValidationConfig& crossValidation)
{
return MakeSharedObject<TrainingSession>(trainer,
trainingSource,
minibatchSizeSchedule,
inputVarToStream,
maxNumTrainingSamples,
progressFrequency, checkpointing, crossValidation);
}
TrainingSession::TrainingSession(
const MinibatchSourcePtr& trainingSource,
const TrainerPtr& trainer,
@ -68,49 +129,56 @@ namespace CNTK
bool saveAllCheckpoints,
size_t maxNumberOfSamples,
size_t progressFrequencyInSamples,
const std::vector<ProgressWriterPtr>& progressWriters) :
m_trainingSource(trainingSource),
const std::vector<ProgressWriterPtr>& progressWriters)
: TrainingSession(
trainer, trainingSource, schedule, modelInputToMinibatchSourceStream, maxNumberOfSamples, progressFrequencyInSamples,
CheckpointConfig(checkPointFileName, checkpointFrequencyInSamples, restoreFromCheckpointIfExists, saveAllCheckpoints),
CrossValidationConfig(crossValidationSource, crossValidationSchedule, crossValidationFrequencyInSamples))
{
if (progressFrequencyInSamples)
{
trainer->AddProgressWriters(progressWriters);
}
}
TrainingSession::TrainingSession(
const TrainerPtr& trainer,
const MinibatchSourcePtr& trainingSource,
const MinibatchSizeSchedule& minibatchSizeSchedule,
const std::unordered_map<Variable, StreamInformation>& inputVarToStream,
size_t maxNumTrainingSamples,
size_t progressFrequency,
const CheckpointConfig& checkpointing,
const CrossValidationConfig& crossValidation) :
m_trainer(trainer),
m_modelInputToMinibatchSourceStream(modelInputToMinibatchSourceStream),
m_checkPointFileName(checkPointFileName),
m_source(trainingSource),
m_mbSize(minibatchSizeSchedule),
m_varToStream(inputVarToStream),
m_maxNumSamples(maxNumTrainingSamples),
m_progressFrequency(progressFrequency),
m_checkpoint(checkpointing),
m_cv(crossValidation),
m_parallelAfterSamples(0),
m_workerRank(0),
m_numberOfWorkers(1),
m_minibatchSizeSchedule(schedule),
m_maxNumberOfSamples(maxNumberOfSamples),
m_restoreFromCheckpointIfExists(restoreFromCheckpointIfExists),
m_saveAllCheckpoints(saveAllCheckpoints),
m_crossValidationSource(crossValidationSource),
m_crossValidationSchedule(crossValidationSchedule)
m_numberOfWorkers(1)
{
if (!trainingSource)
InvalidArgument("Training minibatch source is not allowed to be null.");
if (!trainer)
if (!m_trainer)
InvalidArgument("Trainer is not allowed to be null.");
if(modelInputToMinibatchSourceStream.empty())
InvalidArgument("Input mapping is not allowed to be empty.");
if (m_checkPointFileName.empty())
{
if(checkpointFrequencyInSamples != 0 && checkpointFrequencyInSamples != std::numeric_limits<size_t>::max())
InvalidArgument("Checkpoint file name is not allowed to be empty if checkpoint frequency is non zero.");
if(saveAllCheckpoints)
InvalidArgument("Checkpoint file name is not allowed to be empty if 'save all checkpoints' is specified.");
checkpointFrequencyInSamples = 0;
}
if (!m_source)
InvalidArgument("Training source must not be null.");
if (!m_crossValidationSource)
{
if(crossValidationFrequencyInSamples != 0 && crossValidationFrequencyInSamples != std::numeric_limits<size_t>::max())
InvalidArgument("Cross validation minibatch source is not allowed to be empty.");
crossValidationFrequencyInSamples = 0;
}
if (m_maxNumSamples == 0)
InvalidArgument("maxNumTrainingSamples must not be zero.");
if (m_varToStream.empty())
InvalidArgument("inputVarToStream mapping must not be empty.");
// Let's calculate the warm up period the distributed learners may need.
// We will take the maximum warm up period required.
auto learners = trainer->ParameterLearners();
auto learners = m_trainer->ParameterLearners();
m_parallelAfterSamples = 0;
for (const auto& l: learners)
for (const auto& l : learners)
{
auto distributed = std::dynamic_pointer_cast<DistributedLearner>(l);
if (distributed)
@ -122,47 +190,49 @@ namespace CNTK
}
// Fill-in required actions.
if (checkpointFrequencyInSamples != 0)
m_actions.push_back({ checkpointFrequencyInSamples, 0, 0,
if (m_checkpoint.m_frequency != 0)
m_actions.push_back({ m_checkpoint.m_frequency, 0, 0,
[this](size_t currentIndex, const DeviceDescriptor&)
{
SaveCheckpoint(currentIndex);
// enable profiler after the first checkpoint
// This has effect only if the profiler is globally enabled by StartProfiler()
Microsoft::MSR::CNTK::ProfilerEnable(true);
return true;
} });
if(crossValidationFrequencyInSamples != 0)
m_actions.push_back({ crossValidationFrequencyInSamples, 0, 0,
[this](size_t currentIndex, const DeviceDescriptor& d) { CrossValidate(currentIndex, d); } });
if (m_cv.m_frequency != 0)
m_actions.push_back({ m_cv.m_frequency , 0, 0,
[this](size_t currentIndex, const DeviceDescriptor& d) { return CrossValidate(currentIndex, d); } });
if (progressFrequencyInSamples != 0)
m_actions.push_back({ progressFrequencyInSamples, 0, 0,
[this](size_t currentIndex, const DeviceDescriptor&) { ReportProgress(currentIndex); } });
m_trainer->AddProgressWriters(progressWriters);
if (m_progressFrequency != 0)
{
m_actions.push_back({ m_progressFrequency, 0, 0,
[this](size_t currentIndex, const DeviceDescriptor&) { ReportProgress(currentIndex); return true; } });
}
}
void TrainingSession::Train(const DeviceDescriptor& computeDevice)
{
std::unordered_map<Variable, ValuePtr> minibatch;
bool shouldTrain = m_maxNumberOfSamples > 0;
bool shouldTrain = m_maxNumSamples > 0;
// Let's try to restore if required.
size_t restoredNumberOfSamples = 0;
if (m_restoreFromCheckpointIfExists && !m_checkPointFileName.empty())
if (m_checkpoint.m_restore && !m_checkpoint.m_fileName.empty())
{
RestoreFromCheckpoint();
restoredNumberOfSamples = m_trainer->TotalNumberOfSamplesSeen();
}
// Main train loop.
bool earlyExit = false;
while (shouldTrain)
{
// Get next minibatch.
size_t samplesLeft = m_maxNumberOfSamples > m_trainer->TotalNumberOfSamplesSeen()
? m_maxNumberOfSamples - m_trainer->TotalNumberOfSamplesSeen()
: 0;
size_t samplesLeft = earlyExit || m_maxNumSamples <= Trainer()->TotalNumberOfSamplesSeen()
? 0
: m_maxNumSamples - Trainer()->TotalNumberOfSamplesSeen();
// Note that in case of distributed training we don't want to stop if the local minibatch
// is empty - it is possible that the other workers are still processing their minibatches.
@ -170,32 +240,34 @@ namespace CNTK
// Train on the minibatch.
OnMinibatchStart();
shouldTrain = m_trainer->TrainMinibatch(minibatch, computeDevice);
OnMinibatchEnd();
shouldTrain = Trainer()->TrainMinibatch(minibatch, computeDevice);
earlyExit |= !OnMinibatchEnd(); // If the callback wants to have early exit - we stop training.
auto profMisc = Microsoft::MSR::CNTK::ScopeProfile(Microsoft::MSR::CNTK::profilerEvtMainPost);
// Peform actions if required.
size_t totalNumberOfSamples = m_trainer->TotalNumberOfSamplesSeen();
size_t totalNumberOfSamples = Trainer()->TotalNumberOfSamplesSeen();
for (auto& action : m_actions)
{
size_t index = totalNumberOfSamples / action.frequency;
if (index != action.currentIndex)
{
action.action(action.currentIndex, computeDevice);
// If any action wants to have early exit - we stop training.
earlyExit |= !action.action(action.currentIndex, computeDevice);
action.currentIndex = index;
action.sampleCountWhenLastCalled = totalNumberOfSamples;
}
}
}
if (restoredNumberOfSamples != m_trainer->TotalNumberOfSamplesSeen())
if (restoredNumberOfSamples != Trainer()->TotalNumberOfSamplesSeen())
{
// Let's do all actions on the last probably a partial data at the end.
for (auto& action: m_actions)
{
if (m_trainer->TotalNumberOfSamplesSeen() % action.frequency != 0 &&
m_trainer->TotalNumberOfSamplesSeen() != action.sampleCountWhenLastCalled)
if (Trainer()->TotalNumberOfSamplesSeen() % action.frequency != 0 &&
Trainer()->TotalNumberOfSamplesSeen() != action.sampleCountWhenLastCalled)
action.action(action.currentIndex, computeDevice);
}
}
@ -203,38 +275,48 @@ namespace CNTK
// In case of incremental - save final checkpoint.
// This is required only when we keep all existing checkpoints, otherwise
// The checkpoint was already saved with the proper name.
if (m_saveAllCheckpoints && !fexists(m_checkPointFileName))
if (m_checkpoint.m_frequency &&
m_checkpoint.m_preserveAll &&
!fexists(m_checkpoint.m_fileName))
SaveFinalCheckpoint();
}
// TODO: Possibly expose a limiting counter on the number of samples for validation.
void TrainingSession::CrossValidate(size_t currentIndex, const DeviceDescriptor& computeDevice)
bool TrainingSession::CrossValidate(size_t currentIndex, const DeviceDescriptor& computeDevice)
{
std::unordered_map<Variable, ValuePtr> minibatch;
double accumulatedError = 0;
double error;
size_t totalNumberOfSamples = 0;
size_t numberOfMinibatches = 0;
auto checkpoint = m_crossValidationSource->GetCheckpointState();
size_t sampleCount = 0;
while(GetCrossValidationMinibatch(minibatch, m_crossValidationSchedule[sampleCount], computeDevice), !minibatch.empty())
if (m_cv.m_source) // Running cross validation
{
// TODO: it may be slow to rely on TestMinibatch to return error each time, since it may require transfer
// of error from the GPU each time.
error = m_trainer->TestMinibatch(minibatch, computeDevice, sampleCount);
accumulatedError += error * sampleCount;
totalNumberOfSamples += sampleCount;
numberOfMinibatches++;
std::unordered_map<Variable, ValuePtr> minibatch;
double accumulatedError = 0;
double error = 0;
size_t totalNumberOfSamples = 0;
size_t numberOfMinibatches = 0;
auto checkpoint = m_cv.m_source->GetCheckpointState();
size_t sampleCount = 0;
while (GetCrossValidationMinibatch(minibatch, m_cv.m_mbSize[sampleCount], computeDevice), !minibatch.empty())
{
// TODO: it may be slow to rely on TestMinibatch to return error each time, since it may require transfer
// of error from the GPU each time.
error = m_trainer->TestMinibatch(minibatch, computeDevice, sampleCount);
accumulatedError += error * sampleCount;
totalNumberOfSamples += sampleCount;
numberOfMinibatches++;
}
m_cv.m_source->RestoreFromCheckpoint(checkpoint);
Trainer()->SummarizeTestProgress();
return OnCrossValidationEnd(currentIndex, accumulatedError / totalNumberOfSamples, totalNumberOfSamples, numberOfMinibatches);
}
else // Only invoking the callback.
{
return OnCrossValidationEnd(currentIndex, 0, 0, 0);
}
m_crossValidationSource->RestoreFromCheckpoint(checkpoint);
m_trainer->SummarizeTestProgress();
OnCrossValidationEnd(currentIndex, accumulatedError / totalNumberOfSamples, totalNumberOfSamples, numberOfMinibatches);
}
inline void TrainingSession::ReportProgress(size_t /*currentIndex*/)
{
m_trainer->SummarizeTrainingProgress();
Trainer()->SummarizeTrainingProgress();
}
void TrainingSession::GetTrainingMinibatch(std::unordered_map<Variable, ValuePtr>& minibatch, size_t maxMbSize, const DeviceDescriptor& computeDevice)
@ -242,7 +324,7 @@ namespace CNTK
size_t workerRank = m_workerRank, numberOfWorkers = m_numberOfWorkers;
// Check if we are operating in distributed mode.
if (m_parallelAfterSamples > m_trainer->TotalNumberOfSamplesSeen())
if (m_parallelAfterSamples > Trainer()->TotalNumberOfSamplesSeen())
{
numberOfWorkers = 1;
workerRank = 0;
@ -250,13 +332,13 @@ namespace CNTK
size_t mbSize = GetMinibatchSize();
mbSize = std::min(mbSize, maxMbSize);
GetNextMinibatch(m_trainingSource, minibatch, mbSize, workerRank, numberOfWorkers, computeDevice);
GetNextMinibatch(m_source, minibatch, mbSize, workerRank, numberOfWorkers, computeDevice);
}
void TrainingSession::GetCrossValidationMinibatch(std::unordered_map<Variable, ValuePtr>& minibatch, size_t maxMbSize, const DeviceDescriptor& computeDevice)
{
// TODO: Support distributed cross-validation, when TestMinibatch supports it.
GetNextMinibatch(m_crossValidationSource, minibatch, maxMbSize, 0, 1, computeDevice);
GetNextMinibatch(m_cv.m_source, minibatch, maxMbSize, 0, 1, computeDevice);
}
void TrainingSession::GetNextMinibatch(const MinibatchSourcePtr& source, std::unordered_map<Variable, ValuePtr>& minibatch, size_t mbSize, size_t workerRank, size_t numberOfWorkers, const DeviceDescriptor& computeDevice)
@ -271,34 +353,34 @@ namespace CNTK
if (minibatchData.empty())
return;
for (auto v : m_modelInputToMinibatchSourceStream)
for (auto v : m_varToStream)
minibatch.insert({ v.first, minibatchData[v.second].data });
}
void TrainingSession::RestoreFromCheckpoint(const std::wstring& checkpointFileName)
{
Dictionary externalState = m_trainer->RestoreFromCheckpoint(checkpointFileName);
m_trainingSource->RestoreFromCheckpoint(externalState[s_trainingMinibatchSource].Value<Dictionary>());
Dictionary externalState = Trainer()->RestoreFromCheckpoint(checkpointFileName);
m_source->RestoreFromCheckpoint(externalState[s_trainingMinibatchSource].Value<Dictionary>());
}
void TrainingSession::SaveCheckpoint(size_t currentIndex)
{
OnCheckpointStart(currentIndex);
Dictionary externalState;
externalState[s_trainingMinibatchSource] = m_trainingSource->GetCheckpointState();
externalState[s_trainingMinibatchSource] = m_source->GetCheckpointState();
wstring checkpointFile = m_checkPointFileName;
if (m_saveAllCheckpoints)
wstring checkpointFile = m_checkpoint.m_fileName;
if (m_checkpoint.m_preserveAll)
checkpointFile += std::to_wstring(currentIndex);
m_trainer->SaveCheckpoint(checkpointFile, externalState);
Trainer()->SaveCheckpoint(checkpointFile, externalState);
OnCheckpointEnd(currentIndex);
}
void TrainingSession::SaveFinalCheckpoint()
{
Dictionary externalState;
externalState[s_trainingMinibatchSource] = m_trainingSource->GetCheckpointState();
m_trainer->SaveCheckpoint(m_checkPointFileName, externalState);
externalState[s_trainingMinibatchSource] = m_source->GetCheckpointState();
Trainer()->SaveCheckpoint(m_checkpoint.m_fileName, externalState);
}
// Restores from a m_checkPointFileName file.
@ -308,29 +390,30 @@ namespace CNTK
// Where N is some positive integer.
void TrainingSession::RestoreFromCheckpoint()
{
assert(!m_checkPointFileName.empty());
assert(!m_checkpoint.m_fileName.empty());
auto checkpoint = m_checkpoint.m_fileName;
// Make sure the intermediate directories exist, so no need for further checks.
msra::files::make_intermediate_dirs(m_checkPointFileName);
msra::files::make_intermediate_dirs(checkpoint);
size_t pos = m_checkPointFileName.find_last_of(L"\\/");
size_t pos = checkpoint.find_last_of(L"\\/");
wstring parent;
wstring fileName;
if (pos == wstring::npos)
{
parent = L"..";
fileName = m_checkPointFileName;
fileName = checkpoint;
}
else
{
parent = m_checkPointFileName.substr(0, pos);
fileName = m_checkPointFileName.substr(pos);
parent = checkpoint.substr(0, pos);
fileName = checkpoint.substr(pos);
}
std::wstring restoreFile;
if (fexists(m_checkPointFileName))
if (fexists(checkpoint))
{
restoreFile = m_checkPointFileName;
restoreFile = checkpoint;
}
else
{
@ -375,7 +458,7 @@ namespace CNTK
this->RestoreFromCheckpoint(restoreFile);
// Recalculate actions indicies.
size_t totalNumberOfSamples = m_trainer->TotalNumberOfSamplesSeen();
size_t totalNumberOfSamples = Trainer()->TotalNumberOfSamplesSeen();
for (auto& action : m_actions)
{
action.currentIndex = totalNumberOfSamples / action.frequency;

Просмотреть файл

@ -454,6 +454,18 @@ namespace CNTK
#endif
}
bool IsFirstOutputOfMultiOutputUDF(const Variable& var)
{
if (!var.IsOutput())
return false;
auto owner = var.Owner();
if (dynamic_cast<PrimitiveFunction*>(owner.get()))
return false;
return (var == owner->Outputs()[0]) && (owner->Outputs().size() > 1);
}
std::vector<Axis> DynamicAxesFromInternalDynamicAxisName(const std::wstring& internalDynamicAxisName)
{
std::vector<Axis> inputVarDynamicAxes;
@ -513,7 +525,8 @@ namespace CNTK
if (var.GetDataType() != value->GetDataType())
LogicError("The Variable's DataType %s does not match the corresponding Value's DataType %s", DataTypeName(var.GetDataType()), DataTypeName(value->GetDataType()));
bool isPackedValue = (dynamic_cast<PackedValue*>(value.get()) != nullptr);
auto packedValue = dynamic_cast<PackedValue*>(value.get());
bool isPackedValue = (packedValue != nullptr) && packedValue->IsPacked();
// TODO: Is supplying dense data for an Input variable tagged as sparse, a fatal error even for packed value objects?
if (!isPackedValue)
@ -571,7 +584,7 @@ namespace CNTK
LogicError("The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(value->GetDataType()));
auto packedValue = dynamic_cast<PackedValue*>(value.get());
if (packedValue)
if (packedValue && packedValue->IsPacked())
return packedValue->PackedData<ElementType>();
auto varShape = var.Shape();
@ -953,4 +966,34 @@ namespace CNTK
Data()->SetValue(0.0);
}
}
std::wstring DynamicAxesAsString(std::vector<Axis> da, bool rowMajor)
{
if (da.size() == 0)
return L"[]";
std::wstringstream wss;
wss << "[";
if (da == Axis::UnknownDynamicAxes())
wss << "???";
else
{
if (rowMajor)
std::reverse(da.begin(), da.end());
bool first = true;
for (auto d : da)
{
wss << (first ? "" : ", ");
if (d == Axis::DefaultBatchAxis())
wss << "#";
else if (d == Axis::DefaultDynamicAxis())
wss << "*";
else
wss << d.Name();
first = false;
}
}
wss << "]";
return wss.str();
}
}

Просмотреть файл

@ -141,9 +141,9 @@ namespace CNTK
inline std::wstring AsStringForErrorReporting(const NDShape& shape)
{
bool invertShape = Internal::IsReversingTensorShapesInErrorMessagesEnabled();
bool reverseShape = Internal::IsReversingTensorShapesInErrorMessagesEnabled();
auto displayShape = shape;
if (invertShape)
if (reverseShape)
{
for (size_t i = 0, j = shape.Rank() - 1; i < shape.Rank(); ++i, --j)
displayShape[i] = shape[j];
@ -526,6 +526,8 @@ namespace CNTK
InvalidArgument("The specified axis index (%d) exceeds the static #axes (%d) of the corresponding operand", (int)axis.StaticAxisIndex(), (int)operandShape.Rank());
}
bool IsFirstOutputOfMultiOutputUDF(const Variable& var);
std::vector<Axis> DynamicAxesFromInternalDynamicAxisName(const std::wstring& internalDynamicAxisName);
// Construct the dynamic axis name to be used internally for the CNTK InputNodes
@ -625,4 +627,7 @@ namespace CNTK
size_t m_numUpdates;
};
std::wstring DynamicAxesAsString(std::vector<Axis> da, bool rowMajor = false);
}

Просмотреть файл

@ -33,6 +33,8 @@ namespace CNTK
m_unpackedShape = m_unpackedShape.AppendShape({ packedDataLayout->GetNumTimeSteps(), packedDataLayout->GetNumSequences() });
}
bool IsPacked() const { return m_isPacked; }
void Unpack() const;
const NDShape& Shape() const override { return m_unpackedShape; }

Просмотреть файл

@ -183,6 +183,22 @@ namespace CNTK
}
}
std::wstring Variable::AsString() const
{
std::wstringstream wss;
wss << VariableKindName(Kind()) << "('";
if (Name() != L"")
wss << Name();
else
wss << Uid();
bool reverse = Internal::IsReversingTensorShapesInErrorMessagesEnabled();
if (reverse)
wss << "', " << DynamicAxesAsString(DynamicAxes(), reverse) << ", " << AsStringForErrorReporting(Shape()) << ")";
else
wss << "', " << AsStringForErrorReporting(Shape()) << ", " << DynamicAxesAsString(DynamicAxes(), reverse) << ")";
return wss.str();
}
static const std::wstring InitializerTypeAttributeName = L"initializerType";
static const std::wstring OutputRankAttributeName = L"outputRank";
static const std::wstring FilterRankAttributeName = L"filterRank";

Просмотреть файл

@ -0,0 +1,13 @@
// Constants.h -- the constants used by CNTK
//
#pragma once
#ifndef _CONSTANTS_H_
#define _CONSTANTS_H_
// Constants used in aggregation
const size_t DEFAULT_PACK_THRESHOLD_SIZE_IN_KB = 32;
const size_t DEFAULT_PACK_THRESHOLD_SIZE_IN_BYTES = DEFAULT_PACK_THRESHOLD_SIZE_IN_KB * 1024;
#endif

Просмотреть файл

@ -267,12 +267,12 @@ template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateConvolutionNode(const std::wstring& nodeName, const TensorShape& kernelShape, const TensorShape& mapCount,
const TensorShape& strideShape, const std::vector<bool>& sharing,
const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
bool transpose, const TensorShape& outputShape, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
{
return net.AddNodeToNetWithElemType(New<ConvolutionNode<ElemType>>(net.GetDeviceId(), nodeName,
kernelShape, mapCount, strideShape,
sharing, autoPadding, lowerPad, upperPad,
transpose, imageLayout, maxTempMemSizeInSamples));
transpose, outputShape, imageLayout, maxTempMemSizeInSamples));
}
template <class ElemType>
@ -344,13 +344,13 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Convo
const TensorShape& kernelShape, const TensorShape& mapCount,
const TensorShape& strideShape, const std::vector<bool>& sharing,
const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples,
bool transpose, const TensorShape& outputShape, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples,
const std::wstring nodeName)
{
return net.AddNodeToNetAndAttachInputs(New<ConvolutionNode<ElemType>>(net.GetDeviceId(), nodeName,
kernelShape, mapCount, strideShape,
sharing, autoPadding, lowerPad, upperPad,
transpose, imageLayout, maxTempMemSizeInSamples),
transpose, outputShape, imageLayout, maxTempMemSizeInSamples),
{ weight, inputValues });
}
@ -502,9 +502,9 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Seque
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::ForwardBackward(const ComputationNodePtr label, const ComputationNodePtr prediction, int blankTokenId, int delayConstraint, const std::wstring nodeName)
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::ForwardBackward(const ComputationNodePtr graph, const ComputationNodePtr features, int blankTokenId, int delayConstraint, const std::wstring nodeName)
{
return net.AddNodeToNetAndAttachInputs(New<ForwardBackwardNode<ElemType>>(net.GetDeviceId(), nodeName, blankTokenId, delayConstraint), { label, prediction });
return net.AddNodeToNetAndAttachInputs(New<ForwardBackwardNode<ElemType>>(net.GetDeviceId(), nodeName, blankTokenId, delayConstraint), { graph, features });
}
template <class ElemType>

Просмотреть файл

@ -54,7 +54,7 @@ public:
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const TensorShape& sampleLayout, const wstring& dynamicAxisName = L"");
ComputationNodePtr CreateConvolutionNode(const std::wstring& nodeName, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape,
const std::vector<bool>& sharing, const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples);
bool transpose, const TensorShape& outputShape, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples);
ComputationNodePtr CreateConvolutionNode(const std::wstring& nodeName, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels,
const size_t horizontalSubsample, const size_t verticalSubsample,
ImageLayoutKind imageLayoutKind, const bool zeroPadding = false, const size_t maxTempMemSizeInSamples = 0);
@ -84,7 +84,7 @@ public:
const ComputationNodePtr inputValues,
const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape,
const std::vector<bool>& sharing, const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples,
bool transpose, const TensorShape& outputShape, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples,
const std::wstring nodeName = L"");
ComputationNodePtr Pooling(const ComputationNodePtr inputValues,
PoolKind poolKind, const TensorShape& kernelShape, const TensorShape& strideShape,
@ -126,7 +126,7 @@ public:
ComputationNodePtr CosDistance(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
ComputationNodePtr CrossEntropy(const ComputationNodePtr label, const ComputationNodePtr prediction, const std::wstring nodeName = L"");
ComputationNodePtr CrossEntropyWithSoftmax(const ComputationNodePtr label, const ComputationNodePtr prediction, const std::wstring nodeName = L"");
ComputationNodePtr ForwardBackward(const ComputationNodePtr label, const ComputationNodePtr prediction, int blankTokenId, int delayConstraint, const std::wstring nodeName = L"");
ComputationNodePtr ForwardBackward(const ComputationNodePtr graph, const ComputationNodePtr features, int blankTokenId, int delayConstraint, const std::wstring nodeName = L"");
ComputationNodePtr DiagTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
ComputationNodePtr Diagonal(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr Dropout(const ComputationNodePtr a, const std::wstring nodeName = L"");

Просмотреть файл

@ -105,7 +105,7 @@ template<class ElemType>
}
std::shared_ptr<Matrix<ElemType>> unpackedData;
if ((maxNumTimeSteps == 1) || (numSequences == 1))
if ((maxNumTimeSteps == 1) || (numSequences == 1) || (batchMajor && (layout->GetNumParallelSequences() == layout->GetNumSequences())))
unpackedData = std::make_shared<Matrix<ElemType>>(packedData.AsReference());
else
{

Просмотреть файл

@ -49,7 +49,8 @@
#define CNTK_MODEL_VERSION_17 17 // use 8 bytes for rng seeds on both platforms
#define CNTK_MODEL_VERSION_18 18 // reserving 18 for dilated convolution, write out one more TensorShape
#define CNTK_MODEL_VERSION_19 19 // batch norm: flag whether running mean count is 0
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_19
#define CNTK_MODEL_VERSION_20 20 // adding output shape to convolution node
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_20
// helper mode for debugging
@ -1417,13 +1418,13 @@ public:
// for debugging, set the gaps to NaN instead (to track whether it bubbles up somewhere)
void InvalidateMissingValueColumns(const FrameRange& fr) override final
{
// fprintf(stderr, "invalidating %ls %ls m_value column range %d\n", NodeName().c_str(), OperationName().c_str(), (int)fr.timeIdxInSeq);
MaskMissingColumnsTo(*m_value, m_pMBLayout, fr, Matrix<ElemType>::MakeNan(__LINE__));
if (m_value->GetMatrixType() != SPARSE) // Sparse matrices can only be masked with 0s
MaskMissingColumnsTo(*m_value, m_pMBLayout, fr, Matrix<ElemType>::MakeNan(__LINE__));
}
void InvalidateMissingGradientColumns(const FrameRange& fr) override final
{
// fprintf(stderr, "invalidating %ls %ls m_gradient column range %d\n", NodeName().c_str(), OperationName().c_str(), (int)fr.timeIdxInSeq);
MaskMissingColumnsTo(*m_gradient, m_pMBLayout, fr, Matrix<ElemType>::MakeNan(__LINE__));
if (m_gradient->GetMatrixType() != SPARSE) // Sparse matrices can only be masked with 0s
MaskMissingColumnsTo(*m_gradient, m_pMBLayout, fr, Matrix<ElemType>::MakeNan(__LINE__));
}
static TensorView<ElemType> Unpack(const TensorShape& sampleShape,

Просмотреть файл

@ -53,14 +53,14 @@ class ConvolutionNodeBase : public ComputationNode<ElemType>
public:
ConvolutionNodeBase(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name), m_poolKind(PoolKind::None), m_transpose(false), m_maxTempMemSizeInSamples(0)
: Base(deviceId, name), m_poolKind(PoolKind::None), m_transpose(false), m_outputShape(TensorShape(0)), m_maxTempMemSizeInSamples(0)
{
}
ConvolutionNodeBase(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape,
const std::vector<bool>& sharing, const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
PoolKind poolKind, bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
PoolKind poolKind, bool transpose, const TensorShape& outputShape, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
: Base(deviceId, name), m_kernelShape(kernelShape), m_mapCount(mapCount), m_stride(strideShape), m_sharing(sharing),
m_autoPad(autoPadding), m_lowerPad(lowerPad), m_upperPad(upperPad), m_poolKind(poolKind), m_transpose(transpose),
m_autoPad(autoPadding), m_lowerPad(lowerPad), m_upperPad(upperPad), m_poolKind(poolKind), m_transpose(transpose), m_outputShape(outputShape),
m_imageLayout(imageLayout), m_maxTempMemSizeInSamples(maxTempMemSizeInSamples)
{
}
@ -81,6 +81,7 @@ public:
fstream << (int32_t)m_imageLayout;
fstream << m_maxTempMemSizeInSamples;
fstream << m_transpose;
m_outputShape.Save(fstream);
}
void Load(File& fstream, size_t modelVersion) override
@ -109,6 +110,10 @@ public:
{
fstream >> m_transpose;
}
if (modelVersion >= CNTK_MODEL_VERSION_20)
{
m_outputShape.Load(fstream);
}
}
void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -126,6 +131,7 @@ public:
node->m_upperPad = m_upperPad;
node->m_poolKind = m_poolKind;
node->m_transpose = m_transpose;
node->m_outputShape = m_outputShape;
node->m_imageLayout = m_imageLayout;
node->m_maxTempMemSizeInSamples = m_maxTempMemSizeInSamples;
}
@ -148,6 +154,7 @@ public:
TensorShape LowerPad() const { return m_lowerPad; }
TensorShape UpperPad() const { return m_upperPad; }
bool Transpose() const { return m_transpose; }
TensorShape OutputShape() const { return m_outputShape; }
size_t MaxTempMemSizeInSamples() const { return m_maxTempMemSizeInSamples; }
PoolKind PoolingKind() const { return m_poolKind; }
@ -216,7 +223,8 @@ protected:
TensorShape m_lowerPad;
TensorShape m_upperPad;
PoolKind m_poolKind;
bool m_transpose; // means de-convolution ...I think
bool m_transpose;
TensorShape m_outputShape;
ImageLayoutKind m_imageLayout;
size_t m_maxTempMemSizeInSamples;
@ -238,6 +246,7 @@ protected: \
using Base::m_upperPad; \
using Base::m_poolKind; \
using Base::m_transpose; \
using Base::m_outputShape; \
using Base::m_imageLayout; \
using Base::m_maxTempMemSizeInSamples; \
using Base::m_tempMatrixForward; \
@ -262,8 +271,8 @@ public:
}
ConvolutionNode(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape,
const std::vector<bool>& sharing, const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
: Base(deviceId, name, kernelShape, mapCount, strideShape, sharing, autoPadding, lowerPad, upperPad, PoolKind::None, transpose, imageLayout, maxTempMemSizeInSamples),
bool transpose, const TensorShape &outputShape, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
: Base(deviceId, name, kernelShape, mapCount, strideShape, sharing, autoPadding, lowerPad, upperPad, PoolKind::None, transpose, outputShape, imageLayout, maxTempMemSizeInSamples),
m_convolution2D(false)
{
}
@ -273,14 +282,14 @@ public:
: ConvolutionNode(deviceId, name, TensorShape(kernelWidth, kernelHeight, 1), TensorShape(1, 1, outputChannels),
TensorShape(horizontalSubsample, verticalSubsample, 1), vector<bool>{true},
vector<bool>{zeroPadding}, TensorShape(0), TensorShape(0),
false, imageLayout, maxTempMemSizeInSamples)
false, TensorShape(0), imageLayout, maxTempMemSizeInSamples)
{
m_convolution2D = true;
}
ConvolutionNode(const ScriptableObjects::IConfigRecordPtr configp)
: ConvolutionNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"kernelShape"), configp->Get(L"mapCount"), configp->Get(L"strideShape"),
configp->Get(L"dimSharing"), configp->Get(L"dimPadding"), configp->Get(L"dimPadLower"), configp->Get(L"dimPadUpper"),
configp->Get(L"transpose"), ImageLayoutKindFrom(configp->Get(L"imageLayout")), configp->Get(L"maxTempMemSizeInSamples"))
configp->Get(L"transpose"), configp->Get(L"dimOutputShape"), ImageLayoutKindFrom(configp->Get(L"imageLayout")), configp->Get(L"maxTempMemSizeInSamples"))
{
AttachInputsFromConfig(configp, GetExpectedNumInputs());
}
@ -443,13 +452,41 @@ public:
{
outputShape = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
if (m_outputShape.GetRank() > 0 && m_outputShape != TensorShape(0)) // user have explicitly set m_outputShape, we check if it's the same as outputShape
{
if (m_outputShape != outputShape)
{
InvalidArgument("%ls %ls the shape of the specified convolution output %ls is different from "
"the result of convoluting the input argument using the provided options %ls. It is recommonded "
"that the output shape is not specified for convolution.", NodeName().c_str(), OperationName().c_str(),
static_cast<std::wstring>(m_outputShape).c_str(),
static_cast<std::wstring>(outputShape).c_str());
}
}
}
else
{
// In case of transpose (deconvolution), node input (inputShape) is really the output of the convolution
// and node output (outDims) is convolution input. ConvolveGeometry does not care about deconvolutions (it does not have to).
outputShape = ConvolveGeometry::ComputeInputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
if (m_outputShape.GetRank() <= 0 || m_outputShape == TensorShape(0))
{
// In case of convolution transpose (deconvolution), node input (inputShape) is really the output of the convolution
// and node output (outDims) is convolution input. ConvolveGeometry does not care about deconvolutions (it does not have to).
outputShape = ConvolveGeometry::ComputeInputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
}
else
{
// in case the user specifies the output shape, we make sure the input shape can be the result of
// convolution from the specified output shape
auto inferredShape = ConvolveGeometry::ComputeOutputShape(m_outputShape, m_kernelShape, m_mapCount, m_stride, m_sharing, m_autoPad, m_lowerPad, m_upperPad);
if (inputShape != inferredShape)
InvalidArgument("%ls %ls the shape of the convolution transpose operand %ls is different from "
"the result of convoluting the specified output argument using "
"the provided options %ls", NodeName().c_str(), OperationName().c_str(),
static_cast<std::wstring>(inputShape).c_str(),
static_cast<std::wstring>(inferredShape).c_str());
outputShape = m_outputShape;
}
}
if (m_imageLayout == ImageLayoutKind::CHW)
@ -760,7 +797,7 @@ public:
PoolingNode(DEVICEID_TYPE deviceId, const wstring& name, PoolKind pool, const TensorShape& kernelShape, const TensorShape& strideShape,
const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
ImageLayoutKind imageLayout)
: Base(deviceId, name, kernelShape, TensorShape(1), strideShape, vector<bool>{true}, autoPadding, lowerPad, upperPad, pool, false, imageLayout, 0)
: Base(deviceId, name, kernelShape, TensorShape(1), strideShape, vector<bool>{true}, autoPadding, lowerPad, upperPad, pool, false, TensorShape(0), imageLayout, 0)
{
}
PoolingNode(const ScriptableObjects::IConfigRecordPtr configp)
@ -882,7 +919,7 @@ public:
MaxUnpoolingNode(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& kernelShape, const TensorShape& strideShape,
const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
ImageLayoutKind imageLayout)
: Base(deviceId, name, kernelShape, TensorShape(1), strideShape, vector<bool>{true}, autoPadding, lowerPad, upperPad, PoolKind::Max, true, imageLayout, 0)
: Base(deviceId, name, kernelShape, TensorShape(1), strideShape, vector<bool>{true}, autoPadding, lowerPad, upperPad, PoolKind::Max, true, TensorShape(0), imageLayout, 0)
{
}
MaxUnpoolingNode(const ScriptableObjects::IConfigRecordPtr configp)

Просмотреть файл

@ -6,6 +6,7 @@
#include "Basics.h"
#include "ComputationNode.h"
#include "Constants.h"
#include "Matrix.h"
#include "TensorView.h"
#include <unordered_set>
@ -1451,7 +1452,8 @@ void AggregateAccumulatorValuesAndUpdateEvaluation(
shared_ptr<ComputationNetwork> net,
set<shared_ptr<ComputationNodeBase>> evalNodesWhichAccumulateResult,
shared_ptr<DistGradHeader> gradHeader,
shared_ptr<MPIWrapper> mpi);
shared_ptr<MPIWrapper> mpi,
size_t packThresholdSizeInBytes = (size_t)DEFAULT_PACK_THRESHOLD_SIZE_IN_BYTES);
// -----------------------------------------------------------------------
// EpochAccumulatorNode calculates mean values of all samples used in forward pass.
@ -1502,7 +1504,8 @@ protected:
shared_ptr<ComputationNetwork> net,
set<shared_ptr<ComputationNodeBase>> evalNodesWhichAccumulateResult,
shared_ptr<DistGradHeader> gradHeader,
shared_ptr<MPIWrapper> mpi);
shared_ptr<MPIWrapper> mpi,
size_t packThresholdSize);
void Reset();

Просмотреть файл

@ -118,6 +118,7 @@ public:
// Negate (input)
// Sqrt (input)
// Reciprocal (input)
// ExponentialLinearUnitDerivative (input)
// These are all implemented by single-opcode functions and can thus be declared by a macro.
// -----------------------------------------------------------------------
@ -141,21 +142,22 @@ public:
} \
}
// Name Forward and Backward opcodes Gradient optype
DeclareUnaryElementWiseWithOpCodeNode(Abs, Abs, ElementwiseProductWithAbsDerivative, binaryWithInputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Cosine, Cosine, ElementwiseProductWithCosDerivative, binaryWithInputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Exp, Exp, ElementwiseProduct, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Floor, Floor, None, noGradient);
DeclareUnaryElementWiseWithOpCodeNode(Log, Log, ElementwiseProductWithLogDerivativeFromOutput, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Negate, Negate, Negate, unaryGradient);
DeclareUnaryElementWiseWithOpCodeNode(Pass, Copy, Copy, unaryGradient);
DeclareUnaryElementWiseWithOpCodeNode(LabelsToGraph, Copy, Copy, unaryGradient);
DeclareUnaryElementWiseWithOpCodeNode(Reciprocal, Reciprocal, ElementwiseProductWithReciprocalDerivative, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(RectifiedLinear, LinearRectifier, ElementwiseProductWithLinearRectifierDerivativeFromOutput, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Sigmoid, Sigmoid, ElementwiseProductWithSigmoidDerivativeFromOutput, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Sin, Sin, ElementwiseProductWithSinDerivative, binaryWithInputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Sqrt, Sqrt, ElementwiseProductWithSqrtDerivative, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Tanh, Tanh, ElementwiseProductWithTanhDerivativeFromOutput, binaryWithOutputGradient);
// Name Forward and Backward opcodes Gradient optype
DeclareUnaryElementWiseWithOpCodeNode(Abs, Abs, ElementwiseProductWithAbsDerivative, binaryWithInputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Cosine, Cosine, ElementwiseProductWithCosDerivative, binaryWithInputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Exp, Exp, ElementwiseProduct, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Floor, Floor, None, noGradient);
DeclareUnaryElementWiseWithOpCodeNode(Log, Log, ElementwiseProductWithLogDerivativeFromOutput, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Negate, Negate, Negate, unaryGradient);
DeclareUnaryElementWiseWithOpCodeNode(Pass, Copy, Copy, unaryGradient);
DeclareUnaryElementWiseWithOpCodeNode(LabelsToGraph, Copy, Copy, unaryGradient);
DeclareUnaryElementWiseWithOpCodeNode(Reciprocal, Reciprocal, ElementwiseProductWithReciprocalDerivative, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(RectifiedLinear, LinearRectifier, ElementwiseProductWithLinearRectifierDerivativeFromOutput, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Sigmoid, Sigmoid, ElementwiseProductWithSigmoidDerivativeFromOutput, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Sin, Sin, ElementwiseProductWithSinDerivative, binaryWithInputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Sqrt, Sqrt, ElementwiseProductWithSqrtDerivative, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Tanh, Tanh, ElementwiseProductWithTanhDerivativeFromOutput, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(ExponentialLinearUnit, ExponentialLinearUnit, ElementwiseProductWithExponentialLinearUnitDerivativeFromOutput, binaryWithOutputGradient);
#pragma pop_macro("DeclareUnaryElementWiseWithOpCodeNode")

Просмотреть файл

@ -767,10 +767,10 @@ template class DummyCriterionNode<double>;
// ForwardBackwardNode (graph, prediction, delayConstraint)
// CTC training criterion, primarily based on the paper "Connectionist Temporal Classification: Labelling Unsegmented
// Sequence Data with Recurrent Neural Networks", ftp://ftp.idsia.ch/pub/juergen/icml2006.pdf
//
// blankTokenId (input): id of the blank token. If specified as SIZE_MAX, will be replaced with (numberOfLabels - 1)
// delayConstraint -- label output delay constraint introduced during training that allows to have shorter delay during inference.
// This using the original time information to enforce that CTC tokens only get aligned within a time margin.
// Setting this parameter smaller will result in shorted delay between label output during decoding, yet may hurt accuracy.
// Setting this parameter smaller will result in shorter delay between label output during decoding, yet may hurt accuracy.
// delayConstraint=-1 means no constraint
// -----------------------------------------------------------------------
@ -785,7 +785,7 @@ class ForwardBackwardNode : public ComputationNodeNonLooping<ElemType>, public
}
public:
DeclareConstructorFromConfigWithNumInputs(ForwardBackwardNode);
ForwardBackwardNode(DEVICEID_TYPE deviceId, const wstring & name, int blankTokenId=INT_MIN, int delayConstraint=-1) :
ForwardBackwardNode(DEVICEID_TYPE deviceId, const wstring & name, size_t blankTokenId=SIZE_MAX, int delayConstraint=-1) :
Base(deviceId, name), m_blankTokenId(blankTokenId), m_delayConstraint(delayConstraint)
{
}
@ -936,6 +936,9 @@ public:
m_maxValues->Resize(1, cols);
}
int DelayConstraint() { return m_delayConstraint; }
size_t BlankTokenId() { return m_blankTokenId; }
protected:
virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return true; }
shared_ptr<Matrix<ElemType>> m_logSoftmaxOfRight;
@ -945,7 +948,7 @@ protected:
shared_ptr<Matrix<ElemType>> m_maxValues;
msra::lattices::GammaCalculation<ElemType> m_GammaCal;
int m_blankTokenId;
size_t m_blankTokenId;
int m_delayConstraint;
};

Просмотреть файл

@ -12,6 +12,9 @@
namespace Microsoft { namespace MSR { namespace CNTK {
template <typename ElemType>
class SelectUserDefinedV2FunctionOutputNode;
// -----------------------------------------------------------------------
// UserDefinedV2Function
// Proxy ComputationNode type for a V2 user-defined custom Function, instances
@ -25,6 +28,8 @@ class UserDefinedV2FunctionNode final : public ComputationNodeNonLooping<ElemTyp
{
typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName() { return L"UserDefinedV2Function"; }
friend class SelectUserDefinedV2FunctionOutputNode<ElemType>;
public:
UserDefinedV2FunctionNode(DEVICEID_TYPE deviceId, const wstring& name, const ::CNTK::FunctionPtr& externalFunction = nullptr)
@ -32,10 +37,18 @@ public:
{
if (!m_externalFunction)
LogicError("UserDefinedV2FunctionNode ctor should never be called with externalFunction == nullptr");
m_numOutputs = m_externalFunction->Outputs().size();
m_values.resize(m_numOutputs);
m_gradients.resize(m_numOutputs);
m_MBLayouts.resize(m_numOutputs);
m_outputHasNewMBLayout.resize(m_numOutputs);
}
virtual void ForwardPropNonLooping() override
{
m_values[0] = m_value;
// Get the arguments of the external function
auto arguments = m_externalFunction->Arguments();
std::unordered_map<::CNTK::Variable, ::CNTK::ValuePtr> argumentValues;
@ -53,40 +66,50 @@ public:
}
assert(j == arguments.size());
auto outputs = m_externalFunction->Outputs();
// TODO: Instead of passing null for output values, we should have the forward call directly produce the outputs in the output Value() of this node
std::unordered_map<::CNTK::Variable, ::CNTK::ValuePtr> outputValue = { { m_externalFunction->Output(), nullptr } };
std::unordered_map<::CNTK::Variable, ::CNTK::ValuePtr> outputValues;
for (auto output : outputs)
outputValues.insert({output, nullptr});
std::unordered_set<::CNTK::Variable> outputsToRetainBackwardStateFor;
if (Environment().IsTraining())
outputsToRetainBackwardStateFor.insert(m_externalFunction->Output());
outputsToRetainBackwardStateFor.insert(outputs.begin(), outputs.end());
auto computeDevice = ::CNTK::AsDeviceDescriptor(InputRef(0).Value().GetDeviceId());
m_currentBackpropStatePtr = m_externalFunction->Forward(argumentValues, outputValue, computeDevice, outputsToRetainBackwardStateFor);
m_currentBackpropStatePtr = m_externalFunction->Forward(argumentValues, outputValues, computeDevice, outputsToRetainBackwardStateFor);
// Copy the computed output to Value() of this node
// TODO: We currently assume that the external Function does not generate a new MBLayout
auto outputMatrixAndLayout = ::CNTK::Utils::GetCNTKImplMatrixAndMBLayoutFromValueObject<ElemType>(outputValue.begin()->first, outputValue.begin()->second);
Value().AssignValuesOf(*outputMatrixAndLayout.first);
if ((GetMBLayout() != nullptr) && (outputMatrixAndLayout.second == nullptr))
LogicError("The UserDefinedFunction node has a non-null output MBLayout but none found from the (%S) user Function::Forward output Value", m_externalFunction->Name().c_str());
else if ((GetMBLayout() == nullptr) && (outputMatrixAndLayout.second != nullptr))
LogicError("The UserDefinedFunction node does not have an output MBLayout but the (%S) user Function::Forward output Value have a non-null layout", m_externalFunction->Name().c_str());
else if ((GetMBLayout() == nullptr) && (outputMatrixAndLayout.second == nullptr))
;
else
// Copy the computed output
for (size_t i = 0; i < outputs.size(); ++i)
{
if (m_hasNewOutputMBLayout)
GetMBLayout()->CopyFrom(outputMatrixAndLayout.second);
auto output = outputs[i];
auto outputMatrixAndLayout = ::CNTK::Utils::GetCNTKImplMatrixAndMBLayoutFromValueObject<ElemType>(output, outputValues[output]);
m_values[i]->SetValue(*outputMatrixAndLayout.first);
if ((m_MBLayouts[i] != nullptr) && (outputMatrixAndLayout.second == nullptr))
LogicError("The UserDefinedFunction node has a non-null output MBLayout but none found from the (%S) user Function::Forward output Value", m_externalFunction->Name().c_str());
else if ((m_MBLayouts[i] == nullptr) && (outputMatrixAndLayout.second != nullptr))
LogicError("The UserDefinedFunction node does not have an output MBLayout but the (%S) user Function::Forward output Value have a non-null layout", m_externalFunction->Name().c_str());
else if ((m_MBLayouts[i] == nullptr) && (outputMatrixAndLayout.second == nullptr))
;
else
{
if (*GetMBLayout() != *outputMatrixAndLayout.second)
LogicError("The MBLayout of the output computed by the external function (%S) does not match the expected MBLayout", m_externalFunction->Name().c_str());
if (m_outputHasNewMBLayout[i])
m_MBLayouts[i]->CopyFrom(outputMatrixAndLayout.second);
else
{
if (*m_MBLayouts[i] != *outputMatrixAndLayout.second)
LogicError("The MBLayout of the output computed by the external function (%S) does not match the expected MBLayout", m_externalFunction->Name().c_str());
}
}
}
}
virtual void BackpropToNonLooping(size_t inputIndex) override
{
m_gradients[0] = m_gradient;
std::vector<::CNTK::Variable> externalFunctionUniqueInputs;
auto externalFunctionInputs = m_externalFunction->Inputs();
for (auto input : externalFunctionInputs)
@ -97,10 +120,21 @@ public:
auto input = externalFunctionUniqueInputs[inputIndex];
auto gradientValue = ::CNTK::Utils::GetValueObjectFromCNTKImplMatrixAndMBLayout(m_externalFunction->Output(), Gradient(), GetMBLayout());
std::unordered_map<::CNTK::Variable, ::CNTK::ValuePtr> outputGradientValue = { { m_externalFunction->Output(), gradientValue } };
std::unordered_map<::CNTK::Variable, ::CNTK::ValuePtr> outputGradientValues;
auto outputs = m_externalFunction->Outputs();
for (size_t i = 0; i < outputs.size(); ++i)
{
auto output = outputs[i];
// TODO: We unpack the same output gradients each time this method is called for a different input.
// We should be able to cache the unpacked values during backpropagation of gradients to the first
// input, and reuse them for subsequence inputs.
auto gradientValue = ::CNTK::Utils::GetValueObjectFromCNTKImplMatrixAndMBLayout(output, *m_gradients[i], m_MBLayouts[i]);
outputGradientValues.insert({ output, gradientValue });
}
std::unordered_map<::CNTK::Variable, ::CNTK::ValuePtr> inputGradientValue = { { input, nullptr } };
m_externalFunction->Backward(m_currentBackpropStatePtr, outputGradientValue, inputGradientValue);
m_externalFunction->Backward(m_currentBackpropStatePtr, outputGradientValues, inputGradientValue);
// Accumulate the computed input gradient value into the existing input gradient value
// TODO: We should directly pass the actual input gradient tensor to the Backward method
@ -116,71 +150,160 @@ public:
{
Base::Validate(isFinalValidationPass);
// The external Function can only have a single output
auto numOutputs = m_externalFunction->Outputs().size();
if (numOutputs != 1)
InvalidArgument("Found user defined function (%S) with %lu outputs. User defined functions must have exactly one output", this->GetName().c_str(), (unsigned long)numOutputs);
auto output = m_externalFunction->Output();
if (output.GetDataType() != ::CNTK::AsDataType<ElemType>())
auto outputs = m_externalFunction->Outputs();
for (size_t i = 0; i < outputs.size(); ++i)
{
LogicError("The DataType (%s) of the external user defined Function's output does not match the internal ComputationNode's ElemType (%s)",
DataTypeName(output.GetDataType()),
DataTypeName(::CNTK::AsDataType<ElemType>()));
}
auto output = outputs[i];
auto outputNDShape = output.Shape();
if (outputNDShape.IsUnknown() || outputNDShape.HasInferredDimension())
LogicError("The output shape of an external user defined Function should be fully determined by the time CNTK engine validation executes");
auto outputDynamicAxes = output.DynamicAxes();
if (outputDynamicAxes.empty())
{
m_hasNewOutputMBLayout = true;
m_pMBLayout = nullptr;
}
else
{
auto argumentVariables = m_externalFunction->Arguments();
size_t j = 0;
auto numInputs = GetNumInputs();
for (size_t i = 0; i < numInputs; ++i)
if (output.GetDataType() != ::CNTK::AsDataType<ElemType>())
{
auto& input = InputRef(i);
if (input.template Is<LearnableParameter<ElemType>>())
continue;
auto argumentVar = argumentVariables[j];
if (argumentVar.DynamicAxes() == outputDynamicAxes)
{
m_pMBLayout = input.GetMBLayout();
break;
}
j++;
LogicError("The DataType (%s) of the external user defined Function's output does not match the internal ComputationNode's ElemType (%s)",
DataTypeName(output.GetDataType()),
DataTypeName(::CNTK::AsDataType<ElemType>()));
}
if (!m_pMBLayout)
auto outputNDShape = output.Shape();
if (outputNDShape.IsUnknown() || outputNDShape.HasInferredDimension())
LogicError("The output shape of an external user defined Function should be fully determined by the time CNTK engine validation executes");
auto outputDynamicAxes = output.DynamicAxes();
if (outputDynamicAxes.empty())
{
m_pMBLayout = make_shared<MBLayout>(); // this generates a new layout
m_pMBLayout->SetUniqueAxisName(InternalDynamicAxisNameFromDynamicAxes(output.DynamicAxes()));
m_hasNewOutputMBLayout = true;
m_outputHasNewMBLayout[i] = true;
m_MBLayouts[i] = nullptr;
}
else
m_hasNewOutputMBLayout = false;
{
auto argumentVariables = m_externalFunction->Arguments();
size_t j = 0;
auto numInputs = GetNumInputs();
for (size_t k = 0; k < numInputs; ++k)
{
auto& input = InputRef(k);
if (input.template Is<LearnableParameter<ElemType>>())
continue;
auto argumentVar = argumentVariables[j];
if (argumentVar.DynamicAxes() == outputDynamicAxes)
{
m_MBLayouts[i] = input.GetMBLayout();
break;
}
j++;
}
if (!m_MBLayouts[i])
{
m_MBLayouts[i] = make_shared<MBLayout>(); // this generates a new layout
m_MBLayouts[i]->SetUniqueAxisName(InternalDynamicAxisNameFromDynamicAxes(output.DynamicAxes()));
m_outputHasNewMBLayout[i] = true;
}
else
m_outputHasNewMBLayout[i] = false;
}
if (i == 0)
{
m_pMBLayout = m_MBLayouts[i];
SetDims(::CNTK::AsTensorShape(outputNDShape), HasMBLayout());
}
}
auto outputTensorShape = ::CNTK::AsTensorShape(outputNDShape);
SetDims(outputTensorShape, HasMBLayout());
}
void RequestMatricesBeforeForwardProp(MatrixPool& matrixPool) override
{
Base::RequestMatricesBeforeForwardProp(matrixPool);
for (size_t i = 1 ; i < m_numOutputs; ++i)
RequestMatrixFromPool(m_values[i], matrixPool);
}
void RequestMatricesBeforeBackprop(MatrixPool& matrixPool) override
{
Base::RequestMatricesBeforeBackprop(matrixPool);
for (size_t i = 1; i < m_numOutputs; ++i)
RequestMatrixFromPool(m_gradients[i], matrixPool);
}
void ReleaseMatricesAfterBackprop(MatrixPool& matrixPool) override
{
Base::ReleaseMatricesAfterBackprop(matrixPool);
for (size_t i = 1; i < m_numOutputs; ++i)
ReleaseMatrixToPool(m_values[i], matrixPool);
for (size_t i = 1; i < m_numOutputs; ++i)
ReleaseMatrixToPool(m_gradients[i], matrixPool);
}
private:
::CNTK::FunctionPtr m_externalFunction;
bool m_hasNewOutputMBLayout;
::CNTK::BackPropStatePtr m_currentBackpropStatePtr;
size_t m_numOutputs;
std::vector<std::shared_ptr<Matrix<ElemType>>> m_values;
std::vector<std::shared_ptr<Matrix<ElemType>>> m_gradients;
std::vector<std::shared_ptr<MBLayout>> m_MBLayouts;
std::vector<bool> m_outputHasNewMBLayout;
};
template class UserDefinedV2FunctionNode<float>;
template class UserDefinedV2FunctionNode<double>;
// -----------------------------------------------------------------------
// SelectUserDefinedV2FunctionOutputNode(userDefinedV2FunctionNode, outputIndex)
// ComputationNode for selecting one of the multiple outputs of UserDefinedV2FunctionNode
// This is needed since the CNTK computation engin natively does not support
// nodes with multiple outputs and hence, we need a separate node to multiplex
// the additional outputs.
// -----------------------------------------------------------------------
// TODO: We currently only support external nodes that cannot be part of CNTK recurrent loops
template <class ElemType>
class SelectUserDefinedV2FunctionOutputNode final : public ComputationNodeNonLooping<ElemType>, public NumInputs<1>
{
typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName() { return L"SelectUserDefinedV2FunctionOutput"; }
public:
SelectUserDefinedV2FunctionOutputNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputIndex = 0)
: Base(deviceId, name), m_outputIndex(outputIndex)
{}
virtual void ForwardPropNonLooping() override
{
// TODO: We should avoid this copy but that requires carefully managing the
// lifetimes of the Value objects since to be able to directly use the
// input Value as its output, we have to make sure that the input's Value
// is not reused until all dependents of this node are finished.
auto inputNode = Input(0)->template As<UserDefinedV2FunctionNode<ElemType>>();
Value().AssignValuesOf(*inputNode->m_values[m_outputIndex]);
}
virtual void BackpropToNonLooping(size_t inputIndex) override
{
// TODO: We should avoid this copy but that requires carefully managing the
// lifetimes of the Gradient objects since to be able to directly use the
// Gradient as input's gradient, we have to make sure that the Gradient
// is not reused until all the inputs are finished backpropagating to their inputs.
auto inputNode = Input(0)->template As<UserDefinedV2FunctionNode<ElemType>>();
inputNode->m_gradients[m_outputIndex]->SetValue(Gradient());
}
virtual void Validate(bool isFinalValidationPass) override
{
Base::Validate(isFinalValidationPass);
auto inputNode = Input(0)->template As<UserDefinedV2FunctionNode<ElemType>>();
m_pMBLayout = inputNode->m_MBLayouts[m_outputIndex];
auto outputNDShape = inputNode->m_externalFunction->Outputs()[m_outputIndex].Shape();
SetDims(::CNTK::AsTensorShape(outputNDShape), HasMBLayout());
}
private:
size_t m_outputIndex;
};
template class SelectUserDefinedV2FunctionOutputNode<float>;
template class SelectUserDefinedV2FunctionOutputNode<double>;
}}}

Просмотреть файл

@ -31,6 +31,7 @@ using namespace std;
using namespace System;
using namespace System::Collections::Generic;
using namespace System::Collections;
using namespace System::Runtime::InteropServices;
namespace Microsoft { namespace MSR { namespace CNTK { namespace Extensibility { namespace Managed {
@ -291,19 +292,18 @@ public:
throw gcnew ObjectDisposedException("Object has been disposed.");
}
// Hold all buffers that should be pinned during native operations
List<GCHandle>^ pinnedGCHandleList = gcnew List<GCHandle>;
try
{
Native::ValueRefs<ElemType> stdInputs;
Native::ValueRefs<ElemType> stdOutputs;
// Hold gc objects in the stack, while performing native actions
vector<gcroot<cli::array<ElemType>^>> pinBuffers;
vector<gcroot<cli::array<int>^>> pinIndices;
// Map the managed space into the native space, results will be written directly into the managed memory space
// https://msdn.microsoft.com/en-us/library/1dz8byfh.aspx
TransferVectorsToValueBuffers(inputs, stdInputs, pinBuffers, pinIndices, StorageType::Sparse);
TransferVectorsToValueBuffers(outputs, stdOutputs, pinBuffers, pinIndices, StorageType::Dense);
TransferVectorsToValueBuffers(inputs, stdInputs, pinnedGCHandleList, StorageType::Sparse);
TransferVectorsToValueBuffers(outputs, stdOutputs, pinnedGCHandleList, StorageType::Dense);
try
{
@ -324,6 +324,13 @@ public:
{
throw;
}
finally
{
for each (auto h in pinnedGCHandleList)
{
h.Free();
}
}
}
~ModelEvaluationExtended()
@ -431,37 +438,31 @@ private:
}
}
void PinBuffer(cli::array<ElemType>^ itemBuffer, vector<gcroot<cli::array<ElemType>^>>& pinBuffers, Native::ValueBuffer<ElemType, Native::VectorRef>* vb, StorageType storageType, int bufferSize)
void PinBuffer(cli::array<ElemType>^ itemBuffer, List<GCHandle>^ pinnedGCHandleList, Native::ValueBuffer<ElemType, Native::VectorRef>* vb, StorageType storageType, int bufferSize)
{
// gcroot object manages the pointer so that it always corresponds to the correct managed location (even after gc relocation)
gcroot<cli::array<ElemType>^> pBuf(itemBuffer);
pin_ptr<ElemType> pp = &(pBuf[0]);
pinBuffers.push_back(pBuf);
GCHandle h = GCHandle::Alloc(itemBuffer, GCHandleType::Pinned);
pinnedGCHandleList->Add(h);
ElemType* pp = reinterpret_cast<ElemType *>(h.AddrOfPinnedObject().ToPointer());
vb->m_buffer.InitFrom(pp, bufferSize, storageType == StorageType::Sparse ? bufferSize : 0);
pp = nullptr;
}
void PinIndices(cli::array<int>^ itemBuffer, vector<gcroot<cli::array<int>^>>& pinBuffers, Native::ValueBuffer<ElemType, Native::VectorRef>* vb, StorageType storageType, int bufferSize)
void PinIndices(cli::array<int>^ itemBuffer, List<GCHandle>^ pinnedGCHandleList, Native::ValueBuffer<ElemType, Native::VectorRef>* vb, StorageType storageType, int bufferSize)
{
// gcroot object manages the pointer so that it always corresponds to the correct managed location (even after gc relocation)
gcroot<cli::array<int>^> pBuf(itemBuffer);
pin_ptr<int> pp = &(pBuf[0]);
pinBuffers.push_back(pBuf);
GCHandle h = GCHandle::Alloc(itemBuffer, GCHandleType::Pinned);
pinnedGCHandleList->Add(h);
int* pp = reinterpret_cast<int *>(h.AddrOfPinnedObject().ToPointer());
vb->m_indices.InitFrom(pp, bufferSize, storageType == StorageType::Sparse ? bufferSize : 0);
pp = nullptr;
}
void PinColIndices(cli::array<int>^ itemBuffer, vector<gcroot<cli::array<int>^>>& pinBuffers, Native::ValueBuffer<ElemType, Native::VectorRef>* vb, StorageType storageType, int bufferSize)
void PinColIndices(cli::array<int>^ itemBuffer, List<GCHandle>^ pinnedGCHandleList, Native::ValueBuffer<ElemType, Native::VectorRef>* vb, StorageType storageType, int bufferSize)
{
// gcroot object manages the pointer so that it always corresponds to the correct managed location (even after gc relocation)
gcroot<cli::array<int>^> pBuf(itemBuffer);
pin_ptr<int> pp = &(pBuf[0]);
pinBuffers.push_back(pBuf);
GCHandle h = GCHandle::Alloc(itemBuffer, GCHandleType::Pinned);
pinnedGCHandleList->Add(h);
int* pp = reinterpret_cast<int *>(h.AddrOfPinnedObject().ToPointer());
vb->m_colIndices.InitFrom(pp, bufferSize, storageType == StorageType::Sparse ? bufferSize : 0);
pp = nullptr;
}
void TransferVectorsToValueBuffers(cli::array<ValueBuffer<ElemType>^>^ list, Native::ValueRefs<ElemType>& valueRefs, vector<gcroot<cli::array<ElemType>^>>& pinBuffers, vector<gcroot<cli::array<int>^>>& pinIndices, StorageType storageType)
void TransferVectorsToValueBuffers(cli::array<ValueBuffer<ElemType>^>^ list, Native::ValueRefs<ElemType>& valueRefs, List<GCHandle>^ pinnedGCHandleList, StorageType storageType)
{
for each (auto item in list)
{
@ -476,16 +477,16 @@ private:
throw gcnew CNTKRuntimeException("Invalid buffer (empty) for argument into ForwardPass", String::Empty);
}
PinBuffer(item->Buffer, pinBuffers, &vb, storageType, bufferSize);
PinBuffer(item->Buffer, pinnedGCHandleList, &vb, storageType, bufferSize);
if (item->Indices != nullptr)
{
PinIndices(item->Indices, pinIndices, &vb, storageType, bufferSize);
PinIndices(item->Indices, pinnedGCHandleList, &vb, storageType, bufferSize);
}
if (item->ColIndices != nullptr)
{
PinColIndices(item->ColIndices, pinIndices, &vb, storageType, numElements);
PinColIndices(item->ColIndices, pinnedGCHandleList, &vb, storageType, numElements);
}
valueRefs.push_back(vb);

Просмотреть файл

@ -5888,6 +5888,7 @@ void CPUMatrix<ElemType>::RCRFBackwardCompute(const CPUMatrix<ElemType>& alpha,
// t (input): time stamp to process
// maxPhoneNum (input): the max number of phones between utterances
// totalPhoneNum (input): the total number of phones of all utterances
// blankTokenId (input): id of the CTC blank token
// delayConstraint -- label output delay constraint introduced during training that allows to have shorter delay during inference.
// Alpha and Beta scores outside of the delay boundary are set to zero.
// Setting this parameter smaller will result in shorted delay between label output during decoding.
@ -5907,6 +5908,7 @@ void _assignAlphaScore(
const size_t t,
const size_t maxPhoneNum, // Maximum length of utterance in this MB
const size_t totalPhoneNum, // Total number of phones
const size_t blankTokenId,
const int delayConstraint)
{
for (size_t uttId = 0;uttId < uttNum;uttId++) {
@ -5958,7 +5960,7 @@ void _assignAlphaScore(
{
size_t labelid_2 = labelid - 2;
// if current label is not blank and not equal prev non-blank label
if ((size_t)(phoneSeq[labelid]) != totalPhoneNum - 1 && phoneId != (size_t)(phoneSeq[labelid_2]))
if ((size_t)(phoneSeq[labelid]) != blankTokenId && phoneId != (size_t)(phoneSeq[labelid_2]))
{
x = LogAdd(x, alphaScore[alphaId_2]);
}
@ -5980,13 +5982,13 @@ void _assignAlphaScore(
{
size_t labelid_r = labelid + 2;
size_t phoneBoundId_r = (size_t)(phoneBound[labelid_r]);
if (phoneId == totalPhoneNum - 1)
if (phoneId == blankTokenId)
{
// only constraint right side
if (t > phoneBoundId_r + delayConstraint - 1)
alphaScore[alphaId] = LZERO;
}
else if (phoneId != totalPhoneNum - 1)
else if (phoneId != blankTokenId)
{
if (t > phoneBoundId_r + delayConstraint)
alphaScore[alphaId] = LZERO;
@ -6016,6 +6018,7 @@ void _assignBetaScore(
const long t,
const size_t maxPhoneNum,
const size_t totalPhoneNum,
const size_t blankTokenId,
const int delayConstraint)
{
for (size_t uttId = 0;uttId < uttNum;uttId++) {
@ -6055,7 +6058,7 @@ void _assignBetaScore(
ElemType ascore;
if (phoneSeqId < phoneNum - 3)
{
if (phoneSeq[labelid] != totalPhoneNum - 1 && phoneId != phoneSeq[labelid_2])
if (phoneSeq[labelid] != blankTokenId && phoneId != phoneSeq[labelid_2])
{
x = LogAdd(x, betaScore[betaid_2]);
}
@ -6076,12 +6079,12 @@ void _assignBetaScore(
if (delayConstraint != -1)
{
size_t phoneBoundId_r = (size_t)(phoneBound[labelid_2]);
if (phoneId == totalPhoneNum - 1)
if (phoneId == blankTokenId)
{
if (t > phoneBoundId_r + delayConstraint - 1)
betaScore[betaid] = LZERO;
}
else if (phoneId != totalPhoneNum - 1)
else if (phoneId != blankTokenId)
{
if (t > phoneBoundId_r + delayConstraint)
betaScore[betaid] = LZERO;
@ -6171,7 +6174,7 @@ template<class ElemType>
CPUMatrix<ElemType>& CPUMatrix<ElemType>::AssignCTCScore(
const CPUMatrix<ElemType>& prob, CPUMatrix<ElemType>& alpha, CPUMatrix<ElemType>& beta,
const CPUMatrix<ElemType>& phoneSeq, const CPUMatrix<ElemType>& phoneBoundary, ElemType &totalScore, const std::vector<size_t>& uttToChanInd, const std::vector<size_t> & uttBeginFrame, const std::vector<size_t> & uttFrameNum,
const std::vector<size_t> & uttPhoneNum, const size_t numParallelSequences, const size_t maxFrameNum, const int delayConstraint, const bool isColWise)
const std::vector<size_t> & uttPhoneNum, const size_t numParallelSequences, const size_t maxFrameNum, const size_t blankTokenId, const int delayConstraint, const bool isColWise)
{
// Column wise representation of sequences in input matrices (each column is one sequence/utterance)
if (isColWise)
@ -6186,13 +6189,13 @@ CPUMatrix<ElemType>& CPUMatrix<ElemType>::AssignCTCScore(
for (size_t t = 0; t < maxFrameNum; t++)
{
_assignAlphaScore(prob.Data(), alpha.Data(), phoneSeq.Data(), phoneBoundary.Data(), uttToChanInd,
uttFrameNum, uttBeginFrame, uttPhoneNum, numParallelSequences, uttNum, t, maxPhoneNum, totalPhoneNum, delayConstraint);
uttFrameNum, uttBeginFrame, uttPhoneNum, numParallelSequences, uttNum, t, maxPhoneNum, totalPhoneNum, blankTokenId, delayConstraint);
}
for (LONG64 t = maxFrameNum - 1; t >= 0; t--)
{
_assignBetaScore(prob.Data(), beta.Data(), phoneSeq.Data(), phoneBoundary.Data(), uttToChanInd,
uttFrameNum, uttBeginFrame, uttPhoneNum, numParallelSequences, uttNum, t, maxPhoneNum, totalPhoneNum, delayConstraint);
uttFrameNum, uttBeginFrame, uttPhoneNum, numParallelSequences, uttNum, t, maxPhoneNum, totalPhoneNum, blankTokenId, delayConstraint);
}
std::vector<ElemType> scores(uttNum);

Просмотреть файл

@ -231,7 +231,7 @@ public:
// sequence training
CPUMatrix<ElemType>& DropFrame(const CPUMatrix<ElemType>& label, const CPUMatrix<ElemType>& gamma, const ElemType& threshhold);
CPUMatrix<ElemType>& AssignSequenceError(const ElemType hsmoothingWeight, const CPUMatrix<ElemType>& label, const CPUMatrix<ElemType>& dnnoutput, const CPUMatrix<ElemType>& gamma, ElemType alpha);
CPUMatrix<ElemType>& AssignCTCScore(const CPUMatrix<ElemType>& prob, CPUMatrix<ElemType>& alpha, CPUMatrix<ElemType>& beta, const CPUMatrix<ElemType>& phoneSeq, const CPUMatrix<ElemType>& phoneBoundary, ElemType &totalScore, const vector<size_t>& uttMap, const vector<size_t> & uttBeginFrame, const vector<size_t> & uttFrameNum, const vector<size_t> & uttPhoneNum, const size_t samplesInRecurrentStep, const size_t maxFrameNum, const int delayConstraint, const bool isColWise);
CPUMatrix<ElemType>& AssignCTCScore(const CPUMatrix<ElemType>& prob, CPUMatrix<ElemType>& alpha, CPUMatrix<ElemType>& beta, const CPUMatrix<ElemType>& phoneSeq, const CPUMatrix<ElemType>& phoneBoundary, ElemType &totalScore, const vector<size_t>& uttMap, const vector<size_t> & uttBeginFrame, const vector<size_t> & uttFrameNum, const vector<size_t> & uttPhoneNum, const size_t samplesInRecurrentStep, const size_t maxFrameNum, const size_t blankTokenId, const int delayConstraint, const bool isColWise);
CPUMatrix<ElemType>& InplaceSqrt();
CPUMatrix<ElemType>& AssignSqrtOf(const CPUMatrix<ElemType>& a);

Просмотреть файл

@ -85,9 +85,9 @@ enum ElementWiseOperator
// unary (or binary with constant parameter)
opCopy,
opNegate, opNot, opAbs, opFloor, opReciprocal,
opSigmoid, opTanh, opSqr, opSqrt, opExp, opLog, opLinearRectifier, opCosine, opSin,
opSigmoid, opTanh, opSqr, opSqrt, opExp, opLog, opLinearRectifier, opCosine, opSin, opExponentialLinearUnit,
// unary ops for use by Matrix class only (there is no TensorView implementation)
opSigmoidDerivative, opLinearRectifierDerivative, opNegativeSine,
opSigmoidDerivative, opLinearRectifierDerivative, opNegativeSine, opExponentialLinearUnitDerivative,
// binary
opCopyIf, opCopyIfNot, opSum, opDifference, opElementwiseProduct, opElementwiseQuotient, opLogSum,
opMax, opMin, opArgmax, opArgmin,
@ -98,6 +98,7 @@ enum ElementWiseOperator
opElementwiseProductWithCosDerivative, opElementwiseProductWithSinDerivative,
opElementwiseProductWithAbsDerivative, opElementwiseProductWithSqrtDerivative,
opElementwiseProductWithReciprocalDerivative, opSqrOfDifference,
opElementwiseProductWithExponentialLinearUnitDerivativeFromOutput,
// binary ops for indexing
// opIndex,
// ternary
@ -114,53 +115,55 @@ enum ElementWiseOperator
#define ForAllNullaryOps(Macro) \
Macro(ConstOne);
#define ForAllUnaryOps(Macro) \
Macro(Copy); \
Macro(Negate); \
Macro(Not); \
Macro(Abs); \
Macro(Floor); \
Macro(Reciprocal); \
Macro(Sigmoid); \
Macro(Tanh); \
Macro(Sqr); \
Macro(Sqrt); \
Macro(Exp); \
Macro(Log); \
Macro(LinearRectifier); \
Macro(Cosine); \
Macro(Sin);
#define ForAllUnaryOps(Macro) \
Macro(Copy); \
Macro(Negate); \
Macro(Not); \
Macro(Abs); \
Macro(Floor); \
Macro(Reciprocal); \
Macro(Sigmoid); \
Macro(Tanh); \
Macro(Sqr); \
Macro(Sqrt); \
Macro(Exp); \
Macro(Log); \
Macro(LinearRectifier); \
Macro(Cosine); \
Macro(Sin); \
Macro(ExponentialLinearUnit);
#define ForAllBinaryOps(Macro) \
Macro(CopyIf); \
Macro(CopyIfNot); \
Macro(Sum); \
Macro(Difference); \
Macro(ElementwiseProduct); \
Macro(ElementwiseQuotient); \
Macro(LogSum); \
Macro(Max); \
Macro(Min); \
Macro(Equal); \
Macro(NotEqual); \
Macro(Greater); \
Macro(Less); \
Macro(GreaterEqual); \
Macro(LessEqual); \
Macro(And); \
Macro(Or); \
Macro(Xor); \
Macro(MaskNegative); \
Macro(ElementwiseProductWithSigmoidDerivativeFromOutput); \
Macro(ElementwiseProductWithTanhDerivativeFromOutput); \
Macro(ElementwiseProductWithLinearRectifierDerivativeFromOutput); \
Macro(ElementwiseProductWithLogDerivativeFromOutput); \
Macro(ElementwiseProductWithCosDerivative); \
Macro(ElementwiseProductWithSinDerivative); \
Macro(ElementwiseProductWithAbsDerivative); \
Macro(ElementwiseProductWithReciprocalDerivative); \
Macro(ElementwiseProductWithSqrtDerivative); \
Macro(SqrOfDifference); \
#define ForAllBinaryOps(Macro) \
Macro(CopyIf); \
Macro(CopyIfNot); \
Macro(Sum); \
Macro(Difference); \
Macro(ElementwiseProduct); \
Macro(ElementwiseQuotient); \
Macro(LogSum); \
Macro(Max); \
Macro(Min); \
Macro(Equal); \
Macro(NotEqual); \
Macro(Greater); \
Macro(Less); \
Macro(GreaterEqual); \
Macro(LessEqual); \
Macro(And); \
Macro(Or); \
Macro(Xor); \
Macro(MaskNegative); \
Macro(ElementwiseProductWithSigmoidDerivativeFromOutput); \
Macro(ElementwiseProductWithTanhDerivativeFromOutput); \
Macro(ElementwiseProductWithLinearRectifierDerivativeFromOutput); \
Macro(ElementwiseProductWithLogDerivativeFromOutput); \
Macro(ElementwiseProductWithCosDerivative); \
Macro(ElementwiseProductWithSinDerivative); \
Macro(ElementwiseProductWithAbsDerivative); \
Macro(ElementwiseProductWithReciprocalDerivative); \
Macro(ElementwiseProductWithSqrtDerivative); \
Macro(SqrOfDifference); \
Macro(ElementwiseProductWithExponentialLinearUnitDerivativeFromOutput);
//Macro(Index);
#define ForAllTernaryOps(Macro) \

Просмотреть файл

@ -4299,7 +4299,10 @@ GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignCTCScore(const GPUMatrix<ElemTyp
const std::vector<size_t> & uttFrameNum,
const std::vector<size_t> & uttPhoneNum,
const size_t numParallelSequences,
const size_t maxFrameNum, const int delayConstraint, const bool isColWise)
const size_t maxFrameNum,
const size_t blankTokenId,
const int delayConstraint,
const bool isColWise)
{
if (isColWise)
{
@ -4340,13 +4343,13 @@ GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignCTCScore(const GPUMatrix<ElemTyp
for (long t = 0; t < maxFrameNum; t++)
{
_assignAlphaScore << <block_tail, thread_tail, 0, t_stream >> >(prob.Data(), alpha.Data(), phoneSeq.Data(), phoneBoundary.Data(), gpuUttToChanInd,
gpuFrameNum, gpuBeginFrame, gpuPhoneNum, numParallelSequences, uttNum, t, maxPhoneNum, totalPhoneNum, delayConstraint);
gpuFrameNum, gpuBeginFrame, gpuPhoneNum, numParallelSequences, uttNum, t, maxPhoneNum, totalPhoneNum, blankTokenId, delayConstraint);
}
for (long t = maxFrameNum - 1; t >= 0; t--)
{
_assignBetaScore << <block_tail, thread_tail, 0, t_stream >> >(prob.Data(), beta.Data(), phoneSeq.Data(), phoneBoundary.Data(), gpuUttToChanInd,
gpuFrameNum, gpuBeginFrame, gpuPhoneNum, numParallelSequences, uttNum, t, maxPhoneNum, totalPhoneNum, delayConstraint);
gpuFrameNum, gpuBeginFrame, gpuPhoneNum, numParallelSequences, uttNum, t, maxPhoneNum, totalPhoneNum, blankTokenId, delayConstraint);
}
_assignTotalScore << <uttNum, 1, 0, t_stream >> > (beta.Data(), gpuScores, uttNum, gpuUttToChanInd, gpuBeginFrame, numParallelSequences, maxPhoneNum);

Просмотреть файл

@ -351,7 +351,7 @@ public:
GPUMatrix<ElemType>& AssignCTCScore(const GPUMatrix<ElemType>& prob, GPUMatrix<ElemType>& alpha, GPUMatrix<ElemType>& beta,
const GPUMatrix<ElemType> phoneSeq, const GPUMatrix<ElemType> phoneBoundary, ElemType &totalScore, const vector<size_t>& uttMap, const vector<size_t> & uttBeginFrame, const vector<size_t> & uttFrameNum,
const vector<size_t> & uttPhoneNum, const size_t samplesInRecurrentStep, const size_t maxFrameNum, const int delayConstraint, const bool isColWise);
const vector<size_t> & uttPhoneNum, const size_t samplesInRecurrentStep, const size_t maxFrameNum, const size_t blankTokenId, const int delayConstraint, const bool isColWise);
GPUMatrix<ElemType>& InplaceSqrt();
GPUMatrix<ElemType>& AssignSqrtOf(const GPUMatrix<ElemType>& a);

Просмотреть файл

@ -5208,6 +5208,7 @@ __global__ void _adam4BlockSparseCol(CUDA_LONG size,
// t (input): time stamp to process
// maxPhoneNum (input): the max number of phones between utterances
// totalPhoneNum (input): the total number of phones of all utterances
// blankTokenId (input): id of the CTC blank token
// delayConstraint -- label output delay constraint introduced during training that allows to have shorter delay during inference.
// Alpha and Beta scores outside of the delay boundary are set to zero.
// Setting this parameter smaller will result in shorted delay between label output during decoding.
@ -5227,6 +5228,7 @@ __global__ void _assignAlphaScore(
const size_t t,
const size_t maxPhoneNum, // Maximum length of utterance in this MB
const size_t totalPhoneNum, // Total number of phones
const size_t blankTokenId,
const int delayConstraint)
{
LONG64 uttId = blockDim.x * blockIdx.x + threadIdx.x;
@ -5277,7 +5279,7 @@ __global__ void _assignAlphaScore(
if (phoneSeqId > 2)
{
// if current label is not blank and not equal prev non-blank label
if ((LONG64)(phoneSeq[labelid]) != totalPhoneNum - 1 && phoneId != (LONG64)(phoneSeq[labelid_2]))
if ((LONG64)(phoneSeq[labelid]) != blankTokenId && phoneId != (LONG64)(phoneSeq[labelid_2]))
{
x = logaddk(x, alphaScore[alphaId_2]);
}
@ -5299,13 +5301,13 @@ __global__ void _assignAlphaScore(
{
LONG64 labelid_r = labelid + 2;
LONG64 phoneBoundId_r = (LONG64)(phoneBound[labelid_r]);
if (phoneId == totalPhoneNum - 1)
if (phoneId == blankTokenId)
{
// only constraint right side
if (t > phoneBoundId_r + delayConstraint - 1)
alphaScore[alphaId] = LZERO;
}
else if (phoneId != totalPhoneNum - 1)
else if (phoneId != blankTokenId)
{
if (t > phoneBoundId_r + delayConstraint)
alphaScore[alphaId] = LZERO;
@ -5332,6 +5334,7 @@ __global__ void _assignBetaScore(
const size_t t,
const size_t maxPhoneNum,
const size_t totalPhoneNum,
const size_t blankTokenId,
const int delayConstraint)
{
LONG64 uttId = blockDim.x * blockIdx.x + threadIdx.x;
@ -5368,7 +5371,7 @@ __global__ void _assignBetaScore(
ElemType ascore;
if (phoneSeqId < phoneNum - 3)
{
if (phoneSeq[labelid] != totalPhoneNum - 1 && phoneId != phoneSeq[labelid_2])
if (phoneSeq[labelid] != blankTokenId && phoneId != phoneSeq[labelid_2])
{
x = logaddk(x, betaScore[betaid_2]);
}
@ -5389,12 +5392,12 @@ __global__ void _assignBetaScore(
if (delayConstraint != -1)
{
LONG64 phoneBoundId_r = (LONG64)(phoneBound[labelid_2]);
if (phoneId == totalPhoneNum - 1)
if (phoneId == blankTokenId)
{
if (t > phoneBoundId_r + delayConstraint - 1)
betaScore[betaid] = LZERO;
}
else if (phoneId != totalPhoneNum - 1)
else if (phoneId != blankTokenId)
{
if (t > phoneBoundId_r + delayConstraint)
betaScore[betaid] = LZERO;

Просмотреть файл

@ -5732,6 +5732,7 @@ Matrix<ElemType>& Matrix<ElemType>::AssignSequenceError(const ElemType hsmoothin
// uttPhoneNum (input): the phone number of each utterance. The size of this vector = the number of all utterances in this minibatch
// numParallelSequences (input): num of parallel sequences
// mbsize (input): the maximum channel frame number
// blankTokenId (input): id of the CTC blank token
// delayConstraint -- label output delay constraint introduced during training that allows to have shorter delay during inference. This using the original time information to enforce that CTC tokens only get aligned within a time margin.
// Setting this parameter smaller will result in shorted delay between label output during decoding, yet may hurt accuracy.
// delayConstraint=-1 means no constraint
@ -5739,7 +5740,7 @@ template<class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignCTCScore(const Matrix<ElemType>& prob, Matrix<ElemType>& alpha, Matrix<ElemType>& beta,
const Matrix<ElemType>& phoneSeq, const Matrix<ElemType>& phoneBound, ElemType &totalScore, const std::vector<size_t> & uttToChanInd,
const std::vector<size_t> & uttBeginFrame, const std::vector<size_t> & uttFrameNum, const std::vector<size_t> & uttPhoneNum,
const size_t numParallelSequences, const size_t mbsize, const int delayConstraint, const bool isColWise)
const size_t numParallelSequences, const size_t mbsize, const size_t blankTokenId, const int delayConstraint, const bool isColWise)
{
DecideAndMoveToRightDevice(prob, *this);
alpha.Resize(phoneSeq.GetNumRows(), prob.GetNumCols());
@ -5754,9 +5755,9 @@ Matrix<ElemType>& Matrix<ElemType>::AssignCTCScore(const Matrix<ElemType>& prob,
DISPATCH_MATRIX_ON_FLAG(&prob,
this,
this->m_CPUMatrix->AssignCTCScore(*prob.m_CPUMatrix, *alpha.m_CPUMatrix, *beta.m_CPUMatrix, *phoneSeq.m_CPUMatrix, *phoneBound.m_CPUMatrix, totalScore,
uttToChanInd, uttBeginFrame, uttFrameNum, uttPhoneNum, numParallelSequences, mbsize, delayConstraint, isColWise),
uttToChanInd, uttBeginFrame, uttFrameNum, uttPhoneNum, numParallelSequences, mbsize, blankTokenId, delayConstraint, isColWise),
this->m_GPUMatrix->AssignCTCScore(*prob.m_GPUMatrix, *alpha.m_GPUMatrix, *beta.m_GPUMatrix, *phoneSeq.m_GPUMatrix, *phoneBound.m_GPUMatrix, totalScore,
uttToChanInd, uttBeginFrame, uttFrameNum, uttPhoneNum, numParallelSequences, mbsize, delayConstraint, isColWise),
uttToChanInd, uttBeginFrame, uttFrameNum, uttPhoneNum, numParallelSequences, mbsize, blankTokenId, delayConstraint, isColWise),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED
);

Просмотреть файл

@ -380,7 +380,7 @@ public:
Matrix<ElemType>& AssignCTCScore(const Matrix<ElemType>& prob, Matrix<ElemType>& alpha, Matrix<ElemType>& beta, const Matrix<ElemType>& phoneSeq, const Matrix<ElemType>& phoneBound, ElemType &totalScore,
const vector<size_t> & extraUttMap, const vector<size_t> & uttBeginFrame, const vector<size_t> & uttFrameNum, const vector<size_t> & uttPhoneNum, const size_t samplesInRecurrentStep,
const size_t mbSize, const int delayConstraint, const bool isColWise);
const size_t mbSize, const size_t blankTokenId, const int delayConstraint, const bool isColWise);
Matrix<ElemType>& InplaceSqrt();
Matrix<ElemType>& AssignSqrtOf(const Matrix<ElemType>& a);

Просмотреть файл

@ -93,6 +93,23 @@ void NcclComm::AllReduceImpl(void* buffer, size_t count, DataType dtype)
RuntimeError("NcclComm ncclAllReduce failed: %s", ncclGetErrorString(res));
}
void NcclComm::BroadcastImpl(void* buffer, size_t count, MPI_Datatype dtype, int root)
{
ncclResult_t res;
if (dtype == MPI_CHAR)
{
res = ncclBcast(buffer, count, ncclChar, root, m_ncclComm, m_stream);
}
else
{
RuntimeError("NcclComm Broadcast supports Char type only");
}
if (res != ncclSuccess)
{
RuntimeError("NcclComm ncclBcast failed: %s", ncclGetErrorString(res));
}
}
void NcclComm::Sync()
{
cudaStreamSynchronize(m_stream) || "NcclComm: cudaStreamSynchronize failed";

Просмотреть файл

@ -23,6 +23,7 @@ class NcclComm
private:
enum class DataType : int {FLOAT, DOUBLE};
void AllReduceImpl(void* buffer, size_t count, DataType dtype);
void BroadcastImpl(void* buffer, size_t count, MPI_Datatype dtype, int root);
cudaStream_t m_stream;
ncclComm_t m_ncclComm;
#endif
@ -53,6 +54,20 @@ public:
RuntimeError("NcclComm: CNTK was built without NCCL support.");
#endif
}
#pragma warning( push )
#pragma warning ( disable : 4100 ) // Disable warning 4100 in Broadcast function
void Broadcast(void* buffer, size_t count, MPI_Datatype dtype, int root)
{
#ifdef USE_NCCL
BroadcastImpl(buffer, count, dtype, root);
#else
RuntimeError("NcclComm: CNTK was built without NCCL support.");
#endif
}
};
#pragma warning( pop )
}}}

Просмотреть файл

@ -1395,7 +1395,7 @@ GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignSequenceError(const ElemType hsm
template <class ElemType>
GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignCTCScore(const GPUMatrix<ElemType>& prob, GPUMatrix<ElemType>& alpha, GPUMatrix<ElemType>& beta,
const GPUMatrix<ElemType> phoneSeq, const GPUMatrix<ElemType> phoneBound, ElemType &totalScore, const std::vector<size_t>& uttMap, const std::vector<size_t> & uttBeginFrame, const std::vector<size_t> & uttFrameNum,
const std::vector<size_t> & uttPhoneNum, const size_t samplesInRecurrentStep, const size_t maxFrameNum, const int delayConstraint, const bool isColWise)
const std::vector<size_t> & uttPhoneNum, const size_t samplesInRecurrentStep, const size_t maxFrameNum, const size_t blankTokenId, const int delayConstraint, const bool isColWise)
{
return *this;
}

Просмотреть файл

@ -48,6 +48,7 @@ OverloadUnaryMathFns(fabs);
OverloadUnaryMathFns(cos);
OverloadUnaryMathFns(sin);
OverloadUnaryMathFns(floor);
OverloadUnaryMathFns(log1p);
#pragma pop_macro("OverloadUnaryMathFns")
@ -97,6 +98,12 @@ DECL ElemType LinearRectifierDerivative(ElemType z)
return z > 0 ? (ElemType) 1 : 0;
}
template <class ElemType>
DECL ElemType ExponentialLinearUnitDerivative(ElemType z)
{
return z >= 0 ? (ElemType)1 : exp_(z);
}
template <class ElemType>
DECL ElemType Sgn(ElemType z)
{
@ -141,21 +148,9 @@ template <typename ElemType>
DECL ElemType LogAdd(ElemType x, ElemType y)
{
if (x < y)
{
ElemType temp = x;
x = y;
y = temp;
}
ElemType diff = y - x;
if (diff < (ElemType) MINLOGEXP)
{
return (x < (ElemType) LSMALL) ? (ElemType) LZERO : x;
}
else
{
ElemType z = exp_(diff);
return x + log_((ElemType) 1.0 + z);
}
std::swap(x, y);
return x + log1p_(exp_(y - x));
}
// IndexElement reindexes a tensor along one dimension.
@ -206,6 +201,7 @@ DefUnaryOp(LinearRectifier, a > 0 ? a : 0);
DefUnaryOp(Cosine, cos_(a));
DefUnaryOp(Sin, sin_(a));
DefUnaryOp(Reciprocal, a == 0 ? 0 : 1 / a);
DefUnaryOp(ExponentialLinearUnit, a >= 0 ? a : (exp_(a)-1));
#pragma pop_macro("DefUnaryOp")
#pragma push_macro("DefBinaryOp")
@ -245,6 +241,7 @@ DefBinaryOp(ElementwiseProductWithAbsDerivative, a * Sgn(b)); // note: b = input
DefBinaryOp(ElementwiseProductWithReciprocalDerivative, a * -Sqr(b)); // b = output
DefBinaryOp(ElementwiseProductWithSqrtDerivative, a / (2 * b)); // b = output; d/dx sqrt(x) = 1/(2 * sqrt(x)) --> note this is the same as ElementwiseQuotient w a constant; if more show up like this we should add more template params
DefBinaryOp(SqrOfDifference, Sqr(a - b));
DefBinaryOp(ElementwiseProductWithExponentialLinearUnitDerivativeFromOutput, b >= 0 ? a : a*(1+b)); // b = output;
//DefBinaryOp(Index, IndexElement(a, b, i)); // note: this one uses the third argument
#pragma pop_macro("DefBinaryOp")

Просмотреть файл

@ -10,6 +10,8 @@
#include "BinaryConfigHelper.h"
#include "DataReader.h"
#include "StringUtil.h"
#include "ReaderConstants.h"
#include "ReaderUtil.h"
using std::string;
using std::wstring;
@ -49,23 +51,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_filepath = msra::strfun::utf16(config(L"file"));
m_keepDataInMemory = config(L"keepDataInMemory", false);
// EvalActions inserts randomize = "none" into the reader config in DoWriteOutoput. We would like this to be true/false,
// but we can't for this reason. So we will assume false unless we specifically get "true"
m_randomize = false;
wstring randomizeString = config(L"randomize", L"false");
if (!_wcsicmp(randomizeString.c_str(), L"true")) // TODO: don't support case-insensitive option strings in the new reader
m_randomize = true;
if (m_randomize)
m_randomizationWindow = GetRandomizationWindowFromConfig(config);
m_sampleBasedRandomizationWindow = config(L"sampleBasedRandomizationWindow", false);
if (!m_sampleBasedRandomizationWindow && m_randomizationWindow == randomizeAuto)
{
if (config.Exists(L"randomizationWindow"))
m_randomizationWindow = config(L"randomizationWindow");
else
m_randomizationWindow = randomizeAuto;
// The size of the chunk for the binary reader is specified in terms of the number of sequences
// per chunk and is fixed at the time when the data is serialized into the binary format.
// As a result, the on-disk size of a chunk can be arbitrary, and 32MB number used here is
// merely a heuristic.
m_randomizationWindow = g_4GB / g_32MB; // 128 chunks.
}
else
m_randomizationWindow = randomizeNone;
m_traceLevel = config(L"traceLevel", 1);
}

Просмотреть файл

@ -26,10 +26,10 @@ public:
// Get full path to the input file.
const wstring& GetFilePath() const { return m_filepath; }
size_t GetRandomize() const { return m_randomize; }
size_t GetRandomizationWindow() const { return m_randomizationWindow; }
bool UseSampleBasedRandomizationWindow() const { return m_sampleBasedRandomizationWindow; }
unsigned int GetTraceLevel() const { return m_traceLevel; }
bool ShouldKeepDataInMemory() const { return m_keepDataInMemory; }
@ -40,7 +40,9 @@ private:
std::wstring m_filepath;
std::map<std::wstring, std::wstring> m_streams;
size_t m_randomizationWindow;
bool m_randomize;
// Specifies how to interpret randomization window, if true randomization window == number of samples, else
// randomization window = number of chunks (default).
bool m_sampleBasedRandomizationWindow;
unsigned int m_traceLevel;
bool m_keepDataInMemory; // if true the whole dataset is kept in memory
};

Просмотреть файл

@ -35,9 +35,9 @@ CNTKBinaryReader::CNTKBinaryReader(const ConfigParameters& config)
log += " | keeping data in memory";
}
if (configHelper.GetRandomize())
size_t window = configHelper.GetRandomizationWindow();
if (window > 0)
{
size_t window = configHelper.GetRandomizationWindow();
// Verbosity is a general config parameter, not specific to the binary format reader.
log += " | randomizing with window: " + (int)window;
int verbosity = config(L"verbosity", 0);
@ -46,8 +46,9 @@ CNTKBinaryReader::CNTKBinaryReader(const ConfigParameters& config)
window, /* randomizationRangeInSamples */
m_deserializer, /* deserializer */
true, /* shouldPrefetch */
false /* multithreadedGetNextSequences */
);
false, /* multithreadedGetNextSequences */
0, /*maxNumberOfInvalidSequences */
configHelper.UseSampleBasedRandomizationWindow() /*sampleBasedRandomizationWindow */);
}
else
{

Просмотреть файл

@ -11,6 +11,7 @@
#include "DataReader.h"
#include "StringUtil.h"
#include "ReaderConstants.h"
#include "ReaderUtil.h"
using std::string;
using std::wstring;
@ -117,35 +118,6 @@ TextConfigHelper::TextConfigHelper(const ConfigParameters& config)
}
m_filepath = msra::strfun::utf16(config(L"file"));
wstring randomizeString = config(L"randomize", wstring());
if (!_wcsicmp(randomizeString.c_str(), L"none")) // TODO: don't support case-insensitive option strings in the new reader
{
// "none" is only accepted to be backwards-compatible (DoWriteOutput() in EvalActions.cpp
// inserts this magic constant into the reader config to prevent it from shuffling the input).
// In user-defined configurations, 'randomize' should be a boolean.
m_randomizationWindow = randomizeNone;
}
else
{
bool randomize = config(L"randomize", true);
if (!randomize)
{
m_randomizationWindow = randomizeNone;
}
else if (config.Exists(L"randomizationWindow"))
{
m_randomizationWindow = config(L"randomizationWindow");
}
else
{
m_randomizationWindow = randomizeAuto;
}
}
m_sampleBasedRandomizationWindow = config(L"sampleBasedRandomizationWindow", false);
m_skipSequenceIds = config(L"skipSequenceIds", false);
m_maxErrors = config(L"maxErrors", 0);
m_traceLevel = config(L"traceLevel", 1);
@ -153,6 +125,8 @@ TextConfigHelper::TextConfigHelper(const ConfigParameters& config)
m_keepDataInMemory = config(L"keepDataInMemory", false);
m_frameMode = config(L"frameMode", false);
m_randomizationWindow = GetRandomizationWindowFromConfig(config);
m_sampleBasedRandomizationWindow = config(L"sampleBasedRandomizationWindow", false);
if (!m_sampleBasedRandomizationWindow && m_randomizationWindow == randomizeAuto)
{
m_randomizationWindow = g_4GB / m_chunkSizeBytes; // ~ 4 GB (on disk) worth of chunks

Просмотреть файл

@ -87,6 +87,7 @@
<ClCompile Include="FramePacker.cpp" />
<ClCompile Include="ReaderBase.cpp" />
<ClCompile Include="ReaderShim.cpp" />
<ClCompile Include="ReaderUtil.cpp" />
<ClCompile Include="SequencePacker.cpp" />
<ClCompile Include="SequenceRandomizer.cpp" />
<ClCompile Include="TruncatedBpttPacker.cpp" />

Просмотреть файл

@ -135,6 +135,9 @@
<ClCompile Include="Indexer.cpp">
<Filter>Utils</Filter>
</ClCompile>
<ClCompile Include="ReaderUtil.cpp">
<Filter>Utils</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="Interfaces">

Просмотреть файл

@ -0,0 +1,37 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#include "Config.h"
#include "DataReader.h"
namespace Microsoft { namespace MSR { namespace CNTK {
size_t GetRandomizationWindowFromConfig(const ConfigParameters& config)
{
wstring randomizeString = config(L"randomize", wstring());
if (!_wcsicmp(randomizeString.c_str(), L"none")) // TODO: don't support case-insensitive option strings in the new reader
{
// "none" is only accepted to be backwards-compatible (DoWriteOutput() in EvalActions.cpp
// inserts this magic constant into the reader config to prevent it from shuffling the input).
// In user-defined configurations, 'randomize' should be a boolean.
return randomizeNone;
}
bool randomize = config(L"randomize", true);
if (!randomize)
{
return randomizeNone;
}
if (config.Exists(L"randomizationWindow"))
{
return config(L"randomizationWindow");
}
return randomizeAuto;
}
}}}

Просмотреть файл

@ -10,6 +10,10 @@
namespace Microsoft { namespace MSR { namespace CNTK {
class ConfigParameters;
size_t GetRandomizationWindowFromConfig(const ConfigParameters& config);
// Returns the size of the type.
inline size_t GetSizeByType(ElementType type)
{

Просмотреть файл

@ -26,7 +26,8 @@ void AggregateAccumulatorValuesAndUpdateEvaluation(
std::shared_ptr<ComputationNetwork> net,
std::set<std::shared_ptr<ComputationNodeBase>> evalNodesWhichAccumulateResult,
std::shared_ptr<DistGradHeader> gradHeader,
std::shared_ptr<MPIWrapper> mpi)
std::shared_ptr<MPIWrapper> mpi,
size_t packThresholdSizeInBytes)
{
// Accumulator stores mean value and number of samples. Aggregation performs simple summation of values,
// so we transfer sum instead of mean, and calculate mean after aggregation is finished.
@ -58,7 +59,8 @@ void AggregateAccumulatorValuesAndUpdateEvaluation(
mpi,
false /*useAsyncAggregation*/,
net->GetDeviceId(),
0 /*syncStatsTrace*/);
0 /*syncStatsTrace*/,
packThresholdSizeInBytes);
// Prepare header.
const size_t c_evalNodes = 1;
@ -127,10 +129,11 @@ void AggregateAccumulatorValuesAndUpdateEpochEvaluation(
std::vector<EpochCriterion>& epochEvalErrors,
const std::vector<ComputationNodeBasePtr>& evaluationNodes,
CriterionAccumulator<ElemType> localEpochEvalErrors,
std::function<bool(ComputationNodeBasePtr)> containsAccumulatedResult)
std::function<bool(ComputationNodeBasePtr)> containsAccumulatedResult,
size_t packThresholdSizeInBytes = DEFAULT_PACK_THRESHOLD_SIZE_IN_BYTES)
{
// Each node contains accumulated values for part of the data set, we have to aggregate accumulated values.
AggregateAccumulatorValuesAndUpdateEvaluation<ElemType>(net, evalNodesWhichAccumulateResult, gradHeader, mpi);
AggregateAccumulatorValuesAndUpdateEvaluation<ElemType>(net, evalNodesWhichAccumulateResult, gradHeader, mpi, packThresholdSizeInBytes);
// After values of accumulators have been aggregated accross nodes, we have to update evaluation results for
// evaluation nodes that accumulate results.

Просмотреть файл

@ -1511,7 +1511,7 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
// and recalculate evaluation errors based on accumulators.
AggregateAccumulatorValuesAndUpdateEpochEvaluation<ElemType>(
net, evaluationNodesWhichAccumulateResult, m_gradHeader, m_mpi, epochEvalErrors, evaluationNodes,
localEpochEvalErrors, ContainsAccumulatedResult);
localEpochEvalErrors, ContainsAccumulatedResult, m_packThresholdSizeInBytes);
}
return totalEpochSamples;
@ -2111,7 +2111,7 @@ void SGD<ElemType>::InitDistGradAgg(int numEvalNodes, int numGradientBits, int d
if (Globals::UseV2Aggregator()) // Currently used to check V2 against baselines.
m_distGradAgg = std::make_shared<V2SimpleDistGradAggregator<ElemType>>(m_mpi, m_bufferedAsyncGradientAggregation, deviceId, m_syncStatsTrace, ::CNTK::MPICommunicator());
else
m_distGradAgg = std::make_shared<SimpleDistGradAggregator<ElemType>>(m_mpi, m_bufferedAsyncGradientAggregation, deviceId, m_syncStatsTrace);
m_distGradAgg = std::make_shared<SimpleDistGradAggregator<ElemType>>(m_mpi, m_bufferedAsyncGradientAggregation, deviceId, m_syncStatsTrace, m_packThresholdSizeInBytes);
}
m_gradHeader.reset(DistGradHeader::Create(numEvalNodes), [](DistGradHeader* ptr) { DistGradHeader::Destroy(ptr); });
@ -2701,6 +2701,8 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
m_maxSamplesInRAM = configSGD(L"maxSamplesInRAM", (size_t) SIZE_MAX);
m_numSubminiBatches = configSGD(L"numSubminibatches", (size_t) 1);
m_packThresholdSizeInBytes = configSGD(L"packThresholdSizeInKB", DEFAULT_PACK_THRESHOLD_SIZE_IN_KB) * 1024;
if (configAALR.Exists(L"numMiniBatch4LRSearch"))
{
LOGPRINTF(stderr, "WARNING: 'numMiniBatch4LRSearch' is deprecated, please remove it and use 'numSamples4Search' instead.\n");

Просмотреть файл

@ -200,6 +200,9 @@ protected:
intargvector m_numSamples4Search;
size_t m_numBestSearchEpoch;
// Threshold size in bytes for single gradient to do packing
size_t m_packThresholdSizeInBytes;
LearningRateSearchAlgorithm m_autoLearnRateSearchType;
AdaptationRegType m_adaptationRegType;

Просмотреть файл

@ -6,6 +6,7 @@
#pragma once
#include "Constants.h"
#include "IDistGradAggregator.h"
#include "CUDAPageLockedMemAllocator.h"
#include "NcclComm.h"
@ -22,8 +23,9 @@ class SimpleDistGradAggregator : public IDistGradAggregator<ElemType>
UsingIDistGradAggregatorMembers;
public:
SimpleDistGradAggregator(const MPIWrapperPtr& mpi, bool useAsyncAggregation, int deviceId, int syncStatsTrace)
: IDistGradAggregator<ElemType>(mpi), m_useAsyncAggregation(useAsyncAggregation), m_initialized(false), m_bufferedGradHeader(nullptr), m_syncStatsTrace(syncStatsTrace), m_iterationCount(0), m_nccl(deviceId, mpi)
SimpleDistGradAggregator(const MPIWrapperPtr& mpi, bool useAsyncAggregation, int deviceId, int syncStatsTrace, size_t packThresholdSizeInBytes = DEFAULT_PACK_THRESHOLD_SIZE_IN_BYTES)
: IDistGradAggregator<ElemType>(mpi), m_useAsyncAggregation(useAsyncAggregation), m_initialized(false), m_bufferedGradHeader(nullptr), m_syncStatsTrace(syncStatsTrace),
m_iterationCount(0), m_nccl(deviceId, mpi), m_packThresholdSizeInBytes(packThresholdSizeInBytes)
{}
~SimpleDistGradAggregator()
@ -144,25 +146,65 @@ private:
m_initialized = true;
int deviceId = gradients[0]->GetDeviceId();
if (!m_nccl.IsSupported() && deviceId != CPUDEVICE)
if (!m_nccl.IsSupported() && (deviceId != CPUDEVICE))
m_allocator.reset(new CUDAPageLockedMemAllocator(deviceId));
size_t packedGradientsSizeInElements = 0;
for (size_t i = 0; i < gradients.size(); i++)
{
if (!m_useAsyncAggregation && sizeof(ElemType) * gradients[i]->GetNumElements() <= m_packThresholdSizeInBytes)
{
packedGradientsSizeInElements += gradients[i]->GetNumElements();
m_packedGradientsIndex.push_back(i);
}
else
{
m_gradientIndexToAggregate.push_back(i);
}
// Make sure none of the gradient matrixes are sparse - we currently do not support aggregation of sparse gradient matrices
if (gradients[i]->GetMatrixType() != DENSE)
RuntimeError("Gradient aggregation for sparse gradient matrices is currently unsupported!");
if (!m_nccl.IsSupported() && deviceId != CPUDEVICE)
{
m_gpuDataTransferers.push_back(std::make_unique<GPUDataTransferer>(deviceId, m_useAsyncAggregation));
m_intermediateCPUBuffers.push_back(AllocateIntermediateBuffer(deviceId, gradients[i]->GetNumElements()));
}
if (m_useAsyncAggregation)
m_bufferedGradients[gradients[i]].reset(new Matrix<ElemType>(gradients[i]->GetNumRows(), gradients[i]->GetNumCols(), deviceId));
}
// Packing matrices into continous buffer if not doing async aggregation
m_aggregationBuffer.reset();
if (packedGradientsSizeInElements > 0)
{
m_aggregationBuffer.reset(new (std::nothrow) Matrix<ElemType>(1, packedGradientsSizeInElements, deviceId));
}
// If no extra continous buffer allocated or using async aggregation
if (m_aggregationBuffer == nullptr)
{
m_gradientIndexToAggregate.clear();
m_packedGradientsIndex.clear();
packedGradientsSizeInElements = 0;
// Reuse "@param m_gradientIndexToAggregate" for following code, if no continous buffer allocated
for (size_t i = 0; i < gradients.size(); i++)
{
m_gradientIndexToAggregate.push_back(i);
}
}
else
{
// First element is reserved for continous buffer
m_gradientIndexToAggregate.insert(m_gradientIndexToAggregate.begin(), 1, (size_t)-1);
}
// If running on GPU and NCCL not supported, initialize GPU and CPU data transfer
if (!m_nccl.IsSupported() && (deviceId != CPUDEVICE))
{
for (size_t i : m_gradientIndexToAggregate)
{
m_gpuDataTransferers.push_back(std::make_unique<GPUDataTransferer>(deviceId, m_useAsyncAggregation));
m_intermediateCPUBuffers.push_back(AllocateIntermediateBuffer(deviceId,
(i == -1) ? packedGradientsSizeInElements : gradients[i]->GetNumElements()));
}
}
if (m_useAsyncAggregation)
{
m_bufferedGradHeader = DistGradHeader::Create(numEvalNodes);
@ -223,11 +265,33 @@ private:
}
}
// Initiate transfer of the gradient matrices to the CPU if needed
if (!m_nccl.IsSupported() && deviceId >= 0)
// Copy all gradient data into a single contiguous buffer, if additional continous buffer allocated
size_t offset = 0;
for (size_t i : m_packedGradientsIndex)
{
for (size_t i = 0; i < numGradMatrices; ++i)
m_gpuDataTransferers[i]->CopyGPUToCPUAsync(gradients[i]->Data(), gradients[i]->GetNumElements(), m_intermediateCPUBuffers[i].get());
m_aggregationBuffer->ColumnSlice(offset, gradients[i]->GetNumElements()).AssignValuesOf(gradients[i]->Reshaped(1, gradients[i]->GetNumElements()));
offset += gradients[i]->GetNumElements();
}
// Initiate transfer of the bufferred data to the CPU if needed
if (!m_nccl.IsSupported() && deviceId != CPUDEVICE)
{
size_t gpuDataTransfersIdx = 0;
Matrix<ElemType>* gpuCopyBuffer = m_aggregationBuffer.get();
for (size_t i : m_gradientIndexToAggregate)
{
if (i != -1)
{
gpuCopyBuffer = gradients[i];
}
else
{
// i == -1, first element is for packed gradients, which should not be with AsyncAggregation
assert(m_useAsyncAggregation == false);
}
m_gpuDataTransferers[gpuDataTransfersIdx]->CopyGPUToCPUAsync(gpuCopyBuffer->Data(), gpuCopyBuffer->GetNumElements(), m_intermediateCPUBuffers[gpuDataTransfersIdx].get());
gpuDataTransfersIdx++;
}
}
// Initiate receive of the header on the main node
@ -248,26 +312,35 @@ private:
m_mpi->Isend(headerCPU, headerCPU->Size(), MPI_CHAR, m_mpi->MainNodeRank(), numGradMatrices, &sendHeaderRequest) || MpiFail("MPI_Isend");
// Perform async allreduce on the gradient data
std::vector<MPI_Request> allReduceRequests(numGradMatrices);
std::vector<MPI_Request> allReduceRequests;
if (!m_nccl.IsSupported())
{
for (size_t i = 0; i < numGradMatrices; ++i)
size_t allReduceIndex = 0;
ElemType* reductionBuffer;
for (size_t i : m_gradientIndexToAggregate)
{
ElemType* reductionBuffer = gradients[i]->Data();
if (deviceId >= 0)
allReduceRequests.push_back(MPI_Request());
reductionBuffer = (i == -1)? m_aggregationBuffer->Data() : gradients[i]->Data();
if (deviceId != CPUDEVICE)
{
m_gpuDataTransferers[i]->WaitForCopyGPUToCPUAsync();
reductionBuffer = m_intermediateCPUBuffers[i].get();
m_gpuDataTransferers[allReduceIndex]->WaitForCopyGPUToCPUAsync();
reductionBuffer = m_intermediateCPUBuffers[allReduceIndex].get();
}
// On Windows this async MPI_Iallreduce call requires MS MPI v7 or higher to be installed
m_mpi->Iallreduce(MPI_IN_PLACE, reductionBuffer, gradients[i]->GetNumElements(),
MPIWrapper::GetDataType(reductionBuffer), MPI_SUM,
&allReduceRequests[i]) || MpiFail("MPI_Iallreduce");
m_mpi->Iallreduce(MPI_IN_PLACE, reductionBuffer, (i == -1) ? m_aggregationBuffer->GetNumElements() : gradients[i]->GetNumElements(),
MPIWrapper::GetDataType(reductionBuffer), MPI_SUM, &allReduceRequests.back()) || MpiFail("MPI_Iallreduce");
allReduceIndex++;
}
}
}
else
m_nccl.AllReduce(gradients);
{
std::vector<Matrix<ElemType>*> ncclReduceGradients;
for (size_t i : m_gradientIndexToAggregate)
{
ncclReduceGradients.push_back((i == -1) ? m_aggregationBuffer.get() : gradients[i]);
}
m_nccl.AllReduce(ncclReduceGradients);
}
// On the main node wait for the headers to arrive and aggregate
if (m_mpi->IsMainNode())
@ -290,52 +363,48 @@ private:
assert(numNodesHeadersReceivedFrom == (NumProc() - 1));
}
// Initiate receive of the aggregate header
MPI_Request recvAggHeaderRequest;
if (!m_mpi->IsMainNode())
m_mpi->Irecv(headerCPU, headerCPU->Size(), MPI_CHAR, m_mpi->MainNodeRank(), numGradMatrices + 1 + numGradMatrices, &recvAggHeaderRequest) || MpiFail("MPI_Irecv");
// Broadcast the aggregated header to all nodes
m_mpi->Bcast(headerCPU, headerCPU->Size(), MPI_CHAR, m_mpi->MainNodeRank());
// Intiate send of the aggregate header from main node
std::vector<MPI_Request> sendAggHeaderRequests(NumProc() - 1);
if (m_mpi->IsMainNode())
{
for (size_t j = 0; j < NumProc() - 1; ++j)
{
int dest = (j >= MyRank()) ? (j + 1) : j;
// TODO: Should we use MPI_Bcast instead for better performance
m_mpi->Isend(headerCPU, headerCPU->Size(), MPI_CHAR, dest, numGradMatrices + 1 + numGradMatrices, &(sendAggHeaderRequests[j])) || MpiFail("MPI_Isend");
}
}
// Wait for the allreduce operations to finish and initiate transfer back to the GPU if needed
if (!m_nccl.IsSupported())
{
for (size_t i = 0; i < numGradMatrices; ++i)
{
m_mpi->Wait(&allReduceRequests[i], MPI_STATUSES_IGNORE) || MpiFail("MPI_Wait");
if (deviceId >= 0)
m_gpuDataTransferers[i]->CopyCPUToGPUAsync(m_intermediateCPUBuffers[i].get(), gradients[i]->GetNumElements(), gradients[i]->Data());
}
}
// Wait to receive aggregate header
if (!m_mpi->IsMainNode())
m_mpi->Wait(&recvAggHeaderRequest, MPI_STATUSES_IGNORE) || MpiFail("MPI_Wait");
// Wait for all the transfers to finish
if (m_nccl.IsSupported())
m_nccl.Sync();
else if (deviceId >= 0)
{
for (size_t i = 0; i < numGradMatrices; ++i)
m_gpuDataTransferers[i]->WaitForCopyCPUToGPUAsync();
m_nccl.Sync();
}
else
{
// Wait for the allreduce operations to finish and initiate transfer back to the GPU if needed
size_t gpuDataTransfersIdx = 0; // Index of allReduceRequest for each un-packed gradient
for (size_t i : m_gradientIndexToAggregate)
{
m_mpi->Wait(&allReduceRequests[gpuDataTransfersIdx], MPI_STATUSES_IGNORE) || MpiFail("MPI_Wait");
if (deviceId != CPUDEVICE)
{
m_gpuDataTransferers[gpuDataTransfersIdx]->CopyCPUToGPUAsync(m_intermediateCPUBuffers[gpuDataTransfersIdx].get(),
(i == -1) ? m_aggregationBuffer->GetNumElements() : gradients[i]->GetNumElements(),
(i == -1) ? m_aggregationBuffer->Data() : gradients[i]->Data());
}
gpuDataTransfersIdx++;
}
// Wait for copy data from CPU to GPU, if not running on CPU and not NCCL enabled
if (deviceId != CPUDEVICE)
{
for (size_t i = 0; i < m_gradientIndexToAggregate.size(); i++)
m_gpuDataTransferers[i]->WaitForCopyCPUToGPUAsync();
}
}
// Copy data back to the packed gradients from the continous buffer
offset = 0;
for (size_t i : m_packedGradientsIndex)
{
gradients[i]->AssignValuesOf(m_aggregationBuffer->ColumnSlice(offset, gradients[i]->GetNumElements()).Reshaped(gradients[i]->GetNumRows(), gradients[i]->GetNumCols()));
offset += gradients[i]->GetNumElements();
}
// Wait for completion of the async send requests
if (!m_mpi->IsMainNode())
m_mpi->Wait(&sendHeaderRequest, MPI_STATUSES_IGNORE) || MpiFail("MPI_Wait");
else
m_mpi->Waitall(sendAggHeaderRequests.size(), sendAggHeaderRequests.data(), MPI_STATUSES_IGNORE) || MpiFail("MPI_Waitall");
if (showSyncPerfStats)
{
@ -347,8 +416,8 @@ private:
private:
std::unique_ptr<CUDAPageLockedMemAllocator> m_allocator;
std::vector<std::shared_ptr<ElemType>> m_intermediateCPUBuffers;
std::vector<std::shared_ptr<ElemType>> m_intermediateCPUBuffers;
std::vector<std::unique_ptr<GPUDataTransferer>> m_gpuDataTransferers;
std::vector<DistGradHeader*> m_recvHeaders;
@ -363,6 +432,13 @@ private:
std::unordered_map<Matrix<ElemType>*, std::unique_ptr<Matrix<ElemType>>> m_bufferedGradients;
DistGradHeader* m_bufferedGradHeader;
// Packing small gradients (size not larger than threshold size) into a continous buffer to reduce MPI calls.
// Threshold size to pack a gradient into the continous buffer, default 32KB (tunable by define "packThresholdSizeInKB=[value]")
const size_t m_packThresholdSizeInBytes;
std::unique_ptr<Matrix<ElemType>> m_aggregationBuffer;
std::vector<size_t> m_packedGradientsIndex;
std::vector<size_t> m_gradientIndexToAggregate;
int m_syncStatsTrace;
// Only used for controlling frequency of measuring/showing gradient aggregation perf stats

Просмотреть файл

@ -258,7 +258,7 @@ public:
// maxValues (input): values of max elements in label input vectors
// labels (input): 1-hot vector with frame-level phone labels
// CTCPosterior (output): CTC posterior
// blankTokenId (input): id of the blank token
// blankTokenId (input): id of the blank token. If specified as SIZE_MAX, will be replaced with (numberOfLabels - 1)
// delayConstraint -- label output delay constraint introduced during training that allows to have shorter delay during inference. This using the original time information to enforce that CTC tokens only get aligned within a time margin.
// Setting this parameter smaller will result in shorted delay between label output during decoding, yet may hurt accuracy.
// delayConstraint=-1 means no constraint
@ -285,7 +285,7 @@ public:
std::vector<size_t> phoneBound;
ElemType finalScore = 0;
if (blankTokenId == INT_MIN)
if (blankTokenId == SIZE_MAX)
blankTokenId = numRows - 1;
size_t mbsize = numCols / numParallelSequences;
@ -374,7 +374,7 @@ public:
Microsoft::MSR::CNTK::Matrix<ElemType> alpha(m_deviceid);
Microsoft::MSR::CNTK::Matrix<ElemType> beta(m_deviceid);
CTCPosterior.AssignCTCScore(prob, alpha, beta, matrixPhoneSeqs, matrixPhoneBounds, finalScore, uttToChanInd, uttBeginFrame,
uttFrameNum, uttPhoneNum, numParallelSequences, mbsize, delayConstraint, /*isColWise=*/true );
uttFrameNum, uttPhoneNum, numParallelSequences, mbsize, blankTokenId, delayConstraint, /*isColWise=*/true );
Microsoft::MSR::CNTK::Matrix<ElemType> rowSum(m_deviceid);
rowSum.Resize(1, numCols);

Просмотреть файл

@ -209,6 +209,15 @@ void TrainSequenceToSequenceTranslator(const DeviceDescriptor& device, bool useS
break;
trainer->TrainMinibatch({ { rawInput, minibatchData[rawInputStreamInfo] }, { rawLabels, minibatchData[rawLabelsStreamInfo] } }, device);
// Some basic sanity tests on the training loss and evaluation error values
auto IsNegativeOrNan = [](double value) {
return (value < 0) || std::isnan(value);
};
if (IsNegativeOrNan(trainer->PreviousMinibatchLossAverage()) || IsNegativeOrNan(trainer->PreviousMinibatchEvaluationAverage()))
ReportFailure("SequenceToSequence: Invalid (-ve or nan) loss or evaluation metric encountered in training of the SequenceToSequence model.");
PrintTrainingProgress(trainer, i, outputFrequencyInMinibatches);
if ((i + 1) == numMinibatchesToCheckpointAfter)
@ -232,9 +241,12 @@ void TrainSequenceToSequenceTranslator()
{
fprintf(stderr, "\nTrainSequenceToSequenceTranslator..\n");
// TODO: Also test with sparse input variables in the graph
TrainSequenceToSequenceTranslator(DeviceDescriptor::CPUDevice(), false, true, false, false, true, true);
TrainSequenceToSequenceTranslator(DeviceDescriptor::CPUDevice(), true, false, false, false, true, true);
if (IsGPUAvailable())
{
TrainSequenceToSequenceTranslator(DeviceDescriptor::GPUDevice(0), false, false, true, true, false, false);
TrainSequenceToSequenceTranslator(DeviceDescriptor::GPUDevice(0), true, true, true, true, false, false);
}
}

Просмотреть файл

@ -12,7 +12,7 @@ DOC_DIR=$TEST_ROOT_DIR/../../bindings/python/doc
pushd $DOC_DIR || exit $?
echo Current dir: $PWD
py.test --deviceid $TEST_DEVICE
py.test --verbose --deviceid $TEST_DEVICE
if [ "$?" -eq "0" ]; then
echo "__COMPLETED__"

Просмотреть файл

@ -7,7 +7,7 @@ python -c "import numpy; print('NumPy: %s'%numpy.version.full_version)"
python -c "import scipy; print('SciPy: %s'%scipy.version.full_version)"
python -c "import pytest; print('PyTest: %s'%pytest.__version__)"
py.test --deviceid $TEST_DEVICE --is1bitsgd $TEST_1BIT_SGD
py.test --verbose --deviceid $TEST_DEVICE --is1bitsgd $TEST_1BIT_SGD
if [ "$?" -eq "0" ]; then
echo "__COMPLETED__"

Просмотреть файл

@ -10,7 +10,7 @@ python -c "import pytest; print('PyTest: %s'%pytest.__version__)"
# TODO why doesn't "py.test --pyargs cntk" work?
MODULE_DIR="$(python -c "import cntk, os, sys; sys.stdout.write(os.path.dirname(os.path.abspath(cntk.__file__)))")"
[ $? -eq 0 ] || exit $?
py.test "$MODULE_DIR" --deviceid $TEST_DEVICE --is1bitsgd $TEST_1BIT_SGD --doctest-modules
py.test "$MODULE_DIR" --verbose --deviceid $TEST_DEVICE --is1bitsgd $TEST_1BIT_SGD --doctest-modules
if [ "$?" -eq "0" ]; then
echo "__COMPLETED__"

Просмотреть файл

@ -7,7 +7,7 @@ python -c "import numpy; print('NumPy: %s'%numpy.version.full_version)"
python -c "import scipy; print('SciPy: %s'%scipy.version.full_version)"
python -c "import pytest; print('PyTest: %s'%pytest.__version__)"
py.test --deviceid $TEST_DEVICE --is1bitsgd $TEST_1BIT_SGD
py.test --verbose --deviceid $TEST_DEVICE --is1bitsgd $TEST_1BIT_SGD
if [ "$?" -eq "0" ]; then
echo "__COMPLETED__"

Просмотреть файл

@ -471,67 +471,67 @@ Here are the ones that don't share memory:
02/23/2017 05:17:41: Precomputing --> Completed.
02/23/2017 05:17:41: Starting Epoch 1: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:38: Starting Epoch 1: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/23/2017 05:17:41: Starting minibatch loop.
02/23/2017 05:17:42: Finished Epoch[ 1 of 10]: [Training] cr = 4.15554279 * 368; Err = 2.48529418 * 368; totalSamplesSeen = 368; learningRatePerSample = 0.0049999999; epochTime=0.465629s
02/23/2017 05:17:42: SGD: Saving checkpoint model '/tmp/cntk-test-20170223051714.228082/Speech_LSTM_CTC@release_cpu/models/simple.dnn.1'
02/18/2017 07:06:38: Starting minibatch loop.
02/18/2017 07:06:39: Finished Epoch[ 1 of 10]: [Training] cr = 4.16293501 * 368; Err = 2.52941181 * 368; totalSamplesSeen = 368; learningRatePerSample = 0.0049999999; epochTime=1.00518s
02/18/2017 07:06:39: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn.1'
02/23/2017 05:17:42: Starting Epoch 2: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:39: Starting Epoch 2: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/23/2017 05:17:42: Starting minibatch loop.
02/23/2017 05:17:42: Finished Epoch[ 2 of 10]: [Training] cr = 3.68123707 * 438; Err = 1.00000000 * 438; totalSamplesSeen = 806; learningRatePerSample = 0.0049999999; epochTime=0.604311s
02/23/2017 05:17:43: SGD: Saving checkpoint model '/tmp/cntk-test-20170223051714.228082/Speech_LSTM_CTC@release_cpu/models/simple.dnn.2'
02/18/2017 07:06:39: Starting minibatch loop.
02/18/2017 07:06:41: Finished Epoch[ 2 of 10]: [Training] cr = 3.68804012 * 438; Err = 1.00000000 * 438; totalSamplesSeen = 806; learningRatePerSample = 0.0049999999; epochTime=1.21725s
02/18/2017 07:06:41: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn.2'
02/23/2017 05:17:43: Starting Epoch 3: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:41: Starting Epoch 3: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/23/2017 05:17:43: Starting minibatch loop.
02/23/2017 05:17:43: Finished Epoch[ 3 of 10]: [Training] cr = 0.00000000 * 0; Err = 0.00000000 * 0; totalSamplesSeen = 806; learningRatePerSample = 0.0049999999; epochTime=0.000737422s
02/23/2017 05:17:43: SGD: Saving checkpoint model '/tmp/cntk-test-20170223051714.228082/Speech_LSTM_CTC@release_cpu/models/simple.dnn.3'
02/18/2017 07:06:41: Starting minibatch loop.
02/18/2017 07:06:41: Finished Epoch[ 3 of 10]: [Training] cr = 0.00000000 * 0; Err = 0.00000000 * 0; totalSamplesSeen = 806; learningRatePerSample = 0.0049999999; epochTime=0.00115141s
02/18/2017 07:06:41: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn.3'
02/23/2017 05:17:43: Starting Epoch 4: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:41: Starting Epoch 4: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/23/2017 05:17:43: Starting minibatch loop.
02/23/2017 05:17:43: Finished Epoch[ 4 of 10]: [Training] cr = 2.09130743 * 368; Err = 1.00000000 * 368; totalSamplesSeen = 1174; learningRatePerSample = 0.0049999999; epochTime=0.442559s
02/23/2017 05:17:43: SGD: Saving checkpoint model '/tmp/cntk-test-20170223051714.228082/Speech_LSTM_CTC@release_cpu/models/simple.dnn.4'
02/18/2017 07:06:41: Starting minibatch loop.
02/18/2017 07:06:42: Finished Epoch[ 4 of 10]: [Training] cr = 2.14839206 * 368; Err = 1.00000000 * 368; totalSamplesSeen = 1174; learningRatePerSample = 0.0049999999; epochTime=0.99298s
02/18/2017 07:06:42: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn.4'
02/23/2017 05:17:43: Starting Epoch 5: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:42: Starting Epoch 5: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/23/2017 05:17:43: Starting minibatch loop.
02/23/2017 05:17:44: Finished Epoch[ 5 of 10]: [Training] cr = 464.95003780 * 248; Err = 1.00000000 * 248; totalSamplesSeen = 1422; learningRatePerSample = 0.0049999999; epochTime=0.28969s
02/23/2017 05:17:44: SGD: Saving checkpoint model '/tmp/cntk-test-20170223051714.228082/Speech_LSTM_CTC@release_cpu/models/simple.dnn.5'
02/18/2017 07:06:42: Starting minibatch loop.
02/18/2017 07:06:43: Finished Epoch[ 5 of 10]: [Training] cr = 383.37273185 * 248; Err = 1.00000000 * 248; totalSamplesSeen = 1422; learningRatePerSample = 0.0049999999; epochTime=0.675824s
02/18/2017 07:06:43: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn.5'
02/23/2017 05:17:44: Starting Epoch 6: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:43: Starting Epoch 6: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/23/2017 05:17:44: Starting minibatch loop.
02/23/2017 05:17:44: Finished Epoch[ 6 of 10]: [Training] cr = 1.84468669 * 248; Err = 1.00000000 * 248; totalSamplesSeen = 1670; learningRatePerSample = 0.0049999999; epochTime=0.283613s
02/23/2017 05:17:44: SGD: Saving checkpoint model '/tmp/cntk-test-20170223051714.228082/Speech_LSTM_CTC@release_cpu/models/simple.dnn.6'
02/18/2017 07:06:43: Starting minibatch loop.
02/18/2017 07:06:43: Finished Epoch[ 6 of 10]: [Training] cr = 1.82054593 * 248; Err = 1.00000000 * 248; totalSamplesSeen = 1670; learningRatePerSample = 0.0049999999; epochTime=0.662799s
02/18/2017 07:06:44: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn.6'
02/23/2017 05:17:44: Starting Epoch 7: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:44: Starting Epoch 7: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/23/2017 05:17:44: Starting minibatch loop.
02/23/2017 05:17:44: Finished Epoch[ 7 of 10]: [Training] cr = 1.71730664 * 358; Err = 1.00000000 * 358; totalSamplesSeen = 2028; learningRatePerSample = 0.0049999999; epochTime=0.423181s
02/23/2017 05:17:45: SGD: Saving checkpoint model '/tmp/cntk-test-20170223051714.228082/Speech_LSTM_CTC@release_cpu/models/simple.dnn.7'
02/18/2017 07:06:44: Starting minibatch loop.
02/18/2017 07:06:45: Finished Epoch[ 7 of 10]: [Training] cr = 1.70413907 * 358; Err = 1.00000000 * 358; totalSamplesSeen = 2028; learningRatePerSample = 0.0049999999; epochTime=0.973855s
02/18/2017 07:06:45: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn.7'
02/23/2017 05:17:45: Starting Epoch 8: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:45: Starting Epoch 8: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/23/2017 05:17:45: Starting minibatch loop.
02/23/2017 05:17:45: Finished Epoch[ 8 of 10]: [Training] cr = 0.00000000 * 0; Err = 0.00000000 * 0; totalSamplesSeen = 2028; learningRatePerSample = 0.0049999999; epochTime=0.000582587s
02/23/2017 05:17:45: SGD: Saving checkpoint model '/tmp/cntk-test-20170223051714.228082/Speech_LSTM_CTC@release_cpu/models/simple.dnn.8'
02/18/2017 07:06:45: Starting minibatch loop.
02/18/2017 07:06:45: Finished Epoch[ 8 of 10]: [Training] cr = 0.00000000 * 0; Err = 0.00000000 * 0; totalSamplesSeen = 2028; learningRatePerSample = 0.0049999999; epochTime=0.00162166s
02/18/2017 07:06:45: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn.8'
02/23/2017 05:17:45: Starting Epoch 9: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:45: Starting Epoch 9: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/23/2017 05:17:45: Starting minibatch loop.
02/23/2017 05:17:45: Finished Epoch[ 9 of 10]: [Training] cr = 1.20227814 * 308; Err = 1.00000000 * 308; totalSamplesSeen = 2336; learningRatePerSample = 0.0049999999; epochTime=0.358506s
02/23/2017 05:17:45: SGD: Saving checkpoint model '/tmp/cntk-test-20170223051714.228082/Speech_LSTM_CTC@release_cpu/models/simple.dnn.9'
02/18/2017 07:06:45: Starting minibatch loop.
02/18/2017 07:06:46: Finished Epoch[ 9 of 10]: [Training] cr = 1.19612240 * 308; Err = 1.00000000 * 308; totalSamplesSeen = 2336; learningRatePerSample = 0.0049999999; epochTime=0.835254s
02/18/2017 07:06:46: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn.9'
02/23/2017 05:17:45: Starting Epoch 10: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:46: Starting Epoch 10: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/23/2017 05:17:45: Starting minibatch loop.
02/23/2017 05:17:46: Finished Epoch[10 of 10]: [Training] cr = 1.33477291 * 608; Err = 1.00000000 * 608; totalSamplesSeen = 2944; learningRatePerSample = 0.0049999999; epochTime=0.912103s
02/23/2017 05:17:46: SGD: Saving checkpoint model '/tmp/cntk-test-20170223051714.228082/Speech_LSTM_CTC@release_cpu/models/simple.dnn'
02/18/2017 07:06:46: Starting minibatch loop.
02/18/2017 07:06:48: Finished Epoch[10 of 10]: [Training] cr = 1.33511935 * 608; Err = 1.00000000 * 608; totalSamplesSeen = 2944; learningRatePerSample = 0.0049999999; epochTime=1.71111s
02/18/2017 07:06:48: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn'
02/23/2017 05:17:46: Action "train" complete.
02/18/2017 07:06:48: Action "train" complete.
02/23/2017 05:17:46: __COMPLETED__
02/18/2017 07:06:48: __COMPLETED__
=== Deleting last epoch data

Просмотреть файл

@ -342,13 +342,13 @@ Memory Sharing: Out of 200 matrices, 71 are shared as 27, and 129 are not shared
02/18/2017 07:06:38: Starting Epoch 1: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:38: Starting minibatch loop.
02/18/2017 07:06:39: Finished Epoch[ 1 of 10]: [Training] cr = 4.15554279 * 368; Err = 2.48529418 * 368; totalSamplesSeen = 368; learningRatePerSample = 0.0049999999; epochTime=1.00518s
02/18/2017 07:06:39: Finished Epoch[ 1 of 10]: [Training] cr = 4.16293501 * 368; Err = 2.52941181 * 368; totalSamplesSeen = 368; learningRatePerSample = 0.0049999999; epochTime=1.00518s
02/18/2017 07:06:39: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn.1'
02/18/2017 07:06:39: Starting Epoch 2: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:39: Starting minibatch loop.
02/18/2017 07:06:41: Finished Epoch[ 2 of 10]: [Training] cr = 3.68123763 * 438; Err = 1.00000000 * 438; totalSamplesSeen = 806; learningRatePerSample = 0.0049999999; epochTime=1.21725s
02/18/2017 07:06:41: Finished Epoch[ 2 of 10]: [Training] cr = 3.68804068 * 438; Err = 1.00000000 * 438; totalSamplesSeen = 806; learningRatePerSample = 0.0049999999; epochTime=1.21725s
02/18/2017 07:06:41: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn.2'
02/18/2017 07:06:41: Starting Epoch 3: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
@ -360,25 +360,25 @@ Memory Sharing: Out of 200 matrices, 71 are shared as 27, and 129 are not shared
02/18/2017 07:06:41: Starting Epoch 4: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:41: Starting minibatch loop.
02/18/2017 07:06:42: Finished Epoch[ 4 of 10]: [Training] cr = 2.09130760 * 368; Err = 1.00000000 * 368; totalSamplesSeen = 1174; learningRatePerSample = 0.0049999999; epochTime=0.99298s
02/18/2017 07:06:42: Finished Epoch[ 4 of 10]: [Training] cr = 2.14839604 * 368; Err = 1.00000000 * 368; totalSamplesSeen = 1174; learningRatePerSample = 0.0049999999; epochTime=0.99298s
02/18/2017 07:06:42: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn.4'
02/18/2017 07:06:42: Starting Epoch 5: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:42: Starting minibatch loop.
02/18/2017 07:06:43: Finished Epoch[ 5 of 10]: [Training] cr = 464.94988029 * 248; Err = 1.00000000 * 248; totalSamplesSeen = 1422; learningRatePerSample = 0.0049999999; epochTime=0.675824s
02/18/2017 07:06:43: Finished Epoch[ 5 of 10]: [Training] cr = 383.36677797 * 248; Err = 1.00000000 * 248; totalSamplesSeen = 1422; learningRatePerSample = 0.0049999999; epochTime=0.675824s
02/18/2017 07:06:43: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn.5'
02/18/2017 07:06:43: Starting Epoch 6: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:43: Starting minibatch loop.
02/18/2017 07:06:43: Finished Epoch[ 6 of 10]: [Training] cr = 1.84473739 * 248; Err = 1.00000000 * 248; totalSamplesSeen = 1670; learningRatePerSample = 0.0049999999; epochTime=0.662799s
02/18/2017 07:06:43: Finished Epoch[ 6 of 10]: [Training] cr = 1.82060106 * 248; Err = 1.00000000 * 248; totalSamplesSeen = 1670; learningRatePerSample = 0.0049999999; epochTime=0.662799s
02/18/2017 07:06:44: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn.6'
02/18/2017 07:06:44: Starting Epoch 7: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:44: Starting minibatch loop.
02/18/2017 07:06:45: Finished Epoch[ 7 of 10]: [Training] cr = 1.71734363 * 358; Err = 1.00000000 * 358; totalSamplesSeen = 2028; learningRatePerSample = 0.0049999999; epochTime=0.973855s
02/18/2017 07:06:45: Finished Epoch[ 7 of 10]: [Training] cr = 1.70418050 * 358; Err = 1.00000000 * 358; totalSamplesSeen = 2028; learningRatePerSample = 0.0049999999; epochTime=0.973855s
02/18/2017 07:06:45: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn.7'
02/18/2017 07:06:45: Starting Epoch 8: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
@ -390,13 +390,13 @@ Memory Sharing: Out of 200 matrices, 71 are shared as 27, and 129 are not shared
02/18/2017 07:06:45: Starting Epoch 9: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:45: Starting minibatch loop.
02/18/2017 07:06:46: Finished Epoch[ 9 of 10]: [Training] cr = 1.20229756 * 308; Err = 1.00000000 * 308; totalSamplesSeen = 2336; learningRatePerSample = 0.0049999999; epochTime=0.835254s
02/18/2017 07:06:46: Finished Epoch[ 9 of 10]: [Training] cr = 1.19614708 * 308; Err = 1.00000000 * 308; totalSamplesSeen = 2336; learningRatePerSample = 0.0049999999; epochTime=0.835254s
02/18/2017 07:06:46: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn.9'
02/18/2017 07:06:46: Starting Epoch 10: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 07:06:46: Starting minibatch loop.
02/18/2017 07:06:48: Finished Epoch[10 of 10]: [Training] cr = 1.33476970 * 608; Err = 1.00000000 * 608; totalSamplesSeen = 2944; learningRatePerSample = 0.0049999999; epochTime=1.71111s
02/18/2017 07:06:48: Finished Epoch[10 of 10]: [Training] cr = 1.33512417 * 608; Err = 1.00000000 * 608; totalSamplesSeen = 2944; learningRatePerSample = 0.0049999999; epochTime=1.71111s
02/18/2017 07:06:48: SGD: Saving checkpoint model '/tmp/cntk-test-20170218070416.834755/Speech_LSTM_CTC@debug_gpu/models/simple.dnn'
02/18/2017 07:06:48: Action "train" complete.

Просмотреть файл

@ -467,13 +467,13 @@ Here are the ones that don't share memory:
02/22/2017 21:20:37: Starting Epoch 1: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/22/2017 21:20:37: Starting minibatch loop.
02/22/2017 21:20:38: Finished Epoch[ 1 of 10]: [Training] cr = 4.15554279 * 368; Err = 2.48529418 * 368; totalSamplesSeen = 368; learningRatePerSample = 0.0049999999; epochTime=0.926568s
02/22/2017 21:20:38: Finished Epoch[ 1 of 10]: [Training] cr = 4.16293534 * 368; Err = 2.52941181 * 368; totalSamplesSeen = 368; learningRatePerSample = 0.0049999999; epochTime=0.926568s
02/22/2017 21:20:38: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20170222211959.232036\Speech_LSTM_CTC@release_cpu/models/simple.dnn.1'
02/22/2017 21:20:38: Starting Epoch 2: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/22/2017 21:20:38: Starting minibatch loop.
02/22/2017 21:20:40: Finished Epoch[ 2 of 10]: [Training] cr = 3.68123707 * 438; Err = 1.00000000 * 438; totalSamplesSeen = 806; learningRatePerSample = 0.0049999999; epochTime=1.07618s
02/22/2017 21:20:40: Finished Epoch[ 2 of 10]: [Training] cr = 3.68804096 * 438; Err = 1.00000000 * 438; totalSamplesSeen = 806; learningRatePerSample = 0.0049999999; epochTime=1.07618s
02/22/2017 21:20:40: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20170222211959.232036\Speech_LSTM_CTC@release_cpu/models/simple.dnn.2'
02/22/2017 21:20:40: Starting Epoch 3: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
@ -485,25 +485,25 @@ Here are the ones that don't share memory:
02/22/2017 21:20:40: Starting Epoch 4: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/22/2017 21:20:40: Starting minibatch loop.
02/22/2017 21:20:41: Finished Epoch[ 4 of 10]: [Training] cr = 2.09131158 * 368; Err = 1.00000000 * 368; totalSamplesSeen = 1174; learningRatePerSample = 0.0049999999; epochTime=0.749923s
02/22/2017 21:20:41: Finished Epoch[ 4 of 10]: [Training] cr = 2.14839720 * 368; Err = 1.00000000 * 368; totalSamplesSeen = 1174; learningRatePerSample = 0.0049999999; epochTime=0.749923s
02/22/2017 21:20:41: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20170222211959.232036\Speech_LSTM_CTC@release_cpu/models/simple.dnn.4'
02/22/2017 21:20:41: Starting Epoch 5: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/22/2017 21:20:41: Starting minibatch loop.
02/22/2017 21:20:41: Finished Epoch[ 5 of 10]: [Training] cr = 464.94304435 * 248; Err = 1.00000000 * 248; totalSamplesSeen = 1422; learningRatePerSample = 0.0049999999; epochTime=0.369816s
02/22/2017 21:20:41: Finished Epoch[ 5 of 10]: [Training] cr = 383.36570691 * 248; Err = 1.00000000 * 248; totalSamplesSeen = 1422; learningRatePerSample = 0.0049999999; epochTime=0.369816s
02/22/2017 21:20:43: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20170222211959.232036\Speech_LSTM_CTC@release_cpu/models/simple.dnn.5'
02/22/2017 21:20:43: Starting Epoch 6: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/22/2017 21:20:43: Starting minibatch loop.
02/22/2017 21:20:43: Finished Epoch[ 6 of 10]: [Training] cr = 1.84468571 * 248; Err = 1.00000000 * 248; totalSamplesSeen = 1670; learningRatePerSample = 0.0049999999; epochTime=0.626039s
02/22/2017 21:20:43: Finished Epoch[ 6 of 10]: [Training] cr = 1.82054569 * 248; Err = 1.00000000 * 248; totalSamplesSeen = 1670; learningRatePerSample = 0.0049999999; epochTime=0.626039s
02/22/2017 21:20:43: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20170222211959.232036\Speech_LSTM_CTC@release_cpu/models/simple.dnn.6'
02/22/2017 21:20:44: Starting Epoch 7: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/22/2017 21:20:44: Starting minibatch loop.
02/22/2017 21:20:44: Finished Epoch[ 7 of 10]: [Training] cr = 1.71730425 * 358; Err = 1.00000000 * 358; totalSamplesSeen = 2028; learningRatePerSample = 0.0049999999; epochTime=0.607594s
02/22/2017 21:20:44: Finished Epoch[ 7 of 10]: [Training] cr = 1.70413839 * 358; Err = 1.00000000 * 358; totalSamplesSeen = 2028; learningRatePerSample = 0.0049999999; epochTime=0.607594s
02/22/2017 21:20:44: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20170222211959.232036\Speech_LSTM_CTC@release_cpu/models/simple.dnn.7'
02/22/2017 21:20:44: Starting Epoch 8: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
@ -515,13 +515,13 @@ Here are the ones that don't share memory:
02/22/2017 21:20:45: Starting Epoch 9: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/22/2017 21:20:45: Starting minibatch loop.
02/22/2017 21:20:46: Finished Epoch[ 9 of 10]: [Training] cr = 1.20227734 * 308; Err = 1.00000000 * 308; totalSamplesSeen = 2336; learningRatePerSample = 0.0049999999; epochTime=0.754015s
02/22/2017 21:20:46: Finished Epoch[ 9 of 10]: [Training] cr = 1.19612221 * 308; Err = 1.00000000 * 308; totalSamplesSeen = 2336; learningRatePerSample = 0.0049999999; epochTime=0.754015s
02/22/2017 21:20:46: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20170222211959.232036\Speech_LSTM_CTC@release_cpu/models/simple.dnn.9'
02/22/2017 21:20:46: Starting Epoch 10: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/22/2017 21:20:46: Starting minibatch loop.
02/22/2017 21:20:47: Finished Epoch[10 of 10]: [Training] cr = 1.33477251 * 608; Err = 1.00000000 * 608; totalSamplesSeen = 2944; learningRatePerSample = 0.0049999999; epochTime=1.0302s
02/22/2017 21:20:47: Finished Epoch[10 of 10]: [Training] cr = 1.33511985 * 608; Err = 1.00000000 * 608; totalSamplesSeen = 2944; learningRatePerSample = 0.0049999999; epochTime=1.0302s
02/22/2017 21:20:47: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20170222211959.232036\Speech_LSTM_CTC@release_cpu/models/simple.dnn'
02/22/2017 21:20:48: Action "train" complete.

Просмотреть файл

@ -340,13 +340,13 @@ Memory Sharing: Out of 200 matrices, 71 are shared as 27, and 129 are not shared
02/18/2017 09:28:11: Starting Epoch 1: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 09:28:11: Starting minibatch loop.
02/18/2017 09:28:13: Finished Epoch[ 1 of 10]: [Training] cr = 4.15554279 * 368; Err = 2.48529418 * 368; totalSamplesSeen = 368; learningRatePerSample = 0.0049999999; epochTime=1.03913s
02/18/2017 09:28:13: Finished Epoch[ 1 of 10]: [Training] cr = 4.16293501 * 368; Err = 2.52941181 * 368; totalSamplesSeen = 368; learningRatePerSample = 0.0049999999; epochTime=1.03913s
02/18/2017 09:28:13: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20170218092651.300722\Speech_LSTM_CTC@release_gpu/models/simple.dnn.1'
02/18/2017 09:28:13: Starting Epoch 2: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 09:28:13: Starting minibatch loop.
02/18/2017 09:28:14: Finished Epoch[ 2 of 10]: [Training] cr = 3.68123735 * 438; Err = 1.00000000 * 438; totalSamplesSeen = 806; learningRatePerSample = 0.0049999999; epochTime=1.33313s
02/18/2017 09:28:14: Finished Epoch[ 2 of 10]: [Training] cr = 3.68804068 * 438; Err = 1.00000000 * 438; totalSamplesSeen = 806; learningRatePerSample = 0.0049999999; epochTime=1.33313s
02/18/2017 09:28:14: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20170218092651.300722\Speech_LSTM_CTC@release_gpu/models/simple.dnn.2'
02/18/2017 09:28:14: Starting Epoch 3: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
@ -358,25 +358,25 @@ Memory Sharing: Out of 200 matrices, 71 are shared as 27, and 129 are not shared
02/18/2017 09:28:14: Starting Epoch 4: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 09:28:14: Starting minibatch loop.
02/18/2017 09:28:15: Finished Epoch[ 4 of 10]: [Training] cr = 2.09130859 * 368; Err = 1.00000000 * 368; totalSamplesSeen = 1174; learningRatePerSample = 0.0049999999; epochTime=1.03006s
02/18/2017 09:28:15: Finished Epoch[ 4 of 10]: [Training] cr = 2.14839438 * 368; Err = 1.00000000 * 368; totalSamplesSeen = 1174; learningRatePerSample = 0.0049999999; epochTime=1.03006s
02/18/2017 09:28:16: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20170218092651.300722\Speech_LSTM_CTC@release_gpu/models/simple.dnn.4'
02/18/2017 09:28:16: Starting Epoch 5: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 09:28:16: Starting minibatch loop.
02/18/2017 09:28:16: Finished Epoch[ 5 of 10]: [Training] cr = 464.94795867 * 248; Err = 1.00000000 * 248; totalSamplesSeen = 1422; learningRatePerSample = 0.0049999999; epochTime=0.691575s
02/18/2017 09:28:16: Finished Epoch[ 5 of 10]: [Training] cr = 383.36923513 * 248; Err = 1.00000000 * 248; totalSamplesSeen = 1422; learningRatePerSample = 0.0049999999; epochTime=0.691575s
02/18/2017 09:28:16: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20170218092651.300722\Speech_LSTM_CTC@release_gpu/models/simple.dnn.5'
02/18/2017 09:28:17: Starting Epoch 6: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 09:28:17: Starting minibatch loop.
02/18/2017 09:28:17: Finished Epoch[ 6 of 10]: [Training] cr = 1.84468029 * 248; Err = 1.00000000 * 248; totalSamplesSeen = 1670; learningRatePerSample = 0.0049999999; epochTime=0.722621s
02/18/2017 09:28:17: Finished Epoch[ 6 of 10]: [Training] cr = 1.82059245 * 248; Err = 1.00000000 * 248; totalSamplesSeen = 1670; learningRatePerSample = 0.0049999999; epochTime=0.722621s
02/18/2017 09:28:17: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20170218092651.300722\Speech_LSTM_CTC@release_gpu/models/simple.dnn.6'
02/18/2017 09:28:18: Starting Epoch 7: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 09:28:18: Starting minibatch loop.
02/18/2017 09:28:19: Finished Epoch[ 7 of 10]: [Training] cr = 1.71730442 * 358; Err = 1.00000000 * 358; totalSamplesSeen = 2028; learningRatePerSample = 0.0049999999; epochTime=1.0906s
02/18/2017 09:28:19: Finished Epoch[ 7 of 10]: [Training] cr = 1.70416839 * 358; Err = 1.00000000 * 358; totalSamplesSeen = 2028; learningRatePerSample = 0.0049999999; epochTime=1.0906s
02/18/2017 09:28:19: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20170218092651.300722\Speech_LSTM_CTC@release_gpu/models/simple.dnn.7'
02/18/2017 09:28:19: Starting Epoch 8: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
@ -388,13 +388,13 @@ Memory Sharing: Out of 200 matrices, 71 are shared as 27, and 129 are not shared
02/18/2017 09:28:19: Starting Epoch 9: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 09:28:19: Starting minibatch loop.
02/18/2017 09:28:20: Finished Epoch[ 9 of 10]: [Training] cr = 1.20227705 * 308; Err = 1.00000000 * 308; totalSamplesSeen = 2336; learningRatePerSample = 0.0049999999; epochTime=0.945437s
02/18/2017 09:28:20: Finished Epoch[ 9 of 10]: [Training] cr = 1.19614064 * 308; Err = 1.00000000 * 308; totalSamplesSeen = 2336; learningRatePerSample = 0.0049999999; epochTime=0.945437s
02/18/2017 09:28:20: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20170218092651.300722\Speech_LSTM_CTC@release_gpu/models/simple.dnn.9'
02/18/2017 09:28:20: Starting Epoch 10: learning rate per sample = 0.005000 effective momentum = 0.900000 momentum as time constant = 189.8 samples
02/18/2017 09:28:20: Starting minibatch loop.
02/18/2017 09:28:22: Finished Epoch[10 of 10]: [Training] cr = 1.33477301 * 608; Err = 1.00000000 * 608; totalSamplesSeen = 2944; learningRatePerSample = 0.0049999999; epochTime=1.86033s
02/18/2017 09:28:22: Finished Epoch[10 of 10]: [Training] cr = 1.33511915 * 608; Err = 1.00000000 * 608; totalSamplesSeen = 2944; learningRatePerSample = 0.0049999999; epochTime=1.86033s
02/18/2017 09:28:22: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20170218092651.300722\Speech_LSTM_CTC@release_gpu/models/simple.dnn'
02/18/2017 09:28:22: Action "train" complete.

Просмотреть файл

@ -48,7 +48,7 @@ speechTrain = [
input = [
labels = [
alias = "l"
dim = 132
dim = 133
format = "sparse"
]
]
@ -98,7 +98,7 @@ speechTrain = [
// define basic I/O
baseFeatDim = 33
featDim = 11 * baseFeatDim
labelDim = 132
labelDim = 133
// hidden dimensions
cellDim = 1024

Просмотреть файл

@ -498,7 +498,7 @@ Test module "V2LibraryTests" has passed with:
Test case "TensorSuite/TensorPlusRightOperandWithoutAxes" has passed
Test suite "UserDefinedFunctionSuite" has passed with:
4 test cases out of 4 passed
5 test cases out of 5 passed
Test case "UserDefinedFunctionSuite/DuplicateVariablesInCPU" has passed
@ -508,6 +508,8 @@ Test module "V2LibraryTests" has passed with:
Test case "UserDefinedFunctionSuite/TimesAndPlusInGPU" has passed
Test case "UserDefinedFunctionSuite/UserTimesFunctionExample" has passed
Test suite "ValueSuite" has passed with:
30 test cases out of 30 passed
332 assertions out of 332 passed

Просмотреть файл

@ -46,22 +46,20 @@ fi
pushd $TestDataDir
# Note: Run the device selection tests first since later tests may interfere with
# device selection by freezing default device
if [ "$OS" == "Windows_NT" ]; then
TEST_BINARY=V2LibraryTests.exe
else
TEST_BINARY=v2librarytests
fi
# Note: Run the device selection suite first since later tests may interfere with
# device selection by freezing default device
$TEST_BIN_DIR/$TEST_BINARY --report_level=detailed --run_test=DeviceSelectionSuite
ExitCode1=$?
ExitCode=$?
$TEST_BIN_DIR/$TEST_BINARY --report_level=detailed --run_test=!DeviceSelectionSuite
ExitCode2=$?
(( ExitCode1 != 0)) && ExitCode=$ExitCode1
(( ExitCode2 != 0)) && ExitCode=$ExitCode2
(( ExitCode1 == ExitCode2)) && ExitCode=$ExitCode1
(( ExitCode2 > ExitCode )) && ExitCode=$ExitCode2
# Delete the test data
popd

Просмотреть файл

@ -60,7 +60,7 @@ BOOST_AUTO_TEST_CASE(CheckModelVersion)
// This is a watch guard to make sure that any change in the model version will be detected.
// If you change the CNTK model version, please do not silently adapt this test.
// Instead, please do notify the CNTK release team (AlexeyO, Wolfgang, Zhou, Mark) to prepare required steps for the next release.
BOOST_REQUIRE_MESSAGE(CURRENT_CNTK_MODEL_VERSION == 19, "The model version has been changed. Before making changes in this test, please first notify the CNTK release team to prepare required steps in the next release. Thanks!\n");
BOOST_REQUIRE_MESSAGE(CURRENT_CNTK_MODEL_VERSION == 20, "The model version has been changed. Before making changes in this test, please first notify the CNTK release team to prepare required steps in the next release. Thanks!\n");
}
BOOST_AUTO_TEST_CASE(EvalConstantPlusTest)

Просмотреть файл

@ -326,7 +326,9 @@ void CheckEnumValuesNotModified() {
static_cast<size_t>(PrimitiveOpType::EditDistanceError) == 61 &&
static_cast<size_t>(PrimitiveOpType::NoOp) == 62 &&
static_cast<size_t>(PrimitiveOpType::LabelsToGraph) == 63 &&
static_cast<size_t>(PrimitiveOpType::StopGradient) == 64,
static_cast<size_t>(PrimitiveOpType::StopGradient) == 64 &&
static_cast<size_t>(PrimitiveOpType::ELU) == 65 &&
static_cast<size_t>(PrimitiveOpType::ForwardBackward) == 66,
"PrimitiveOpType enum value was modified.");
}

Просмотреть файл

@ -6,6 +6,7 @@
#include "CNTKLibrary.h"
#include <functional>
#include "Common.h"
#include "UserMatrixMultiplicationOp.h"
using namespace CNTK;
// TODO: Need to further cleanup/simplify definition of user defined functions
@ -391,6 +392,11 @@ BOOST_AUTO_TEST_CASE(TimesAndPlusInGPU)
}
}
BOOST_AUTO_TEST_CASE(UserTimesFunctionExample)
{
UserTimesFunctionExample();
}
BOOST_AUTO_TEST_SUITE_END()
}}

Просмотреть файл

@ -61,7 +61,7 @@
<SDLCheck>true</SDLCheck>
<UseFullPaths>true</UseFullPaths>
<OpenMPSupport>true</OpenMPSupport>
<AdditionalIncludeDirectories>$(SolutionDir)Source\CNTKv2LibraryDll\API;$(SolutionDir)Source\CNTKv2LibraryDll\;$(BOOST_INCLUDE_PATH)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(SolutionDir)Examples\Extensibility\CPP;$(SolutionDir)Source\CNTKv2LibraryDll\API;$(SolutionDir)Source\CNTKv2LibraryDll\;$(BOOST_INCLUDE_PATH)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>

Просмотреть файл

@ -42,9 +42,6 @@
<ClCompile Include="DeviceSelectionTests.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="Common.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="MinibatchSourceTest.cpp">
<Filter>Source Files</Filter>
</ClCompile>
@ -63,6 +60,9 @@
<ClCompile Include="LoadLegacyModelTests.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\EndToEndTests\CNTKv2Library\Common\Common.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="stdafx.h">
@ -74,7 +74,7 @@
<ClInclude Include="Common.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="Image.h">
<ClInclude Include="..\..\EndToEndTests\CNTKv2Library\Common\Image.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>

Просмотреть файл

@ -33,12 +33,6 @@ If (-not $buildConfig) {Throw "buildConfig" + $usage}
If (-not $targetConfig) {Throw "targetConfig" + $usage}
If (-not $sharePath) {Throw "sharePath" + $usage}
# Set Verbose mode
If ($verbose)
{
$VerbosePreference = "continue"
}
Write-Verbose "Making binary drops..."
# If not a Release build quit
@ -84,10 +78,7 @@ Remove-Item $baseDropPath\cntk\*.lib -Exclude EvalDll.lib, CNTKLibrary-2.0.lib
Remove-Item $baseDropPath\cntk\*.exp
Remove-Item $baseDropPath\cntk\*.metagen
# Remove specific items
If (Test-Path $baseDropPath\cntk\CommandEval.exe)
{
Remove-Item $baseDropPath\cntk\CommandEval.exe
}
Remove-Item $baseDropPath\cntk\CommandEval.exe -Force -ErrorAction SilentlyContinue
Remove-Item $baseDropPath\cntk\Microsoft.VisualStudio.QualityTools.UnitTestFramework.*
# Make Include folder
@ -112,14 +103,8 @@ Copy-Item Tutorials -Recurse -Destination $baseDropPath\Tutorials
Write-Verbose "Copying Scripts ..."
Copy-Item Scripts -Recurse -Destination $baseDropPath\Scripts
# Remove some files if they exist
If (Test-Path $baseDropPath\Scripts\pytest.ini)
{
Remove-Item $baseDropPath\Scripts\pytest.ini
}
If (Test-Path $baseDropPath\Scripts\install\linux)
{
Remove-Item -Recurse $baseDropPath\Scripts\install\linux
}
Remove-Item $baseDropPath\Scripts\pytest.ini -Force -ErrorAction SilentlyContinue
Remove-Item -Recurse $baseDropPath\Scripts\install\linux -Force -ErrorAction SilentlyContinue
# Copy all items from the share
# For whatever reason Copy-Item in the line below does not work
@ -138,19 +123,24 @@ If ($LastExitCode -gt 7)
Write-Verbose "Making ZIP and cleaning up..."
# Make ZIP file
# Switched to use 7zip because of the backslash separator issue in .NET compressor
# (fixed in 4.6.1, which is not a standard component of build machines
# see https://msdn.microsoft.com/en-us/library/mt712573(v=vs.110).aspx?f=255&MSPPError=-2147217396 )
$workSpace = $PWD.Path
$source = Join-Path $PWD.Path -ChildPath $basePath
$destination = Join-Path $PWD.Path -ChildPath $outputPath
Add-Type -assembly "system.io.compression.filesystem"
[io.compression.zipfile]::CreateFromDirectory($source, $destination)
Set-Location -Path $source
7za a -bd $destination .
If ($LastExitCode -ne 0)
{
throw "7za returned exit code $LastExitCode"
}
Set-Location -Path $workSpace
# Log the file hash
Get-FileHash -Algorithm SHA256 -Path $destination, *.whl
# Remove ZIP sources
If (Test-Path $basePath)
{
Remove-Item $basePath -Recurse
}
Remove-Item -Recurse $basePath -Force -ErrorAction SilentlyContinue
# Return zero exit code code from here (N.B.: can be non-zero from robocopy above)
exit 0

Просмотреть файл

@ -170,7 +170,7 @@
"**Note** if we have less than 8 datapoints for a day we skip over the day assuming something is missing in the raw data. If we get more than 14 data points in a day we truncate the readings.\n",
"\n",
"## Training / Testing / Validation data preparation\n",
"The raw data is sorted by time and we should randomize it before splitting into training, validation and test datasets but this would make it impractical to visualize results in the tutorial. Hence, we split the dataset in the following manner: pick in sequence, 8 values for training, 1 for validation and 1 for test until there is no more data. This will spread training, validation and test datasets across the full timeline while preserving time order.\n"
"We start by reading the csv file for use with CNTK. The raw data is sorted by time and we should randomize it before splitting into training, validation and test datasets but this would make it impractical to visualize results in the tutorial. Hence, we split the dataset in the following manner: pick in sequence, 8 values for training, 1 for validation and 1 for test until there is no more data. This will spread training, validation and test datasets across the full timeline while preserving time order.\n"
]
},
{

Просмотреть файл

@ -63,6 +63,7 @@
%ignore_function CNTK::PlaceholderVariable;
%ignore_function CNTK::InputVariable;
%ignore_function CNTK::OutputVariable;
%ignore_function CNTK::Internal::AddProgressWriters;
%ignore_class CNTK::Variable::CompositeFunction;
%ignore_class CNTK::Variable::Trainer;
@ -237,6 +238,7 @@
%ignore_class CNTK::TrainingSession;
%ignore_function CNTK::CreateBasicTrainingSession;
%ignore_function CNTK::CreateTrainingSession;
%ignore_function CNTK::CreateDataParallelDistributedTrainer;
%ignore_function CNTK::CreateQuantizedDataParallelDistributedTrainer;

Просмотреть файл

@ -151,9 +151,25 @@ class Axis(cntk_py.Axis):
@typemap
def end_static_axis():
'''
Creates an Axis object representing the end (one past last) static axis.
DEPRECATED.
Creates an Axis object representing a new leading static axis.
Returns:
:class:`Axis`: axis object representing the end (one past last) static axis.
:class:`Axis`: axis object representing a new leading static axis.
'''
import warnings
warnings.warn('This will be removed in future versions. Please use '
'Axis.new_leading_axis() instead.', DeprecationWarning)
return cntk_py.Axis.end_static_axis()
@staticmethod
@typemap
def new_leading_axis():
'''
Creates an Axis object representing a new leading static axis.
Returns:
:class:`Axis`: axis object representing a new leading static axis.
'''
return cntk_py.Axis.end_static_axis()

Просмотреть файл

@ -16,6 +16,7 @@
%implicitconv CNTK::Variable;
%rename(_forward) CNTK::Function::Forward;
%rename(_add_progress_writers) CNTK::Internal::AddProgressWriters;
%rename(_backward) CNTK::Function::Backward;
%rename(_infer_outputs) CNTK::Function::InferOutputs;
%rename(_update) CNTK::Learner::Update;
@ -150,6 +151,7 @@
%ignore CNTK::Internal::IsRenamingFunctionsAllowed;
%ignore CNTK::Internal::IsAutomaticUnpackingOfPackedValuesDisabled;
%ignore CNTK::Internal::GetComputationNetworkTraceLevel;
%ignore CNTK::Internal::Convolution;
%ignore CNTK::Function::Function(const std::vector<Variable>& inputs, Dictionary&& functionConfig, const std::wstring& name = L"", const std::wstring& uid = Internal::GenerateUid(L"UserDefinedFunction"));
@ -1422,20 +1424,23 @@ std::unordered_map<CNTK::StreamInformation, std::pair<CNTK::NDArrayViewPtr, CNTK
//
%extend CNTK::NDArrayView {
NDArrayView(PyObject* pyobj, const CNTK::DeviceDescriptor& device, bool readOnly)
NDArrayView(PyObject* numpyArrayObject, const CNTK::DeviceDescriptor& device, bool readOnly, bool borrow)
{
if (!PyArray_Check((PyArrayObject*)pyobj))
if (!PyArray_Check((PyArrayObject*)numpyArrayObject))
{
// Note that in contrast to numpy.i's implementation we demand NumPy arrays
// and do not accept arbitrary sequences, which would needed to be copied around.
throw std::logic_error("NumPy array expected");
}
PyArrayObject* array = (PyArrayObject*)pyobj;
// Borrowing the memory is only allowed on CPU for now
borrow &= device == DeviceDescriptor::CPUDevice();
int rank = PyArray_NDIM(array);
npy_intp* np_shape = PyArray_SHAPE(array);
PyArrayObject* array = (PyArrayObject*)numpyArrayObject;
int rank = PyArray_NDIM(array);
npy_intp* np_shape = PyArray_SHAPE(array);
std::vector<size_t> shape(rank);
npy_intp num_elements = 1;
@ -1451,15 +1456,29 @@ std::unordered_map<CNTK::StreamInformation, std::pair<CNTK::NDArrayViewPtr, CNTK
NDArrayView* view;
if (typecode == NPY_FLOAT)
{
NDArrayView tmp(NDShape(shape), (float*)PyArray_DATA(array), num_elements, DeviceDescriptor::CPUDevice(), readOnly);
view = new NDArrayView(DataType::Float, tmp.Shape(), device);
view->CopyFrom(tmp);
if (borrow)
{
view = new NDArrayView(NDShape(shape), (float*)PyArray_DATA(array), num_elements, DeviceDescriptor::CPUDevice(), readOnly);
}
else
{
NDArrayView tmp(NDShape(shape), (float*)PyArray_DATA(array), num_elements, DeviceDescriptor::CPUDevice(), readOnly);
view = new NDArrayView(DataType::Float, tmp.Shape(), device);
view->CopyFrom(tmp);
}
}
else if (typecode == NPY_DOUBLE)
{
NDArrayView tmp(NDShape(shape), (double*)PyArray_DATA(array), num_elements, DeviceDescriptor::CPUDevice(), readOnly);
view = new NDArrayView(DataType::Double, tmp.Shape(), device);
view->CopyFrom(tmp);
if (borrow)
{
view = new NDArrayView(NDShape(shape), (double*)PyArray_DATA(array), num_elements, DeviceDescriptor::CPUDevice(), readOnly);
}
else
{
NDArrayView tmp(NDShape(shape), (double*)PyArray_DATA(array), num_elements, DeviceDescriptor::CPUDevice(), readOnly);
view = new NDArrayView(DataType::Double, tmp.Shape(), device);
view->CopyFrom(tmp);
}
}
else
{
@ -1469,7 +1488,8 @@ std::unordered_map<CNTK::StreamInformation, std::pair<CNTK::NDArrayViewPtr, CNTK
return view;
}
NDArrayView(const CNTK::NDShape& shape, PyObject* pyData, PyObject* pyColStarts, PyObject* pyRowIndices, const CNTK::DeviceDescriptor& device, bool readOnly)
NDArrayView(const CNTK::NDShape& shape, PyObject* pyData, PyObject* pyColStarts, PyObject* pyRowIndices, const CNTK::DeviceDescriptor& device, bool readOnly, bool borrow)
{
//
// pyData, pyColStarts, and pyRowIndices are fed by
@ -1491,33 +1511,58 @@ std::unordered_map<CNTK::StreamInformation, std::pair<CNTK::NDArrayViewPtr, CNTK
throw std::logic_error("index pointers must be a NumPy array");
}
// Borrowing the memory is only allowed on CPU for now
borrow &= device == DeviceDescriptor::CPUDevice();
PyArrayObject* data = (PyArrayObject*)pyData;
PyArrayObject* indices = (PyArrayObject*)pyColStarts;
PyArrayObject* indptr = (PyArrayObject*)pyRowIndices;
int typecode = PyArray_TYPE(data);
size_t numNonZeroValues = PyArray_SIZE(data);
NDArrayView* view;
if (typecode == NPY_FLOAT)
{
NDArrayView tmp(shape,
(CNTK::SparseIndexType*)PyArray_DATA(indices),
(CNTK::SparseIndexType*)PyArray_DATA(indptr),
(float*)PyArray_DATA(data), numNonZeroValues,
DeviceDescriptor::CPUDevice(), readOnly);
view = new NDArrayView(DataType::Float, StorageFormat::SparseCSC, tmp.Shape(), device);
view->CopyFrom(tmp);
if (borrow)
{
view = new NDArrayView(shape,
(CNTK::SparseIndexType*)PyArray_DATA(indices),
(CNTK::SparseIndexType*)PyArray_DATA(indptr),
(float*)PyArray_DATA(data), numNonZeroValues,
DeviceDescriptor::CPUDevice(), readOnly);
}
else
{
NDArrayView tmp(shape,
(CNTK::SparseIndexType*)PyArray_DATA(indices),
(CNTK::SparseIndexType*)PyArray_DATA(indptr),
(float*)PyArray_DATA(data), numNonZeroValues,
DeviceDescriptor::CPUDevice(), readOnly);
view = new NDArrayView(DataType::Float, StorageFormat::SparseCSC, tmp.Shape(), device);
view->CopyFrom(tmp);
}
}
else if (typecode == NPY_DOUBLE)
{
NDArrayView tmp(shape,
(CNTK::SparseIndexType*)PyArray_DATA(indices),
(CNTK::SparseIndexType*)PyArray_DATA(indptr),
(double*)PyArray_DATA(data), numNonZeroValues,
DeviceDescriptor::CPUDevice(), readOnly);
view = new NDArrayView(DataType::Double, StorageFormat::SparseCSC, tmp.Shape(), device);
view->CopyFrom(tmp);
if (borrow)
{
view = new NDArrayView(shape,
(CNTK::SparseIndexType*)PyArray_DATA(indices),
(CNTK::SparseIndexType*)PyArray_DATA(indptr),
(double*)PyArray_DATA(data), numNonZeroValues,
DeviceDescriptor::CPUDevice(), readOnly);
}
else
{
NDArrayView tmp(shape,
(CNTK::SparseIndexType*)PyArray_DATA(indices),
(CNTK::SparseIndexType*)PyArray_DATA(indptr),
(double*)PyArray_DATA(data), numNonZeroValues,
DeviceDescriptor::CPUDevice(), readOnly);
view = new NDArrayView(DataType::Double, StorageFormat::SparseCSC, tmp.Shape(), device);
view->CopyFrom(tmp);
}
}
else
{
@ -1607,6 +1652,7 @@ namespace CNTK {
Py_END_ALLOW_THREADS;
}
//
// Setting up hash calculation so that __hash__ on Swig objects
// are redirected to the std::hash computation of the C++ API
@ -1619,12 +1665,32 @@ namespace CNTK {
}
%enddef
//
// Setting __str__ and __repr__ methods for frequently used Swig objects
//
%define %py_repr_for(TYPE)
%extend CNTK::TYPE {
const std::wstring __str__() {
return self->AsString();
}
const std::wstring __repr__() {
return self->AsString();
}
}
%enddef
%define %py_eq_for(DATA_TYPE, EQ)
%pythoncode %{
DATA_TYPE.__eq__ = lambda a,b: (a is not None and b is not None and EQ(a,b)) or (a is None and b is None)
%}
%enddef
%py_repr_for(Variable)
%py_repr_for(Parameter)
%py_repr_for(Constant)
%py_repr_for(Function)
%py_eq_for(Variable, Variable_eq)
%py_hash_for(Variable)

Просмотреть файл

@ -8,26 +8,29 @@ import numpy as np
from scipy import sparse
from . import cntk_py
from .device import use_default_device, cpu
from .device import use_default_device, cpu, DeviceKind
from .utils.swig_helper import typemap
def _is_c_contiguous(data):
while isinstance(data, list):
data = data[0]
return data.flags.c_contiguous
class NDArrayView(cntk_py.NDArrayView):
'''
Creates an empty dense internal data representation of a :class:`~cntk.core.Value` object.
Creates an empty dense internal data representation of a
:class:`~cntk.core.Value` object.
To create an NDArrayView from a NumPy array, use :meth:`from_dense`.
To create an NDArrayView from a sparse array, use :meth:`from_csr`.
Args:
shape (tuple): shape of the data
data_type (np.float32, np.float64): data type of the data
device (:class:`~cntk.device.DeviceDescriptor`): device this value should be put
on
device (:class:`~cntk.device.DeviceDescriptor`): device this value
should be put on
'''
def __init__(self, shape, data_type, device=None):
@ -37,39 +40,44 @@ class NDArrayView(cntk_py.NDArrayView):
if device is None:
device = use_default_device()
super(NDArrayView, self).__init__(data_type, cntk_py.StorageFormat_Dense, shape,
device)
device)
@staticmethod
@typemap
def from_dense(np_array, device=None, read_only=False):
def from_dense(np_array, device=None, read_only=False, borrow=False):
'''
Create a :class:`NDArrayView` instance from a NumPy array.
Args:
np_array (numpy.ndarray): NumPy array
device (:class:`~cntk.device.DeviceDescriptor`): device this value should be put
on
read_only (bool): whether the data can be modified or not
device (:class:`~cntk.device.DeviceDescriptor`): device this value
should be put on
borrow (bool, default False): whether nd_arrary memory can be
borrowed internally to speed up the data creation
read_only (bool, optional): whether the data can be modified or
not (default False)
Returns:
:class:`NDArrayView` instance
'''
if not isinstance(np_array, np.ndarray):
raise TypeError('data must be of type numpy.ndarray'
' and not %s'%type(np_array))
' and not %s' % type(np_array))
if not _is_c_contiguous(np_array):
warnings.warn('data is not C contiguous; rearrange your data/computation to avoid costly data conversions', RuntimeWarning)
warnings.warn('data is not C contiguous; rearrange your '
'data/computation to avoid costly data conversions',
RuntimeWarning)
np_array = np.ascontiguousarray(np_array)
if device is None:
device = use_default_device()
return cntk_py.NDArrayView(np_array, device, read_only)
return cntk_py.NDArrayView(np_array, device, read_only, borrow)
@staticmethod
@typemap
def from_csr(csr_array, device=None, read_only=False):
def from_csr(csr_array, device=None, read_only=False, borrow=False):
'''
Create a :class:`NDArrayView` instance from a SciPy sparse array in CSR
format.
@ -77,35 +85,42 @@ class NDArrayView(cntk_py.NDArrayView):
Args:
csr_array (scipy.sparse.csr.csr_matrix): SciPy sparse matrix in CSR
format
device (:class:`~cntk.device.DeviceDescriptor`): device this value should be put
on
read_only (bool): whether the data can be modified or not
device (:class:`~cntk.device.DeviceDescriptor`): device this value
should be put on
read_only (bool, optional): whether the data can be modified or
not (default False)
borrow (bool, default False): whether nd_arrary memory can be
borrowed internally to speed up the data creation
Returns:
:class:`NDArrayView` instance
'''
if not sparse.isspmatrix_csr(csr_array):
raise TypeError("only CSR is supported as of now. Please "
"convert your data using 'tocsr()'")
"convert your data using 'tocsr()'")
if device is None:
device = use_default_device()
return cntk_py.NDArrayView(csr_array.shape, csr_array.data,
csr_array.indptr, csr_array.indices, device, read_only)
csr_array.indptr, csr_array.indices, device,
read_only, borrow)
@staticmethod
@typemap
def from_data(data, device=None, read_only=False):
def from_data(data, device=None, read_only=False, borrow=False):
'''
Create a :class:`NDArrayView` instance from a NumPy or SciPy sparse array in CSR
format.
Create a :class:`NDArrayView` instance from a NumPy or SciPy sparse
array in CSR format.
Args:
data (numpy.ndarray or scipy.sparse.csr.csr_matrix): data
device (:class:`~cntk.device.DeviceDescriptor`): device this value should be put
on
read_only (bool): whether the data can be modified or not
device (:class:`~cntk.device.DeviceDescriptor`): device this value
should be put on
read_only (bool, optional): whether the data can be modified or
not (default False)
borrow (bool, default False): whether nd_arrary memory can be
borrowed internally to speed up the data creation
Returns:
:class:`NDArrayView` instance
@ -117,17 +132,17 @@ class NDArrayView(cntk_py.NDArrayView):
data = np.asarray(data)
if isinstance(data, np.ndarray):
ndav = NDArrayView.from_dense(data, device)
ndav = NDArrayView.from_dense(data, device, borrow=borrow)
elif sparse.issparse(data):
ndav = NDArrayView.from_csr(data, device)
ndav = NDArrayView.from_csr(data, device, borrow=borrow)
else:
raise TypeError('data type "%s" is not supported. Please '
'provide the data as a Python list of NumPy arrays '
'or Scipy CSR matrices.'%type(data))
'provide the data as a Python list of NumPy '
'arrays or Scipy CSR matrices.' % type(data))
return ndav
class Value(cntk_py.Value):
'''
Internal representation of minibatch data.
@ -147,9 +162,10 @@ class Value(cntk_py.Value):
Booleans that tell whether a sequence is a new sequence (`True`) or a
continuation of the sequence in the same slot of the previous
minibatch (`False`)
device (:class:`~cntk.device.DeviceDescriptor`): device this value should be put
on
device (:class:`~cntk.device.DeviceDescriptor`): device this value
should be put on
'''
def __init__(self, shape=None, dtype=None, batch=None, seq_starts=None, device=None):
if device is None:
device = use_default_device()
@ -178,12 +194,13 @@ class Value(cntk_py.Value):
sample = np.asarray(sample, dtype=var.dtype)
except ValueError:
s = sample
while isinstance(s, list) and len(s)>0:
while isinstance(s, list) and len(s) > 0:
s = s[0]
if sparse.issparse(s):
raise ValueError('if you provide sparse data, every '
'sequence has to be encoded as one '
'csr_matrix instance. Your sequence was: \'%s\''%str(sample))
'sequence has to be encoded as one '
'csr_matrix instance. Your sequence '
'was: \'%s\'' % str(sample))
else:
raise
@ -203,10 +220,10 @@ class Value(cntk_py.Value):
'supported, you gave %s' % sample.dtype)
if convert_to_var_dtype:
warnings.warn('your data is of type "%s", but your input'
'expects "%s". Please convert your data '
'beforehand to speed up training.' %
(sample.dtype, str(var.dtype)))
warnings.warn('your data is of type "%s", but your input '
'variable (uid "%s") expects "%s". Please convert '
'your data beforehand to speed up training.' %
(sample.dtype, var.uid, str(var.dtype)))
sample = sample.astype(var.dtype)
return sample
@ -238,24 +255,32 @@ class Value(cntk_py.Value):
:class:`~cntk.core.Value` object.
'''
if not isinstance(var, cntk_py.Variable):
raise TypeError('Variable expected, but got "%s"'%type(var))
cpu_dev = cpu()
raise TypeError('Variable expected, but got "%s"' % type(var))
if not var.dynamic_axes:
# No dynamic axes -> no batch
# No dynamic axes -> we can pass everything in one go
data = Value._as_best_data_type(var, data)
ndav = NDArrayView.from_data(data, device)
# Since the core API's Value does not copy single NDArrayViews,
# we cannot borrow the memory here.
ndav = NDArrayView.from_data(data, device=cpu(), borrow=False)
return cntk_py.Value(ndav)
elif len(var.dynamic_axes) <= 1 and isinstance(data, list):
warnings.warn('you provided the minibatch data as a list, but '
'your corresponding input variable (uid "%s") has '
'only one dynamic axis (batch axis). To speed up '
'graph executen, please convert the data '
'beforehand into one NumPy array to speed up '
' training.' % var.uid)
if isinstance(data, np.ndarray):
# The outermost axis has to be Python list. If the user passes a
# full minibatch as one NumPy array, we have to convert it.
if data.dtype == object:
raise ValueError('dtype object is not supported. If this is a batch '
'of sequences, you need to pass them as a pure-Python list '
'of NumPy arrays')
raise ValueError('dtype object is not supported. If this is a '
'batch of sequences, you need to pass them as a '
'pure-Python list of NumPy arrays')
if seq_starts:
data = list(np.atleast_1d(data))
@ -267,25 +292,30 @@ class Value(cntk_py.Value):
if not isinstance(data, list):
raise ValueError('batch has to be a list of NumPy arrays or '
'SciPy CSR matrices')
list_of_ndavs = []
'SciPy CSR matrices')
# NDArrayViews are all created on CPU. The Value object later then will
# move it to the requested device.
for sample in data:
sample = Value._as_best_data_type(var, sample)
ndav = NDArrayView.from_data(sample, cpu_dev)
list_of_ndavs.append(ndav)
# As Value will later create copies anyways, we do not create copies in
# NDArrayView itself. Because of that, we need to keep around the
# instances _as_best_data_type() until we have passed them to
# Value_create() where it will be copied further.
data = [Value._as_best_data_type(var, sample) for sample in data]
borrow = device.type() == DeviceKind.CPU
list_of_ndavs = [NDArrayView.from_data(sample, device=cpu(),
borrow=borrow)
for sample in data]
from .utils import sanitize_shape
return cntk_py.Value_create(
sanitize_shape(var.shape), list_of_ndavs,
seq_starts or [],
device or use_default_device(),
read_only)
value = cntk_py.Value_create(
sanitize_shape(var.shape),
list_of_ndavs,
seq_starts or [],
device or use_default_device(),
read_only,
True) # always create a copy in Value
return value
@property
def shape(self):
@ -315,13 +345,13 @@ class Value(cntk_py.Value):
'''
return np.asarray(super(Value, self).mask())
def __len__(self):
'''
Number of samples in this value object.
'''
return self.shape[0]
def user_function(user_func):
'''
Wraps the passed Function to create a composite representing the

Просмотреть файл

@ -4,8 +4,20 @@
# for full license information.
# ==============================================================================
from enum import Enum, unique
from . import cntk_py
@unique
class DeviceKind(Enum):
'''
Describes different device kinds like CPU or GPU.
'''
CPU = cntk_py.DeviceKind_CPU
GPU = cntk_py.DeviceKind_GPU
class DeviceDescriptor(cntk_py.DeviceDescriptor):
'''
Describes a device by an unique id and its type. If the device corresponds to a GPU its type is 1,
@ -30,6 +42,7 @@ class DeviceDescriptor(cntk_py.DeviceDescriptor):
'''
return super(DeviceDescriptor, self).type()
def all_devices():
'''
Returns a device descriptor list with all the available devices
@ -39,6 +52,7 @@ def all_devices():
'''
return cntk_py.DeviceDescriptor.all_devices()
def best():
'''
Returns a device descriptor with the best configuration.
@ -48,6 +62,7 @@ def best():
'''
return cntk_py.DeviceDescriptor.best_device()
def cpu():
'''
Returns CPU device descriptor
@ -57,6 +72,7 @@ def cpu():
'''
return cntk_py.DeviceDescriptor.cpu_device()
def default():
'''
Returns default device
@ -66,6 +82,7 @@ def default():
'''
return cntk_py.DeviceDescriptor.default_device()
def gpu(device_id):
'''
Returns GPU device
@ -75,6 +92,7 @@ def gpu(device_id):
'''
return cntk_py.DeviceDescriptor.gpu_device(device_id)
def use_default_device():
'''
Use default device
@ -84,6 +102,7 @@ def use_default_device():
'''
return cntk_py.DeviceDescriptor.use_default_device()
def set_default_device(new_default_device):
'''
Set new device descriptor as default

Просмотреть файл

@ -234,6 +234,26 @@ class MinibatchSource(cntk_py.MinibatchSource):
'''
return super(MinibatchSource, self).is_distributed()
@property
def current_position(self):
'''
Gets current position in the minibatch source.
Returns:
Minibatch position :class:`~cntk.cntk_py.Dictionary` on the global timeline.
'''
return self.get_checkpoint_state()
@current_position.setter
def current_position(self, position):
'''
Sets current position in the minibatch source.
Args:
position (:class:`~cntk.cntk_py.Dictionary`): position returned from :func:`~get_current_position`.
'''
self.restore_from_checkpoint(position)
def _py_dict_to_cntk_dict(py_dict):
'''
Converts a Python dictionary into a CNTK Dictionary whose values are CNTK DictionaryValue instances.
@ -455,25 +475,25 @@ def StreamDef(field=None, shape=None, is_sparse=False, transforms=None, context=
Args:
field (str): this is the name of the stream:
* for CTFDeserializer the name is inside the CTF file
* for ImageDeserializer the acceptable names are `image` or `label`
* for HTKFeatureDeserializer and HTKMLFDeserializer only the default
* for HTKFeatureDeserializer and HTKMLFDeserializer only the default
value of None is acceptable
shape (int, tuple): dimensions of this stream. HTKFeatureDeserializer,
shape (int, tuple): dimensions of this stream. HTKFeatureDeserializer,
HTKMLFDeserializer, and CTFDeserializer read data
as flat arrays. If you need different shapes you can
:func:`~cntk.ops.reshape` it later.
is_sparse (bool): whether the provided data is sparse.
`False` by default, unless mlf is provided.
transforms (list): list of transforms to be applied by the Deserializer.
transforms (list): list of transforms to be applied by the Deserializer.
Currently only ImageDeserializer supports transforms.
context (tuple): left and right context to consider when reading in HTK
context (tuple): left and right context to consider when reading in HTK
data. Only supported by HTKFeatureDeserializer.
scp (str, list): scp files for HTK data
mlf (str, list): mlf files for HTK data
broadcast (bool): whether the features in this stream should be
broadcast (bool): whether the features in this stream should be
broadcast to the whole sequence (useful in e.g. ivectors with HTK)
'''
config = dict(stream_alias=field, is_sparse=is_sparse)
@ -592,5 +612,3 @@ def sequence_to_cntk_text_format(seq_idx, alias_tensor_map):
lines.append('%i\t|' % seq_idx + ' |'.join(line))
return '\n'.join(lines)

Просмотреть файл

@ -12,7 +12,7 @@ import numpy as np
from ..ops.functions import Function
from ..ops.variables import Variable
from ..ops import parameter, input_variable, placeholder_variable, combine
from ..ops import times, element_times, convolution, pooling, unpooling, batch_normalization, dropout, splice, reshape, sequence, softmax, tanh, reduce_sum, reduce_mean, sqrt
from ..ops import times, element_times, convolution, convolution_transpose, pooling, unpooling, batch_normalization, dropout, splice, reshape, sequence, softmax, tanh, reduce_sum, reduce_mean, sqrt
from ..utils import Record, _as_tuple
from .blocks import *
from .blocks import _initializer_for, _get_initial_state_or_default, _INFERRED # helpers
@ -443,7 +443,6 @@ def Convolution1D(rf_shape, # shape of receptive field, e.g. (3)
init=default_override_or(glorot_uniform()),
pad=default_override_or(False),
strides=1,
sharing=True, # (must be True currently)
bias=default_override_or(True),
init_bias=default_override_or(0),
reduction_rank=1, # (0 means input has no depth dimension, e.g. audio signal or B&W image)
@ -460,7 +459,7 @@ def Convolution1D(rf_shape, # shape of receptive field, e.g. (3)
init_bias = get_default_override(Convolution1D, init_bias=init_bias)
if len(_as_tuple(rf_shape)) != 1:
raise ValueError('Convolution1D: rf_shape must be a scalar')
return Convolution(rf_shape, num_filters=num_filters, activation=activation, init=init, pad=pad, strides=strides, sharing=sharing, bias=bias, init_bias=init_bias, reduction_rank=reduction_rank, op_name='Convolution1D', name=name)
return Convolution(rf_shape, num_filters=num_filters, activation=activation, init=init, pad=pad, strides=strides, sharing=True, bias=bias, init_bias=init_bias, reduction_rank=reduction_rank, op_name='Convolution1D', name=name)
def Convolution2D(rf_shape, # shape of receptive field, e.g. (3,3). Must be a 2-element tuple.
@ -469,7 +468,6 @@ def Convolution2D(rf_shape, # shape of receptive field, e.g. (3,3). Must
init=default_override_or(glorot_uniform()),
pad=default_override_or(False),
strides=1,
sharing=True, # (must be True currently)
bias=default_override_or(True),
init_bias=default_override_or(0),
reduction_rank=1, # (0 means input has no depth dimension, e.g. audio signal or B&W image)
@ -486,7 +484,7 @@ def Convolution2D(rf_shape, # shape of receptive field, e.g. (3,3). Must
init_bias = get_default_override(Convolution2D, init_bias=init_bias)
if len(rf_shape) != 2:
raise ValueError('Convolution2D: rf_shape must be a 2D tuple, e.g. (3,3)')
return Convolution(rf_shape, num_filters=num_filters, activation=activation, init=init, pad=pad, strides=strides, sharing=sharing, bias=bias, init_bias=init_bias, reduction_rank=reduction_rank, op_name='Convolution2D', name=name)
return Convolution(rf_shape, num_filters=num_filters, activation=activation, init=init, pad=pad, strides=strides, sharing=True, bias=bias, init_bias=init_bias, reduction_rank=reduction_rank, op_name='Convolution2D', name=name)
def Convolution3D(rf_shape, # shape of receptive field, e.g. (3,3,3). Must be a 3-element tuple.
@ -495,7 +493,6 @@ def Convolution3D(rf_shape, # shape of receptive field, e.g. (3,3,3). Mu
init=default_override_or(glorot_uniform()),
pad=default_override_or(False),
strides=1,
sharing=True, # (must be True currently)
bias=default_override_or(True),
init_bias=default_override_or(0),
reduction_rank=1, # (0 means input has no depth dimension, e.g. audio signal or B&W image)
@ -512,42 +509,41 @@ def Convolution3D(rf_shape, # shape of receptive field, e.g. (3,3,3). Mu
init_bias = get_default_override(Convolution3D, init_bias=init_bias)
if len(rf_shape) != 3:
raise ValueError('Convolution3D: rf_shape must be a 3D tuple, e.g. (3,3,3)')
return Convolution(rf_shape, num_filters=num_filters, activation=activation, init=init, pad=pad, strides=strides, sharing=sharing, bias=bias, init_bias=init_bias, reduction_rank=reduction_rank, op_name='Convolution3D', name=name)
return Convolution(rf_shape, num_filters=num_filters, activation=activation, init=init, pad=pad, strides=strides, sharing=True, bias=bias, init_bias=init_bias, reduction_rank=reduction_rank, op_name='Convolution3D', name=name)
# Deconvolution -- create a deconvolution layer with optional non-linearity
# ConvolutionTranspose -- create a deconvolution layer with optional non-linearity
# TODO: need to merge with above. Can it simply be transpose=True?
def Deconvolution(rf_shape, # shape of receptive field, e.g. (3,3)
num_filters,
num_input_filters,
activation=default_override_or(identity),
init=default_override_or(glorot_uniform()),
pad=default_override_or(False),
strides=1,
sharing=True, # (must be True currently)
lower_pad=(0,),
upper_pad=(0,),
bias=default_override_or(True),
init_bias=default_override_or(0),
reduction_rank=1, # (must be 1 currently)
max_temp_mem_size_in_samples=0,
name=''):
def ConvolutionTranspose(rf_shape, # shape of receptive field, e.g. (3,3)
num_filters,
num_input_filters,
activation=default_override_or(identity),
init=default_override_or(glorot_uniform()),
pad=default_override_or(False),
strides=1,
sharing=True, # (must be True currently)
bias=default_override_or(True),
init_bias=default_override_or(0),
output_shape=(0,),
reduction_rank=1, # (must be 1 currently)
max_temp_mem_size_in_samples=0,
name=''):
'''
Layer factory function to create a deconvolution layer.
'''
#UntestedBranchError("Deconvolution not tested after merge to new Layers lib") # it's actually tested by a end-to-end test
#UntestedBranchError("ConvolutionTranspose not tested after merge to new Layers lib") # it's actually tested by a end-to-end test
activation = get_default_override(Deconvolution, activation=activation)
init = get_default_override(Deconvolution, init=init)
pad = get_default_override(Deconvolution, pad=pad)
bias = get_default_override(Deconvolution, bias=bias)
init_bias = get_default_override(Deconvolution, init_bias=init_bias)
activation = get_default_override(ConvolutionTranspose, activation=activation)
init = get_default_override(ConvolutionTranspose, init=init)
pad = get_default_override(ConvolutionTranspose, pad=pad)
bias = get_default_override(ConvolutionTranspose, bias=bias)
init_bias = get_default_override(ConvolutionTranspose, init_bias=init_bias)
if reduction_rank != 1:
NotImplementedError("Deconvolution: reduction_rank other than 1 currently not supported")
NotImplementedError("ConvolutionTranspose: reduction_rank other than 1 currently not supported")
if not sharing:
NotImplementedError("Deconvolution: sharing option currently must be True")
NotImplementedError("ConvolutionTranspose: sharing option currently must be True")
output_channels_shape = _as_tuple(num_filters)
input_channels_shape = _as_tuple(num_input_filters)
kernel_shape = output_channels_shape + rf_shape
@ -559,22 +555,65 @@ def Deconvolution(rf_shape, # shape of receptive field, e.g. (3,3)
b = Parameter(output_channels_shape + (1,) * len(rf_shape), init=init_bias, name='b') if bias else None
# expression
@BlockFunction('Deconvolution', name)
def deconvolve(x):
r = convolution (W, x,
strides=_as_tuple(strides),
sharing=_as_tuple(sharing),
auto_padding=_as_tuple(pad),
lower_pad=lower_pad,
upper_pad=upper_pad,
transpose=True,
max_temp_mem_size_in_samples=max_temp_mem_size_in_samples)
@BlockFunction('ConvolutionTranspose', name)
def convolve_transposed(x):
r = convolution_transpose (W, x,
strides=_as_tuple(strides),
sharing=_as_tuple(sharing),
auto_padding=_as_tuple(pad),
output_shape=output_shape,
max_temp_mem_size_in_samples=max_temp_mem_size_in_samples)
if bias:
r = r + b
if activation is not None:
r = activation(r)
return r
return deconvolve
return convolve_transposed
# ConvolutionTranspose1D -- create a 1D convolution transpose layer with optional non-linearity
def ConvolutionTranspose1D(filter_shape, # a scalar, e.g., 3
num_filters=None,
activation=activation_default_or_None,
init=init_default_or_glorot_uniform,
pad=pad_default_or_False,
strides=1,
bias=bias_default_or_True,
init_bias=init_bias_default_or_0,
output_shape=None,
name=''):
if len(filter_shape) != 1:
raise ValueError('ConvolutionTranspose1D: filter_shape must be a scalar')
return ConvolutionTranspose(filter_shape, num_filters, activation, init, pad, strides, True, bias, init_bias, output_shape, name=name)
# ConvolutionTranspose2D -- create a 2D convolution transpose layer with optional non-linearity
def ConvolutionTranspose2D(filter_shape, # a 2D tuple, e.g., (3,3)
num_filters=None,
activation=activation_default_or_None,
init=init_default_or_glorot_uniform,
pad=pad_default_or_False,
strides=1,
output_shape=None,
bias=bias_default_or_True,
init_bias=init_bias_default_or_0,
name=''):
if len(filter_shape) != 2:
raise ValueError('ConvolutionTranspose2D: filter_shape must be a 2D tuple, e.g. (3,3)')
return ConvolutionTranspose(filter_shape, num_filters, activation, init, pad, strides, True, bias, init_bias, output_shape, name=name)
# ConvolutionTranspose3D -- create a 3D convolution transpose layer with optional non-linearity
def ConvolutionTranspose3D(filter_shape, # a 3D tuple, e.g., (3,3,3)
num_filters=None,
activation=activation_default_or_None,
init=init_default_or_glorot_uniform,
pad=pad_default_or_False,
strides=1,
output_shape=None,
bias=bias_default_or_True,
init_bias=init_bias_default_or_0,
name=''):
if len(filter_shape) != 3:
raise ValueError('ConvolutionTranspose3D: filter_shape must be a 3D tuple, e.g. (3,3,3)')
return ConvolutionTranspose(filter_shape, num_filters, activation, init, pad, strides, True, bias, init_bias, output_shape, name=name)
# TODO: add sequential mode like Convolution()
from cntk.cntk_py import PoolingType_Max, PoolingType_Average, NDShape

Просмотреть файл

@ -219,8 +219,8 @@ def cross_entropy_with_softmax(output_vector, target_vector, axis=-1, name=''):
target_vector: usually it is one-hot vector where the hot bit
corresponds to the label index. But it can be any probability
distribution over the labels.
axis (int or :class:`~cntk.axis.Axis`): axis along which the cross
entropy will be computed.
axis (int or :class:`~cntk.axis.Axis`, optional): if given, cross entropy will be computed
along this axis
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
@ -404,7 +404,7 @@ def classification_error(output_vector, target_vector, axis=-1, topN=1, name='')
return classification_error(output_vector, target_vector, topN, axis, name)
@typemap
def edit_distance_error(input_a, input_b, subPen=0, delPen=0, insPen=0, squashInputs=False, samplesToIgnore=[], name=''):
def edit_distance_error(input_a, input_b, subPen=0, delPen=0, insPen=0, squashInputs=False, tokensToIgnore=[], name=''):
'''
Edit distance error evaluation node with the option of specifying penalty of substitution, deletion and insertion, as well as squashing the input sequences and ignoring certain samples.
Using the classic DP algorithm as described in https://en.wikipedia.org/wiki/Edit_distance, adjusted to take into account the penalties.
@ -415,7 +415,7 @@ def edit_distance_error(input_a, input_b, subPen=0, delPen=0, insPen=0, squashIn
3 0 3 2
will be represented as the vector of labels (indices) as [1, 0, 0, 1], on which edit distance will be actually evaluated.
The node allows to squash sequences of repeating labels and ignore certain labels. For example, if squashInputs is true and samplesToIgnore contains label '-' then
The node allows to squash sequences of repeating labels and ignore certain labels. For example, if squashInputs is true and tokensToIgnore contains label '-' then
given first input sequence as s1="1-12-" and second as s2="-11--122" the edit distance will be computed against s1' = "112" and s2' = "112".
The returned error is computed as: EditDistance(s1,s2) * length(s1') / length(s1)
@ -435,9 +435,9 @@ def edit_distance_error(input_a, input_b, subPen=0, delPen=0, insPen=0, squashIn
input_a: first input sequence
input_b: second input sequence
subPen, delPen, insPen: substitution, deletion and insertion penalties
squashInputs: whether to merge sequences of identical samples (in both input sequences). If true and samplesToIgnore contains label '-' then
squashInputs: whether to merge sequences of identical samples (in both input sequences). If true and tokensToIgnore contains label '-' then
given first input sequence as s1="a-ab-" and second as s2="-aa--abb" the edit distance will be computed against s1' = "aab" and s2' = "aab".
samplesToIgnore: list of samples to ignore during edit distance evaluation (in both sequences)
tokensToIgnore: list of samples to ignore during edit distance evaluation (in both sequences)
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
@ -446,8 +446,51 @@ def edit_distance_error(input_a, input_b, subPen=0, delPen=0, insPen=0, squashIn
dtype = get_data_type(input_a, input_b)
input_a = sanitize_input(input_a, dtype)
input_b = sanitize_input(input_b, dtype)
return edit_distance_error(input_a, input_b, subPen, delPen, insPen, squashInputs, samplesToIgnore, name)
return edit_distance_error(input_a, input_b, subPen, delPen, insPen, squashInputs, tokensToIgnore, name)
@typemap
def labels_to_graph(labels, name=''):
'''
Conversion node from labels to graph. Typically used as an input to ForwardBackward node.
This node's objective is to transform input labels into a graph representing exact forward-backward criterion.
Example:
num_classes = 2
labels = cntk.input_variable((num_classes))
graph = cntk.labels_to_graph(labels)
Args:
labels: input training labels
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import labels_to_graph
dtype = get_data_type(labels)
labels = sanitize_input(labels, dtype)
return labels_to_graph(labels, name)
@typemap
def forward_backward(graph, features, blankTokenId, delayConstraint=-1, name=''):
'''
Criterion node for training methods that rely on forward-backward Viterbi-like passes, e.g. Connectionist Temporal Classification (CTC) training
The node takes as the input the graph of labels, produced by the labels_to_graph operation that determines the exact forward/backward procedure.
Example:
graph = cntk.labels_to_graph(labels)
networkOut = model(features)
fb = C.forward_backward(graph, networkOut, 132)
Args:
graph: labels graph
features: network output
blankTokenId: id of the CTC blank label
delayConstraint: label output delay constraint introduced during training that allows to have shorter delay during inference. This is using the original time information to enforce that CTC tokens only get aligned within a time margin. Setting this parameter smaller will result in shorted delay between label output during decoding, yet may hurt accuracy. delayConstraint=-1 means no constraint
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import forward_backward
dtype = get_data_type(features, graph)
features = sanitize_input(features, dtype)
graph = sanitize_input(graph, dtype)
return forward_backward(graph, features, blankTokenId, delayConstraint, name)
##########################################################################
# convolution ops
@ -455,7 +498,7 @@ def edit_distance_error(input_a, input_b, subPen=0, delPen=0, insPen=0, squashIn
@typemap
def convolution(convolution_map, operand, strides=(1,), sharing=[True],
auto_padding=[True], lower_pad=(0,), upper_pad=(0,), transpose=False,
auto_padding=[True], lower_pad=(0,), upper_pad=(0,),
max_temp_mem_size_in_samples=0, name=''):
'''
Computes the convolution of ``convolution_map`` (typically a tensor of learnable parameters) with
@ -501,7 +544,7 @@ def convolution(convolution_map, operand, strides=(1,), sharing=[True],
the input dimension. The last value that lines up with the number of input channels must be false.
lower_pad: precise lower padding for each input dimension.
upper_pad : precise upper padding for each input dimension.
transpose (bool): set to true for deconvolution.
output_shape: user expected output shape after convolution transpose.
max_temp_mem_size_in_samples (int): maximum amount of auxiliary memory (in samples) that should be reserved to perform convolution
operations. Some convolution engines (e.g. cuDNN and GEMM-based engines) can benefit from using workspace as it may improve
performance. However, sometimes this may lead to higher memory utilization. Default is 0 which means the same as the input
@ -516,8 +559,74 @@ def convolution(convolution_map, operand, strides=(1,), sharing=[True],
lower_pad = sanitize_shape(lower_pad)
upper_pad = sanitize_shape(upper_pad)
return convolution(convolution_map, operand, strides, sharing, auto_padding,
lower_pad, upper_pad, transpose,
max_temp_mem_size_in_samples, name)
lower_pad, upper_pad, max_temp_mem_size_in_samples, name)
@typemap
def convolution_transpose(convolution_map, operand, strides=(1,), sharing=[True],
auto_padding=[True], lower_pad=(0,), upper_pad=(0,), output_shape=(0,),
max_temp_mem_size_in_samples=0, name=''):
'''
Computes the transposed convolution of ``convolution_map`` (typically a tensor of learnable parameters) with
``operand`` (commonly an image or output of a previous convolution/pooling operation).
This is also known as ``fractionally strided convolutional layers``, or, ``deconvolution``.
This operation is used in image and language processing applications. It supports arbitrary
dimensions, strides, sharing, and padding.
This function operates on input tensors with dimensions :math:`[C \\times M_1 \\times M_2 \\times \\ldots \\times M_n]`. This can be understood as a rank-n
object, where each entry consists of a :math:`C`-dimensional vector. For example, an RGB image would have dimensions
:math:`[3 \\times W \\times H]`, i.e. a :math:`[W \\times H]`-sized structure, where each entry (pixel) consists of a 3-tuple.
`convolution_transpose` convolves the input ``operand`` with a :math:`n+2` rank tensor of (typically learnable) filters called
``convolution_map`` of shape :math:`[O \\times I \\times m_1 \\times m_2 \\times \\ldots \\times m_n ]` (typically :math:`m_i \\ll M_i`).
The first dimension, :math:`O`, is the nunber of convolution filters (i.e. the number of
channels in the output). The second dimension, :math:`I`, must match the number of channels in the input.
The last n dimensions are the spatial extent of the filter. I.e. for each output position, a vector of
dimension :math:`O` is computed. Hence, the total number of filter parameters is :math:`O \\times I \\times m_1 \\times m_2 \\times \\ldots \\times m_n`
Example:
>>> img = np.reshape(np.arange(9.0, dtype = np.float32), (1, 3, 3))
>>> x = C.input_variable(img.shape)
>>> filter = np.reshape(np.array([2, -1, -1, 2], dtype = np.float32), (1, 2, 2))
>>> kernel = C.constant(value = filter)
>>> np.round(C.convolution_transpose(kernel, x, auto_padding = [False]).eval({x: [img]}),5)
array([[[[[ 0., 2., 3., -2.],
[ 6., 4., 6., -1.],
[ 9., 10., 12., 2.],
[ -6., 5., 6., 16.]]]]], dtype=float32)
Args:
convolution_map: convolution filter weights, stored as a tensor of dimensions :math:`[O \\times I \\times m_1 \\times m_2 \\times \\ldots \\times m_n]`,
where :math:`[m_1 \\times m_2 \\times \\ldots \\times m_n]` must be the kernel dimensions (spatial extent of the filter).
operand: convolution input. A tensor with dimensions :math:`[I \\times M_1 \\times M_2 \\times \\ldots \\times M_n]`.
strides (tuple, optional): stride dimensions. If strides[i] > 1 then only pixel positions that are multiples of strides[i] are computed.
For example, a stride of 2 will lead to a halving of that dimension. The first stride dimension that lines up with the number
of input channels can be set to any non-zero value.
sharing (bool): sharing flags for each input dimension
auto_padding (bool): flags for each input dimension whether it should be padded automatically (that is,
symmetrically) or not padded at all. Padding means that the convolution kernel is applied to all pixel positions, where all
pixels outside the area are assumed zero ("padded with zeroes"). Without padding, the kernels are only shifted over
positions where all inputs to the kernel still fall inside the area. In this case, the output dimension will be less than
the input dimension. The last value that lines up with the number of input channels must be false.
lower_pad: precise lower padding for each input dimension.
upper_pad : precise upper padding for each input dimension.
max_temp_mem_size_in_samples (int): maximum amount of auxiliary memory (in samples) that should be reserved to perform convolution
operations. Some convolution engines (e.g. cuDNN and GEMM-based engines) can benefit from using workspace as it may improve
performance. However, sometimes this may lead to higher memory utilization. Default is 0 which means the same as the input
samples.
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import convolution_transpose
operand = sanitize_input(operand)
strides = sanitize_shape(strides)
lower_pad = sanitize_shape(lower_pad)
upper_pad = sanitize_shape(upper_pad)
output_shape = sanitize_shape(output_shape)
return convolution_transpose(convolution_map, operand, strides, sharing, auto_padding,
lower_pad, upper_pad, output_shape,
max_temp_mem_size_in_samples, name)
@typemap
@ -1461,6 +1570,44 @@ def param_relu(alpha, x, name=''):
x = sanitize_input(x)
return pre_lu(alpha, x, name)
@typemap
def softplus(x, steepness=1, name=''):
'''
Softplus operation. Computes the element-wise softplus of ``x``:
:math:`\textrm{softplus}(x) = {\log(1+\exp(x))}`
The optional ``steepness`` allows to make the knee sharper (``steepness>1``) or softer, by computing
``softplus(x * steepness) / steepness``.
(For very large steepness, this approaches a linear rectifier).
The output tensor has the same shape as ``x``.
Example:
>>> C.softplus([[-1, -0.5, 0, 1, 2]]).eval()
array([[ 0.313262, 0.474077, 0.693147, 1.313262, 2.126928]], dtype=float32)
>>> C.softplus([[-1, -0.5, 0, 1, 2]], steepness=4).eval()
array([[ 0.004537, 0.031732, 0.173287, 1.004537, 2.000084]], dtype=float32)
Args:
x (`numpy.array` or :class:`~cntk.ops.functions.Function`): any :class:`~cntk.ops.functions.Function` that outputs a tensor.
steepness (float, optional): optional steepness factor
name (`str`, default to ''): the name of the Function instance in the network
Returns:
cntk.ops.functions.Function:
An instance of :class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import softplus
x = sanitize_input(x)
if steepness == 1:
return softplus(x, name)
xp = placeholder_variable()
f = softplus(steepness * xp) / steepness
return as_block(f, [(xp, x)], 'softplus', name)
@typemap
def sigmoid(x, name=''):
'''
@ -1485,44 +1632,6 @@ def sigmoid(x, name=''):
return sigmoid(x, name)
@typemap
def softplus(x, steepness=1, name=''):
'''
Softplus operation. Computes the element-wise softplus
of ``x``:
``softplus(x) = log(1 + exp(x))``
The optional ``steepness`` allows to make the knee sharper (``steepness>1``) or softer, by computing
``softplus(x * steepness) / steepness``.
(For very large steepness, this approaches a linear rectifier).
The output tensor has the same shape as ``x``.
Example:
>>> C.softplus([[-1, -0.5, 0, 1, 2]]).eval()
array([[ 0.313262, 0.474077, 0.693147, 1.313262, 2.126928]], dtype=float32)
>>> C.softplus([[-1, -0.5, 0, 1, 2]], steepness=4).eval()
array([[ 0.004537, 0.031732, 0.173287, 1.004537, 2.000084]], dtype=float32)
Args:
x: numpy array or any :class:`~cntk.ops.functions.Function` that outputs a tensor
steepness (float, optional): optional steepness factor
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
def softplus1(x):
return log_add_exp(0, x) # numerically stable of writing log(1 + exp(x))
xp = placeholder_variable()
if steepness == 1:
f = softplus1(xp)
else:
f = softplus1(steepness * xp) / steepness
x = sanitize_input(x)
return as_block(f, [(xp, x)], 'softplus', name)
@typemap
def tanh(x, name=''):
'''
@ -2046,7 +2155,7 @@ def reshape(x, shape, begin_axis=None, end_axis=None, name=''):
begin_axis = Axis(0)
if end_axis is None:
end_axis = Axis.end_static_axis()
end_axis = Axis.new_leading_axis()
# Pass begin_axis as the end_axis and vice versa to account for
# the automatic shape reversal across the python SWIG boundary
@ -2057,10 +2166,10 @@ def reshape(x, shape, begin_axis=None, end_axis=None, name=''):
if not axis.is_static_axis:
return axis
if (axis == Axis.end_static_axis()):
if (axis == Axis.new_leading_axis()):
return Axis(0)
elif (axis == Axis(0)):
return Axis.end_static_axis()
return Axis.new_leading_axis()
else:
return Axis(-axis.static_axis_index())

Просмотреть файл

@ -1235,12 +1235,16 @@ class UserFunction(Function):
map_if_possible(variables)
if len(variables)>1:
self.backward(state, root_gradients, variables)
else:
if len(root_gradients) == 1:
for rg in root_gradients.values():
break
result = self.backward(state, rg)
root_gradients = rg
possible_wrt = [input for input in self.inputs if input.needs_gradient]
if len(possible_wrt) > 1:
self.backward(state, root_gradients, variables)
else:
result = self.backward(state, root_gradients)
for k in variables:
variables[k] = result

Просмотреть файл

@ -282,12 +282,12 @@ EDIT_DISTANCE_ERROR_TEST_CASES = [
([[1, 3], [2, 0]], [[2, 0], [2, 0]], 0, 1, 1, True, [1], 2.0),
]
@pytest.mark.parametrize("left_input, right_input, subPen, delPen, insPen, squashInputs, samplesToIgnore, result", EDIT_DISTANCE_ERROR_TEST_CASES)
def test_edit_distance_error(left_input, right_input, subPen, delPen, insPen, squashInputs, samplesToIgnore, result, device_id, precision):
@pytest.mark.parametrize("left_input, right_input, subPen, delPen, insPen, squashInputs, tokensToIgnore, result", EDIT_DISTANCE_ERROR_TEST_CASES)
def test_edit_distance_error(left_input, right_input, subPen, delPen, insPen, squashInputs, tokensToIgnore, result, device_id, precision):
i1 = input_variable(shape=(2,))
i2 = input_variable(shape=(2,))
arguments = {i1 : left_input, i2 : right_input}
a = edit_distance_error(i1, i2, subPen, delPen, insPen, squashInputs, samplesToIgnore)
a = edit_distance_error(i1, i2, subPen, delPen, insPen, squashInputs, tokensToIgnore)
assert np.allclose(result, a.eval(arguments))
def test_sequence_grad_as_numpy_false(device_id, precision):

Просмотреть файл

@ -17,6 +17,8 @@ from .. import constant, parameter, input_variable, placeholder_variable, times,
from ... import InferredDimension
from .ops_test_utils import compare_lists_of_np_arrays, AA
from cntk.io import MinibatchSource, CTFDeserializer, StreamDefs, StreamDef
def test_variable_forwarding():
op = constant(value=2, shape=(3,4)) + 1
assert op.shape == (3,4)
@ -31,7 +33,7 @@ def test_eval_by_node_name():
assert res.eval({'i': [[3]]}) == [6]
assert res.eval({u'i': [[3]]}) == [6]
def test_replace_placeholders():
p = placeholder_variable(shape=(1,))
i = input_variable(shape=(1,),
@ -219,19 +221,19 @@ def test_clone_with_function_in_substitution_map():
t = times(x, w)
b = parameter((proj_dim))
t_plus_b = t + b
p = placeholder_variable()
just_b = t_plus_b.clone('clone', {t : p})
t_plus_b_clone = just_b.clone('share', {p : t})
def test_clone_with_slice():
def test_clone_with_slice():
i1 = input_variable((2,2), name='i1')
i2 = input_variable((2,2), name='i2')
x = splice(i1, i2, axis=0)
W = constant(1, (4,1), name='W')
x = splice(i1, i2, axis=0)
W = constant(1, (4,1), name='W')
y = convolution(W, x)
assert(y.shape == (4,2))
assert(y.shape == (4,2))
from ..functions import CloneMethod
x1 = input_variable((2,1), name='x1')
x2 = input_variable((2,1), name='x2')
@ -267,7 +269,7 @@ def test_input_order():
t = times(x, w)
t_plus_b = plus(t, b, name=func_name)
def compare_var_names(vars, names):
def compare_var_names(vars, names):
num_vars = len(vars)
for i in range(num_vars):
if (vars[i].name != names[i]):
@ -290,8 +292,8 @@ def test_combine_duplicated_inputs():
t_plus_b = plus(t, b, name=func_name)
duplicated_t_plus_b = combine([t_plus_b, t_plus_b])
def compare_var_names(vars, names):
def compare_var_names(vars, names):
num_vars = len(vars)
for i in range(num_vars):
if (vars[i].name != names[i]):
@ -300,7 +302,7 @@ def test_combine_duplicated_inputs():
return True
assert compare_var_names(duplicated_t_plus_b.outputs, [func_name, func_name])
def test_extra_arguments_in_eval():
x1 = input_variable((1,), name='x1')
@ -310,4 +312,35 @@ def test_extra_arguments_in_eval():
result = x1_plus_1.eval({x1 : np.asarray([[1]]), x2 : np.asarray([[1]])})
assert np.allclose(result, [[[2]]])
def test_MinibatchData_and_Value_as_input(tmpdir):
mbdata = r'''0 |S0 100'''
tmpfile = str(tmpdir/'mbtest.txt')
with open(tmpfile, 'w') as f:
f.write(mbdata)
defs = StreamDefs(f1 = StreamDef(field='S0', shape=1))
mb_source = MinibatchSource(CTFDeserializer(tmpfile, defs),
randomize=False)
f1_si = mb_source.stream_info('f1')
mb = mb_source.next_minibatch(1)
f1 = input_variable(shape=(1,),
needs_gradient=True,
name='f')
res = f1 * 2
assert res.eval({f1: mb[f1_si]}) == [[200]]
# Test MinibatchData
assert res.eval(mb[f1_si]) == [[200]]
# Test Value
assert res.eval(mb[f1_si].data) == [[200]]
# Test NumPy (converted back from MinibatchData)
assert res.eval(mb[f1_si].value) == [[200]]
# Test Value
assert res.eval(mb[f1_si].data) == [[200]]

Просмотреть файл

@ -360,3 +360,82 @@ def test_op_roipooling(input_map, input_rois, expected_fwd, expected_bkwd, devic
unittest_helper(input_op,
forward_input, exp_fwd_value, expected_backward,
device_id=device_id, precision=precision)
CONVOLUTION_TRANSPOSE_DATA = [
([1, 1, 1, 3, 3], # input_size
[1, 2, 2], # convolution size
[[[[ 0, 0, 1, 2],
[ 0, 5, 11, 11],
[ 6, 23, 29, 23],
[ 12, 32, 37, 24]]]]) # result
]
# this test handles convolution transpose, without specifying output shape
@pytest.mark.parametrize("input_size, conv_size, result", CONVOLUTION_TRANSPOSE_DATA)
def test_convolution_transpose(input_size, conv_size, result, device_id, precision):
dt = PRECISION_TO_TYPE[precision]
dev = cntk_device(device_id)
# fill input operand with a sequence 1,2,3,... til total size and then
# resize to input_size
total_size = np.prod(input_size)
x = np.arange(total_size, dtype=dt)
input_operand = x.reshape(input_size)
a = I(shape=input_operand.shape[2:],
dtype=sanitize_dtype_cntk(precision),
needs_gradient=False,
name='a')
# do the same for convolution kernel
total_size = np.prod(conv_size)
y = np.arange(total_size, dtype=dt)
conv_map = constant(value=y.reshape(conv_size), device=dev)
from cntk import convolution_transpose
input_op = convolution_transpose(conv_map, a, auto_padding=[False])
forward_input = {a: input_operand}
expected_forward = AA([result])
unittest_helper(input_op, forward_input, expected_forward,
None, device_id=device_id, precision=precision)
CONVOLUTION_TRANSPOSE_OUTPUT_DATA = [
([1, 1, 1, 3, 3], # input_size
[1, 3, 3], # convolution size
[[[[ 0, 3, 4, 11, 8, 10],
[ 3, 12, 11, 28, 19, 26],
[ 12, 27, 16, 35, 20, 25],
[ 27, 60, 35, 76, 43, 56],
[ 24, 51, 28, 59, 32, 40]]]]) # result
]
# this test handles convolution transpose, without specifying output shape
@pytest.mark.parametrize("input_size, conv_size, result", CONVOLUTION_TRANSPOSE_OUTPUT_DATA)
def test_convolution_transpose_with_output(input_size, conv_size, result, device_id, precision):
dt = PRECISION_TO_TYPE[precision]
dev = cntk_device(device_id)
# fill input operand with a sequence 1,2,3,... til total size and then
# resize to input_size
total_size = np.prod(input_size)
x = np.arange(total_size, dtype=dt)
input_operand = x.reshape(input_size)
a = I(shape=input_operand.shape[2:],
dtype=sanitize_dtype_cntk(precision),
needs_gradient=False,
name='a')
# do the same for convolution kernel
total_size = np.prod(conv_size)
y = np.arange(total_size, dtype=dt)
conv_map = constant(value=y.reshape(conv_size), device=dev)
from cntk import convolution_transpose
input_op = convolution_transpose(conv_map, a, auto_padding=[True], strides=2, output_shape=(1,5,6))
forward_input = {a: input_operand}
expected_forward = AA([result])
unittest_helper(input_op, forward_input, expected_forward,
None, device_id=device_id, precision=precision)

Просмотреть файл

@ -301,11 +301,8 @@ def test_op_elu(operand, device_id, precision):
from cntk import elu
#BUGBUG: There is a bug in ElementSelect that cause nan in the output
# for float32.
if PRECISION_TO_TYPE[precision] == np.float64:
_test_unary_op(precision, device_id, elu, operand,
expected_forward, expected_backward)
_test_unary_op(precision, device_id, elu, operand,
expected_forward, expected_backward)
@pytest.mark.parametrize("operand", TENSORS)
def test_op_leaky_relu(operand, device_id, precision):
@ -347,6 +344,21 @@ def test_op_param_relu(operand, device_id, precision):
_test_unary_op(precision, device_id, prelu, operand,
expected_forward, expected_backward)
@pytest.mark.parametrize("operand", TENSORS)
def test_op_softplus(operand, device_id, precision):
softplus_f = np.vectorize(lambda x: np.logaddexp(x, 0))
softplus_b = np.vectorize(lambda x: 1.0/(1.0+np.exp(-x)))
t = AA(operand, dtype=PRECISION_TO_TYPE[precision])
expected_forward = [[softplus_f(t)]]
expected_backward = {
'arg': [[softplus_b(t)]]
}
from .. import softplus
_test_unary_op(precision, device_id, softplus, operand,
expected_forward, expected_backward)
SAMPLES = [ # 2 samples having 4 classes
[1, 1, 2, 3],

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше