Merge branch 'master' into qiwye/asgd-dev
Conflicts: Source/1BitSGD Source/SGDLib/SGD.cpp
This commit is contained in:
Коммит
5a33a35eac
4
CNTK.sln
4
CNTK.sln
|
@ -1133,6 +1133,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Scripts", "Scripts", "{6826
|
|||
ProjectSection(SolutionItems) = preProject
|
||||
Scripts\pytest.ini = Scripts\pytest.ini
|
||||
Scripts\txt2ctf.py = Scripts\txt2ctf.py
|
||||
Scripts\uci2ctf.py = Scripts\uci2ctf.py
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ManagedEvalTests", "Tests\UnitTests\ManagedEvalTests\ManagedEvalTests.csproj", "{CC8DDDCB-D53A-4B30-8596-AEF1C493DB31}"
|
||||
|
@ -1142,6 +1143,9 @@ EndProject
|
|||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CSEvalClient", "Examples\Evaluation\CSEvalClient\CSEvalClient.csproj", "{1C6E6C53-1AA7-4B69-913E-B97BB5A872CF}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CPPEvalClient", "Examples\Evaluation\CPPEvalClient\CPPEvalClient.vcxproj", "{CCC07E8E-F33A-4AF7-9F60-93E2AA61C75E}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{482999D1-B7E2-466E-9F8D-2119F93EAFD9} = {482999D1-B7E2-466E-9F8D-2119F93EAFD9}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
|
|
|
@ -863,38 +863,27 @@ The dimension reduced matrix consisting of the maximum value within each pooling
|
|||
|
||||
This function is often associated with Convolution() operations.
|
||||
|
||||
### Delay
|
||||
### PastValue, FutureValue
|
||||
|
||||
Delay node used in recurrent networks, allows creation of a loop in the convolutional network that will repeat a specified number of times.
|
||||
PastValue and FutureValue nodes are used in recurrent networks, allow creation of a loop in the convolutional network that will repeat a specified number of times. PastValue retrieves the value of a node several steps away in the past, while FutureValue retrieves the value of a node from future.
|
||||
|
||||
`Delay(rows, [cols], delayNode, delayTime=1, needGradient=true, defaultHiddenActivity=0.1)`
|
||||
`PastValue(rows, [cols], node, timeStep=1, defaultHiddenActivity=0.1)`
|
||||
`FutureValue(rows, [cols], node, timeStep=1, defaultHiddenActivity=0.1)`
|
||||
|
||||
#### Parameters
|
||||
|
||||
`cvweight` – convolution weight matrix, it has the dimensions of \[outputChannels, kernelWidth \* kernelHeight \* inputChannels\]
|
||||
`rows` – number of rows in the node
|
||||
|
||||
`kernelWidth` – width of the kernel
|
||||
`cols` – number of cols in the node. This value is often ommit since the length of a sequence varies
|
||||
|
||||
`kernelHeight` – height of the kernel
|
||||
`timeStep` – \[default = 1\] number of time steps toward the past and future
|
||||
|
||||
`outputChannels` – number of output channels
|
||||
|
||||
`horizontalSubsample` – subsamples in the horizontal direction
|
||||
|
||||
`verticalSubsample` – subsamples in the vertical direction
|
||||
|
||||
#### Optional Parameters
|
||||
|
||||
`delayTime` – \[default = 1\] the amount of delay that will be introduced (number of times the loop will happen)
|
||||
|
||||
`needGradient` – \[default = true\] does the gradient need to be computed for this node
|
||||
|
||||
`defaultHiddenActivity` – \[default = 0.1\] the numerical amount for the defaultHiddenActivity
|
||||
`defaultHiddenActivity` – \[default = 0.1\] default value to use when passing the sequence bounday or when the value is missing.
|
||||
|
||||
#### Returns
|
||||
|
||||
The results of the completed Delay loop
|
||||
Eitehr the past or future value of a node
|
||||
|
||||
#### Notes
|
||||
|
||||
This node is used in recurrent networks, where a delay is introduced to examine values from a previous time, such as the prior value (t-1). This has the affect of creating a loop in the computational network that will repeat delayTime number of iterations.
|
||||
This node is used in recurrent networks, where a past value is introduced to examine values from a previous time, such as the prior value (t-1). This has the affect of creating a loop in the computational network.
|
||||
|
|
|
@ -37,34 +37,23 @@ int main(int argc, char* argv[])
|
|||
std::string app = argv[0];
|
||||
std::string path;
|
||||
IEvaluateModel<float> *model;
|
||||
size_t pos;
|
||||
|
||||
#ifdef _WIN32
|
||||
path = app.substr(0, app.rfind("\\"));
|
||||
// Load the eval library
|
||||
auto hModule = LoadLibrary(L"evaldll.dll");
|
||||
if (hModule == nullptr)
|
||||
{
|
||||
fprintf(stderr, "Cannot find evaldll.dll library.");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Get the factory method to the evaluation engine
|
||||
std::string func = "GetEvalF";
|
||||
auto procAddress = GetProcAddress(hModule, func.c_str());
|
||||
auto getEvalProc = (GetEvalProc<float>)procAddress;
|
||||
|
||||
// Native model evaluation instance
|
||||
getEvalProc(&model);
|
||||
pos = app.rfind("\\");
|
||||
path = (pos == std::string::npos) ? "." : app.substr(0, pos);
|
||||
|
||||
// This relative path assumes launching from CNTK's binary folder, e.g. x64\Release
|
||||
const std::string modelWorkingDirectory = path + "/../../Examples/Image/MNIST/Data/";
|
||||
#else // on Linux
|
||||
path = app.substr(0, app.rfind("/"));
|
||||
GetEvalF(&model);
|
||||
pos = app.rfind("/");
|
||||
path = (pos == std::string::npos) ? "." : app.substr(0, pos);
|
||||
|
||||
// This relative path assumes launching from CNTK's binary folder, e.g. build/release/bin/
|
||||
const std::string modelWorkingDirectory = path + "/../../../Examples/Image/MNIST/Data/";
|
||||
#endif
|
||||
|
||||
GetEvalF(&model);
|
||||
|
||||
const std::string modelFilePath = modelWorkingDirectory + "../Output/Models/01_OneHidden";
|
||||
|
||||
|
|
|
@ -69,7 +69,7 @@
|
|||
<AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>EvalDLL.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<DelayLoadDLLs>%(DelayLoadDLLs)</DelayLoadDLLs>
|
||||
<Profile>true</Profile>
|
||||
</Link>
|
||||
|
|
|
@ -110,9 +110,15 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
|
|||
public static bool Evaluate(string record)
|
||||
{
|
||||
var model = Models.Take();
|
||||
var outcome = model.EvaluateRecord(record);
|
||||
Models.Add(model);
|
||||
return outcome;
|
||||
try
|
||||
{
|
||||
var outcome = model.EvaluateRecord(record);
|
||||
return outcome;
|
||||
}
|
||||
finally
|
||||
{
|
||||
Models.Add(model);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -123,9 +129,15 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
|
|||
public static List<float> Evaluate(List<float> inputs)
|
||||
{
|
||||
var model = Models.Take();
|
||||
var outcome = model.EvaluateInput(inputs);
|
||||
Models.Add(model);
|
||||
return outcome;
|
||||
try
|
||||
{
|
||||
var outcome = model.EvaluateInput(inputs);
|
||||
return outcome;
|
||||
}
|
||||
finally
|
||||
{
|
||||
Models.Add(model);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
|
|
@ -0,0 +1,226 @@
|
|||
# The configuration file to build language understanding model with ATIS corpus.
|
||||
# An LSTM model is built to tag each word in sentences with its semantic label.
|
||||
|
||||
WorkDir = work
|
||||
DataDir = data
|
||||
|
||||
modelPath = $WorkDir$/ATIS.slot.lstm
|
||||
parallelTrain = true
|
||||
|
||||
#stderr = $WorkDir$/log
|
||||
|
||||
command = Train:Output:Test
|
||||
|
||||
precision = "float"
|
||||
deviceId = "-1" # change to "auto" to use GPUs
|
||||
|
||||
wordCount = 944 # number of words
|
||||
labelCount = 127 # number of labels
|
||||
|
||||
# The command to train the LSTM model
|
||||
Train = [
|
||||
action = train
|
||||
BrainScriptNetworkBuilder = [
|
||||
inputDim = $wordCount$
|
||||
labelDim = $labelCount$
|
||||
featDim = inputDim*3 # contextual words are used as features: previous word, current word, next word.
|
||||
embDim = 150
|
||||
hiddenDim = 300
|
||||
maxLayer = 1
|
||||
initScale = 6
|
||||
featuresPW = Input(inputDim) # the previous word
|
||||
featuresCW = Input(inputDim) # the current word
|
||||
featuresNW = Input(inputDim) # the next word
|
||||
features = RowStack(featuresPW : featuresCW : featuresNW)
|
||||
|
||||
labels = Input(labelDim, tag = "label")
|
||||
|
||||
# embedding layer
|
||||
emb = Parameter(embDim, featDim)
|
||||
featEmbedded = emb * features
|
||||
|
||||
# build the LSTM stack
|
||||
lstmDims[i:0..maxLayer-1] = hiddenDim
|
||||
NoAuxInputHook (input, lstmState) = BS.Constants.None
|
||||
lstmStack = BS.RNNs.RecurrentLSTMPStack (lstmDims,
|
||||
cellDims=lstmDims,
|
||||
featEmbedded,
|
||||
inputDim=embDim,
|
||||
previousHook=BS.RNNs.PreviousHC,
|
||||
augmentInputHook=BS.RNNs.NoAuxInputHook,
|
||||
augmentInputDim=0,
|
||||
enableSelfStabilization=false)
|
||||
|
||||
lstmOutputLayer = Length (lstmStack)-1
|
||||
LSTMoutput = lstmStack[lstmOutputLayer].h
|
||||
|
||||
W = Parameter(labelDim, hiddenDim, init = "uniform", initValueScale=initScale)
|
||||
b = Parameter(labelDim, 1, init = "fixedValue", value=0)
|
||||
outputs = W * LSTMoutput + b
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, outputs)
|
||||
|
||||
criterionNodes = (cr)
|
||||
evaluationNodes = (cr)
|
||||
outputNodes = (outputs)
|
||||
]
|
||||
|
||||
SGD = [
|
||||
# maximum number of epochs
|
||||
maxEpochs = 1 # set to 1 so this can be added to regression test. Increase to 20 get a good accuracy
|
||||
|
||||
# for each epoch, maximum number of input samples(words) is set below
|
||||
epochSize = 36000
|
||||
|
||||
# minibatchSize should be larger than the maximum sentence length
|
||||
minibatchSize = 70
|
||||
|
||||
learningRatesPerSample = 0.01*2:0.005*12:0.001
|
||||
gradUpdateType = "FSAdaGrad"
|
||||
|
||||
gradientClippingWithTruncation = true
|
||||
clippingThresholdPerSample = 15.0
|
||||
|
||||
# number of minibatches to report progress
|
||||
numMBsToShowResult = 100
|
||||
|
||||
firstMBsToShowResult = 10
|
||||
|
||||
# if validation shows that the model has no improvement, then do back-up to the previously
|
||||
# estimated model and reduce learning rate
|
||||
loadBestModel = true
|
||||
|
||||
parallelTrain = [
|
||||
parallelizationMethod = "DataParallelSGD"
|
||||
parallelizationStartEpoch = 2
|
||||
distributedMBReading = true
|
||||
dataParallelSGD = [
|
||||
gradientBits = 1
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
reader = [
|
||||
readerType = "CNTKTextFormatReader"
|
||||
file = "$DataDir$/ATIS.train.cntk.sparse"
|
||||
randomize = true
|
||||
input = [
|
||||
featuresPW = [
|
||||
alias = "PW" # previous word
|
||||
dim = $wordCount$
|
||||
format = "sparse"
|
||||
]
|
||||
featuresCW = [
|
||||
alias = "CW" # current word
|
||||
dim = $wordCount$
|
||||
format = "sparse"
|
||||
]
|
||||
featuresNW = [
|
||||
alias = "NW" # next word
|
||||
dim = $wordCount$
|
||||
format = "sparse"
|
||||
]
|
||||
|
||||
labels = [
|
||||
alias = "L" # label
|
||||
dim = $labelCount$
|
||||
format = "sparse"
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
# Evaluate the model to predict labels
|
||||
Output = [
|
||||
action = "write"
|
||||
|
||||
traceLevel = 1
|
||||
epochSize = 0
|
||||
|
||||
defaultHiddenActivity = 0.1
|
||||
BrainScriptNetworkBuilder = [
|
||||
modelAsTrained = BS.Network.Load ("$modelPath$")
|
||||
final = Hardmax(modelAsTrained.outputs)
|
||||
]
|
||||
|
||||
outputPath = $WorkDir$/model.writeaction
|
||||
outputNodeNames = final
|
||||
|
||||
reader = [
|
||||
readerType = "CNTKTextFormatReader"
|
||||
file = "$DataDir$/ATIS.test.cntk.sparse"
|
||||
|
||||
randomize = false
|
||||
input = [
|
||||
featuresPW = [
|
||||
alias = "PW" # previous word
|
||||
dim = $wordCount$
|
||||
format = "sparse"
|
||||
]
|
||||
featuresCW = [
|
||||
alias = "CW" # current word
|
||||
dim = $wordCount$
|
||||
format = "sparse"
|
||||
]
|
||||
featuresNW = [
|
||||
alias = "NW" # next word
|
||||
dim = $wordCount$
|
||||
format = "sparse"
|
||||
]
|
||||
|
||||
labels = [
|
||||
alias = "L" # label
|
||||
dim = $labelCount$
|
||||
format = "sparse"
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
# Evaluate the model's accuracy
|
||||
Test = [
|
||||
action = "test"
|
||||
|
||||
traceLevel = 1
|
||||
epochSize = 0
|
||||
|
||||
defaultHiddenActivity = 0.1
|
||||
BrainScriptNetworkBuilder = [
|
||||
labels = Input($labelCount$, tag = "label")
|
||||
modelAsTrained = BS.Network.Load ("$modelPath$")
|
||||
final = Hardmax(modelAsTrained.outputs)
|
||||
errorRate = ErrorPrediction(labels, final, tag='evaluation')
|
||||
]
|
||||
|
||||
evalNodeNames = errorRate
|
||||
|
||||
reader = [
|
||||
readerType = "CNTKTextFormatReader"
|
||||
file = "$DataDir$/ATIS.test.cntk.sparse"
|
||||
|
||||
randomize = false
|
||||
input = [
|
||||
featuresPW = [
|
||||
alias = "PW" # previous word
|
||||
dim = $wordCount$
|
||||
format = "sparse"
|
||||
]
|
||||
featuresCW = [
|
||||
alias = "CW" # current word
|
||||
dim = $wordCount$
|
||||
format = "sparse"
|
||||
]
|
||||
featuresNW = [
|
||||
alias = "NW" # next word
|
||||
dim = $wordCount$
|
||||
format = "sparse"
|
||||
]
|
||||
|
||||
labels = [
|
||||
alias = "L" # label
|
||||
dim = $labelCount$
|
||||
format = "sparse"
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
|
@ -0,0 +1,168 @@
|
|||
# Build Language Understanding Models with CNTK
|
||||
|
||||
This example demonstrates how to use build language understanding model with CNTK using ATIS data set. This example is similar to
|
||||
[SLU example](https://github.com/Microsoft/CNTK/tree/master/Examples/Text/Miscellaneous/SLU). They are different in that
|
||||
- CNTKTextFormatReader is used here, instead of LUSequenceReader
|
||||
- With CNTKTextFormatReader, the input format is much more flexible. In the example setting, sparse contextual feature vectors are explored
|
||||
- Sparse label input is used.
|
||||
|
||||
The Air travel information system (ATIS) corpus is used for training and testing.
|
||||
## Download the example
|
||||
The data and configuration is checked in to github. You can get it by command:
|
||||
|
||||
`git clone https://github.com/Microsoft/cntk`
|
||||
|
||||
The example is under folder:
|
||||
`<cntk_root>\Examples\Text\ATIS`
|
||||
|
||||
## Data File Format
|
||||
There are four files under `data` sub-folder
|
||||
|
||||
|Files |Content |
|
||||
|:----------------------|:--------|
|
||||
|ATIS.train.cntk.sparse |featurized training data set
|
||||
|ATIS.test.cntk.sparse |featurized test data set
|
||||
|ATIS.vocab |all words extracted from training data. Vocab size: 944
|
||||
|ATIS.labels |all semantic labels extracted from training data. Total labels: 127
|
||||
|
||||
We preprocess ATIS data by converting words into word indexes, and labels into label IDs in order to use
|
||||
[CNTKTextFormatReader](https://github.com/Microsoft/CNTK/wiki/CNTKTextFormat-Reader). You can use any
|
||||
script/tool to preprocess your text data files. In this example, data is already preprocessed.
|
||||
|
||||
The last two files ATIS.vocab and ATIS.labels are not really required to run the example. They are included for evaluation and debugging purpose.
|
||||
E.g. they can be used to convert .sparse files back to original text files.
|
||||
|
||||
To understand the data format (two .sparse files), let's start with a sample sentence:
|
||||
```
|
||||
BOS i would like to find a flight from charlotte to Las Vegas that makes a stop in St. Louis EOS
|
||||
```
|
||||
it is converted into the following text:
|
||||
```
|
||||
1 |PW 1:1 |CW 1:1 |NW 12:1 |L 126:1
|
||||
1 |PW 1:1 |CW 12:1 |NW 39:1 |L 126:1
|
||||
1 |PW 12:1 |CW 39:1 |NW 28:1 |L 126:1
|
||||
1 |PW 39:1 |CW 28:1 |NW 3:1 |L 126:1
|
||||
1 |PW 28:1 |CW 3:1 |NW 86:1 |L 126:1
|
||||
1 |PW 3:1 |CW 86:1 |NW 15:1 |L 126:1
|
||||
1 |PW 86:1 |CW 15:1 |NW 10:1 |L 126:1
|
||||
1 |PW 15:1 |CW 10:1 |NW 4:1 |L 126:1
|
||||
1 |PW 10:1 |CW 4:1 |NW 101:1 |L 126:1
|
||||
1 |PW 4:1 |CW 101:1 |NW 3:1 |L 48:1
|
||||
1 |PW 101:1 |CW 3:1 |NW 92:1 |L 126:1
|
||||
1 |PW 3:1 |CW 92:1 |NW 90:1 |L 78:1
|
||||
1 |PW 92:1 |CW 90:1 |NW 33:1 |L 123:1
|
||||
1 |PW 90:1 |CW 33:1 |NW 338:1 |L 126:1
|
||||
1 |PW 33:1 |CW 338:1 |NW 15:1 |L 126:1
|
||||
1 |PW 338:1 |CW 15:1 |NW 132:1 |L 126:1
|
||||
1 |PW 15:1 |CW 132:1 |NW 17:1 |L 126:1
|
||||
1 |PW 132:1 |CW 17:1 |NW 72:1 |L 126:1
|
||||
1 |PW 17:1 |CW 72:1 |NW 144:1 |L 71:1
|
||||
1 |PW 72:1 |CW 144:1 |NW 2:1 |L 119:1
|
||||
1 |PW 144:1 |CW 2:1 |NW 2:1 |L 126:1
|
||||
```
|
||||
where the first column identifies the sequence (sentence) ID, which is the same for all words of the same sentence. There are four input streams: PW, CW, NW, L.
|
||||
The input "PW" represents the previous word ID, "CW" for current word, and "NW" for next word. Input name "L" is for labels. The input names can be anything you
|
||||
like and you can add more input as needed, e.g. words in a bigger window.
|
||||
|
||||
Words "BOS" and "EOS" denote beginning of sentence and end of sentences respectively.
|
||||
|
||||
Each line above represents one sample (word). E.g. the meaning of this line: `1 |PW 4:1 |CW 101:1 |NW 3:1 |L 48:1`:
|
||||
* the sequence ID is 1
|
||||
* the current word is "charlotte" whose word ID is 101
|
||||
* the previous word is "from" whose ID is 4
|
||||
* the next word is "to" whose ID is 3
|
||||
* the semantic label is "B-fromloc.city_name" whose label Id is 48.
|
||||
|
||||
All word IDs, label IDs and corresponding words and labels are stored in ATIS.vocab and ATIS.labels.
|
||||
|
||||
## CNTK Configuration
|
||||
|
||||
In this example, we use BrainScript to create one-layer LSTM with embedding for slot tagging. The consolidated config file is ATIS.cntk. One can check the file (with some comments)
|
||||
for details, especially how the reader is configured in ATIS.cntk.
|
||||
|
||||
reader=[
|
||||
readerType = "CNTKTextFormatReader"
|
||||
file = "$DataDir$/ATIS.train.cntk.sparse"
|
||||
|
||||
miniBatchMode = "partial"
|
||||
randomize = true
|
||||
input = [
|
||||
featuresPW = [
|
||||
alias = "PW" # previous word
|
||||
dim = $wordCount$
|
||||
format = "sparse"
|
||||
]
|
||||
featuresCW = [
|
||||
alias = "CW" # current word
|
||||
dim = $wordCount$
|
||||
format = "sparse"
|
||||
]
|
||||
featuresNW = [
|
||||
alias = "NW" # next word
|
||||
dim = $wordCount$
|
||||
format = "sparse"
|
||||
]
|
||||
|
||||
labels = [
|
||||
alias = "L" # label
|
||||
dim = $labelCount$
|
||||
format = "sparse"
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
The above section tell CNTK to use CNTKTextFormatReader to read data from the file "$DataDir/ATIS.train.cntk.sparse". The same input names (PW, CW, NW, L) are used to refer inputs (features and labels) provided in data files. The input is read into different
|
||||
feature vectors: featuresPW, featuresCW, featuresNW and labels. These vectors are later used to build LSTM node with BrainScript as follows.
|
||||
```
|
||||
featuresPW = Input(inputDim)
|
||||
featuresCW = Input(inputDim)
|
||||
featuresNW = Input(inputDim)
|
||||
features = RowStack(featuresPW : featuresCW : featuresNW)
|
||||
labels=Input(labelDim, tag="label")
|
||||
# embedding layer
|
||||
emb = LearnableParameter(embDim, featDim)
|
||||
featEmbedded = Times(emb, features)
|
||||
# build the LSTM stack
|
||||
lstmDims[i:0..maxLayer] = hiddenDim
|
||||
NoAuxInputHook (input, lstmState) = BS.Constants.None
|
||||
lstmStack = BS.RNNs.RecurrentLSTMPStack (lstmDims,
|
||||
cellDims=lstmDims,
|
||||
featEmbedded,
|
||||
inputDim=embDim,
|
||||
previousHook=BS.RNNs.PreviousHC,
|
||||
augmentInputHook=BS.RNNs.NoAuxInputHook,
|
||||
augmentInputDim=0,
|
||||
enableSelfStabilization=false)
|
||||
lstmOutputLayer = Length (lstmStack)-1
|
||||
LSTMoutput = lstmStack[lstmOutputLayer].h
|
||||
|
||||
```
|
||||
A few other notes about the config:
|
||||
- it is important to specify the format is "sparse".
|
||||
- the gradUpdateType is set FSAdaGrad. This setting reports better model accuracy comparing any other update methods.
|
||||
- multiple LSTM layers can be used by changing the value of maxLayer.
|
||||
|
||||
Three commands are configured: Train, Output and Test. The command "Train" is used to train a model, "Output" is used to evaluate the model against a test set and store
|
||||
the model output, and the command "Test" is to calculate the model's accuracy.
|
||||
|
||||
## Run the example
|
||||
|
||||
One can run the example locally or on Philly (for Microsoft internal users).
|
||||
|
||||
To run locally,
|
||||
|
||||
```sh
|
||||
> mkdir work # the default work_dir
|
||||
> open ATIS.cntk and update the value of deviceId: -1 for CPU, auto for GPU
|
||||
> cntk.exe configFile=ATIS.cntk
|
||||
```
|
||||
|
||||
By default, the maxEpochs is set to 1 to save training time. One can change it to larger value such as 20 in order to get a good model accuracy.
|
||||
Depends on GPU, it normally takes about 20 minutes to run 20 epochs on single GPU, and slot F1 score is about 93.
|
||||
|
||||
**For Microsoft users only**, to run the job on Philly:
|
||||
- first upload data folder to philly cloud. e.g. `\\storage.gcr.philly.selfhost.corp.microsoft.com\pnrsy\<your_alias>\ATIS `
|
||||
- update the config file to philly cloud, e.g. `\\storage.gcr.philly.selfhost.corp.microsoft.com\pnrsy_scratch\<your_alias>\ATIS`
|
||||
- go to http://philly/ to create a new job by specifying data folder and config file, and start the job.
|
||||
|
||||
More details about Philly, including how to upload data to Philly and start jobs, can be found [here](https://microsoft.sharepoint.com/teams/ATISG/SitePages/Philly%20Users%20Guide.aspx)
|
|
@ -0,0 +1,127 @@
|
|||
B-aircraft_code
|
||||
B-airline_code
|
||||
B-airline_name
|
||||
B-airport_code
|
||||
B-airport_name
|
||||
B-arrive_date.date_relative
|
||||
B-arrive_date.day_name
|
||||
B-arrive_date.day_number
|
||||
B-arrive_date.month_name
|
||||
B-arrive_date.today_relative
|
||||
B-arrive_time.end_time
|
||||
B-arrive_time.period_mod
|
||||
B-arrive_time.period_of_day
|
||||
B-arrive_time.start_time
|
||||
B-arrive_time.time
|
||||
B-arrive_time.time_relative
|
||||
B-booking_class
|
||||
B-city_name
|
||||
B-class_type
|
||||
B-compartment
|
||||
B-connect
|
||||
B-cost_relative
|
||||
B-day_name
|
||||
B-day_number
|
||||
B-days_code
|
||||
B-depart_date.date_relative
|
||||
B-depart_date.day_name
|
||||
B-depart_date.day_number
|
||||
B-depart_date.month_name
|
||||
B-depart_date.today_relative
|
||||
B-depart_date.year
|
||||
B-depart_time.end_time
|
||||
B-depart_time.period_mod
|
||||
B-depart_time.period_of_day
|
||||
B-depart_time.start_time
|
||||
B-depart_time.time
|
||||
B-depart_time.time_relative
|
||||
B-economy
|
||||
B-fare_amount
|
||||
B-fare_basis_code
|
||||
B-flight
|
||||
B-flight_days
|
||||
B-flight_mod
|
||||
B-flight_number
|
||||
B-flight_stop
|
||||
B-flight_time
|
||||
B-fromloc.airport_code
|
||||
B-fromloc.airport_name
|
||||
B-fromloc.city_name
|
||||
B-fromloc.state_code
|
||||
B-fromloc.state_name
|
||||
B-meal
|
||||
B-meal_code
|
||||
B-meal_description
|
||||
B-mod
|
||||
B-month_name
|
||||
B-or
|
||||
B-period_of_day
|
||||
B-restriction_code
|
||||
B-return_date.date_relative
|
||||
B-return_date.day_name
|
||||
B-return_date.day_number
|
||||
B-return_date.month_name
|
||||
B-return_date.today_relative
|
||||
B-return_time.period_mod
|
||||
B-return_time.period_of_day
|
||||
B-round_trip
|
||||
B-state_code
|
||||
B-state_name
|
||||
B-stoploc.airport_code
|
||||
B-stoploc.airport_name
|
||||
B-stoploc.city_name
|
||||
B-stoploc.state_code
|
||||
B-time
|
||||
B-time_relative
|
||||
B-today_relative
|
||||
B-toloc.airport_code
|
||||
B-toloc.airport_name
|
||||
B-toloc.city_name
|
||||
B-toloc.country_name
|
||||
B-toloc.state_code
|
||||
B-toloc.state_name
|
||||
B-transport_type
|
||||
I-airline_name
|
||||
I-airport_name
|
||||
I-arrive_date.day_number
|
||||
I-arrive_time.end_time
|
||||
I-arrive_time.period_of_day
|
||||
I-arrive_time.start_time
|
||||
I-arrive_time.time
|
||||
I-arrive_time.time_relative
|
||||
I-city_name
|
||||
I-class_type
|
||||
I-cost_relative
|
||||
I-depart_date.day_number
|
||||
I-depart_date.today_relative
|
||||
I-depart_time.end_time
|
||||
I-depart_time.period_of_day
|
||||
I-depart_time.start_time
|
||||
I-depart_time.time
|
||||
I-depart_time.time_relative
|
||||
I-economy
|
||||
I-fare_amount
|
||||
I-fare_basis_code
|
||||
I-flight_mod
|
||||
I-flight_number
|
||||
I-flight_stop
|
||||
I-flight_time
|
||||
I-fromloc.airport_name
|
||||
I-fromloc.city_name
|
||||
I-fromloc.state_name
|
||||
I-meal_code
|
||||
I-meal_description
|
||||
I-restriction_code
|
||||
I-return_date.date_relative
|
||||
I-return_date.day_number
|
||||
I-return_date.today_relative
|
||||
I-round_trip
|
||||
I-state_name
|
||||
I-stoploc.city_name
|
||||
I-time
|
||||
I-today_relative
|
||||
I-toloc.airport_name
|
||||
I-toloc.city_name
|
||||
I-toloc.state_name
|
||||
I-transport_type
|
||||
O
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,944 @@
|
|||
</s>
|
||||
BOS
|
||||
EOS
|
||||
to
|
||||
from
|
||||
flights
|
||||
the
|
||||
on
|
||||
what
|
||||
me
|
||||
flight
|
||||
show
|
||||
i
|
||||
boston
|
||||
san
|
||||
a
|
||||
denver
|
||||
in
|
||||
and
|
||||
francisco
|
||||
atlanta
|
||||
is
|
||||
pittsburgh
|
||||
dallas
|
||||
all
|
||||
baltimore
|
||||
list
|
||||
philadelphia
|
||||
like
|
||||
are
|
||||
airlines
|
||||
of
|
||||
between
|
||||
that
|
||||
washington
|
||||
pm
|
||||
leaving
|
||||
please
|
||||
morning
|
||||
would
|
||||
fly
|
||||
for
|
||||
city
|
||||
fare
|
||||
wednesday
|
||||
first
|
||||
need
|
||||
after
|
||||
trip
|
||||
oakland
|
||||
there
|
||||
ground
|
||||
round
|
||||
does
|
||||
transportation
|
||||
'd
|
||||
which
|
||||
cheapest
|
||||
you
|
||||
arriving
|
||||
class
|
||||
before
|
||||
available
|
||||
american
|
||||
new
|
||||
fares
|
||||
milwaukee
|
||||
with
|
||||
give
|
||||
have
|
||||
afternoon
|
||||
york
|
||||
st.
|
||||
one
|
||||
dc
|
||||
at
|
||||
way
|
||||
monday
|
||||
leave
|
||||
arrive
|
||||
airport
|
||||
thursday
|
||||
how
|
||||
want
|
||||
tuesday
|
||||
nonstop
|
||||
find
|
||||
am
|
||||
earliest
|
||||
go
|
||||
vegas
|
||||
miami
|
||||
las
|
||||
united
|
||||
information
|
||||
orlando
|
||||
phoenix
|
||||
chicago
|
||||
sunday
|
||||
saturday
|
||||
evening
|
||||
charlotte
|
||||
twenty
|
||||
newark
|
||||
can
|
||||
delta
|
||||
toronto
|
||||
seattle
|
||||
diego
|
||||
kansas
|
||||
indianapolis
|
||||
houston
|
||||
airline
|
||||
noon
|
||||
any
|
||||
friday
|
||||
lake
|
||||
salt
|
||||
's
|
||||
next
|
||||
us
|
||||
o'clock
|
||||
cleveland
|
||||
continental
|
||||
air
|
||||
angeles
|
||||
los
|
||||
august
|
||||
worth
|
||||
do
|
||||
fort
|
||||
july
|
||||
stop
|
||||
code
|
||||
5
|
||||
seventh
|
||||
early
|
||||
memphis
|
||||
tell
|
||||
aircraft
|
||||
downtown
|
||||
or
|
||||
june
|
||||
6
|
||||
louis
|
||||
montreal
|
||||
cincinnati
|
||||
around
|
||||
tomorrow
|
||||
cost
|
||||
going
|
||||
latest
|
||||
petersburg
|
||||
tampa
|
||||
many
|
||||
minneapolis
|
||||
nashville
|
||||
8
|
||||
get
|
||||
mean
|
||||
jose
|
||||
detroit
|
||||
10
|
||||
an
|
||||
departing
|
||||
stopover
|
||||
tacoma
|
||||
by
|
||||
about
|
||||
twa
|
||||
much
|
||||
7
|
||||
leaves
|
||||
may
|
||||
long
|
||||
type
|
||||
burbank
|
||||
see
|
||||
expensive
|
||||
ticket
|
||||
international
|
||||
12
|
||||
travel
|
||||
could
|
||||
dollars
|
||||
than
|
||||
daily
|
||||
columbus
|
||||
service
|
||||
beach
|
||||
'm
|
||||
california
|
||||
9
|
||||
night
|
||||
least
|
||||
know
|
||||
economy
|
||||
time
|
||||
4
|
||||
depart
|
||||
into
|
||||
meal
|
||||
paul
|
||||
coach
|
||||
book
|
||||
april
|
||||
airports
|
||||
northwest
|
||||
la
|
||||
lowest
|
||||
now
|
||||
december
|
||||
less
|
||||
westchester
|
||||
day
|
||||
serves
|
||||
it
|
||||
serve
|
||||
november
|
||||
okay
|
||||
arrives
|
||||
used
|
||||
field
|
||||
love
|
||||
last
|
||||
ontario
|
||||
second
|
||||
county
|
||||
return
|
||||
kind
|
||||
september
|
||||
mitchell
|
||||
general
|
||||
as
|
||||
stops
|
||||
flying
|
||||
2
|
||||
third
|
||||
be
|
||||
direct
|
||||
fifth
|
||||
eighth
|
||||
stopping
|
||||
times
|
||||
breakfast
|
||||
out
|
||||
make
|
||||
capacity
|
||||
car
|
||||
take
|
||||
schedule
|
||||
seating
|
||||
sixth
|
||||
1000
|
||||
number
|
||||
goes
|
||||
cities
|
||||
dinner
|
||||
connecting
|
||||
3
|
||||
dl
|
||||
fourth
|
||||
airfare
|
||||
possible
|
||||
this
|
||||
has
|
||||
served
|
||||
meals
|
||||
ninth
|
||||
looking
|
||||
also
|
||||
restriction
|
||||
week
|
||||
late
|
||||
eastern
|
||||
returning
|
||||
back
|
||||
today
|
||||
interested
|
||||
price
|
||||
business
|
||||
most
|
||||
prices
|
||||
1991
|
||||
two
|
||||
types
|
||||
flies
|
||||
twentieth
|
||||
will
|
||||
through
|
||||
limousine
|
||||
ua
|
||||
bwi
|
||||
via
|
||||
tenth
|
||||
using
|
||||
stand
|
||||
plane
|
||||
ap
|
||||
fifteenth
|
||||
guardia
|
||||
same
|
||||
1
|
||||
should
|
||||
other
|
||||
arrangements
|
||||
f
|
||||
only
|
||||
rental
|
||||
then
|
||||
display
|
||||
your
|
||||
shortest
|
||||
wednesdays
|
||||
listing
|
||||
canadian
|
||||
classes
|
||||
again
|
||||
numbers
|
||||
thirtieth
|
||||
florida
|
||||
express
|
||||
midwest
|
||||
tickets
|
||||
where
|
||||
twelfth
|
||||
sixteenth
|
||||
h
|
||||
north
|
||||
eleventh
|
||||
carolina
|
||||
seventeenth
|
||||
under
|
||||
smallest
|
||||
mco
|
||||
distance
|
||||
lunch
|
||||
either
|
||||
makes
|
||||
if
|
||||
qx
|
||||
transport
|
||||
far
|
||||
hp
|
||||
57
|
||||
october
|
||||
no
|
||||
my
|
||||
m80
|
||||
thank
|
||||
arizona
|
||||
jfk
|
||||
colorado
|
||||
jersey
|
||||
q
|
||||
weekday
|
||||
airplane
|
||||
y
|
||||
planes
|
||||
some
|
||||
departure
|
||||
use
|
||||
ewr
|
||||
their
|
||||
ohio
|
||||
thirty
|
||||
nineteenth
|
||||
when
|
||||
fourteenth
|
||||
explain
|
||||
layover
|
||||
alaska
|
||||
march
|
||||
stopovers
|
||||
live
|
||||
people
|
||||
traveling
|
||||
serving
|
||||
rent
|
||||
hi
|
||||
offer
|
||||
later
|
||||
yes
|
||||
january
|
||||
area
|
||||
logan
|
||||
right
|
||||
booking
|
||||
sfo
|
||||
midnight
|
||||
yn
|
||||
but
|
||||
during
|
||||
landings
|
||||
february
|
||||
dfw
|
||||
abbreviation
|
||||
630
|
||||
both
|
||||
're
|
||||
230
|
||||
qw
|
||||
boeing
|
||||
coming
|
||||
passengers
|
||||
arrange
|
||||
hours
|
||||
qo
|
||||
codes
|
||||
trying
|
||||
tower
|
||||
466
|
||||
canada
|
||||
each
|
||||
530
|
||||
over
|
||||
uses
|
||||
arrivals
|
||||
11
|
||||
southwest
|
||||
281
|
||||
trips
|
||||
838
|
||||
days
|
||||
those
|
||||
takeoffs
|
||||
lufthansa
|
||||
west
|
||||
1100
|
||||
arrival
|
||||
757
|
||||
minnesota
|
||||
anywhere
|
||||
america
|
||||
430
|
||||
thrift
|
||||
let
|
||||
mornings
|
||||
nationair
|
||||
'll
|
||||
kinds
|
||||
cheap
|
||||
close
|
||||
seats
|
||||
pennsylvania
|
||||
name
|
||||
quebec
|
||||
indiana
|
||||
michigan
|
||||
saturdays
|
||||
different
|
||||
taxi
|
||||
provided
|
||||
rates
|
||||
utah
|
||||
these
|
||||
starting
|
||||
sometime
|
||||
costs
|
||||
making
|
||||
bh
|
||||
eighteenth
|
||||
following
|
||||
another
|
||||
ff
|
||||
near
|
||||
747
|
||||
ea
|
||||
1992
|
||||
connect
|
||||
help
|
||||
choices
|
||||
sa
|
||||
maximum
|
||||
wish
|
||||
1115
|
||||
six
|
||||
weekdays
|
||||
more
|
||||
total
|
||||
s
|
||||
dc10
|
||||
d9s
|
||||
2100
|
||||
snack
|
||||
1245
|
||||
georgia
|
||||
72s
|
||||
73s
|
||||
f28
|
||||
heading
|
||||
departures
|
||||
amount
|
||||
825
|
||||
737
|
||||
813
|
||||
ap57
|
||||
sixteen
|
||||
m
|
||||
sorry
|
||||
serviced
|
||||
three
|
||||
miles
|
||||
departs
|
||||
1700
|
||||
requesting
|
||||
718
|
||||
land
|
||||
nevada
|
||||
100
|
||||
so
|
||||
tennessee
|
||||
tuesdays
|
||||
hello
|
||||
destination
|
||||
reservation
|
||||
texas
|
||||
rentals
|
||||
co
|
||||
meaning
|
||||
ap80
|
||||
1500
|
||||
270
|
||||
thursdays
|
||||
philly
|
||||
thirteenth
|
||||
services
|
||||
sundays
|
||||
turboprop
|
||||
stands
|
||||
415
|
||||
provide
|
||||
cars
|
||||
we
|
||||
great
|
||||
mondays
|
||||
include
|
||||
sure
|
||||
't
|
||||
well
|
||||
2134
|
||||
fn
|
||||
555
|
||||
ord
|
||||
934
|
||||
connection
|
||||
296
|
||||
abbreviations
|
||||
755
|
||||
highest
|
||||
hold
|
||||
720
|
||||
fit
|
||||
80
|
||||
soon
|
||||
four
|
||||
ten
|
||||
noontime
|
||||
too
|
||||
offers
|
||||
options
|
||||
within
|
||||
difference
|
||||
c
|
||||
restrictions
|
||||
plan
|
||||
originating
|
||||
describe
|
||||
nw
|
||||
1110
|
||||
connections
|
||||
dulles
|
||||
21
|
||||
733
|
||||
say
|
||||
approximately
|
||||
define
|
||||
852
|
||||
1291
|
||||
rate
|
||||
who
|
||||
proper
|
||||
beginning
|
||||
being
|
||||
329
|
||||
352
|
||||
don
|
||||
1024
|
||||
such
|
||||
wanted
|
||||
615
|
||||
mealtime
|
||||
provides
|
||||
prefer
|
||||
1288
|
||||
257
|
||||
across
|
||||
continent
|
||||
overnight
|
||||
local
|
||||
route
|
||||
746
|
||||
off
|
||||
j31
|
||||
closest
|
||||
19
|
||||
lax
|
||||
l10
|
||||
be1
|
||||
1994
|
||||
red
|
||||
eye
|
||||
not
|
||||
aa
|
||||
dca
|
||||
determine
|
||||
1200
|
||||
1205
|
||||
dtw
|
||||
airfares
|
||||
capacities
|
||||
200
|
||||
town
|
||||
lga
|
||||
300
|
||||
1993
|
||||
database
|
||||
1765
|
||||
eight
|
||||
up
|
||||
originate
|
||||
look
|
||||
cp
|
||||
carries
|
||||
here
|
||||
201
|
||||
located
|
||||
dinnertime
|
||||
1039
|
||||
lastest
|
||||
1222
|
||||
they
|
||||
just
|
||||
d
|
||||
limo
|
||||
3724
|
||||
210
|
||||
stapleton
|
||||
343
|
||||
1145
|
||||
schedules
|
||||
932
|
||||
nonstops
|
||||
without
|
||||
landing
|
||||
b
|
||||
midway
|
||||
217
|
||||
bound
|
||||
727
|
||||
takeoff
|
||||
324
|
||||
train
|
||||
along
|
||||
friends
|
||||
transcontinental
|
||||
missouri
|
||||
reservations
|
||||
lives
|
||||
767
|
||||
269
|
||||
ac
|
||||
atl
|
||||
month
|
||||
taking
|
||||
repeat
|
||||
845
|
||||
airplanes
|
||||
buy
|
||||
still
|
||||
itinerary
|
||||
actually
|
||||
earlier
|
||||
various
|
||||
reaching
|
||||
very
|
||||
names
|
||||
505
|
||||
grounds
|
||||
ap68
|
||||
must
|
||||
kennedy
|
||||
operation
|
||||
4400
|
||||
1201
|
||||
297
|
||||
question
|
||||
combination
|
||||
basis
|
||||
laying
|
||||
1133
|
||||
650
|
||||
tonight
|
||||
43
|
||||
ls
|
||||
sam
|
||||
ap58
|
||||
once
|
||||
nighttime
|
||||
yx
|
||||
kw
|
||||
212
|
||||
1600
|
||||
tpa
|
||||
prior
|
||||
good
|
||||
1800
|
||||
819
|
||||
inform
|
||||
k
|
||||
dc9
|
||||
305
|
||||
anything
|
||||
771
|
||||
459
|
||||
calling
|
||||
designate
|
||||
417
|
||||
spend
|
||||
hou
|
||||
1220
|
||||
directly
|
||||
jet
|
||||
reverse
|
||||
staying
|
||||
l1011
|
||||
belong
|
||||
445
|
||||
515
|
||||
travels
|
||||
order
|
||||
mci
|
||||
150
|
||||
110
|
||||
connects
|
||||
charges
|
||||
minimum
|
||||
intercontinental
|
||||
497766
|
||||
sounds
|
||||
811
|
||||
seat
|
||||
final
|
||||
phl
|
||||
20
|
||||
start
|
||||
823
|
||||
1059
|
||||
271
|
||||
382
|
||||
able
|
||||
put
|
||||
locate
|
||||
hartfield
|
||||
scheduled
|
||||
run
|
||||
225
|
||||
1158
|
||||
equipment
|
||||
begins
|
||||
lands
|
||||
reaches
|
||||
carried
|
||||
wn
|
||||
bn
|
||||
try
|
||||
included
|
||||
130
|
||||
continuing
|
||||
india
|
||||
lester
|
||||
pearson
|
||||
listings
|
||||
1209
|
||||
everywhere
|
||||
sd
|
||||
whether
|
||||
offered
|
||||
486
|
||||
1300
|
||||
950
|
||||
usa
|
||||
1045
|
||||
al
|
||||
currently
|
||||
enroute
|
||||
visit
|
||||
them
|
||||
takes
|
||||
55
|
||||
thing
|
||||
705
|
||||
fridays
|
||||
catch
|
||||
straight
|
||||
advertises
|
||||
having
|
||||
planning
|
||||
listed
|
||||
1055
|
||||
405
|
||||
468
|
||||
equal
|
||||
working
|
||||
sb
|
||||
hopefully
|
||||
dh8
|
||||
symbols
|
||||
sort
|
||||
cover
|
||||
810
|
||||
operating
|
||||
320
|
||||
639
|
||||
seventeen
|
||||
1207
|
||||
608
|
||||
besides
|
||||
companies
|
||||
've
|
||||
got
|
||||
somebody
|
||||
else
|
||||
wants
|
||||
level
|
||||
vicinity
|
||||
1940
|
||||
311
|
||||
mia
|
||||
instead
|
||||
priced
|
||||
eleven
|
||||
comes
|
||||
greatest
|
||||
summer
|
||||
economic
|
||||
bay
|
||||
402
|
||||
gets
|
||||
date
|
||||
1020
|
||||
730
|
||||
400
|
||||
doesn
|
||||
toward
|
||||
home
|
||||
1850
|
||||
1505
|
||||
runs
|
||||
673
|
||||
723
|
||||
thanks
|
||||
bring
|
||||
zone
|
||||
yyz
|
||||
afternoons
|
||||
non
|
||||
largest
|
||||
500
|
||||
come
|
||||
428
|
||||
98
|
||||
qualify
|
||||
279
|
||||
137338
|
||||
d10
|
||||
539
|
||||
fine
|
||||
while
|
||||
665
|
||||
concerning
|
||||
iah
|
||||
1230
|
||||
oak
|
||||
preferably
|
||||
twelve
|
||||
3357
|
||||
323
|
||||
nights
|
||||
229
|
||||
regarding
|
||||
seven
|
||||
inexpensive
|
||||
420
|
||||
416
|
||||
repeating
|
||||
scenario
|
||||
139
|
||||
82
|
||||
kindly
|
||||
limousines
|
||||
345
|
||||
afterwards
|
||||
734
|
||||
place
|
||||
includes
|
||||
106
|
||||
1026
|
||||
124
|
||||
fifteen
|
||||
bna
|
||||
supper
|
||||
oh
|
||||
71
|
||||
thereafter
|
||||
2153
|
||||
year
|
||||
discount
|
||||
1130
|
||||
1030
|
||||
world
|
||||
trans
|
||||
including
|
||||
represented
|
||||
o
|
||||
'hare
|
||||
exceeding
|
||||
815
|
||||
928
|
||||
163
|
||||
bur
|
||||
419
|
||||
cvg
|
||||
1017
|
||||
315
|
||||
842
|
||||
1083
|
||||
0900
|
||||
longest
|
||||
called
|
||||
snacks
|
||||
645
|
||||
ever
|
||||
single
|
131
Makefile
131
Makefile
|
@ -71,7 +71,7 @@ INCLUDEPATH:= $(addprefix $(SOURCEDIR)/, Common/Include CNTKv2LibraryDll CNTKv2L
|
|||
# COMMON_FLAGS include settings that are passed both to NVCC and C++ compilers.
|
||||
COMMON_FLAGS:= -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K -std=c++11
|
||||
CPPFLAGS:=
|
||||
CXXFLAGS:= -msse3 -mssse3 -std=c++0x -fopenmp -fpermissive -fPIC -Werror -fcheck-new
|
||||
CXXFLAGS:= -msse4.1 -mssse3 -std=c++0x -fopenmp -fpermissive -fPIC -Werror -fcheck-new
|
||||
LIBPATH:=
|
||||
LIBS:=
|
||||
LDFLAGS:=
|
||||
|
@ -375,6 +375,8 @@ CNTKLIBRARY_SRC =\
|
|||
$(SOURCEDIR)/CNTKv2LibraryDll/Utils.cpp \
|
||||
$(SOURCEDIR)/CNTKv2LibraryDll/Value.cpp \
|
||||
$(SOURCEDIR)/CNTKv2LibraryDll/Variable.cpp \
|
||||
$(SOURCEDIR)/CNTKv2LibraryDll/Learner.cpp \
|
||||
|
||||
|
||||
CNTKLIBRARY_SRC+=$(CNTK_COMMON_SRC)
|
||||
CNTKLIBRARY_SRC+=$(COMPUTATION_NETWORK_LIB_SRC)
|
||||
|
@ -437,7 +439,7 @@ EVAL_SRC=\
|
|||
$(SOURCEDIR)/ActionsLib/NetworkFactory.cpp \
|
||||
$(SOURCEDIR)/ActionsLib/NetworkDescriptionLanguage.cpp \
|
||||
$(SOURCEDIR)/ActionsLib/SimpleNetworkBuilder.cpp \
|
||||
$(SOURCEDIR)/ActionsLib/NDLNetworkBuilder.cpp
|
||||
$(SOURCEDIR)/ActionsLib/NDLNetworkBuilder.cpp \
|
||||
|
||||
EVAL_SRC+=$(SGDLIB_SRC)
|
||||
EVAL_SRC+=$(COMPUTATION_NETWORK_LIB_SRC)
|
||||
|
@ -814,6 +816,127 @@ $(CNTK_CORE_BS): $(SOURCEDIR)/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs
|
|||
@echo bin-placing deployable resource files
|
||||
cp -f $^ $@
|
||||
|
||||
########################################
|
||||
# Unit Tests
|
||||
########################################
|
||||
|
||||
# use system pre-installed Boost libraries
|
||||
# Todo: use our own version of boost libraries
|
||||
BOOSTLIB_PATH = /usr/lib/x86_64-linux-gnu
|
||||
BOOSTLIBS := boost_unit_test_framework boost_filesystem boost_system
|
||||
|
||||
UNITTEST_EVAL_SRC = \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/EvalTests/EvalExtendedTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/EvalTests/stdafx.cpp
|
||||
|
||||
UNITTEST_EVAL_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_EVAL_SRC))
|
||||
|
||||
UNITTEST_EVAL := $(BINDIR)/evaltests
|
||||
# Temporarily not build unit tests as the docker image does not include boost.
|
||||
#ALL += $(UNITTEST_EVAL)
|
||||
#SRC += $(UNITTEST_EVAL_SRC)
|
||||
|
||||
$(UNITTEST_EVAL) : $(UNITTEST_EVAL_OBJ) | $(EVAL_LIB) $(CNTKMATH_LIB)
|
||||
@echo $(SEPARATOR)
|
||||
@mkdir -p $(dir $@)
|
||||
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
|
||||
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(LIBDIR) $(BOOSTLIB_PATH)) -o $@ $^ $(patsubst %, -l%, $(BOOSTLIBS)) -l$(EVAL) -l$(CNTKMATH)
|
||||
|
||||
#TODO: create project specific makefile or rules to avoid adding project specific path to the global path
|
||||
INCLUDEPATH += $(SOURCEDIR)/Readers/CNTKTextFormatReader
|
||||
|
||||
UNITTEST_READER_SRC = \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/CNTKTextFormatReaderTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/HTKLMFReaderTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/ImageReaderTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/ReaderLibTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/UCIFastReaderTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/stdafx.cpp \
|
||||
$(SOURCEDIR)/Readers/CNTKTextFormatReader/Indexer.cpp \
|
||||
$(SOURCEDIR)/Readers/CNTKTextFormatReader/TextParser.cpp \
|
||||
|
||||
UNITTEST_READER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_READER_SRC))
|
||||
|
||||
UNITTEST_READER := $(BINDIR)/readertests
|
||||
# Temporarily not build unit tests as the docker image does not include boost.
|
||||
#ALL += $(UNITTEST_READER)
|
||||
#SRC += $(UNITTEST_READER_SRC)
|
||||
|
||||
$(UNITTEST_READER): $(UNITTEST_READER_OBJ) | $(HTKMLFREADER) $(HTKDESERIALIZERS) $(UCIFASTREADER) $(COMPOSITEDATAREADER) $(IMAGEREADER) $(CNTKMATH_LIB)
|
||||
@echo $(SEPARATOR)
|
||||
@mkdir -p $(dir $@)
|
||||
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
|
||||
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(LIBDIR) $(BOOSTLIB_PATH)) -o $@ $^ $(patsubst %, -l%, $(BOOSTLIBS)) -l$(CNTKMATH)
|
||||
|
||||
UNITTEST_NETWORK_SRC = \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/NetworkTests/OperatorEvaluation.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/NetworkTests/stdafx.cpp \
|
||||
$(SOURCEDIR)/CNTK/ModelEditLanguage.cpp \
|
||||
$(SOURCEDIR)/ActionsLib/TrainActions.cpp \
|
||||
$(SOURCEDIR)/ActionsLib/EvalActions.cpp \
|
||||
$(SOURCEDIR)/ActionsLib/OtherActions.cpp \
|
||||
$(SOURCEDIR)/ActionsLib/SpecialPurposeActions.cpp \
|
||||
$(SOURCEDIR)/ActionsLib/NetworkFactory.cpp \
|
||||
$(SOURCEDIR)/ActionsLib/NetworkDescriptionLanguage.cpp \
|
||||
$(SOURCEDIR)/ActionsLib/SimpleNetworkBuilder.cpp \
|
||||
$(SOURCEDIR)/ActionsLib/NDLNetworkBuilder.cpp \
|
||||
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptEvaluator.cpp \
|
||||
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptParser.cpp \
|
||||
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptTest.cpp \
|
||||
|
||||
UNITTEST_NETWORK_SRC += $(COMPUTATION_NETWORK_LIB_SRC)
|
||||
UNITTEST_NETWORK_SRC += $(CNTK_COMMON_SRC)
|
||||
UNITTEST_NETWORK_SRC += $(SEQUENCE_TRAINING_LIB_SRC)
|
||||
UNITTEST_NETWORK_SRC += $(SGDLIB_SRC)
|
||||
|
||||
UNITTEST_NETWORK_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_NETWORK_SRC)))
|
||||
|
||||
UNITTEST_NETWORK := $(BINDIR)/networktests
|
||||
# Temporarily not build unit tests as the docker image does not include boost.
|
||||
#ALL += $(UNITTEST_NETWORK)
|
||||
#SRC += $(UNITTEST_NETWORK_SRC)
|
||||
|
||||
$(UNITTEST_NETWORK): $(UNITTEST_NETWORK_OBJ) | $(CNTKMATH_LIB) $(CNTKTEXTFORMATREADER)
|
||||
@echo $(SEPARATOR)
|
||||
@mkdir -p $(dir $@)
|
||||
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
|
||||
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(NVMLLIBPATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(LIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(patsubst %, -l%, $(BOOSTLIBS)) -l$(CNTKMATH) $(LIBS)
|
||||
|
||||
UNITTEST_MATH_SRC = \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/MathTests/BatchNormalizationEngineTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/MathTests/BlockMultiplierTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/MathTests/constants.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/MathTests/ConvolutionEngineTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/MathTests/CPUMatrixTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/MathTests/CPUSparseMatrixTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/MathTests/fixtures.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/MathTests/GPUMatrixCudaBlasTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/MathTests/GPUMatrixTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/MathTests/GPUSparseMatrixTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/MathTests/MatrixBlasTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/MathTests/MatrixDataSynchronizationTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/MathTests/MatrixFileWriteReadTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/MathTests/MatrixQuantizerTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/MathTests/MatrixSparseDenseInteractionsTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/MathTests/MatrixTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/MathTests/stdafx.cpp \
|
||||
|
||||
UNITTEST_MATH_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_MATH_SRC))
|
||||
|
||||
UNITTEST_MATH := $(BINDIR)/mathtests
|
||||
# Temporarily not build unit tests as the docker image does not include boost.
|
||||
#ALL += $(UNITTEST_MATH)
|
||||
#SRC += $(UNITTEST_MATH_SRC)
|
||||
|
||||
$(UNITTEST_MATH): $(UNITTEST_MATH_OBJ) | $(CNTKMATH_LIB)
|
||||
@echo $(SEPARATOR)
|
||||
@mkdir -p $(dir $@)
|
||||
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
|
||||
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(NVMLLIBPATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(LIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(patsubst %, -l%, $(BOOSTLIBS)) $(LIBS) -l$(CNTKMATH)
|
||||
|
||||
unittests: $(UNITTEST_EVAL) $(UNITTEST_READER) $(UNITTEST_NETWORK) $(UNITTEST_MATH)
|
||||
|
||||
|
||||
########################################
|
||||
# General compile and dependency rules
|
||||
########################################
|
||||
|
@ -838,13 +961,13 @@ $(OBJDIR)/%.o : %.cu $(BUILD_CONFIGURATION)
|
|||
@mkdir -p $(dir $@)
|
||||
$(NVCC) -c $< -o $@ $(COMMON_FLAGS) $(CUFLAGS) $(INCLUDEPATH:%=-I%) -Xcompiler "-fPIC -Werror"
|
||||
|
||||
$(OBJDIR)/%.o : %.cpp $(BUILD_CONFIGURATION)
|
||||
$(OBJDIR)/%.o : %.cpp $(BUILD_CONFIGURATION)
|
||||
@echo $(SEPARATOR)
|
||||
@echo creating $@ for $(ARCH) with build type $(BUILDTYPE)
|
||||
@mkdir -p $(dir $@)
|
||||
$(CXX) -c $< -o $@ $(COMMON_FLAGS) $(CPPFLAGS) $(CXXFLAGS) $(INCLUDEPATH:%=-I%) -MD -MP -MF ${@:.o=.d}
|
||||
|
||||
.PHONY: clean buildall all
|
||||
.PHONY: clean buildall all unittests
|
||||
|
||||
clean:
|
||||
@echo $(SEPARATOR)
|
||||
|
|
11
README.md
11
README.md
|
@ -1,6 +1,11 @@
|
|||
# CNTK
|
||||
|
||||
## Latest news
|
||||
*2016-07-15.* V 1.6 Binary release
|
||||
CNTK v.1.6 binaries are on the [CNTK Releases page](https://github.com/Microsoft/CNTK/releases)
|
||||
|
||||
*2016-07-12.* We have further expanded Licensing options for CNTK 1bit-SGD and related components. See the details at the [Wiki page](https://github.com/microsoft/cntk/wiki/CNTK-1bit-SGD-License). These new options are an extension of the new CNTK 1bit-SGD License that we have announced on Jun 23, 2016.
|
||||
|
||||
*2016-07-05.* CNTK now supports *Deconvolution* and *Unpooling*. See the usage example in the Network number 4 in [MNIST Sample](https://github.com/Microsoft/CNTK/blob/master/Examples/Image/MNIST/README.md).
|
||||
|
||||
*2016-06-23.* New License Terms for CNTK 1bit-SGD and related components.
|
||||
|
@ -8,12 +13,6 @@ Effective immediately the License Terms for CNTK 1bit-SGD and related components
|
|||
|
||||
*2016-06-20.* A [post](http://itpeernetwork.intel.com/accelerating-the-computational-network-tool-kit-with-intel-mkl/) on Intel MKL and CNTK is published in the [Intel IT Peer Network](http://itpeernetwork.intel.com/accelerating-the-computational-network-tool-kit-with-intel-mkl/)
|
||||
|
||||
*2016-06-16.* V 1.5 Binary release. NuGet Package with CNTK Model Evaluation Libraries.
|
||||
NuGet Package is added to CNTK v.1.5 binaries. See [CNTK Releases page](https://github.com/Microsoft/CNTK/releases) and [NuGet Package description](https://github.com/Microsoft/CNTK/wiki/Nuget-Package-for-Evaluation).
|
||||
|
||||
*2016-06-15.* CNTK now supports building against a custom Intel® Math Kernel Library (MKL).
|
||||
See [setup instructions](https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-your-machine) on how to set this up for your platform.
|
||||
|
||||
See [all news](https://github.com/Microsoft/CNTK/wiki/News).
|
||||
|
||||
## What is CNTK
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
This directory contains different script helping using different components of CNTK.
|
||||
|
||||
### CNTK Text format Converters
|
||||
Two Python Scripts for converting Data to CNTK Text format for using as an input for CNTK Text Format Reader (see https://github.com/microsoft/cnTK/wiki/CNTKTextFormat-Reader).
|
||||
```
|
||||
txt2ctf.py
|
||||
```
|
||||
Converts a set of dictionary files and a plain text file to CNTK Text format. Run ```python txt2ctf.py -h``` to see usage instructions. See the comments in the beginning of the script file for the specific usage example.
|
||||
|
||||
```
|
||||
uci2ctf.py
|
||||
```
|
||||
Converts data stored in a text file in UCI format to CNTK Text format. Run ```python uci2ctf.py -h``` to see usage instructions and example. Also see a usage example below:
|
||||
```
|
||||
python Scripts/uci2ctf.py --input_file Examples/Image/MNIST/Data/Train-28x28.txt --features_start 1 --features_dim 784 --labels_start 0 --labels_dim 1 --num_labels 10 --output_file Examples/Image/MNIST/Data/Train-28x28_cntk_text.txt
|
||||
```
|
||||
```input_file``` – original dataset in the (columnar) UCI format
|
||||
```features_start``` – index of the first feature column (start parameter in the UCIFastReader config, see https://github.com/Microsoft/CNTK/wiki/UCI-Fast-Reader)
|
||||
```features_dim``` – number of feature columns (dim parameter in the UCIFastReader config)
|
||||
```labels_start``` - index of the first label column
|
||||
```labels_dim``` – number of label columns
|
||||
```num_labels``` – number of possible label values (labelDim parameter in the UCIFastReader config)
|
||||
```output_file``` – path and filename of the resulting dataset.
|
||||
|
|
@ -191,6 +191,7 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
|
|||
else if (EqualInsensitive(nodeType, OperationNameOf(KhatriRaoProductNode), L"ColumnwiseCrossProduct")) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(LearnableParameter), L"Parameter")) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(LogNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(LogPlusNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(LogSoftmaxNode))) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(LogisticNode), L"Logistic")) ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(LookupTableNode))) ret = true;
|
||||
|
|
|
@ -53,7 +53,6 @@ public:
|
|||
|
||||
__declspec_noreturn static inline void EvaluationError(const wstring &msg, TextLocation where)
|
||||
{
|
||||
//Microsoft::MSR::CNTK::DebugUtil::PrintCallStack();
|
||||
throw EvaluationException(msg, where);
|
||||
}
|
||||
|
||||
|
|
|
@ -89,9 +89,18 @@ struct Issue
|
|||
// Because it is often hard to recognize an issue only from the point where it occurred, we also report the history in compact visual form.
|
||||
// Since often multiple contexts are on the same source line, we only print each source line once in a consecutive row of contexts.
|
||||
/*static*/ void TextLocation::PrintIssue(const vector<TextLocation>& locations, const wchar_t* errorKind, const wchar_t* kind, const wchar_t* what)
|
||||
{
|
||||
wstring error = CreateIssueMessage(locations, errorKind, kind, what);
|
||||
fprintf(stderr, "%ls", error.c_str());
|
||||
fflush(stderr);
|
||||
}
|
||||
|
||||
/*static*/ wstring TextLocation::CreateIssueMessage(const vector<TextLocation>& locations, const wchar_t* errorKind, const wchar_t* kind, const wchar_t* what)
|
||||
{
|
||||
vector<Issue> issues; // tracing the error backwards
|
||||
size_t symbolIndex = 0;
|
||||
wstring message;
|
||||
|
||||
for (size_t n = 0; n < locations.size(); n++)
|
||||
{
|
||||
let& location = locations[n];
|
||||
|
@ -125,20 +134,23 @@ struct Issue
|
|||
if (!locations.empty()) // (be resilient to some throwers not having a TextLocation; to be avoided)
|
||||
{
|
||||
let& firstLoc = issues.front().location;
|
||||
fprintf(stderr, "[CALL STACK]\n");
|
||||
message += wstrprintf(L"[CALL STACK]\n");
|
||||
for (auto i = issues.rbegin(); i != issues.rend(); i++)
|
||||
{
|
||||
let& issue = *i;
|
||||
auto& where = issue.location;
|
||||
const auto& lines = where.GetSourceFile().lines;
|
||||
const auto line = (where.lineNo == lines.size()) ? L"(end)" : lines[where.lineNo].c_str();
|
||||
fprintf(stderr, " %ls\n %ls\n", line, issue.markup.c_str());
|
||||
message += wstrprintf(L" %ls\n %ls\n", line, issue.markup.c_str());
|
||||
}
|
||||
fprintf(stderr, "%ls while %ls: %ls(%d)", errorKind, kind, firstLoc.GetSourceFile().path.c_str(), (int)firstLoc.lineNo + 1 /*report 1-based*/);
|
||||
message += wstrprintf(L"%ls while %ls: %ls(%d)", errorKind, kind, firstLoc.GetSourceFile().path.c_str(), (int)firstLoc.lineNo + 1 /*report 1-based*/);
|
||||
}
|
||||
else
|
||||
fprintf(stderr, "%ls while %ls", errorKind, kind);
|
||||
fprintf(stderr, ": %ls\n", what), fflush(stderr);
|
||||
{
|
||||
message += wstrprintf(L"%ls while %ls", errorKind, kind);
|
||||
}
|
||||
message += wstrprintf(L": %ls\n", what);
|
||||
return message;
|
||||
}
|
||||
/*static*/ vector<SourceFile> TextLocation::sourceFileMap;
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@ struct TextLocation // position in the text. Lightweight value struct that we ca
|
|||
|
||||
// helpers for pretty-printing errors: Show source-code line with ...^ under it to mark up the point of error
|
||||
static void PrintIssue(const vector<TextLocation>& locations, const wchar_t* errorKind, const wchar_t* kind, const wchar_t* what);
|
||||
static std::wstring CreateIssueMessage(const vector<TextLocation>& locations, const wchar_t* errorKind, const wchar_t* kind, const wchar_t* what);
|
||||
static void Trace(TextLocation, const wchar_t* traceKind, const wchar_t* op, const wchar_t* exprPath);
|
||||
|
||||
// construction
|
||||
|
@ -77,8 +78,12 @@ public:
|
|||
} // where the error happened
|
||||
virtual const wchar_t* kind() const = 0; // e.g. "warning" or "error"
|
||||
|
||||
wstring GetError(const std::wstring& linePrefix) const override
|
||||
{
|
||||
return TextLocation::CreateIssueMessage(locations, linePrefix.c_str(), kind(), msra::strfun::utf16(what()).c_str());
|
||||
}
|
||||
// pretty-print this as an error message
|
||||
void /*ScriptingException::*/ PrintError(const std::wstring& linePrefix) const
|
||||
void /*ScriptingException::*/ PrintError(const std::wstring& linePrefix) const override
|
||||
{
|
||||
TextLocation::PrintIssue(locations, linePrefix.c_str(), kind(), msra::strfun::utf16(what()).c_str());
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "NDLNetworkBuilder.h"
|
||||
#include "ModelEditLanguage.h"
|
||||
#include "CPUMatrix.h" // used for SetNumThreads()
|
||||
#include "GPUMatrix.h" // used for SyncGuard::EnableSync()
|
||||
#include "CommonMatrix.h"
|
||||
#include "SGD.h"
|
||||
#include "MPIWrapper.h"
|
||||
|
@ -117,6 +118,23 @@ size_t GetMaxEpochs(const ConfigParameters& configParams)
|
|||
return maxEpochs;
|
||||
}
|
||||
|
||||
#ifndef CPUONLY
|
||||
// abort execution is GPU is not supported (e.g. compute capability not supported)
|
||||
void CheckSupportForGpu(DEVICEID_TYPE deviceId)
|
||||
{
|
||||
auto gpuData = GetGpuData(deviceId);
|
||||
if (gpuData.validity == GpuValidity::ComputeCapabilityNotSupported)
|
||||
{
|
||||
InvalidArgument("CNTK: The GPU (%s) has compute capability %d.%d. CNTK is only supported on GPUs with compute capability 3.0 or greater",
|
||||
gpuData.name.c_str(), gpuData.versionMajor, gpuData.versionMinor);
|
||||
}
|
||||
else if (gpuData.validity == GpuValidity::UnknownDevice)
|
||||
{
|
||||
InvalidArgument("CNTK: Unknown GPU with Device ID %d.", gpuData.deviceId);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// special temporary function to guard against a now invalid usage of "truncated" which exists in some IPG production setups
|
||||
static void DisableLegacyTruncationSettings(const ConfigParameters& TopLevelConfig, const ConfigParameters& commandConfig)
|
||||
{
|
||||
|
@ -373,6 +391,30 @@ void PrintUsageInfo()
|
|||
LOGPRINTF(stderr, "-------------------------------------------------------------------\n");
|
||||
}
|
||||
|
||||
// print gpu info for current gpu devices (e.g. Device[0]: cores = 2496; computeCapability = 5.2; type = "Quadro M4000"; memory = 8192 MB)
|
||||
void PrintGpuInfo()
|
||||
{
|
||||
#ifndef CPUONLY
|
||||
std::vector<GpuData> gpusData = GetAllGpusData();
|
||||
|
||||
if (gpusData.empty())
|
||||
{
|
||||
LOGPRINTF(stderr, "No GPUs found\n");
|
||||
return;
|
||||
}
|
||||
|
||||
LOGPRINTF(stderr, "-------------------------------------------------------------------\n");
|
||||
LOGPRINTF(stderr, "GPU info:\n\n");
|
||||
|
||||
for (GpuData& data : gpusData)
|
||||
{
|
||||
LOGPRINTF(stderr, "\t\tDevice[%d]: cores = %d; computeCapability = %d.%d; type = \"%s\"; memory = %lu MB\n",
|
||||
data.deviceId, data.cudaCores, data.versionMajor, data.versionMinor, data.name.c_str(), data.totalMemory);
|
||||
}
|
||||
LOGPRINTF(stderr, "-------------------------------------------------------------------\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// main() for use with BrainScript
|
||||
// ---------------------------------------------------------------------------
|
||||
|
@ -464,6 +506,21 @@ int wmainWithBS(int argc, wchar_t* argv[]) // called from wmain which is a wrapp
|
|||
let valp = BS::Evaluate(expr); // evaluate parse into a dictionary
|
||||
let& config = valp.AsRef<ScriptableObjects::IConfigRecord>(); // this is the dictionary
|
||||
|
||||
#ifndef CPUONLY
|
||||
auto valpp = config.Find(L"deviceId");
|
||||
if (valpp)
|
||||
{
|
||||
auto valp = *valpp;
|
||||
if (!valp.Is<ScriptableObjects::String>()) // if it's not string 'auto' or 'cpu', then it's a gpu
|
||||
{
|
||||
if (static_cast<int>(valp) >= 0) // gpu (id >= 0)
|
||||
{
|
||||
CheckSupportForGpu(valp); // throws if gpu is not supported
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// legacy parameters that have changed spelling
|
||||
if (config.Find(L"DoneFile")) // variables follow camel case (start with lower-case letters)
|
||||
InvalidArgument("Legacy spelling of 'DoneFile' no longer allowed. Use 'doneFile'.");
|
||||
|
@ -485,6 +542,10 @@ int wmainWithBS(int argc, wchar_t* argv[]) // called from wmain which is a wrapp
|
|||
|
||||
TracingGPUMemoryAllocator::SetTraceLevel(config(L"traceGPUMemoryAllocations", 0));
|
||||
|
||||
bool synchronizeCUDAKernelExecutions = config(L"synchronizeCUDAKernelExecutions", false);
|
||||
if (synchronizeCUDAKernelExecutions)
|
||||
SyncGuard::EnableSync();
|
||||
|
||||
// logging
|
||||
wstring logpath = config(L"stderr", L"");
|
||||
if (logpath != L"")
|
||||
|
@ -502,6 +563,9 @@ int wmainWithBS(int argc, wchar_t* argv[]) // called from wmain which is a wrapp
|
|||
// echo config info to log
|
||||
PrintBuiltInfo();
|
||||
|
||||
// echo gpu info to log
|
||||
PrintGpuInfo();
|
||||
|
||||
// execute the actions
|
||||
// std::string type = config(L"precision", "float");
|
||||
int numCPUThreads = config(L"numCPUThreads", 0);
|
||||
|
@ -559,6 +623,18 @@ int wmainOldCNTKConfig(int argc, wchar_t* argv[])
|
|||
{
|
||||
ConfigParameters config;
|
||||
std::string rawConfigString = ConfigParameters::ParseCommandLine(argc, argv, config); // get the command param set they want
|
||||
|
||||
#ifndef CPUONLY
|
||||
ConfigValue val = config("deviceId", "auto");
|
||||
if (!EqualCI(val, "cpu") && !EqualCI(val, "auto"))
|
||||
{
|
||||
if (static_cast<int>(val) >= 0) // gpu (id >= 0)
|
||||
{
|
||||
CheckSupportForGpu(static_cast<int>(val)); // throws if gpu is not supported
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
bool timestamping = config(L"timestamping", false);
|
||||
if (timestamping)
|
||||
{
|
||||
|
@ -602,6 +678,8 @@ int wmainOldCNTKConfig(int argc, wchar_t* argv[])
|
|||
}
|
||||
|
||||
PrintBuiltInfo(); // this one goes to log file
|
||||
PrintGpuInfo();
|
||||
|
||||
std::string timestamp = TimeDateStamp();
|
||||
|
||||
// dump config info
|
||||
|
|
|
@ -144,6 +144,7 @@
|
|||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\Common\CrossProcessMutex.h" />
|
||||
<ClInclude Include="..\Common\Include\basetypes.h" />
|
||||
<ClInclude Include="..\Common\Include\Basics.h" />
|
||||
<ClInclude Include="..\Common\Include\BestGpu.h" />
|
||||
<ClInclude Include="..\Common\Include\DataReader.h" />
|
||||
|
@ -222,4 +223,4 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets" />
|
||||
</Project>
|
||||
</Project>
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -47,6 +47,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
template <typename ElementType>
|
||||
class ComputationNode;
|
||||
|
||||
class File;
|
||||
}}}
|
||||
|
||||
// TODO: The following should be reconciled with the equivalent code in the CNTK implementation
|
||||
|
@ -135,352 +137,30 @@ namespace CNTK
|
|||
// Forward declarations
|
||||
class CompositeFunction;
|
||||
class Function;
|
||||
class Variable;
|
||||
|
||||
namespace Internal
|
||||
// Similar to make_shared except that it associates a custom deleter with the shared_ptr to ensure
|
||||
// that objects are deleted on the same side of the library DLL where they are allocated
|
||||
template <typename T, typename ...CtorArgTypes>
|
||||
inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs)
|
||||
{
|
||||
// A reference count to be used as the base class for all reference counted types.
|
||||
class CNTK_API ReferenceCount
|
||||
{
|
||||
public:
|
||||
|
||||
ReferenceCount();
|
||||
virtual ~ReferenceCount();
|
||||
|
||||
size_t AddReference();
|
||||
size_t RemoveReference();
|
||||
size_t GetReferenceCount();
|
||||
|
||||
private:
|
||||
std::atomic<size_t>* m_rc;
|
||||
};
|
||||
|
||||
// A smart pointer to a reference counted object
|
||||
// T must be a type derived from ReferenceCount
|
||||
template <class T>
|
||||
class CNTK_API ReferenceCountedPtr final
|
||||
{
|
||||
typedef void(*ReferenceCountedObjectDeleter)(ReferenceCount* obj);
|
||||
|
||||
public:
|
||||
|
||||
ReferenceCountedPtr(T* ptr = nullptr, ReferenceCountedObjectDeleter deleter = nullptr) : m_objPtr(ptr), m_deleter(deleter)
|
||||
{
|
||||
AddReferenceIfNeeded();
|
||||
}
|
||||
|
||||
ReferenceCountedPtr(const ReferenceCountedPtr& other) : m_objPtr(nullptr), m_deleter(nullptr)
|
||||
{
|
||||
*this = other;
|
||||
}
|
||||
|
||||
ReferenceCountedPtr(ReferenceCountedPtr&& other) : m_objPtr(nullptr), m_deleter(nullptr)
|
||||
{
|
||||
*this = std::move(other);
|
||||
}
|
||||
|
||||
~ReferenceCountedPtr()
|
||||
{
|
||||
DeleteReferenceIfNeeded(m_objPtr, m_deleter);
|
||||
}
|
||||
|
||||
ReferenceCountedPtr& operator=(const ReferenceCountedPtr& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
T* oldPtr = m_objPtr;
|
||||
ReferenceCountedObjectDeleter oldDeleter = m_deleter;
|
||||
|
||||
m_objPtr = other.m_objPtr;
|
||||
m_deleter = other.m_deleter;
|
||||
AddReferenceIfNeeded();
|
||||
|
||||
DeleteReferenceIfNeeded(oldPtr, oldDeleter);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
ReferenceCountedPtr& operator=(ReferenceCountedPtr&& other)
|
||||
{
|
||||
assert(this != &other);
|
||||
|
||||
T* oldPtr = m_objPtr;
|
||||
ReferenceCountedObjectDeleter oldDeleter = m_deleter;
|
||||
|
||||
m_objPtr = other.m_objPtr;
|
||||
m_deleter = other.m_deleter;
|
||||
// No change to ref-count of the adopted pointer.
|
||||
|
||||
other.m_objPtr = nullptr;
|
||||
other.m_deleter = nullptr;
|
||||
|
||||
DeleteReferenceIfNeeded(oldPtr, oldDeleter);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Conversion to a ReferenceCountedSharedPtr instance of a base type
|
||||
template <typename Base, typename std::enable_if<std::is_base_of<Base, T>::value>::type* = nullptr>
|
||||
operator ReferenceCountedPtr<Base>()
|
||||
{
|
||||
return ReferenceCountedPtr<Base>(m_objPtr, m_deleter);
|
||||
}
|
||||
|
||||
T* operator->() const
|
||||
{
|
||||
return m_objPtr;
|
||||
}
|
||||
|
||||
T& operator*() const
|
||||
{
|
||||
return *m_objPtr;
|
||||
}
|
||||
|
||||
operator T*() const
|
||||
{
|
||||
return m_objPtr;
|
||||
}
|
||||
|
||||
T* GetPtr() const
|
||||
{
|
||||
return m_objPtr;
|
||||
}
|
||||
|
||||
private:
|
||||
void AddReferenceIfNeeded()
|
||||
{
|
||||
static_assert(std::is_base_of<ReferenceCount, T>::value, "ReferenceCountedPtr<T> can only be used when ReferenceCount is a base type of T!");
|
||||
|
||||
if (m_objPtr != nullptr)
|
||||
reinterpret_cast<ReferenceCount*>(m_objPtr)->AddReference();
|
||||
}
|
||||
|
||||
static void DeleteReferenceIfNeeded(T* objPtr, ReferenceCountedObjectDeleter deleter)
|
||||
{
|
||||
static_assert(std::is_base_of<ReferenceCount, T>::value, "ReferenceCountedPtr<T> can only be used when ReferenceCount is a base type of T!");
|
||||
|
||||
if (objPtr != nullptr)
|
||||
{
|
||||
size_t refCountRemaining = reinterpret_cast<ReferenceCount*>(objPtr)->RemoveReference();
|
||||
if (refCountRemaining == 0)
|
||||
{
|
||||
if (deleter != nullptr)
|
||||
deleter(reinterpret_cast<ReferenceCount*>(objPtr));
|
||||
else
|
||||
delete objPtr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
T* m_objPtr;
|
||||
ReferenceCountedObjectDeleter m_deleter;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
bool operator==(const ReferenceCountedPtr<T>& first, const ReferenceCountedPtr<T>& second)
|
||||
{
|
||||
return first.GetPtr() == second.GetPtr();
|
||||
}
|
||||
|
||||
// A wrapper around the STL vector implementation with a safe ABI to allow usage across the library DLL boundary
|
||||
// as STL vectors cannot be used across the DLL boundary
|
||||
template <typename T>
|
||||
class CNTK_API SimpleVector final
|
||||
{
|
||||
template <typename ValueType>
|
||||
friend CNTK_API bool operator==(const SimpleVector<ValueType>& first, const SimpleVector<ValueType>& second);
|
||||
|
||||
friend class CNTK::Function;
|
||||
|
||||
public:
|
||||
SimpleVector();
|
||||
|
||||
template <typename ContainerType, typename std::enable_if<std::is_same<ContainerType, std::vector<T>>::value ||
|
||||
std::is_same<ContainerType, std::initializer_list<T>>::value ||
|
||||
std::is_same<ContainerType, std::array<T, sizeof(ContainerType) / sizeof(T)>>::value>::type* = nullptr>
|
||||
SimpleVector(const ContainerType& initList)
|
||||
: SimpleVector(initList.size())
|
||||
{
|
||||
std::copy(initList.begin(), initList.end(), Data());
|
||||
}
|
||||
|
||||
SimpleVector(size_t numElements, const T& initVal = T());
|
||||
~SimpleVector();
|
||||
|
||||
SimpleVector(const SimpleVector& other);
|
||||
SimpleVector& operator=(const SimpleVector& other);
|
||||
|
||||
SimpleVector(SimpleVector&& other);
|
||||
SimpleVector& operator=(SimpleVector&& other);
|
||||
|
||||
T& operator[](size_t idx);
|
||||
const T& operator[](size_t idx) const;
|
||||
|
||||
size_t Size() const;
|
||||
|
||||
T* Data();
|
||||
const T* Data() const;
|
||||
|
||||
void PushBack(const T& value);
|
||||
void PushBack(T&& value);
|
||||
|
||||
operator std::vector<T>() const
|
||||
{
|
||||
std::vector<T> retVector(Size());
|
||||
for (size_t i = 0; i < Size(); ++i)
|
||||
retVector[i] = this->operator[](i);
|
||||
|
||||
return retVector;
|
||||
}
|
||||
|
||||
std::unordered_set<T> GetAsUnorderedSet(bool ensureUnique = true)
|
||||
{
|
||||
std::unordered_set<T> retSet;
|
||||
for (size_t i = 0; i < Size(); ++i)
|
||||
{
|
||||
auto insertRet = retSet.insert(this->operator[](i));
|
||||
if (ensureUnique && !insertRet.second)
|
||||
RuntimeError("A SimpleVector with duplicate elements cannot be converted to an unordered_set");
|
||||
}
|
||||
|
||||
return retSet;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<T>* m_vector;
|
||||
};
|
||||
|
||||
template <typename ValueType>
|
||||
CNTK_API bool operator==(const SimpleVector<ValueType>& first, const SimpleVector<ValueType>& second);
|
||||
|
||||
template <typename ValueType>
|
||||
bool operator!=(const SimpleVector<ValueType>& first, const SimpleVector<ValueType>& second)
|
||||
{
|
||||
return !(first == second);
|
||||
}
|
||||
|
||||
// A wrapper around the STL set implementation with a safe ABI to allow usage across the library DLL boundary
|
||||
// as STL sets cannot be used across the DLL boundary
|
||||
template <typename KeyType>
|
||||
class CNTK_API SimpleSet final
|
||||
{
|
||||
friend class CNTK::CompositeFunction;
|
||||
|
||||
template <typename T>
|
||||
friend CNTK_API bool operator==(const SimpleSet<T>& first, const SimpleSet<T>& second);
|
||||
|
||||
public:
|
||||
SimpleSet();
|
||||
~SimpleSet();
|
||||
|
||||
SimpleSet(const SimpleSet& other);
|
||||
SimpleSet& operator=(const SimpleSet& other);
|
||||
|
||||
SimpleSet(SimpleSet&& other);
|
||||
SimpleSet& operator=(SimpleSet&& other);
|
||||
|
||||
bool Insert(const KeyType& key);
|
||||
bool Contains(const KeyType& key) const;
|
||||
|
||||
size_t Size() const;
|
||||
|
||||
operator SimpleVector<KeyType>() const;
|
||||
|
||||
operator std::unordered_set<KeyType>() const
|
||||
{
|
||||
return ((SimpleVector<KeyType>)(*this)).GetAsUnorderedSet();
|
||||
}
|
||||
|
||||
static SimpleSet<KeyType> CreateSimpleSet(const std::unordered_set<KeyType>& initSet)
|
||||
{
|
||||
SimpleSet<KeyType> simpleSet;
|
||||
for (auto key : initSet)
|
||||
simpleSet.Insert(key);
|
||||
|
||||
return simpleSet;
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_set<KeyType>* m_set;
|
||||
};
|
||||
|
||||
template <typename KeyType>
|
||||
CNTK_API bool operator==(const SimpleSet<KeyType>& first, const SimpleSet<KeyType>& second);
|
||||
|
||||
template <typename KeyType>
|
||||
bool operator!=(const SimpleSet<KeyType>& first, const SimpleSet<KeyType>& second)
|
||||
{
|
||||
return !(first == second);
|
||||
}
|
||||
|
||||
// A wrapper aroound the STL map implementation with a safe ABI to allow usage across the library DLL boundary
|
||||
// as STL maps cannot be used across the DLL boundary
|
||||
template <typename KeyType, typename ValueType>
|
||||
class CNTK_API SimpleMap final
|
||||
{
|
||||
friend class CNTK::CompositeFunction;
|
||||
friend class CNTK::Function;
|
||||
|
||||
public:
|
||||
SimpleMap();
|
||||
~SimpleMap();
|
||||
|
||||
SimpleMap(const SimpleMap& other);
|
||||
SimpleMap& operator=(const SimpleMap& other);
|
||||
|
||||
SimpleMap(SimpleMap&& other);
|
||||
SimpleMap& operator=(SimpleMap&& other);
|
||||
|
||||
ValueType& operator[](const KeyType& key);
|
||||
const ValueType& operator[](const KeyType& key) const;
|
||||
|
||||
bool Insert(const KeyType& key, const ValueType& value);
|
||||
bool Contains(const KeyType& key) const;
|
||||
size_t Size() const;
|
||||
|
||||
SimpleSet<KeyType> Keys() const;
|
||||
|
||||
static SimpleMap<KeyType, ValueType> CreateSimpleMap(const std::unordered_map<KeyType, ValueType>& initMap)
|
||||
{
|
||||
SimpleMap<KeyType, ValueType> simpleMap;
|
||||
for (auto keyValuePair : initMap)
|
||||
simpleMap.Insert(keyValuePair.first, keyValuePair.second);
|
||||
|
||||
return simpleMap;
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<KeyType, ValueType>* m_map;
|
||||
};
|
||||
auto objPtr = new T(std::forward<CtorArgTypes>(ctorArgs)...);
|
||||
return std::shared_ptr<T>(objPtr, [](T* ptr) { delete ptr; });
|
||||
}
|
||||
|
||||
// Forward declarations
|
||||
class NDArrayView;
|
||||
typedef Internal::ReferenceCountedPtr<NDArrayView> NDArrayViewPtr;
|
||||
typedef std::shared_ptr<NDArrayView> NDArrayViewPtr;
|
||||
|
||||
class NDMask;
|
||||
typedef Internal::ReferenceCountedPtr<NDMask> NDMaskPtr;
|
||||
typedef std::shared_ptr<NDMask> NDMaskPtr;
|
||||
|
||||
class Value;
|
||||
typedef Internal::ReferenceCountedPtr<Value> ValuePtr;
|
||||
typedef std::shared_ptr<Value> ValuePtr;
|
||||
|
||||
class Function;
|
||||
typedef Internal::ReferenceCountedPtr<Function> FunctionPtr;
|
||||
typedef std::shared_ptr<Function> FunctionPtr;
|
||||
|
||||
namespace Internal
|
||||
{
|
||||
CNTK_API FunctionPtr Combine(const Internal::SimpleVector<FunctionPtr>& operands, const std::wstring& name = L"");
|
||||
}
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template <typename T>
|
||||
struct hash<CNTK::Internal::ReferenceCountedPtr<T>>
|
||||
{
|
||||
size_t operator()(const CNTK::Internal::ReferenceCountedPtr<T>& x) const
|
||||
{
|
||||
return std::hash<const void*>()(x.GetPtr());
|
||||
}
|
||||
};
|
||||
class Learner;
|
||||
typedef std::shared_ptr<Learner> LearnerPtr;
|
||||
}
|
||||
|
|
|
@ -128,6 +128,7 @@
|
|||
<ClInclude Include="API\CNTKLibrary.h" />
|
||||
<ClInclude Include="API\CNTKLibraryInternals.h" />
|
||||
<ClInclude Include="Function.h" />
|
||||
<ClInclude Include="Learner.h" />
|
||||
<ClInclude Include="Utils.h" />
|
||||
<ClInclude Include="stdafx.h" />
|
||||
<ClInclude Include="targetver.h" />
|
||||
|
@ -140,6 +141,7 @@
|
|||
</PrecompiledHeader>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Function.cpp" />
|
||||
<ClCompile Include="Learner.cpp" />
|
||||
<ClCompile Include="NDArrayView.cpp" />
|
||||
<ClCompile Include="NDMask.cpp" />
|
||||
<ClCompile Include="stdafx.cpp">
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
<ClCompile Include="Variable.cpp" />
|
||||
<ClCompile Include="Utils.cpp" />
|
||||
<ClCompile Include="NDMask.cpp" />
|
||||
<ClCompile Include="Learner.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="stdafx.h" />
|
||||
|
@ -22,6 +23,7 @@
|
|||
<Filter>API</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Function.h" />
|
||||
<ClInclude Include="Learner.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="API">
|
||||
|
|
|
@ -14,7 +14,21 @@ namespace CNTK
|
|||
return GPUDevice(0);
|
||||
}
|
||||
|
||||
/*static*/ Axis Axis::DefaultDynamicAxis = Axis(L"defaultDynamicAxis");
|
||||
/*static*/ Axis Axis::BatchAxis = Axis(L"batchAxis");
|
||||
/*static*/ Axis Axis::AllAxes = Axis(L"allAxes");
|
||||
/*static*/ const Axis& Axis::DefaultDynamicAxis()
|
||||
{
|
||||
static Axis s_defaultDynamicAxis(L"defaultDynamicAxis");
|
||||
return s_defaultDynamicAxis;
|
||||
}
|
||||
|
||||
/*static*/ const Axis& Axis::BatchAxis()
|
||||
{
|
||||
static Axis s_batchAxis(L"batchAxis");
|
||||
return s_batchAxis;
|
||||
}
|
||||
|
||||
/*static*/ const Axis& Axis::AllAxes()
|
||||
{
|
||||
static Axis s_allAxes(L"allAxes");
|
||||
return s_allAxes;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,34 +17,82 @@ bool g_shareNodeValueMatrices = true;
|
|||
|
||||
namespace CNTK
|
||||
{
|
||||
Internal::SimpleVector<Variable> Function::InputsImpl() const
|
||||
std::shared_ptr<std::vector<Variable>> Function::InputsImpl() const
|
||||
{
|
||||
const CompositeFunction* compositeFunction = dynamic_cast<const CompositeFunction*>(this);
|
||||
std::vector<Variable> inputs;
|
||||
if (compositeFunction == nullptr)
|
||||
return m_inputs;
|
||||
inputs = m_inputs;
|
||||
else
|
||||
return Internal::SimpleVector<Variable>(compositeFunction->DetermineInputs());
|
||||
inputs = compositeFunction->DetermineInputs();
|
||||
|
||||
return std::shared_ptr<std::vector<Variable>>(new std::vector<Variable>(std::move(inputs)), [](std::vector<Variable>* ptr) { delete ptr; });
|
||||
}
|
||||
|
||||
/*virtual*/ void Function::_ReplacePlaceholders(const Internal::SimpleMap<Placeholder, Variable>& placeholderReplacements,
|
||||
Internal::SimpleSet<const Function*>& visitedFunctions,
|
||||
Internal::SimpleSet<Placeholder>& replacedPlaceholders)
|
||||
FunctionPtr Function::ReplacePlaceholders(const std::unordered_map<Placeholder, Variable>& placeholderReplacements)
|
||||
{
|
||||
visitedFunctions.Insert(this);
|
||||
// Cannot be called on primitive functions
|
||||
if (RootFunction() == nullptr)
|
||||
InvalidArgument("ReplacePlaceholders should never be called on primitive functions");
|
||||
|
||||
for (auto& inputVar : *(m_inputs.m_vector))
|
||||
std::unordered_set<const Function*> visitedFunctions;
|
||||
std::unordered_set<Placeholder> replacedPlaceholders;
|
||||
ReplacePlaceholders(placeholderReplacements, visitedFunctions, replacedPlaceholders);
|
||||
|
||||
for (auto replacementPair : placeholderReplacements)
|
||||
{
|
||||
if (replacedPlaceholders.find(replacementPair.first) == replacedPlaceholders.end())
|
||||
InvalidArgument("At least one of the placeholders specified for replacement was not found in the function");
|
||||
}
|
||||
|
||||
return this->shared_from_this();
|
||||
}
|
||||
|
||||
// Placeholders can be replaced incrementally - i.e. not all placeholders need to replaced in one go.
|
||||
// The only requirement is that they must all be replaced before making any 'Forward' calls on the Function instance.
|
||||
/*virtual*/ void Function::ReplacePlaceholders(const std::unordered_map<Placeholder, Variable>& placeholderReplacements,
|
||||
std::unordered_set<const Function*>& visitedFunctions,
|
||||
std::unordered_set<Placeholder>& replacedPlaceholders)
|
||||
{
|
||||
visitedFunctions.insert(this);
|
||||
|
||||
for (auto& inputVar : m_inputs)
|
||||
{
|
||||
if (inputVar.IsPlaceholder())
|
||||
{
|
||||
Placeholder placeholder(inputVar);
|
||||
if (placeholderReplacements.Contains(placeholder))
|
||||
if (placeholderReplacements.find(placeholder) != placeholderReplacements.end())
|
||||
{
|
||||
inputVar = placeholderReplacements[placeholder];
|
||||
replacedPlaceholders.Insert(placeholder);
|
||||
inputVar = placeholderReplacements.at(placeholder);
|
||||
replacedPlaceholders.insert(placeholder);
|
||||
}
|
||||
}
|
||||
else if (inputVar.IsOutput() && !visitedFunctions.Contains(inputVar.Owner()))
|
||||
inputVar.Owner()->_ReplacePlaceholders(placeholderReplacements, visitedFunctions, replacedPlaceholders);
|
||||
else if (inputVar.IsOutput() && (visitedFunctions.find(inputVar.Owner().get()) == visitedFunctions.end()))
|
||||
inputVar.Owner()->ReplacePlaceholders(placeholderReplacements, visitedFunctions, replacedPlaceholders);
|
||||
}
|
||||
}
|
||||
|
||||
// Replace any PlaceHolder Variables in the graph of Functions underlying 'this' CompositeFunction. All PlaceHolder variables
|
||||
// should have been replaced before performing any Forward compute of 'this' Function.
|
||||
/*virtual*/ void CompositeFunction::ReplacePlaceholders(const std::unordered_map<Placeholder, Variable>& placeholderReplacements,
|
||||
std::unordered_set<const Function*>& visitedFunctions,
|
||||
std::unordered_set<Placeholder>& replacedPlaceholders)
|
||||
{
|
||||
RootFunction()->ReplacePlaceholders(placeholderReplacements, visitedFunctions, replacedPlaceholders);
|
||||
|
||||
// If any of the placeholders were replaced with Output variables, let's add the graph of function underneath each of those to 'm_allPrimitiveFunctions' set
|
||||
for (auto replacedPlaceholder : replacedPlaceholders)
|
||||
{
|
||||
auto replacingVariable = placeholderReplacements.at(replacedPlaceholder);
|
||||
if (replacingVariable.IsOutput())
|
||||
{
|
||||
auto ownerFunc = replacingVariable.Owner();
|
||||
std::unordered_set<FunctionPtr> visitedFunctions;
|
||||
DetermineInputs(ownerFunc, visitedFunctions);
|
||||
|
||||
// Add the newly visited functions to 'm_allPrimitiveFunctions' set
|
||||
m_allPrimitiveFunctions.insert(visitedFunctions.begin(), visitedFunctions.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -79,7 +127,7 @@ namespace CNTK
|
|||
else if (variable.IsInput())
|
||||
{
|
||||
// TODO: Specify dynamic axis
|
||||
if (variable.IsSparseInput())
|
||||
if (IsSparseInput(variable))
|
||||
computationNodePtr = builder.CreateSparseInputNode(variable.Name(), AsTensorShape(variable.Shape()));
|
||||
else
|
||||
computationNodePtr = builder.CreateInputNode(variable.Name(), AsTensorShape(variable.Shape()));
|
||||
|
@ -111,7 +159,7 @@ namespace CNTK
|
|||
{
|
||||
assert(variable.IsOutput());
|
||||
|
||||
Function* function = variable.Owner();
|
||||
Function* function = variable.Owner().get();
|
||||
ComputationNodeBasePtr computationNodePtr;
|
||||
if (dynamic_cast<PrimitiveFunction*>(function))
|
||||
{
|
||||
|
@ -222,14 +270,14 @@ namespace CNTK
|
|||
}
|
||||
|
||||
template <typename ElementType>
|
||||
ComputationNetworkPtr CompositeFunction::GetComputationNetwork(const DeviceDescriptor& device, const Internal::SimpleSet<Variable>& backpropRoots)
|
||||
ComputationNetworkPtr CompositeFunction::GetComputationNetwork(const DeviceDescriptor& device, const std::unordered_set<Variable>& backpropRoots)
|
||||
{
|
||||
if (m_computationNetwork != nullptr)
|
||||
{
|
||||
// TODO: We should either invalidate and readapt the network if he backpropRoots change compared to what was specified when the network
|
||||
// was last constructed, to just recreate a new network.
|
||||
// For now just disallow changing the backpropRoots after the network is created
|
||||
if (m_currentBackpropRoots != *backpropRoots.m_set)
|
||||
if (m_currentBackpropRoots != backpropRoots)
|
||||
LogicError("Changing backprop roots across different Forward calls on a CNTK composite Function is currently unsupported");
|
||||
|
||||
// TODO: Support changing the device across different invocations of the forward method on a Function instance
|
||||
|
@ -244,7 +292,7 @@ namespace CNTK
|
|||
ComputationNetworkBuilder<ElementType> builder(*m_computationNetwork);
|
||||
|
||||
// TODO: We current only support one backprop root
|
||||
if (backpropRoots.Size() > 1)
|
||||
if (backpropRoots.size() > 1)
|
||||
LogicError("More than one backprop roots is currently unsupported");
|
||||
|
||||
ComputationNodeBasePtr backpropRootNode;
|
||||
|
@ -258,7 +306,7 @@ namespace CNTK
|
|||
auto currentRootNode = GetNode(rootOutput, m_computationNetwork, builder, m_variableToNodeMap, m_isVariableRootMap);
|
||||
forwardRootNodes.push_back(currentRootNode);
|
||||
|
||||
if (backpropRoots.Contains(rootOutput))
|
||||
if (backpropRoots.find(rootOutput) != backpropRoots.end())
|
||||
backpropRootNode = m_variableToNodeMap[rootOutput];
|
||||
}
|
||||
|
||||
|
@ -281,7 +329,7 @@ namespace CNTK
|
|||
if (std::find(currentComputationNodeInputs.begin(), currentComputationNodeInputs.end(), nullptr) != currentComputationNodeInputs.end())
|
||||
{
|
||||
// We found a null input; this variable must correspond to a PastValue or FutureValue function
|
||||
const PrimitiveFunction* primitiveFunc = dynamic_cast<const PrimitiveFunction*>(varNodePair.first.Owner().GetPtr());
|
||||
const PrimitiveFunction* primitiveFunc = dynamic_cast<const PrimitiveFunction*>(varNodePair.first.Owner().get());
|
||||
if ((primitiveFunc == nullptr) || ((primitiveFunc->OpType() != PrimitiveOpType::PastValue) && (primitiveFunc->OpType() != PrimitiveOpType::FutureValue)))
|
||||
InvalidArgument("Invalid Function graph detected; recurrence found at a Function that is not a PastValue/FutureValue function");
|
||||
|
||||
|
@ -326,10 +374,10 @@ namespace CNTK
|
|||
LogicError("The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(value->Data()->GetDataType()));
|
||||
|
||||
// TODO: Is supplying dense data for an Input variable tagged as sparse, a fatal error?
|
||||
if (var.IsSparseInput() && !value->Data()->IsSparse())
|
||||
if (IsSparseInput(var) && !value->Data()->IsSparse())
|
||||
InvalidArgument("Dense input data supplied for a sparse input Variable");
|
||||
|
||||
if (var.IsSparseInput() && (value->Data()->GetStorageFormat() != StorageFormat::SparseCSC))
|
||||
if (IsSparseInput(var) && (value->Data()->GetStorageFormat() != StorageFormat::SparseCSC))
|
||||
InvalidArgument("Sparse Input data must be in SparseCSC format");
|
||||
|
||||
if (value->Data()->Shape().NumAxes() == var.Shape().NumAxes())
|
||||
|
@ -413,7 +461,7 @@ namespace CNTK
|
|||
layout->GetNumCols(),
|
||||
AsCNTKImplDeviceId(value->Data()->Device()),
|
||||
value->Data()->IsSparse() ? MatrixType::SPARSE : MatrixType::DENSE,
|
||||
AsCNTKMatrixFormat(value->Data()->GetStorageFormat()));
|
||||
AsCNTKImplMatrixFormat(value->Data()->GetStorageFormat()));
|
||||
|
||||
std::vector<size_t> sequencesShorterThanLongestSequence;
|
||||
for (size_t i = 0; i < numSequences; ++i)
|
||||
|
@ -458,8 +506,8 @@ namespace CNTK
|
|||
{
|
||||
// Just create a view over the existing matrix itself
|
||||
auto tensorView = new TensorView<ElementType>(std::make_shared<Matrix<ElementType>>(matrix.AsReference()), AsTensorShape(valueDataShape));
|
||||
auto data = NDArrayViewPtr(new NDArrayView(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(matrix.GetFormat()), valueDataShape, true, tensorView), [](ReferenceCount* ptr) { delete ptr; });
|
||||
return ValuePtr(new Value(data), [](ReferenceCount* ptr) { delete ptr; });
|
||||
auto data = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(matrix.GetFormat()), valueDataShape, true, tensorView);
|
||||
return MakeSharedObject<Value>(data);
|
||||
}
|
||||
|
||||
if (layout->GetNumCols() != matrix.GetNumCols())
|
||||
|
@ -509,7 +557,7 @@ namespace CNTK
|
|||
NDMaskPtr mask;
|
||||
if (!sequencesShorterThanLongestSequence.empty())
|
||||
{
|
||||
mask = NDMaskPtr(new NDMask({ maxNumTimeSteps, numSequences }, AsDeviceDescriptor(matrix.GetDeviceId())), [](ReferenceCount* ptr) { delete ptr; });
|
||||
mask = MakeSharedObject<NDMask>(NDShape({ maxNumTimeSteps, numSequences }), AsDeviceDescriptor(matrix.GetDeviceId()));
|
||||
for (auto shortSequenceIdx : sequencesShorterThanLongestSequence)
|
||||
{
|
||||
mask->MaskSection({ sequenceLengths[shortSequenceIdx], shortSequenceIdx }, { NDShape::InferredDimension, 1 });
|
||||
|
@ -517,97 +565,89 @@ namespace CNTK
|
|||
}
|
||||
|
||||
auto tensorView = new TensorView<ElementType>(shuffledMatrixData, AsTensorShape(valueDataShape));
|
||||
auto data = NDArrayViewPtr(new NDArrayView(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), StorageFormat::Dense, valueDataShape, true, tensorView), [](ReferenceCount* ptr) { delete ptr; });
|
||||
return ValuePtr(new Value(data, mask), [](ReferenceCount* ptr) { delete ptr; });
|
||||
auto data = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), StorageFormat::Dense, valueDataShape, true, tensorView);
|
||||
return MakeSharedObject<Value>(data, mask);
|
||||
}
|
||||
|
||||
void CompositeFunction::PopulateNetworkInputs(const Internal::SimpleMap<Variable, const ValuePtr>& arguments)
|
||||
template <typename ElementType>
|
||||
/*static*/ void CompositeFunction::PopulateComputationNodeValue(const std::pair<Variable, ValuePtr>& variableValue, ComputationNodeBasePtr& computationNode)
|
||||
{
|
||||
auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<ElementType>(variableValue.first, variableValue.second);
|
||||
MBLayoutPtr layout = CNTKMatrixAndMBLayout.second;
|
||||
|
||||
auto& nodeData = computationNode->As<ComputationNode<ElementType>>()->Value();
|
||||
|
||||
// Switch the node matrix to the right matrix type
|
||||
nodeData.SwitchToMatrixType(CNTKMatrixAndMBLayout.first->GetMatrixType(), CNTKMatrixAndMBLayout.first->GetFormat(), false);
|
||||
nodeData.AssignValuesOf(*CNTKMatrixAndMBLayout.first);
|
||||
computationNode->GetMBLayout()->CopyFrom(layout);
|
||||
}
|
||||
|
||||
void CompositeFunction::PopulateNetworkInputs(const std::unordered_map<Variable, const ValuePtr>& arguments)
|
||||
{
|
||||
auto functionArguments = this->Arguments();
|
||||
std::vector<ComputationNodeBasePtr> inputNodes;
|
||||
for (auto argument : functionArguments)
|
||||
{
|
||||
// Ensure we have values for all arguments of the function
|
||||
if (!arguments.Contains(argument))
|
||||
if (arguments.find(argument) == arguments.end())
|
||||
InvalidArgument("Value not specified for required Function Argument");
|
||||
|
||||
auto argumentComputationNode = m_variableToNodeMap[argument];
|
||||
inputNodes.push_back(argumentComputationNode);
|
||||
|
||||
ValuePtr argumentValue = arguments[argument];
|
||||
ValuePtr argumentValue = arguments.at(argument);
|
||||
|
||||
MBLayoutPtr layout;
|
||||
switch (argumentValue->Data()->GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
{
|
||||
auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<float>(argument, argumentValue);
|
||||
layout = CNTKMatrixAndMBLayout.second;
|
||||
|
||||
auto& nodeData = argumentComputationNode->As<ComputationNode<float>>()->Value();
|
||||
// Switch the node matrix to the right matrix type
|
||||
nodeData.SwitchToMatrixType(CNTKMatrixAndMBLayout.first->GetMatrixType(), CNTKMatrixAndMBLayout.first->GetFormat(), false);
|
||||
nodeData.AssignValuesOf(*CNTKMatrixAndMBLayout.first);
|
||||
PopulateComputationNodeValue<float>({ argument, argumentValue }, argumentComputationNode);
|
||||
break;
|
||||
}
|
||||
case DataType::Double:
|
||||
{
|
||||
auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<double>(argument, argumentValue);
|
||||
layout = CNTKMatrixAndMBLayout.second;
|
||||
|
||||
auto& nodeData = argumentComputationNode->As<ComputationNode<double>>()->Value();
|
||||
// Switch the node matrix to the right matrix type
|
||||
nodeData.SwitchToMatrixType(CNTKMatrixAndMBLayout.first->GetMatrixType(), CNTKMatrixAndMBLayout.first->GetFormat(), false);
|
||||
nodeData.AssignValuesOf(*CNTKMatrixAndMBLayout.first);
|
||||
PopulateComputationNodeValue<double>({ argument, argumentValue }, argumentComputationNode);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(argumentValue->Data()->GetDataType()));
|
||||
break;
|
||||
}
|
||||
|
||||
argumentComputationNode->GetMBLayout()->CopyFrom(layout);
|
||||
}
|
||||
|
||||
m_computationNetwork->BumpEvalTimeStamp(inputNodes);
|
||||
}
|
||||
|
||||
void CompositeFunction::PopulateNetworkGradients(const Internal::SimpleMap<Variable, const ValuePtr>& gradients)
|
||||
template <typename ElementType>
|
||||
/*static*/ void CompositeFunction::PopulateComputationNodeGradient(const std::pair<Variable, ValuePtr>& variableGradient, Microsoft::MSR::CNTK::ComputationNodeBasePtr& computationNode)
|
||||
{
|
||||
auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<ElementType>(variableGradient.first, variableGradient.second);
|
||||
MBLayoutPtr layout = CNTKMatrixAndMBLayout.second;
|
||||
auto nodeLayout = computationNode->GetMBLayout();
|
||||
if (((layout == nullptr) != (nodeLayout == nullptr)) || ((layout != nullptr) && (*layout != *nodeLayout)))
|
||||
InvalidArgument("The layout of the specified gradient Value in incompatible with the layout of the corresponding Variable computed during Forward call");
|
||||
computationNode->As<ComputationNode<ElementType>>()->AssignGradient(*CNTKMatrixAndMBLayout.first);
|
||||
}
|
||||
|
||||
// Assign the supplied gradients corresponding to the root(s) of the network to be backpropagated through the graph
|
||||
void CompositeFunction::PopulateNetworkGradients(const std::unordered_map<Variable, const ValuePtr>& gradients)
|
||||
{
|
||||
auto functionOutputs = this->Outputs();
|
||||
std::unordered_map<Variable, const ValuePtr>& gradientsValueMap = *gradients.m_map;
|
||||
for (auto gradientVarValuePair : gradientsValueMap)
|
||||
for (auto gradientVarValuePair : gradients)
|
||||
{
|
||||
// Only gradients for roots of the function can be specified
|
||||
if (std::find(functionOutputs.begin(), functionOutputs.end(), gradientVarValuePair.first) == functionOutputs.end())
|
||||
InvalidArgument("Gradients cannot be specified for a Variable that is not an Output of the Function");
|
||||
|
||||
auto outputComputationNode = m_variableToNodeMap[gradientVarValuePair.first];
|
||||
auto nodeLayout = outputComputationNode->GetMBLayout();
|
||||
|
||||
ValuePtr gradientValue = gradientVarValuePair.second;
|
||||
|
||||
MBLayoutPtr layout;
|
||||
switch (gradientValue->Data()->GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
{
|
||||
auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<float>(gradientVarValuePair.first, gradientValue);
|
||||
layout = CNTKMatrixAndMBLayout.second;
|
||||
if (((layout == nullptr) != (nodeLayout == nullptr)) || ((layout != nullptr) && (*layout != *nodeLayout)))
|
||||
InvalidArgument("The layout of the specified gradient Value in incompatible with the layout of the corresponding Variable computed during Forward call");
|
||||
outputComputationNode->As<ComputationNode<float>>()->AssignGradient(*CNTKMatrixAndMBLayout.first);
|
||||
PopulateComputationNodeGradient<float>(gradientVarValuePair, outputComputationNode);
|
||||
break;
|
||||
}
|
||||
case DataType::Double:
|
||||
{
|
||||
auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<double>(gradientVarValuePair.first, gradientValue);
|
||||
layout = CNTKMatrixAndMBLayout.second;
|
||||
if (((layout == nullptr) != (nodeLayout == nullptr)) || ((layout != nullptr) && (*layout != *nodeLayout)))
|
||||
InvalidArgument("The layout of the specified gradient Value in incompatible with the layout of the corresponding Variable computed during Forward call");
|
||||
outputComputationNode->As<ComputationNode<double>>()->AssignGradient(*CNTKMatrixAndMBLayout.first);
|
||||
PopulateComputationNodeGradient<double>(gradientVarValuePair, outputComputationNode);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(gradientValue->Data()->GetDataType()));
|
||||
break;
|
||||
|
@ -618,6 +658,8 @@ namespace CNTK
|
|||
static NDShape GetValueShape(const Variable& var, const ComputationNodeBasePtr& computationNodePtr)
|
||||
{
|
||||
size_t outputValueNumAxes = var.Shape().NumAxes();
|
||||
|
||||
// Add the batch and dynamic axes if needed
|
||||
if (computationNodePtr->GetMBLayout() != nullptr)
|
||||
outputValueNumAxes += 2;
|
||||
|
||||
|
@ -650,37 +692,27 @@ namespace CNTK
|
|||
InvalidArgument("The shape %s of the specified Value object for output does not match the actual output shape %s", AsString(outputValuePtr->Data()->Shape()).c_str(), AsString(outputShape).c_str());
|
||||
}
|
||||
|
||||
ValuePtr nodeValue;
|
||||
switch (outputVarValuePair.first.GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
{
|
||||
auto nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<float>(outputVarValuePair.first, computationNodePtr->As<ComputationNode<float>>()->Value(), computationNodePtr->GetMBLayout());
|
||||
if (outputValuePtr == nullptr)
|
||||
{
|
||||
auto data = NDArrayViewPtr(new NDArrayView(outputVarValuePair.first.GetDataType(), outputShape, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId())), [](ReferenceCount* ptr) { delete ptr; });
|
||||
auto mask = (nodeValue->Mask() != nullptr) ? NDMaskPtr(new NDMask(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()), [](ReferenceCount* ptr) { delete ptr; }) : nullptr;
|
||||
outputValuePtr = ValuePtr(new Value(data, mask), [](ReferenceCount* ptr) { delete ptr; });
|
||||
}
|
||||
outputValuePtr->CopyFrom(*nodeValue);
|
||||
nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<float>(outputVarValuePair.first, computationNodePtr->As<ComputationNode<float>>()->Value(), computationNodePtr->GetMBLayout());
|
||||
break;
|
||||
}
|
||||
case DataType::Double:
|
||||
{
|
||||
auto nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<double>(outputVarValuePair.first, computationNodePtr->As<ComputationNode<double>>()->Value(), computationNodePtr->GetMBLayout());
|
||||
if (outputValuePtr == nullptr)
|
||||
{
|
||||
auto data = NDArrayViewPtr(new NDArrayView(outputVarValuePair.first.GetDataType(), outputShape, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId())), [](ReferenceCount* ptr) { delete ptr; });
|
||||
auto mask = (nodeValue->Mask() != nullptr) ? NDMaskPtr(new NDMask(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()), [](ReferenceCount* ptr) { delete ptr; }) : nullptr;
|
||||
outputValuePtr = ValuePtr(new Value(data, mask), [](ReferenceCount* ptr) { delete ptr; });
|
||||
}
|
||||
outputValuePtr->CopyFrom(*nodeValue);
|
||||
nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<double>(outputVarValuePair.first, computationNodePtr->As<ComputationNode<double>>()->Value(), computationNodePtr->GetMBLayout());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(outputVarValuePair.first.GetDataType()));
|
||||
break;
|
||||
}
|
||||
|
||||
if (outputValuePtr == nullptr)
|
||||
{
|
||||
auto data = MakeSharedObject<NDArrayView>(outputVarValuePair.first.GetDataType(), outputShape, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId()));
|
||||
auto mask = (nodeValue->Mask() != nullptr) ? MakeSharedObject<NDMask>(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()) : nullptr;
|
||||
outputValuePtr = MakeSharedObject<Value>(data, mask);
|
||||
}
|
||||
outputValuePtr->CopyFrom(*nodeValue);
|
||||
outputs[outputVarValuePair.first] = outputValuePtr;
|
||||
}
|
||||
}
|
||||
|
@ -713,50 +745,40 @@ namespace CNTK
|
|||
if (!computationNodePtr->NeedsGradient())
|
||||
LogicError("Backpropagated gradient value cannot be read from a ComputationNode that has NeedsGradient set to false");
|
||||
|
||||
ValuePtr nodeValue;
|
||||
switch (gradientVarValuePair.first.GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
{
|
||||
auto nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<float>(gradientVarValuePair.first, computationNodePtr->As<ComputationNode<float>>()->Gradient(), computationNodePtr->GetMBLayout());
|
||||
if (gradientValuePtr == nullptr)
|
||||
{
|
||||
auto data = NDArrayViewPtr(new NDArrayView(gradientVarValuePair.first.GetDataType(), gradientShape, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId())), [](ReferenceCount* ptr) { delete ptr; });
|
||||
auto mask = NDMaskPtr((nodeValue->Mask() != nullptr) ? new NDMask(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()) : nullptr, [](ReferenceCount* ptr) { delete ptr; });
|
||||
gradientValuePtr = ValuePtr(new Value(data, mask), [](ReferenceCount* ptr) { delete ptr; });
|
||||
}
|
||||
gradientValuePtr->CopyFrom(*nodeValue);
|
||||
nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<float>(gradientVarValuePair.first, computationNodePtr->As<ComputationNode<float>>()->Gradient(), computationNodePtr->GetMBLayout());
|
||||
break;
|
||||
}
|
||||
case DataType::Double:
|
||||
{
|
||||
auto nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<double>(gradientVarValuePair.first, computationNodePtr->As<ComputationNode<double>>()->Gradient(), computationNodePtr->GetMBLayout());
|
||||
if (gradientValuePtr == nullptr)
|
||||
{
|
||||
auto data = NDArrayViewPtr(new NDArrayView(gradientVarValuePair.first.GetDataType(), gradientShape, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId())), [](ReferenceCount* ptr) { delete ptr; });
|
||||
auto mask = NDMaskPtr((nodeValue->Mask() != nullptr) ? new NDMask(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()) : nullptr, [](ReferenceCount* ptr) { delete ptr; });
|
||||
gradientValuePtr = ValuePtr(new Value(data, mask), [](ReferenceCount* ptr) { delete ptr; });
|
||||
|
||||
}
|
||||
gradientValuePtr->CopyFrom(*nodeValue);
|
||||
nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<double>(gradientVarValuePair.first, computationNodePtr->As<ComputationNode<double>>()->Gradient(), computationNodePtr->GetMBLayout());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(gradientVarValuePair.first.GetDataType()));
|
||||
break;
|
||||
}
|
||||
|
||||
if (gradientValuePtr == nullptr)
|
||||
{
|
||||
auto data = MakeSharedObject<NDArrayView>(gradientVarValuePair.first.GetDataType(), gradientShape, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId()));
|
||||
auto mask = (nodeValue->Mask() != nullptr) ? MakeSharedObject<NDMask>(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()) : nullptr;
|
||||
gradientValuePtr = MakeSharedObject<Value>(data, mask);
|
||||
}
|
||||
|
||||
gradientValuePtr->CopyFrom(*nodeValue);
|
||||
gradients[gradientVarValuePair.first] = gradientValuePtr;
|
||||
}
|
||||
}
|
||||
|
||||
/*virtual*/ BackPropStatePtr CompositeFunction::Forward(const Internal::SimpleMap<Variable, const ValuePtr>& arguments,
|
||||
Internal::SimpleMap<Variable, ValuePtr>& outputs,
|
||||
const Internal::SimpleSet<Variable>& outputsToRetainBackwardStateFor,
|
||||
const DeviceDescriptor& computeDevice)
|
||||
/*virtual*/ BackPropStatePtr CompositeFunction::Forward(const std::unordered_map<Variable, const ValuePtr>& arguments,
|
||||
std::unordered_map<Variable, ValuePtr>& outputs,
|
||||
const DeviceDescriptor& computeDevice,
|
||||
const std::unordered_set<Variable>& outputsToRetainBackwardStateFor)
|
||||
{
|
||||
// TODO: How about zero argument functions?
|
||||
// TODO: We need a better way to determine the ElementType for the network
|
||||
auto dataType = arguments.m_map->begin()->second->Data()->GetDataType();
|
||||
auto dataType = arguments.begin()->second->Data()->GetDataType();
|
||||
if (dataType == DataType::Float)
|
||||
GetComputationNetwork<float>(computeDevice, outputsToRetainBackwardStateFor);
|
||||
else
|
||||
|
@ -767,10 +789,10 @@ namespace CNTK
|
|||
// Feed data into the arguments of the network
|
||||
PopulateNetworkInputs(arguments);
|
||||
|
||||
std::unordered_set<Variable> functionOutputs = Internal::SimpleVector<Variable>(this->Outputs()).GetAsUnorderedSet();
|
||||
std::unordered_set<Variable> functionOutputs(this->Outputs().begin(), this->Outputs().end());
|
||||
std::vector<ComputationNodeBasePtr> outputsToEvaluate;
|
||||
|
||||
for (auto outputVarValuePair : *outputs.m_map)
|
||||
for (auto outputVarValuePair : outputs)
|
||||
{
|
||||
// Ensure that only a subset of this function's outputs are being asked to be evaluated
|
||||
if (functionOutputs.find(outputVarValuePair.first) == functionOutputs.end())
|
||||
|
@ -781,128 +803,105 @@ namespace CNTK
|
|||
}
|
||||
|
||||
// The 'outputsToRetainBackwardStateFor' nodes also need to be evaluated if not already specified in 'outputs'
|
||||
for (auto rootVarForBackprop : *outputsToRetainBackwardStateFor.m_set)
|
||||
for (auto rootVarForBackprop : outputsToRetainBackwardStateFor)
|
||||
{
|
||||
if (outputs.m_map->find(rootVarForBackprop) == outputs.m_map->end())
|
||||
if (outputs.find(rootVarForBackprop) == outputs.end())
|
||||
outputsToEvaluate.push_back(m_variableToNodeMap[rootVarForBackprop]);
|
||||
}
|
||||
|
||||
m_computationNetwork->ForwardProp(outputsToEvaluate);
|
||||
|
||||
GetNetworkOutputs(*(outputs.m_map));
|
||||
GetNetworkOutputs(outputs);
|
||||
|
||||
// TODO: How to deal with the specified 'computeDevice'
|
||||
|
||||
return (outputsToRetainBackwardStateFor.Size() > 0) ? BackPropStatePtr(new CNTKBackPropState(this, { arguments.m_map->begin()->first, m_variableToNodeMap[arguments.m_map->begin()->first]->GetEvalTimeStamp() }), [](ReferenceCount* ptr) { delete ptr; }) : nullptr;
|
||||
return (outputsToRetainBackwardStateFor.size() > 0) ? MakeSharedObject<CNTKBackPropState>(this->shared_from_this(), std::make_pair(arguments.begin()->first, m_variableToNodeMap[arguments.begin()->first]->GetEvalTimeStamp())) : nullptr;
|
||||
}
|
||||
|
||||
/*virtual*/ void CompositeFunction::Backward(const BackPropStatePtr& state,
|
||||
const Internal::SimpleMap<Variable, const ValuePtr>& rootGradientValues,
|
||||
Internal::SimpleMap<Variable, ValuePtr>& backPropagatedGradientValuesForInputs)
|
||||
const std::unordered_map<Variable, const ValuePtr>& rootGradientValues,
|
||||
std::unordered_map<Variable, ValuePtr>& backPropagatedGradientValuesForInputs)
|
||||
{
|
||||
if ((state == nullptr) || (dynamic_cast<const CNTKBackPropState*>(state.GetPtr()) == nullptr))
|
||||
auto backpropState = dynamic_cast<const CNTKBackPropState*>(state.get());
|
||||
if (backpropState == nullptr)
|
||||
InvalidArgument("Invalid backprop state specified");
|
||||
|
||||
// TODO: Support multiple concurrent backprop states
|
||||
auto backpropState = dynamic_cast<const CNTKBackPropState*>(state.GetPtr());
|
||||
if (backpropState->EvalTimeStamp().second != m_variableToNodeMap[backpropState->EvalTimeStamp().first]->GetEvalTimeStamp())
|
||||
LogicError("The specified backprop state specified cannot be used for backpropagation as the Function's internal state was modified by subsequent Forward calls to the function."
|
||||
"This is not a user error but a shortcoming of the current implementation where multiple independent backprop states are not simultaneously supported");
|
||||
|
||||
if (rootGradientValues.Size() > 1)
|
||||
if (rootGradientValues.size() > 1)
|
||||
LogicError("Currently gradient backprop from only one of the Function Outputs is supported");
|
||||
|
||||
// TODO: Avoid copying the data when possible
|
||||
|
||||
// Zero all gradients of nodes below the root nodes
|
||||
for (auto rootGradientVarValuePair : *rootGradientValues.m_map)
|
||||
for (auto rootGradientVarValuePair : rootGradientValues)
|
||||
m_computationNetwork->ZeroInputGradients(m_variableToNodeMap[rootGradientVarValuePair.first]);
|
||||
|
||||
// Feed data into the arguments of the network
|
||||
PopulateNetworkGradients(rootGradientValues);
|
||||
|
||||
// Backpropagate through the network
|
||||
auto rootComputationNodePtr = m_variableToNodeMap[rootGradientValues.m_map->begin()->first];
|
||||
auto rootComputationNodePtr = m_variableToNodeMap[rootGradientValues.begin()->first];
|
||||
m_computationNetwork->GetNestedNetwork(rootComputationNodePtr)->Backprop(FrameRange(nullptr), true, true);
|
||||
|
||||
GetNetworkGradients(*(backPropagatedGradientValuesForInputs.m_map));
|
||||
GetNetworkGradients(backPropagatedGradientValuesForInputs);
|
||||
|
||||
// TODO: How to deal with the specified 'computeDevice'
|
||||
}
|
||||
|
||||
/*virtual*/ void CompositeFunction::_ReplacePlaceholders(const Internal::SimpleMap<Placeholder, Variable>& placeholderReplacements, Internal::SimpleSet<const Function*>& visitedFunctions, Internal::SimpleSet<Placeholder>& replacedPlaceholders)
|
||||
{
|
||||
RootFunction()->_ReplacePlaceholders(placeholderReplacements, visitedFunctions, replacedPlaceholders);
|
||||
|
||||
// If any of the placeholders were replaced with Output variables, let's add the graph of function underneath each of those to 'm_allPrimitiveFunctions' set
|
||||
for (auto replacedPlaceholder : *replacedPlaceholders.m_set)
|
||||
{
|
||||
auto replacingVariable = placeholderReplacements[replacedPlaceholder];
|
||||
if (replacingVariable.IsOutput())
|
||||
{
|
||||
auto ownerFunc = replacingVariable.Owner();
|
||||
Internal::SimpleSet<FunctionPtr> visitedFunctions;
|
||||
DetermineInputs(ownerFunc, visitedFunctions);
|
||||
|
||||
// Add the newly visited functions to 'm_allPrimitiveFunctions' set
|
||||
m_allPrimitiveFunctions.m_set->insert(visitedFunctions.m_set->begin(), visitedFunctions.m_set->end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FunctionPtr Times(const Variable& leftOperand, const Variable& rightOperand, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::Times, { leftOperand, rightOperand }, Dictionary(), name), name);
|
||||
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Times, std::vector<Variable>({ leftOperand, rightOperand }), Dictionary(), name), name);
|
||||
}
|
||||
|
||||
FunctionPtr Plus(const Variable& leftOperand, const Variable& rightOperand, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::Plus, { leftOperand, rightOperand }, Dictionary(), name), name);
|
||||
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Plus, std::vector<Variable>({ leftOperand, rightOperand }), Dictionary(), name), name);
|
||||
}
|
||||
|
||||
FunctionPtr Sigmoid(const Variable& operand, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::Sigmoid, { operand }, Dictionary(), name), name);
|
||||
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Sigmoid, std::vector<Variable>({ operand }), Dictionary(), name), name);
|
||||
}
|
||||
|
||||
FunctionPtr Tanh(const Variable& operand, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::Tanh, { operand }, Dictionary(), name), name);
|
||||
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Tanh, std::vector<Variable>({ operand }), Dictionary(), name), name);
|
||||
}
|
||||
|
||||
namespace Internal
|
||||
FunctionPtr Combine(const std::initializer_list<FunctionPtr>& operands, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
FunctionPtr Combine(const Internal::SimpleVector<FunctionPtr>& operands, const std::wstring& name/* = L""*/)
|
||||
std::unordered_set<FunctionPtr> uniqueOperands;
|
||||
std::vector<Variable> inputs;
|
||||
for (auto operand : operands)
|
||||
{
|
||||
Internal::SimpleSet<FunctionPtr> uniqueOperands;
|
||||
std::vector<Variable> inputs;
|
||||
for (size_t i = 0; i < operands.Size(); ++i)
|
||||
{
|
||||
if (uniqueOperands.Contains(operands[i]))
|
||||
LogicError("All function operands specified to Combine must be unique");
|
||||
if (uniqueOperands.find(operand) != uniqueOperands.end())
|
||||
LogicError("All function operands specified to Combine must be unique");
|
||||
|
||||
uniqueOperands.Insert(operands[i]);
|
||||
auto currentFunctionOutputs = operands[i]->Outputs();
|
||||
std::copy(currentFunctionOutputs.begin(), currentFunctionOutputs.end(), std::back_inserter(inputs));
|
||||
}
|
||||
|
||||
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::Combine, inputs, Dictionary(), name), name);
|
||||
uniqueOperands.insert(operand);
|
||||
auto currentFunctionOutputs = operand->Outputs();
|
||||
std::copy(currentFunctionOutputs.begin(), currentFunctionOutputs.end(), std::back_inserter(inputs));
|
||||
}
|
||||
|
||||
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Combine, inputs, Dictionary(), name), name);
|
||||
}
|
||||
|
||||
FunctionPtr CrossEntropyWithSoftmax(const Variable& output, const Variable& labels, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::CrossEntropyWithSoftmax, { output, labels }, Dictionary(), name), name);
|
||||
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::CrossEntropyWithSoftmax, std::vector<Variable>({ output, labels }), Dictionary(), name), name);
|
||||
}
|
||||
|
||||
FunctionPtr ClassificationError(const Variable& prediction, const Variable& labels, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::ClassificationError, { prediction, labels }, Dictionary(), name), name);
|
||||
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::ClassificationError, std::vector<Variable>({ prediction, labels }), Dictionary(), name), name);
|
||||
}
|
||||
|
||||
FunctionPtr Exp(const Variable& operand, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::Exp, { operand }, Dictionary(), name), name);
|
||||
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Exp, std::vector<Variable>({ operand }), Dictionary(), name), name);
|
||||
}
|
||||
|
||||
FunctionPtr PastValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name/* = L""*/)
|
||||
|
@ -912,7 +911,7 @@ namespace CNTK
|
|||
|
||||
auto additionalProperties = Dictionary();
|
||||
additionalProperties[L"stepSize"] = DictionaryValue(stepSize);
|
||||
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::PastValue, { initialState, operand }, std::move(additionalProperties), name), name);
|
||||
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::PastValue, std::vector<Variable>({ initialState, operand }), std::move(additionalProperties), name), name);
|
||||
}
|
||||
|
||||
FunctionPtr FutureValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name/* = L""*/)
|
||||
|
@ -922,16 +921,16 @@ namespace CNTK
|
|||
|
||||
auto additionalProperties = Dictionary();
|
||||
additionalProperties[L"stepSize"] = DictionaryValue(stepSize);
|
||||
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::FutureValue, { initialState, operand }, std::move(additionalProperties), name), name);
|
||||
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::FutureValue, std::vector<Variable>({ initialState, operand }), std::move(additionalProperties), name), name);
|
||||
}
|
||||
|
||||
FunctionPtr ElementTimes(const Variable& leftOperand, const Variable& rightOperand, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::ElementTimes, { leftOperand, rightOperand }, Dictionary(), name), name);
|
||||
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::ElementTimes, std::vector<Variable>({ leftOperand, rightOperand }), Dictionary(), name), name);
|
||||
}
|
||||
|
||||
FunctionPtr ReduceSum(const Variable& operand, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::ReduceSum, { operand }, Dictionary(), name), name);
|
||||
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::ReduceSum, std::vector<Variable>({ operand }), Dictionary(), name), name);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,17 +68,17 @@ namespace CNTK
|
|||
{
|
||||
}
|
||||
|
||||
virtual BackPropStatePtr Forward(const Internal::SimpleMap<Variable, const ValuePtr>& /*arguments*/,
|
||||
Internal::SimpleMap<Variable, ValuePtr>& /*outputs*/,
|
||||
const Internal::SimpleSet<Variable>& /*outputsToRetainBackwardStateFor*/,
|
||||
const DeviceDescriptor& /*computeDevice*/) override
|
||||
virtual BackPropStatePtr Forward(const std::unordered_map<Variable, const ValuePtr>& /*arguments*/,
|
||||
std::unordered_map<Variable, ValuePtr>& /*outputs*/,
|
||||
const DeviceDescriptor& /*computeDevice*/,
|
||||
const std::unordered_set<Variable>& /*outputsToRetainBackwardStateFor*/) override
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
virtual void Backward(const BackPropStatePtr& /*state*/,
|
||||
const Internal::SimpleMap<Variable, const ValuePtr>& /*rootGradientValues*/,
|
||||
Internal::SimpleMap<Variable, ValuePtr>& /*backPropagatedGradientValuesForInputs*/) override
|
||||
const std::unordered_map<Variable, const ValuePtr>& /*rootGradientValues*/,
|
||||
std::unordered_map<Variable, ValuePtr>& /*backPropagatedGradientValuesForInputs*/) override
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
|
@ -280,54 +280,58 @@ namespace CNTK
|
|||
private:
|
||||
std::pair<Variable, int64_t> m_evalTimeStamp;
|
||||
};
|
||||
typedef Internal::ReferenceCountedPtr<CNTKBackPropState> CNTKBackPropStatePtr;
|
||||
typedef std::shared_ptr<CNTKBackPropState> CNTKBackPropStatePtr;
|
||||
|
||||
class CompositeFunction;
|
||||
typedef Internal::ReferenceCountedPtr<CompositeFunction> CompositeFunctionPtr;
|
||||
typedef std::shared_ptr<CompositeFunction> CompositeFunctionPtr;
|
||||
|
||||
class CompositeFunction final : public Function
|
||||
{
|
||||
friend class Function;
|
||||
|
||||
template <typename T, typename ...CtorArgTypes>
|
||||
friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs);
|
||||
|
||||
public:
|
||||
static CompositeFunctionPtr Create(const FunctionPtr& rootFunction, const std::wstring& name = L"")
|
||||
{
|
||||
Internal::SimpleSet<FunctionPtr> visitedFunctions;
|
||||
std::unordered_set<FunctionPtr> visitedFunctions;
|
||||
|
||||
// Call DetermineInputs to get the set of all functions in the graph
|
||||
DetermineInputs(rootFunction, visitedFunctions);
|
||||
|
||||
auto func = new CompositeFunction(rootFunction, std::move(visitedFunctions), name);
|
||||
return CompositeFunctionPtr(func, [](ReferenceCount* ptr) { delete ptr; });
|
||||
return MakeSharedObject<CompositeFunction>(rootFunction, std::move(visitedFunctions), name);
|
||||
}
|
||||
|
||||
virtual BackPropStatePtr Forward(const Internal::SimpleMap<Variable, const ValuePtr>& arguments,
|
||||
Internal::SimpleMap<Variable, ValuePtr>& outputs,
|
||||
const Internal::SimpleSet<Variable>& outputsToRetainBackwardStateFor,
|
||||
const DeviceDescriptor& computeDevice) override;
|
||||
virtual BackPropStatePtr Forward(const std::unordered_map<Variable, const ValuePtr>& arguments,
|
||||
std::unordered_map<Variable, ValuePtr>& outputs,
|
||||
const DeviceDescriptor& computeDevice,
|
||||
const std::unordered_set<Variable>& outputsToRetainBackwardStateFor) override;
|
||||
|
||||
virtual void Backward(const BackPropStatePtr& state,
|
||||
const Internal::SimpleMap<Variable, const ValuePtr>& rootGradientValues,
|
||||
Internal::SimpleMap<Variable, ValuePtr>& backPropagatedGradientValuesForInputs) override;
|
||||
const std::unordered_map<Variable, const ValuePtr>& rootGradientValues,
|
||||
std::unordered_map<Variable, ValuePtr>& backPropagatedGradientValuesForInputs) override;
|
||||
|
||||
private:
|
||||
virtual void _ReplacePlaceholders(const Internal::SimpleMap<Placeholder, Variable>& placeholderReplacements, Internal::SimpleSet<const Function*>& visitedFunctions, Internal::SimpleSet<Placeholder>& replacedPlaceholders) override;
|
||||
virtual void ReplacePlaceholders(const std::unordered_map<Placeholder, Variable>& placeholderReplacements,
|
||||
std::unordered_set<const Function*>& visitedFunctions,
|
||||
std::unordered_set<Placeholder>& replacedPlaceholders) override;
|
||||
|
||||
CompositeFunction(const FunctionPtr& rootFunction, Internal::SimpleSet<FunctionPtr>&& allPrimitiveFunctions, const std::wstring& name)
|
||||
CompositeFunction(const FunctionPtr& rootFunction, std::unordered_set<FunctionPtr>&& allPrimitiveFunctions, const std::wstring& name)
|
||||
: Function({}, rootFunction->Outputs(), rootFunction, name), m_allPrimitiveFunctions(std::move(allPrimitiveFunctions))
|
||||
{
|
||||
}
|
||||
|
||||
std::vector<Variable> DetermineInputs() const
|
||||
{
|
||||
Internal::SimpleSet<FunctionPtr> visitedFunctions;
|
||||
std::unordered_set<FunctionPtr> visitedFunctions;
|
||||
return DetermineInputs(RootFunction(), visitedFunctions);
|
||||
}
|
||||
|
||||
// Recursively traverses the Function graph underlying the 'rootFunction' to determine all the leaves (aka inputs) of the graph
|
||||
static std::vector<Variable> DetermineInputs(const FunctionPtr& rootFunction, Internal::SimpleSet<FunctionPtr>& visitedFunctions)
|
||||
static std::vector<Variable> DetermineInputs(const FunctionPtr& rootFunction, std::unordered_set<FunctionPtr>& visitedFunctions)
|
||||
{
|
||||
visitedFunctions.Insert(rootFunction);
|
||||
visitedFunctions.insert(rootFunction);
|
||||
|
||||
std::vector<Variable> inputs;
|
||||
std::vector<Variable> rootFunctionInputs = rootFunction->Inputs();
|
||||
|
@ -335,7 +339,7 @@ namespace CNTK
|
|||
{
|
||||
if (!rootInput.IsOutput())
|
||||
inputs.push_back(rootInput);
|
||||
else if (!visitedFunctions.Contains(rootInput.Owner()))
|
||||
else if (visitedFunctions.find(rootInput.Owner()) == visitedFunctions.end())
|
||||
{
|
||||
FunctionPtr function = rootInput.Owner();
|
||||
std::vector<Variable> functionInputs = DetermineInputs(function, visitedFunctions);
|
||||
|
@ -347,7 +351,7 @@ namespace CNTK
|
|||
}
|
||||
|
||||
template <typename ElementType>
|
||||
Microsoft::MSR::CNTK::ComputationNetworkPtr GetComputationNetwork(const DeviceDescriptor& device, const Internal::SimpleSet<Variable>& backpropRoots);
|
||||
Microsoft::MSR::CNTK::ComputationNetworkPtr GetComputationNetwork(const DeviceDescriptor& device, const std::unordered_set<Variable>& backpropRoots);
|
||||
|
||||
template <typename ElementType>
|
||||
static Microsoft::MSR::CNTK::ComputationNodeBasePtr GetOutputVariableNode(const Variable& variable, Microsoft::MSR::CNTK::ComputationNetworkPtr& network, Microsoft::MSR::CNTK::ComputationNetworkBuilder<ElementType>& builder, std::unordered_map<Variable, Microsoft::MSR::CNTK::ComputationNodeBasePtr>& variableToNodeMap, std::unordered_map<Variable, bool>& isVariableRootMap);
|
||||
|
@ -355,8 +359,13 @@ namespace CNTK
|
|||
template <typename ElementType>
|
||||
static Microsoft::MSR::CNTK::ComputationNodeBasePtr GetNode(const Variable& variable, Microsoft::MSR::CNTK::ComputationNetworkPtr& network, Microsoft::MSR::CNTK::ComputationNetworkBuilder<ElementType>& builder, std::unordered_map<Variable, Microsoft::MSR::CNTK::ComputationNodeBasePtr>& variableToNodeMap, std::unordered_map<Variable, bool>& isVariableRootMap);
|
||||
|
||||
void PopulateNetworkInputs(const Internal::SimpleMap<Variable, const ValuePtr>& arguments);
|
||||
void PopulateNetworkGradients(const Internal::SimpleMap<Variable, const ValuePtr>& gradients);
|
||||
template <typename ElementType>
|
||||
static void PopulateComputationNodeValue(const std::pair<Variable, ValuePtr>& variableValue, Microsoft::MSR::CNTK::ComputationNodeBasePtr& computationNode);
|
||||
void PopulateNetworkInputs(const std::unordered_map<Variable, const ValuePtr>& arguments);
|
||||
|
||||
template <typename ElementType>
|
||||
static void PopulateComputationNodeGradient(const std::pair<Variable, ValuePtr>& variableGradient, Microsoft::MSR::CNTK::ComputationNodeBasePtr& computationNode);
|
||||
void PopulateNetworkGradients(const std::unordered_map<Variable, const ValuePtr>& gradients);
|
||||
|
||||
void GetNetworkOutputs(std::unordered_map<Variable, ValuePtr>& outputs);
|
||||
void GetNetworkGradients(std::unordered_map<Variable, ValuePtr>& gradients);
|
||||
|
@ -371,7 +380,7 @@ namespace CNTK
|
|||
|
||||
// Set of all primitive functions in the graph underlying 'this' Function. Also keeps the primitive Function objects alive
|
||||
// by holding strong references to them
|
||||
Internal::SimpleSet<FunctionPtr> m_allPrimitiveFunctions;
|
||||
std::unordered_set<FunctionPtr> m_allPrimitiveFunctions;
|
||||
|
||||
// A map from Variable objects to ComputationNode objects in the ComputationNetwork instance that implements 'this' Composite Function
|
||||
std::unordered_map<Variable, Microsoft::MSR::CNTK::ComputationNodeBasePtr> m_variableToNodeMap;
|
||||
|
|
|
@ -0,0 +1,464 @@
|
|||
//
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||
//
|
||||
|
||||
#include "Learner.h"
|
||||
#include "TensorView.h"
|
||||
#include "Utils.h"
|
||||
|
||||
#define UPDATE_FUNCTION \
|
||||
switch (smoothedGradientValue->Data()->GetDataType()) \
|
||||
{ \
|
||||
case DataType::Float: \
|
||||
Update<float>(parameter, smoothedGradientValue, gradientValue, parameterValue, trainingSampleCount); \
|
||||
break; \
|
||||
case DataType::Double: \
|
||||
Update<double>(parameter, smoothedGradientValue, gradientValue, parameterValue, trainingSampleCount); \
|
||||
break; \
|
||||
default: \
|
||||
NOT_IMPLEMENTED; \
|
||||
}
|
||||
|
||||
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
using namespace std;
|
||||
|
||||
namespace CNTK
|
||||
{
|
||||
template <typename ElementType>
|
||||
/*static*/ shared_ptr<const Matrix<ElementType>> LearnerBase::GetMatrix(const NDArrayViewPtr arrayView)
|
||||
{
|
||||
return arrayView->GetMatrix<ElementType>();
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
/*static*/ shared_ptr<Matrix<ElementType>> LearnerBase::GetWritableMatrix(NDArrayViewPtr arrayView)
|
||||
{
|
||||
return arrayView->GetWritableMatrix<ElementType>();
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
/*static*/ const TensorView<ElementType>* LearnerBase::GetTensorView(const NDArrayViewPtr arrayView)
|
||||
{
|
||||
return arrayView->GetTensorView<ElementType>();
|
||||
}
|
||||
|
||||
/*static*/ bool LearnerBase::HasNan(const ValuePtr& value, const char* name)
|
||||
{
|
||||
const auto& data = value->Data();
|
||||
switch (data->GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
return data->GetMatrix<float>()->HasNan(name);
|
||||
case DataType::Double:
|
||||
return data->GetMatrix<double>()->HasNan(name);
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(data->GetDataType()));
|
||||
}
|
||||
}
|
||||
|
||||
/*static*/ void LearnerBase::Print(const ValuePtr& value, const char* msg)
|
||||
{
|
||||
const auto& data = value->Data();
|
||||
switch (data->GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
data->GetMatrix<float>()->Print(msg);
|
||||
break;
|
||||
case DataType::Double:
|
||||
data->GetMatrix<double>()->Print(msg);
|
||||
break;
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(data->GetDataType()));
|
||||
}
|
||||
}
|
||||
|
||||
// Clipping gradients to prevent outliers,
|
||||
template <typename ElementType>
|
||||
void LearnerBase::ClipGradient(Matrix<ElementType>& gradient, size_t actualMBSize) const
|
||||
{
|
||||
if (m_additionalOptions.gradientClippingThresholdPerSample != numeric_limits<double>::infinity())
|
||||
{
|
||||
double maxGradientPerMB = m_additionalOptions.gradientClippingThresholdPerSample * actualMBSize;
|
||||
if (m_additionalOptions.gradientClippingWithTruncation)
|
||||
gradient.InplaceTruncate(ElementType(maxGradientPerMB));
|
||||
else
|
||||
{
|
||||
// norm2 normalized
|
||||
double gradientNorm = gradient.FrobeniusNorm();
|
||||
if (gradientNorm > maxGradientPerMB)
|
||||
{
|
||||
double normFactor = maxGradientPerMB / gradientNorm;
|
||||
gradient *= ElementType(normFactor);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Performs additional preprocessing before calling the update method
|
||||
// (gradient clipping and L2 regularization depending on the additional learning parameters).
|
||||
template <typename ElementType>
|
||||
void LearnerBase::PreProcess(const ValuePtr& gradientValue,const ValuePtr& parameterValue, size_t actualMBSize) const
|
||||
{
|
||||
const auto& gradientMatrix = gradientValue->Data()->GetWritableMatrix<ElementType>();
|
||||
|
||||
// clipping gradients to prevent outliers
|
||||
ClipGradient<ElementType>(*gradientMatrix, actualMBSize);
|
||||
|
||||
// L2 regularizer
|
||||
if (m_additionalOptions.l2RegularizationWeight > 0)
|
||||
{
|
||||
// multiply by actualMBSize so that it's invariant to minibatch size since learning rate is per sample
|
||||
auto weight = ElementType(m_additionalOptions.l2RegularizationWeight * actualMBSize);
|
||||
const auto& parameterMatrix = parameterValue->Data()->GetWritableMatrix<ElementType>();
|
||||
Matrix<ElementType>::ScaleAndAdd(weight, *parameterMatrix, *gradientMatrix);
|
||||
}
|
||||
}
|
||||
|
||||
// Performs additional postprocessing after the update method has been executed
|
||||
// (noise injection and L1 regularization specified by the additional learning parameters).
|
||||
template <typename ElementType>
|
||||
void LearnerBase::PostProcess(const Variable& parameter, const ValuePtr& gradientValue,
|
||||
const ValuePtr& parameterValue, size_t actualMBSize) const
|
||||
{
|
||||
const auto& parameterMatrix = parameterValue->Data()->GetWritableMatrix<ElementType>();
|
||||
if (m_additionalOptions.gaussianNoiseInjectionStdDev > 0)
|
||||
{
|
||||
const auto& gradientMatrix = gradientValue->Data()->GetWritableMatrix<ElementType>();
|
||||
|
||||
Matrix<ElementType> sgdUpdateNoise((DEVICEID_TYPE)parameterMatrix->GetDeviceId());
|
||||
|
||||
// get the gradient structure since gradient is sparse
|
||||
sgdUpdateNoise.SetValue(*gradientMatrix);
|
||||
|
||||
auto noiseStdDev = ElementType(m_additionalOptions.gaussianNoiseInjectionStdDev);
|
||||
|
||||
// reset its value to random
|
||||
sgdUpdateNoise.SetGaussianRandomValue(ElementType(0.0), noiseStdDev);
|
||||
|
||||
Matrix<ElementType>::ScaleAndAdd(ElementType(1.0), sgdUpdateNoise, *parameterMatrix);
|
||||
}
|
||||
|
||||
// L1 regularizer with proximal gradient descent method
|
||||
if (m_additionalOptions.l1RegularizationWeight > 0)
|
||||
{
|
||||
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
|
||||
// multiply by actualMBSize so that it's invariant to minibatch size since learning rate is per sample
|
||||
auto weight = ElementType(learningRate * m_additionalOptions.l1RegularizationWeight * actualMBSize);
|
||||
parameterValue->Data()->GetWritableMatrix<ElementType>()->InplaceSoftThreshold(weight);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
/*static*/ TensorView<ElementType>* LearnerBase::GetWritableTensorView(NDArrayViewPtr arrayView)
|
||||
{
|
||||
return arrayView->GetWritableTensorView<ElementType>();
|
||||
}
|
||||
|
||||
LearnerBase::LearnerBase(const unordered_set<Variable>& parameters, const DeviceDescriptor& device)
|
||||
: Learner(parameters),
|
||||
m_learningRatePerSample(0.0),
|
||||
m_sampleCount(0)
|
||||
{
|
||||
const unordered_set<Variable>& parameterSet = parameters;
|
||||
for (const auto& parameter : parameterSet)
|
||||
{
|
||||
// TODO: using the same device to allocate data for all smoothed gradients. Is this correct?
|
||||
// Should the device be specified on the per-parameter basis?
|
||||
NDArrayViewPtr view;
|
||||
if (parameter.GetDataType() == DataType::Float)
|
||||
{
|
||||
view = MakeSharedObject<NDArrayView>(0.0f, parameter.Shape(), device);
|
||||
}
|
||||
else
|
||||
{
|
||||
view = MakeSharedObject<NDArrayView>(0.0, parameter.Shape(), device);
|
||||
}
|
||||
|
||||
m_smoothedGradientValues.insert(make_pair(parameter, MakeSharedObject<Value>(view)));
|
||||
m_additionalOptions.learningRateMultipliers.insert(make_pair(parameter, 1.0));
|
||||
}
|
||||
}
|
||||
|
||||
void LearnerBase::ResetSmoothedGradients()
|
||||
{
|
||||
for (const auto& parameter : Parameters())
|
||||
{
|
||||
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
|
||||
const auto& data = smoothedGradientValue->Data();
|
||||
switch (data->GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
data->SetValue(0.0f);
|
||||
break;
|
||||
case DataType::Double:
|
||||
data->SetValue(0.0);
|
||||
break;
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", ::CNTK::DataTypeName(data->GetDataType()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*virtual*/ bool LearnerBase::Update(const unordered_map<Variable, ValuePtr>& parameterValues,
|
||||
const unordered_map<Variable, const ValuePtr>& gradientValues,
|
||||
size_t trainingSampleCount) /*override*/
|
||||
{
|
||||
// make sure trainingSampleCount is a valid value
|
||||
assert(trainingSampleCount > 0);
|
||||
|
||||
for (const auto& parameter : Parameters())
|
||||
{
|
||||
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
|
||||
const auto& gradientValue = gradientValues.at(parameter);
|
||||
const auto& parameterValue = parameterValues.at(parameter);
|
||||
|
||||
// TODO: make this a runtime parameter.
|
||||
#if DUMPOUTPUT
|
||||
LOGPRINTF(stderr, "Update_%ls\n", parameter.Name().c_str());
|
||||
#endif
|
||||
|
||||
#ifdef _DEBUG
|
||||
if (HasNan(smoothedGradientValue, "TrainOneEpoch/UpdateWeights/Learner::Update(): "))
|
||||
LogicError("%ls has NaNs in smoothedGradient.", parameter.Name().c_str());
|
||||
#endif
|
||||
|
||||
#if DUMPOUTPUT
|
||||
LOGPRINTF(stderr, "learnRatePerSample=%0.8f, momentum=%0.8f, actualMBSize=%ld\n",
|
||||
m_learningRatePerSample, m_momentumPerSample, trainingSampleCount);
|
||||
LOGPRINTF(stderr, "GradUpdateType()=%s, GradientUpdateNoiseStd()=%0.8f\n",
|
||||
LearnerType().c_str(), m_GaussianNoiseInjectStd);
|
||||
Print(gradientValue, "Gradient Update");
|
||||
Print(smoothedGradientValue, "Smoothed Gradient Input");
|
||||
#endif
|
||||
UPDATE_FUNCTION;
|
||||
|
||||
#if DUMPOUTPUT
|
||||
Print(parameterValue, "Parameter Update");
|
||||
#endif
|
||||
|
||||
#ifdef _DEBUG
|
||||
if (HasNan(parameterValue, "TrainOneEpoch/UpdateWeights/Learner::Update(): "))
|
||||
LogicError("%ls has NaNs in parameter values after parameter update.", parameter.Name().c_str());
|
||||
#endif
|
||||
}
|
||||
m_sampleCount += trainingSampleCount;
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
void LearnerBase::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const
|
||||
{
|
||||
PreProcess<ElementType>(gradientValue, parameterValue, trainingSampleCount);
|
||||
Update(parameter, smoothedGradientValue, gradientValue, parameterValue, trainingSampleCount);
|
||||
PostProcess<ElementType>(parameter, gradientValue, parameterValue, trainingSampleCount);
|
||||
}
|
||||
|
||||
string LearnerBase::LearnerType() const
|
||||
{
|
||||
auto name = typeid(*this).name();
|
||||
if (strncmp(name, "class ", 6) == 0)
|
||||
{
|
||||
// On Windows, the type name contains "class" prefix.
|
||||
// Return the actual name, omitting the prefix.
|
||||
return &name[6];
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
/*virtual*/ Dictionary LearnerBase::GetCheckpointState() const /*override*/
|
||||
{
|
||||
NOT_IMPLEMENTED; // Until the new checkpointing is fully fleshed out, nobody should be calling this.
|
||||
Dictionary checkpoint;
|
||||
|
||||
for (const auto& parameter : Parameters())
|
||||
{
|
||||
// TODO: parameter name is not guaranteed to be unique. Instead, all serializable objects
|
||||
// need to expose "UId" property -- a persistent unique internal name.
|
||||
// Switch to UId as soon as it's available.
|
||||
if (checkpoint.Contains(parameter.Name()))
|
||||
{
|
||||
LogicError("Parameter names must be unique");
|
||||
}
|
||||
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
|
||||
|
||||
// Potentially, could store things like dimensions, element size, format, etc., but
|
||||
// that seems to be redundant, since all of that is passed in the constructor.
|
||||
checkpoint[parameter.Name()] = SerializeToVector(smoothedGradientValue->Data());
|
||||
}
|
||||
return checkpoint;
|
||||
}
|
||||
|
||||
/*virtual*/ void LearnerBase::RestoreFromCheckpoint(const Dictionary& checkpoint) /*override*/
|
||||
{
|
||||
NOT_IMPLEMENTED; // Until the new checkpointing is fully fleshed out, nobody should be calling this.
|
||||
for (const auto& parameter : Parameters())
|
||||
{
|
||||
if (!checkpoint.Contains(parameter.Name()))
|
||||
{
|
||||
LogicError("Checkpoint does not contain state for parameter %ls", parameter.Name().c_str());
|
||||
}
|
||||
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
|
||||
|
||||
const DictionaryValue& state = checkpoint[parameter.Name()];
|
||||
|
||||
const auto& data = smoothedGradientValue->Data();
|
||||
|
||||
DeserializeFromVector(data, state.GetValue<vector<DictionaryValue>>());
|
||||
}
|
||||
}
|
||||
|
||||
/*virtual*/ void LearnerSGD::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const /*override*/
|
||||
{
|
||||
UPDATE_FUNCTION;
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
void LearnerSGD::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const
|
||||
{
|
||||
UNUSED(trainingSampleCount);
|
||||
|
||||
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue->Data());
|
||||
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue->Data());
|
||||
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue->Data());
|
||||
|
||||
const auto& learningRate = ElementType(ParameterDependentLearningRate(parameter));
|
||||
|
||||
// TODO: break up the NormalGrad into 3 different functions, each with its own set of parameters
|
||||
// (one for vanilla SGD, the other for momentum SGD, and the third one for NAG).
|
||||
smoothedGradientMatrix->NormalGrad(*gradientMatrix, *parameterMatrix,
|
||||
learningRate, ElementType(m_momentumPerSample), m_useNesterovAcceleration);
|
||||
}
|
||||
|
||||
LearnerAdaGrad::LearnerAdaGrad(const unordered_set<Variable>& parameters, bool needAveMultiplier, const DeviceDescriptor& device)
|
||||
: LearnerBase(parameters, device),
|
||||
m_needAveMultiplier(needAveMultiplier)
|
||||
{
|
||||
}
|
||||
|
||||
/*virtual*/ void LearnerAdaGrad::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const /*override*/
|
||||
{
|
||||
UPDATE_FUNCTION;
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
void LearnerAdaGrad::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const
|
||||
{
|
||||
UNUSED(trainingSampleCount);
|
||||
|
||||
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue->Data());
|
||||
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue->Data());
|
||||
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue->Data());
|
||||
|
||||
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
|
||||
|
||||
auto aveMultiplier = smoothedGradientMatrix->Adagrad(*gradientMatrix, m_needAveMultiplier);
|
||||
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
|
||||
}
|
||||
|
||||
LearnerFSAdaGrad::LearnerFSAdaGrad(const unordered_set<Variable>& parameters, const DeviceDescriptor& device)
|
||||
: LearnerMomentumSGD(parameters, device)
|
||||
{
|
||||
}
|
||||
|
||||
/*virtual*/ void LearnerFSAdaGrad::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const /*override*/
|
||||
{
|
||||
UPDATE_FUNCTION;
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
void LearnerFSAdaGrad::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const
|
||||
{
|
||||
UNUSED(trainingSampleCount);
|
||||
|
||||
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue->Data());
|
||||
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue->Data());
|
||||
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue->Data());
|
||||
|
||||
//const double momentum = MomentumPerMB(m_momentumPerSample, trainingSampleCount);
|
||||
|
||||
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
|
||||
|
||||
smoothedGradientMatrix->FSAdagrad(trainingSampleCount, *gradientMatrix, *parameterMatrix,
|
||||
learningRate, ElementType(m_momentumPerSample));
|
||||
}
|
||||
|
||||
LearnerRMSProp::LearnerRMSProp(const unordered_set<Variable>& parameters,
|
||||
double gamma, double inc, double dec, double max, double min,
|
||||
bool needAveMultiplier, const DeviceDescriptor& device)
|
||||
: LearnerBase(parameters, device),
|
||||
m_gamma(gamma), m_inc(inc), m_dec(dec), m_max(max), m_min(min),
|
||||
m_needAveMultiplier(needAveMultiplier)
|
||||
{
|
||||
}
|
||||
|
||||
/*virtual*/ void LearnerRMSProp::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const /*override*/
|
||||
{
|
||||
UPDATE_FUNCTION;
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
void LearnerRMSProp::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const
|
||||
{
|
||||
UNUSED(trainingSampleCount);
|
||||
|
||||
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue->Data());
|
||||
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue->Data());
|
||||
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue->Data());
|
||||
|
||||
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
|
||||
|
||||
auto aveMultiplier = smoothedGradientMatrix->RmsProp(*gradientMatrix,
|
||||
ElementType(m_gamma), ElementType(m_inc),
|
||||
ElementType(m_max), ElementType(m_dec),
|
||||
ElementType(m_min), m_needAveMultiplier);
|
||||
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
|
||||
}
|
||||
|
||||
// Explicit template instantiations
|
||||
template shared_ptr<Matrix<float>> LearnerBase::GetWritableMatrix<float>(const NDArrayViewPtr arrayView);
|
||||
template shared_ptr<Matrix<double>> LearnerBase::GetWritableMatrix<double>(const NDArrayViewPtr arrayView);
|
||||
|
||||
LearnerPtr SGDLearner(const unordered_set<Variable>& parameters, const DeviceDescriptor& device)
|
||||
{
|
||||
return MakeSharedObject<LearnerSGD>(parameters, device);
|
||||
}
|
||||
|
||||
LearnerPtr MomentumSGDLearner(const unordered_set<Variable>& parameters, const DeviceDescriptor& device)
|
||||
{
|
||||
return MakeSharedObject<LearnerMomentumSGD>(parameters, device);
|
||||
}
|
||||
|
||||
LearnerPtr NesterovLearner(const unordered_set<Variable>& parameters, const DeviceDescriptor& device)
|
||||
{
|
||||
return MakeSharedObject<LearnerNesterov>(parameters, device);
|
||||
}
|
||||
|
||||
LearnerPtr AdaGradLearner(const unordered_set<Variable>& parameters, bool needAveMultiplier, const DeviceDescriptor& device)
|
||||
{
|
||||
return MakeSharedObject<LearnerAdaGrad>(parameters, needAveMultiplier, device);
|
||||
}
|
||||
|
||||
LearnerPtr FSAdaGradLearner(const unordered_set<Variable>& parameters, const DeviceDescriptor& device)
|
||||
{
|
||||
return MakeSharedObject<LearnerFSAdaGrad>(parameters, device);
|
||||
}
|
||||
|
||||
LearnerPtr RMSPropLearner(const unordered_set<Variable>& parameters,
|
||||
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier,
|
||||
const DeviceDescriptor& device)
|
||||
{
|
||||
return MakeSharedObject<LearnerRMSProp>(parameters, gamma, inc, dec, max, min, needAveMultiplier, device);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,224 @@
|
|||
//
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||
//
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "CNTKLibrary.h"
|
||||
|
||||
namespace CNTK
|
||||
{
|
||||
// A collection of additional options that are applicable for all standard learners
|
||||
// (after these options are set, they retain their value for the entire lifespan of a learner).
|
||||
struct AdditionalLearningOptions
|
||||
{
|
||||
double l1RegularizationWeight = 0.0;
|
||||
double l2RegularizationWeight = 0.0;
|
||||
double gaussianNoiseInjectionStdDev = 0.0;
|
||||
bool gradientClippingWithTruncation = false;
|
||||
double gradientClippingThresholdPerSample = 0.0;
|
||||
std::unordered_map<Variable, double> learningRateMultipliers;
|
||||
};
|
||||
|
||||
// An abstract base class at the root of the standard learners hierarchy
|
||||
// It implements most of the learner functionality, except for the actual update function,
|
||||
// and adds a few pre-/postprocessing methods (which are invoked before and after the update).
|
||||
class LearnerBase : public Learner
|
||||
{
|
||||
public:
|
||||
|
||||
CNTK_API virtual bool Update(const std::unordered_map<Variable, ValuePtr>& parameterValues,
|
||||
const std::unordered_map<Variable, const ValuePtr>& gradientValues,
|
||||
size_t trainingSampleCount) override final;
|
||||
|
||||
CNTK_API virtual Dictionary GetCheckpointState() const override;
|
||||
|
||||
CNTK_API virtual void RestoreFromCheckpoint(const Dictionary& checkpoint) override;
|
||||
|
||||
CNTK_API void SetAdditionalOptions(const AdditionalLearningOptions& additionalOptions)
|
||||
{
|
||||
m_additionalOptions = additionalOptions;
|
||||
}
|
||||
|
||||
// TODO: should this be called ResetMomentum?
|
||||
// needed for BlockMomemtumSGD to reset SGD momentum after aggregation.
|
||||
CNTK_API void ResetSmoothedGradients();
|
||||
|
||||
// TODO: move learning rate and momentum scheduling and adjustment functionality
|
||||
// inside the learner and drop these setters.
|
||||
void SetLearningRate(double value) { m_learningRatePerSample = value; }
|
||||
|
||||
protected:
|
||||
LearnerBase(const std::unordered_set<Variable>& parameters,
|
||||
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice());
|
||||
|
||||
virtual void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const = 0;
|
||||
|
||||
double ParameterDependentLearningRate(const Variable& parameter) const
|
||||
{
|
||||
return m_learningRatePerSample * m_additionalOptions.learningRateMultipliers.at(parameter);
|
||||
}
|
||||
|
||||
std::string LearnerType() const;
|
||||
|
||||
double m_learningRatePerSample;
|
||||
|
||||
AdditionalLearningOptions m_additionalOptions;
|
||||
|
||||
std::unordered_map<Variable, ValuePtr> m_smoothedGradientValues;
|
||||
|
||||
// The following four static protected methods expose private methods of NDArrayView class
|
||||
// (which declares LearnerBase as friend class), so that they are available to subclasses.
|
||||
template <typename ElementType>
|
||||
static std::shared_ptr<const Microsoft::MSR::CNTK::Matrix<ElementType>> GetMatrix(const NDArrayViewPtr arrayView);
|
||||
|
||||
template <typename ElementType>
|
||||
static std::shared_ptr<Microsoft::MSR::CNTK::Matrix<ElementType>> GetWritableMatrix(NDArrayViewPtr arrayView);
|
||||
|
||||
template <typename ElementType>
|
||||
static const Microsoft::MSR::CNTK::TensorView<ElementType>* GetTensorView(const NDArrayViewPtr arrayView);
|
||||
|
||||
template <typename ElementType>
|
||||
static Microsoft::MSR::CNTK::TensorView<ElementType>* GetWritableTensorView(NDArrayViewPtr arrayView);
|
||||
|
||||
template <typename ElementType>
|
||||
void ClipGradient(Microsoft::MSR::CNTK::Matrix<ElementType>& gradient, size_t actualMBSize) const;
|
||||
|
||||
// Performs additional preprocessing before calling the update method
|
||||
// (gradient clipping and L2 regularization depending on the additional learning parameters).
|
||||
template <typename ElementType>
|
||||
void PreProcess(const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t actualMBSize) const;
|
||||
|
||||
// Performs additional postprocessing after the update method has been executed
|
||||
// (noise injection and L1 regularization specified by the additional learning parameters).
|
||||
template <typename ElementType>
|
||||
void PostProcess(const Variable& parameter, const ValuePtr& gradientValue,
|
||||
const ValuePtr& parameterValue, size_t actualMBSize) const;
|
||||
private:
|
||||
// Templatized update function, it invokes preprocess and postprocess using the provided
|
||||
// template parameter and also invokes virtual Update method implemented in one of the subclasses.
|
||||
template <typename ElementType>
|
||||
void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const;
|
||||
|
||||
// TODO: make these functions friends of NDViewArray and move to Utils?
|
||||
static bool HasNan(const ValuePtr& value, const char* name);
|
||||
static void Print(const ValuePtr& value, const char* msg);
|
||||
|
||||
size_t m_sampleCount;
|
||||
};
|
||||
|
||||
// Vanilla gradient descent optimization algorithm.
|
||||
class LearnerSGD : public LearnerBase
|
||||
{
|
||||
public:
|
||||
|
||||
LearnerSGD(const std::unordered_set<Variable>& parameters,
|
||||
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice())
|
||||
: LearnerBase(parameters, device),
|
||||
m_momentumPerSample(0.0),
|
||||
m_useNesterovAcceleration(false)
|
||||
{
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
virtual void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const override;
|
||||
|
||||
template <typename ElementType>
|
||||
void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const;
|
||||
|
||||
double m_momentumPerSample;
|
||||
bool m_useNesterovAcceleration;
|
||||
};
|
||||
|
||||
// SGD optimization with momentum.
|
||||
class LearnerMomentumSGD : public LearnerSGD
|
||||
{
|
||||
public:
|
||||
|
||||
LearnerMomentumSGD(const std::unordered_set<Variable>& parameters,
|
||||
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice())
|
||||
: LearnerSGD(parameters, device)
|
||||
{
|
||||
}
|
||||
|
||||
void SetMomentum(double value) { m_momentumPerSample = value; }
|
||||
};
|
||||
|
||||
// Nesterov's accelerated SGDLearnerBase descent.
|
||||
class LearnerNesterov : public LearnerSGD
|
||||
{
|
||||
public:
|
||||
|
||||
LearnerNesterov(const std::unordered_set<Variable>& parameters,
|
||||
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice())
|
||||
: LearnerSGD(parameters, device)
|
||||
{
|
||||
m_useNesterovAcceleration = true;
|
||||
}
|
||||
};
|
||||
|
||||
class LearnerAdaGrad : public LearnerBase
|
||||
{
|
||||
public:
|
||||
|
||||
LearnerAdaGrad(const std::unordered_set<Variable>& parameters, bool needAveMultiplier,
|
||||
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice());
|
||||
|
||||
protected:
|
||||
bool m_needAveMultiplier;
|
||||
|
||||
virtual void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const override;
|
||||
|
||||
template <typename ElementType>
|
||||
void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const;
|
||||
};
|
||||
|
||||
class LearnerFSAdaGrad : public LearnerMomentumSGD
|
||||
{
|
||||
public:
|
||||
|
||||
LearnerFSAdaGrad(const std::unordered_set<Variable>& parameters,
|
||||
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice());
|
||||
|
||||
protected:
|
||||
|
||||
virtual void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const override;
|
||||
|
||||
template <typename ElementType>
|
||||
void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const;
|
||||
};
|
||||
|
||||
class LearnerRMSProp : public LearnerBase
|
||||
{
|
||||
public:
|
||||
|
||||
LearnerRMSProp(const std::unordered_set<Variable>& parameters,
|
||||
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier,
|
||||
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice());
|
||||
|
||||
protected:
|
||||
|
||||
double m_gamma;
|
||||
double m_inc;
|
||||
double m_dec;
|
||||
double m_max;
|
||||
double m_min;
|
||||
bool m_needAveMultiplier;
|
||||
|
||||
virtual void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const override;
|
||||
|
||||
template <typename ElementType>
|
||||
void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
|
||||
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const;
|
||||
};
|
||||
}
|
|
@ -60,7 +60,7 @@ namespace CNTK
|
|||
matrixDims.second,
|
||||
AsCNTKImplDeviceId(device),
|
||||
IsSparseStorageFormat(storageType) ? MatrixType::SPARSE : MatrixType::DENSE,
|
||||
AsCNTKMatrixFormat(storageType));
|
||||
AsCNTKImplMatrixFormat(storageType));
|
||||
return new TensorView<ElementType>(matrix, AsTensorShape(viewShape));
|
||||
}
|
||||
|
||||
|
@ -99,8 +99,22 @@ namespace CNTK
|
|||
}
|
||||
|
||||
NDArrayView::NDArrayView(CNTK::DataType dataType, const DeviceDescriptor& device, CNTK::StorageFormat storageType, const NDShape& viewShape, bool readOnly, void* tensorView)
|
||||
: m_dataType(dataType), m_device(device), m_storageFormat(storageType), m_viewShape(viewShape), m_isReadOnly(readOnly), m_tensorView(tensorView)
|
||||
: m_dataType(dataType), m_device(device), m_storageFormat(storageType), m_viewShape(viewShape), m_isReadOnly(readOnly)
|
||||
{
|
||||
m_tensorView = std::shared_ptr<void>(tensorView, [this](void*) {
|
||||
switch (m_dataType)
|
||||
{
|
||||
case DataType::Float:
|
||||
delete GetTensorView<float>();
|
||||
break;
|
||||
case DataType::Double:
|
||||
delete GetTensorView<double>();
|
||||
break;
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(m_dataType));
|
||||
break;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
NDArrayView::NDArrayView(CNTK::DataType dataType, CNTK::StorageFormat storageType, const NDShape& viewShape, const DeviceDescriptor& device)
|
||||
|
@ -108,6 +122,10 @@ namespace CNTK
|
|||
{
|
||||
}
|
||||
|
||||
NDArrayView::~NDArrayView()
|
||||
{
|
||||
}
|
||||
|
||||
void NDArrayView::SetValue(float value)
|
||||
{
|
||||
if (IsSparse())
|
||||
|
@ -124,22 +142,6 @@ namespace CNTK
|
|||
GetWritableMatrix<double>()->SetValue(value);
|
||||
}
|
||||
|
||||
NDArrayView::~NDArrayView()
|
||||
{
|
||||
switch (m_dataType)
|
||||
{
|
||||
case DataType::Float:
|
||||
delete GetTensorView<float>();
|
||||
break;
|
||||
case DataType::Double:
|
||||
delete GetTensorView<double>();
|
||||
break;
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(m_dataType));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
/*static*/ std::shared_ptr<Matrix<ElementType>> NDArrayView::GetMatrixImpl(const TensorView<ElementType>* tensorView, size_t rowColSplitPoint)
|
||||
{
|
||||
|
@ -150,7 +152,8 @@ namespace CNTK
|
|||
size_t splitPoint = rowColSplitPoint;
|
||||
if (splitPoint == NDArrayView::AutoSelectRowColSplitPoint)
|
||||
{
|
||||
// Determine the split point
|
||||
// Determine the split point by determining which of the axes can be
|
||||
// folded and selecting the non-foldable axis as the split point
|
||||
std::vector<bool> dimsToDrop(tensorShape.GetRank(), false);
|
||||
for (size_t k = 1; k < tensorShape.GetRank(); ++k)
|
||||
if (tensorShape.CanFlatten(k))
|
||||
|
@ -197,7 +200,7 @@ namespace CNTK
|
|||
if (AsDataType<ElementType>() != m_dataType)
|
||||
LogicError("NDArrayView::GetTensorView: The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(m_dataType));
|
||||
|
||||
return (const TensorView<ElementType>*)(m_tensorView);
|
||||
return (const TensorView<ElementType>*)(m_tensorView.get());
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
|
@ -211,7 +214,7 @@ namespace CNTK
|
|||
|
||||
NDArrayViewPtr NDArrayView::DeepClone(bool readOnly/* = false*/) const
|
||||
{
|
||||
NDArrayViewPtr newView(new NDArrayView(this->GetDataType(), this->GetStorageFormat(), this->Shape(), this->Device()), [](ReferenceCount* ptr) { delete ptr; });
|
||||
NDArrayViewPtr newView = MakeSharedObject<NDArrayView>(this->GetDataType(), this->GetStorageFormat(), this->Shape(), this->Device());
|
||||
switch (m_dataType)
|
||||
{
|
||||
case DataType::Float:
|
||||
|
@ -234,9 +237,7 @@ namespace CNTK
|
|||
}
|
||||
|
||||
newView->m_isReadOnly = readOnly;
|
||||
return NDArrayViewPtr(newView, [](ReferenceCount* ptr) {
|
||||
delete ptr;
|
||||
});
|
||||
return newView;
|
||||
}
|
||||
|
||||
void NDArrayView::CopyFrom(const NDArrayView& source)
|
||||
|
@ -285,8 +286,7 @@ namespace CNTK
|
|||
break;
|
||||
}
|
||||
|
||||
auto aliasView = new NDArrayView(GetDataType(), Device(), GetStorageFormat(), Shape(), IsReadOnly() || readOnly, tensorView);;
|
||||
return NDArrayViewPtr(aliasView, [](ReferenceCount* ptr) { delete ptr; });
|
||||
return MakeSharedObject<NDArrayView>(GetDataType(), Device(), GetStorageFormat(), Shape(), IsReadOnly() || readOnly, tensorView);
|
||||
}
|
||||
|
||||
// TODO: This could actually be strided?
|
||||
|
@ -322,8 +322,7 @@ namespace CNTK
|
|||
auto randomUniformMatrix = std::make_shared<Matrix<ElementType>>(Matrix<ElementType>::RandomUniform(matrixDims.first, matrixDims.second, AsCNTKImplDeviceId(device), (ElementType)rangeBegin, (ElementType)rangeEnd, seed));
|
||||
auto tensorView = new TensorView<ElementType>(randomUniformMatrix, AsTensorShape(shape));
|
||||
|
||||
auto view = new NDArrayView(AsDataType<ElementType>(), device, StorageFormat::Dense, shape, false, tensorView);
|
||||
return NDArrayViewPtr(view, [](ReferenceCount* ptr) { delete ptr; });
|
||||
return MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), device, StorageFormat::Dense, shape, false, tensorView);
|
||||
}
|
||||
|
||||
// Explicit template instantiations
|
||||
|
@ -339,8 +338,10 @@ namespace CNTK
|
|||
template std::shared_ptr<const Matrix<float>> NDArrayView::GetMatrix(size_t rowColSplitPoint/* = AutoSelectRowColSplitPoint*/) const;
|
||||
template std::shared_ptr<const Matrix<double>> NDArrayView::GetMatrix(size_t rowColSplitPoint/* = AutoSelectRowColSplitPoint*/) const;
|
||||
|
||||
template std::shared_ptr<Matrix<float>> NDArrayView::GetWritableMatrix(size_t rowColSplitPoint/* = AutoSelectRowColSplitPoint*/);
|
||||
template std::shared_ptr<Matrix<double>> NDArrayView::GetWritableMatrix(size_t rowColSplitPoint/* = AutoSelectRowColSplitPoint*/);
|
||||
template std::shared_ptr<Matrix<float>> NDArrayView::GetWritableMatrix<float>(size_t rowColSplitPoint/* = AutoSelectRowColSplitPoint*/);
|
||||
template std::shared_ptr<Matrix<double>> NDArrayView::GetWritableMatrix<double>(size_t rowColSplitPoint/* = AutoSelectRowColSplitPoint*/);
|
||||
template TensorView<float>* NDArrayView::GetWritableTensorView<float>();
|
||||
template TensorView<double>* NDArrayView::GetWritableTensorView<double>();
|
||||
|
||||
template CNTK_API NDArrayView::NDArrayView(const NDShape& viewShape, const SparseIndexType* colStarts, const SparseIndexType* rowIndices, const float* nonZeroValues, size_t numNonZeroValues, const DeviceDescriptor& device, bool readOnly/* = false*/);
|
||||
template CNTK_API NDArrayView::NDArrayView(const NDShape& viewShape, const SparseIndexType* colStarts, const SparseIndexType* rowIndices, const double* nonZeroValues, size_t numNonZeroValues, const DeviceDescriptor& device, bool readOnly/* = false*/);
|
||||
|
|
|
@ -17,15 +17,13 @@ namespace CNTK
|
|||
static Matrix<char>* AllocateMatrix(const NDShape& viewShape, const DeviceDescriptor& device)
|
||||
{
|
||||
auto matrixDims = GetMatrixDimensions(viewShape);
|
||||
auto maskMatrix = new Matrix<char>(matrixDims.first, matrixDims.second, AsCNTKImplDeviceId(device));
|
||||
maskMatrix->SetValue(1);
|
||||
|
||||
return maskMatrix;
|
||||
return new Matrix<char>(matrixDims.first, matrixDims.second, AsCNTKImplDeviceId(device));
|
||||
}
|
||||
|
||||
NDMask::NDMask(const NDShape& shape, Matrix<char>* matrix)
|
||||
: m_device(AsDeviceDescriptor(matrix->GetDeviceId())), m_maskShape(shape), m_matrixView(matrix)
|
||||
: m_device(AsDeviceDescriptor(matrix->GetDeviceId())), m_maskShape(shape)
|
||||
{
|
||||
m_matrixView = std::shared_ptr<Matrix<char>>(matrix, [](Matrix<char>* ptr) { delete ptr; });
|
||||
}
|
||||
|
||||
NDMask::NDMask(const NDShape& shape, const DeviceDescriptor& device/* = DeviceDescriptor::DefaultDevice()*/)
|
||||
|
@ -33,16 +31,17 @@ namespace CNTK
|
|||
{
|
||||
if (shape.NumAxes() > 2)
|
||||
LogicError("NDMask instances with more than 2 axes are currently unsupported");
|
||||
|
||||
Clear();
|
||||
}
|
||||
|
||||
NDMask::~NDMask()
|
||||
{
|
||||
delete m_matrixView;
|
||||
}
|
||||
|
||||
void NDMask::MaskSection(const std::vector<size_t>& sectionOffset, const NDShape& sectionShape)
|
||||
{
|
||||
// TODO: Implement batching of masking operation for masks residing on GPUs to avoid making
|
||||
// TODO: Implement batching of masking operation for masks residing on GPUs to avoid making
|
||||
// GPU invocations for each MaskSection call.
|
||||
|
||||
if (sectionOffset.size() > m_maskShape.NumAxes())
|
||||
|
@ -78,12 +77,13 @@ namespace CNTK
|
|||
|
||||
void NDMask::Clear()
|
||||
{
|
||||
// Clear the mask by marking all samples as Valid; i.e. a value of 1
|
||||
GetMatrix()->SetValue(1);
|
||||
}
|
||||
|
||||
Matrix<char>* NDMask::GetMatrix() const
|
||||
{
|
||||
return m_matrixView;
|
||||
return m_matrixView.get();
|
||||
}
|
||||
|
||||
void NDMask::CopyFrom(const NDMask& source)
|
||||
|
@ -96,14 +96,14 @@ namespace CNTK
|
|||
|
||||
NDMaskPtr NDMask::DeepClone() const
|
||||
{
|
||||
NDMaskPtr newMask = new NDMask(this->Shape(), this->Device());
|
||||
NDMaskPtr newMask = MakeSharedObject<NDMask>(this->Shape(), this->Device());
|
||||
newMask->CopyFrom(*this);
|
||||
|
||||
return NDMaskPtr(newMask, [](ReferenceCount* ptr) { delete ptr; });
|
||||
return newMask;
|
||||
}
|
||||
|
||||
NDMaskPtr NDMask::Alias() const
|
||||
{
|
||||
return NDMaskPtr(new NDMask(this->Shape(), new Matrix<char>(GetMatrix()->AsReference())), [](ReferenceCount* ptr) { delete ptr; });
|
||||
return MakeSharedObject<NDMask>(this->Shape(), new Matrix<char>(GetMatrix()->AsReference()));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,354 +6,138 @@
|
|||
#include "stdafx.h"
|
||||
#include "CNTKLibrary.h"
|
||||
#include "Utils.h"
|
||||
#include "File.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace CNTK
|
||||
{
|
||||
namespace Internal
|
||||
template <typename T>
|
||||
void DictionaryValue::AllocateDataPtr(const T& value)
|
||||
{
|
||||
ReferenceCount::ReferenceCount()
|
||||
: m_rc(new std::atomic<size_t>(0))
|
||||
{}
|
||||
static_assert(is_same<T, NDShape>::value || is_same<T, vector<DictionaryValue>>::value, "AllocateDataPtr called with invalid type");
|
||||
m_data.m_ptr = new T(value);
|
||||
}
|
||||
|
||||
/*virtual*/ ReferenceCount::~ReferenceCount()
|
||||
template <typename T>
|
||||
void DictionaryValue::FreePtrAsType()
|
||||
{
|
||||
T* typedPtr = reinterpret_cast<T*>(m_data.m_ptr);
|
||||
delete typedPtr;
|
||||
|
||||
m_data.m_ptr = nullptr;
|
||||
}
|
||||
|
||||
void DictionaryValue::FreeDataPtr()
|
||||
{
|
||||
if (m_valueType == Type::NDShape)
|
||||
FreePtrAsType<NDShape>();
|
||||
else if (m_valueType == Type::Vector)
|
||||
FreePtrAsType<vector<DictionaryValue>>();
|
||||
}
|
||||
|
||||
Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, DictionaryValue& us)
|
||||
{
|
||||
size_t version;
|
||||
stream >> version;
|
||||
|
||||
stream >> us.m_valueType;
|
||||
|
||||
switch (us.ValueType())
|
||||
{
|
||||
delete m_rc;
|
||||
}
|
||||
|
||||
size_t ReferenceCount::AddReference()
|
||||
case DictionaryValue::Type::Bool:
|
||||
stream >> us.m_data.m_boolean;
|
||||
break;
|
||||
case DictionaryValue::Type::SizeT:
|
||||
stream >> us.m_data.m_sizeT;
|
||||
break;
|
||||
case DictionaryValue::Type::Float:
|
||||
stream >> us.m_data.m_float;
|
||||
break;
|
||||
case DictionaryValue::Type::Double:
|
||||
stream >> us.m_data.m_double;
|
||||
break;
|
||||
case DictionaryValue::Type::NDShape:
|
||||
{
|
||||
return ++(*m_rc);
|
||||
}
|
||||
|
||||
size_t ReferenceCount::RemoveReference()
|
||||
{
|
||||
assert(m_rc->load() > 0);
|
||||
return --(*m_rc);
|
||||
}
|
||||
|
||||
size_t ReferenceCount::GetReferenceCount()
|
||||
{
|
||||
return m_rc->load();
|
||||
}
|
||||
|
||||
#pragma region SimpleVector
|
||||
|
||||
template <typename T>
|
||||
SimpleVector<T>::SimpleVector()
|
||||
: m_vector(new std::vector<T>())
|
||||
{
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
SimpleVector<T>::SimpleVector(size_t numElements, const T& initVal/* = T()*/)
|
||||
: m_vector(new std::vector<T>(numElements, initVal))
|
||||
{
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
SimpleVector<T>::~SimpleVector()
|
||||
{
|
||||
delete m_vector;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
SimpleVector<T>::SimpleVector(const SimpleVector<T>& other)
|
||||
: m_vector(new std::vector<T>(*other.m_vector))
|
||||
{
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
SimpleVector<T>& SimpleVector<T>::operator=(const SimpleVector<T>& other)
|
||||
{
|
||||
if (this != &other)
|
||||
size_t size;
|
||||
stream >> size;
|
||||
vector<size_t> dims(size);
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
delete m_vector;
|
||||
m_vector = new std::vector<T>(*other.m_vector);
|
||||
stream >> dims[i];
|
||||
}
|
||||
|
||||
return *this;
|
||||
us.AllocateDataPtr(NDShape(dims));
|
||||
break;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
SimpleVector<T>::SimpleVector(SimpleVector<T>&& other)
|
||||
: m_vector(nullptr)
|
||||
case DictionaryValue::Type::Vector:
|
||||
{
|
||||
*this = std::move(other);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
SimpleVector<T>& SimpleVector<T>::operator=(SimpleVector<T>&& other)
|
||||
{
|
||||
assert(this != &other);
|
||||
|
||||
delete m_vector;
|
||||
m_vector = other.m_vector;
|
||||
|
||||
other.m_vector = nullptr;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T& SimpleVector<T>::operator[](size_t idx)
|
||||
{
|
||||
assert(idx < Size());
|
||||
return (*m_vector)[idx];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const T& SimpleVector<T>::operator[](size_t idx) const
|
||||
{
|
||||
assert(idx < Size());
|
||||
return (*m_vector)[idx];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t SimpleVector<T>::Size() const
|
||||
{
|
||||
return m_vector->size();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T* SimpleVector<T>::Data()
|
||||
{
|
||||
return m_vector->data();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const T* SimpleVector<T>::Data() const
|
||||
{
|
||||
return m_vector->data();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void SimpleVector<T>::PushBack(const T& value)
|
||||
{
|
||||
m_vector->push_back(value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void SimpleVector<T>::PushBack(T&& value)
|
||||
{
|
||||
m_vector->push_back(std::move(value));
|
||||
}
|
||||
|
||||
template <typename ValueType>
|
||||
bool operator==(const SimpleVector<ValueType>& first, const SimpleVector<ValueType>& second)
|
||||
{
|
||||
return *first.m_vector == *second.m_vector;
|
||||
}
|
||||
|
||||
// Explicit template instantiations
|
||||
template class SimpleVector<Variable>;
|
||||
template class SimpleVector<size_t>;
|
||||
template class SimpleVector<Axis>;
|
||||
template class SimpleVector<FunctionPtr>;
|
||||
|
||||
template bool operator==(const SimpleVector<size_t>& first, const SimpleVector<size_t>& second);
|
||||
|
||||
#pragma endregion SimpleVector
|
||||
|
||||
#pragma region SimpleSet
|
||||
|
||||
template <typename KeyType>
|
||||
SimpleSet<KeyType>::SimpleSet()
|
||||
: m_set(new std::unordered_set<KeyType>())
|
||||
{
|
||||
}
|
||||
|
||||
template <typename KeyType>
|
||||
SimpleSet<KeyType>::~SimpleSet()
|
||||
{
|
||||
delete m_set;
|
||||
}
|
||||
|
||||
template <typename KeyType>
|
||||
SimpleSet<KeyType>::SimpleSet(const SimpleSet& other)
|
||||
: m_set(nullptr)
|
||||
{
|
||||
*this = other;
|
||||
}
|
||||
|
||||
template <typename KeyType>
|
||||
SimpleSet<KeyType>& SimpleSet<KeyType>::operator=(const SimpleSet& other)
|
||||
{
|
||||
if (this != &other)
|
||||
size_t size;
|
||||
stream >> size;
|
||||
vector<DictionaryValue> values(size);
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
delete m_set;
|
||||
m_set = new std::unordered_set<KeyType>(*(other.m_set));
|
||||
stream >> values[i];
|
||||
}
|
||||
|
||||
return *this;
|
||||
us.AllocateDataPtr(values);
|
||||
break;
|
||||
}
|
||||
|
||||
template <typename KeyType>
|
||||
SimpleSet<KeyType>::SimpleSet(SimpleSet&& other)
|
||||
: m_set(nullptr)
|
||||
{
|
||||
*this = std::move(other);
|
||||
default:
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
|
||||
template <typename KeyType>
|
||||
SimpleSet<KeyType>& SimpleSet<KeyType>::operator=(SimpleSet&& other)
|
||||
Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const DictionaryValue& us)
|
||||
{
|
||||
stream << us.version;
|
||||
|
||||
stream << us.ValueType();
|
||||
|
||||
switch (us.ValueType())
|
||||
{
|
||||
assert(this != &other);
|
||||
|
||||
delete m_set;
|
||||
m_set = other.m_set;
|
||||
other.m_set = nullptr;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename KeyType>
|
||||
bool SimpleSet<KeyType>::Insert(const KeyType& key)
|
||||
case DictionaryValue::Type::Bool:
|
||||
stream << us.m_data.m_boolean;
|
||||
break;
|
||||
case DictionaryValue::Type::SizeT:
|
||||
stream << us.m_data.m_sizeT;
|
||||
break;
|
||||
case DictionaryValue::Type::Float:
|
||||
stream << us.m_data.m_float;
|
||||
break;
|
||||
case DictionaryValue::Type::Double:
|
||||
stream << us.m_data.m_double;
|
||||
break;
|
||||
case DictionaryValue::Type::NDShape:
|
||||
{
|
||||
return m_set->insert(key).second;
|
||||
}
|
||||
|
||||
template <typename KeyType>
|
||||
bool SimpleSet<KeyType>::Contains(const KeyType& key) const
|
||||
{
|
||||
return (m_set->find(key) != m_set->end());
|
||||
}
|
||||
|
||||
template <typename KeyType>
|
||||
size_t SimpleSet<KeyType>::Size() const
|
||||
{
|
||||
return m_set->size();
|
||||
}
|
||||
|
||||
template <typename KeyType>
|
||||
SimpleSet<KeyType>::operator SimpleVector<KeyType>() const
|
||||
{
|
||||
SimpleVector<KeyType> retVector;
|
||||
for (auto key : *m_set)
|
||||
retVector.PushBack(key);
|
||||
|
||||
return retVector;
|
||||
}
|
||||
|
||||
template <typename KeyType>
|
||||
bool operator==(const SimpleSet<KeyType>& first, const SimpleSet<KeyType>& second)
|
||||
{
|
||||
return *first.m_set == *second.m_set;
|
||||
}
|
||||
|
||||
// Explicit template instantiations
|
||||
template class SimpleSet<FunctionPtr>;
|
||||
template class SimpleSet<Variable>;
|
||||
template class SimpleSet<Placeholder>;
|
||||
template class SimpleSet<const Function*>;
|
||||
|
||||
template bool operator==(const SimpleSet<Variable>& first, const SimpleSet<Variable>& second);
|
||||
template bool operator==(const SimpleSet<Placeholder>& first, const SimpleSet<Placeholder>& second);
|
||||
|
||||
#pragma endregion SimpleSet
|
||||
|
||||
#pragma region SimpleMap
|
||||
|
||||
template <typename KeyType, typename ValueType>
|
||||
SimpleMap<KeyType, ValueType>::SimpleMap()
|
||||
: m_map(new std::unordered_map<KeyType, ValueType>())
|
||||
{
|
||||
}
|
||||
|
||||
template <typename KeyType, typename ValueType>
|
||||
SimpleMap<KeyType, ValueType>::~SimpleMap()
|
||||
{
|
||||
delete m_map;
|
||||
}
|
||||
|
||||
template <typename KeyType, typename ValueType>
|
||||
SimpleMap<KeyType, ValueType>::SimpleMap(const SimpleMap& other)
|
||||
: m_map(nullptr)
|
||||
{
|
||||
*this = other;
|
||||
}
|
||||
|
||||
template <typename KeyType, typename ValueType>
|
||||
SimpleMap<KeyType, ValueType>& SimpleMap<KeyType, ValueType>::operator=(const SimpleMap& other)
|
||||
{
|
||||
if (this != &other)
|
||||
NDShape* shapePtr = reinterpret_cast<NDShape*>(us.m_data.m_ptr);
|
||||
auto size = shapePtr->NumAxes();
|
||||
stream << size;
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
delete m_map;
|
||||
m_map = new std::unordered_map<KeyType, ValueType>(*(other.m_map));
|
||||
stream << shapePtr->operator[](i);
|
||||
}
|
||||
|
||||
return *this;
|
||||
break;
|
||||
}
|
||||
|
||||
template <typename KeyType, typename ValueType>
|
||||
SimpleMap<KeyType, ValueType>::SimpleMap(SimpleMap&& other)
|
||||
: m_map(nullptr)
|
||||
case DictionaryValue::Type::Vector:
|
||||
{
|
||||
*this = std::move(other);
|
||||
vector<DictionaryValue>* vectorPtr =
|
||||
reinterpret_cast<vector<DictionaryValue>*>(us.m_data.m_ptr);
|
||||
auto size = vectorPtr->size();
|
||||
stream << size;
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
stream << vectorPtr->operator[](i);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
template <typename KeyType, typename ValueType>
|
||||
SimpleMap<KeyType, ValueType>& SimpleMap<KeyType, ValueType>::operator=(SimpleMap&& other)
|
||||
{
|
||||
assert(this != &other);
|
||||
|
||||
delete m_map;
|
||||
m_map = other.m_map;
|
||||
other.m_map = nullptr;
|
||||
|
||||
return *this;
|
||||
default:
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
template <typename KeyType, typename ValueType>
|
||||
ValueType& SimpleMap<KeyType, ValueType>::operator[](const KeyType& key)
|
||||
{
|
||||
return (*m_map)[key];
|
||||
}
|
||||
|
||||
template <typename KeyType, typename ValueType>
|
||||
const ValueType& SimpleMap<KeyType, ValueType>::operator[](const KeyType& key) const
|
||||
{
|
||||
return (*m_map)[key];
|
||||
}
|
||||
|
||||
template <typename KeyType, typename ValueType>
|
||||
bool SimpleMap<KeyType, ValueType>::Insert(const KeyType& key, const ValueType& value)
|
||||
{
|
||||
return m_map->insert({ key, value }).second;
|
||||
}
|
||||
|
||||
template <typename KeyType, typename ValueType>
|
||||
bool SimpleMap<KeyType, ValueType>::Contains(const KeyType& key) const
|
||||
{
|
||||
return (m_map->find(key) != m_map->end());
|
||||
}
|
||||
|
||||
template <typename KeyType, typename ValueType>
|
||||
size_t SimpleMap<KeyType, ValueType>::Size() const
|
||||
{
|
||||
return m_map->size();
|
||||
}
|
||||
|
||||
template <typename KeyType, typename ValueType>
|
||||
SimpleSet<KeyType> SimpleMap<KeyType, ValueType>::Keys() const
|
||||
{
|
||||
SimpleSet<KeyType> keys;
|
||||
for (auto keyValuePair : *m_map)
|
||||
keys.Insert(keyValuePair.first);
|
||||
|
||||
return keys;
|
||||
}
|
||||
|
||||
// Explicit template instantiations
|
||||
template class SimpleMap<Variable, ValuePtr>;
|
||||
template class SimpleMap<Variable, const ValuePtr>;
|
||||
template class SimpleMap<Placeholder, Variable>;
|
||||
|
||||
#pragma endregion SimpleMap
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
||||
Dictionary::Dictionary()
|
||||
: m_dictionaryData(new std::unordered_map < std::wstring, DictionaryValue>)
|
||||
: m_dictionaryData(new unordered_map <wstring, DictionaryValue>)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -365,7 +149,7 @@ namespace CNTK
|
|||
Dictionary::Dictionary(Dictionary&& other)
|
||||
: m_dictionaryData(nullptr)
|
||||
{
|
||||
*this = std::move(other);
|
||||
*this = move(other);
|
||||
}
|
||||
|
||||
Dictionary& Dictionary::operator=(Dictionary&& other)
|
||||
|
@ -394,4 +178,130 @@ namespace CNTK
|
|||
{
|
||||
return (m_dictionaryData->find(key) != m_dictionaryData->end());
|
||||
}
|
||||
|
||||
Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const Dictionary& us)
|
||||
{
|
||||
stream << us.version;
|
||||
stream << us.m_dictionaryData->size();
|
||||
for (auto it = us.m_dictionaryData->begin(); it != us.m_dictionaryData->end(); ++it)
|
||||
{
|
||||
stream << it->first;
|
||||
stream << it->second;
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
|
||||
Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, Dictionary& us)
|
||||
{
|
||||
size_t version;
|
||||
stream >> version;
|
||||
size_t size;
|
||||
stream >> size;
|
||||
us.m_dictionaryData->reserve(size);
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
wstring key;
|
||||
stream >> key;
|
||||
DictionaryValue value;
|
||||
stream >> value;
|
||||
us.m_dictionaryData->insert(make_pair(key, value));
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr)
|
||||
{
|
||||
if (viewPtr->IsSparse())
|
||||
{
|
||||
LogicError("Sparse NDArrayView cannot be serialized into a vector.");
|
||||
}
|
||||
|
||||
auto numElements = viewPtr->Shape().TotalSize();
|
||||
|
||||
vector<DictionaryValue> values(numElements);
|
||||
|
||||
NDArrayViewPtr cpuDataViewPtr = viewPtr;
|
||||
if ((viewPtr->Device().Type() != DeviceKind::CPU))
|
||||
{
|
||||
cpuDataViewPtr = MakeSharedObject<NDArrayView>(viewPtr->GetDataType(), viewPtr->Shape(), DeviceDescriptor::CPUDevice());
|
||||
cpuDataViewPtr->CopyFrom(*viewPtr);
|
||||
}
|
||||
|
||||
const T* buffer = cpuDataViewPtr->DataBuffer<T>();
|
||||
for (auto i = 0; i < numElements; ++i)
|
||||
{
|
||||
T v = buffer[i];
|
||||
values[i] = DictionaryValue(v);
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const vector<DictionaryValue>& values)
|
||||
{
|
||||
if (viewPtr->IsSparse())
|
||||
{
|
||||
LogicError("Sparse NDArrayView cannot be deserialized from a vector.");
|
||||
}
|
||||
|
||||
auto numElements = viewPtr->Shape().TotalSize();
|
||||
|
||||
if (values.size() != numElements)
|
||||
{
|
||||
LogicError("Number of elements (%lu) in the deserialized representation does not match the expected value (%lu)",
|
||||
values.size(), numElements);
|
||||
}
|
||||
|
||||
NDArrayViewPtr cpuDataViewPtr = viewPtr;
|
||||
if ((viewPtr->Device().Type() != DeviceKind::CPU))
|
||||
{
|
||||
cpuDataViewPtr = MakeSharedObject<NDArrayView>(viewPtr->GetDataType(), viewPtr->Shape(), DeviceDescriptor::CPUDevice());
|
||||
}
|
||||
|
||||
T* buffer = cpuDataViewPtr->WritableDataBuffer<T>();
|
||||
for (auto i = 0; i < numElements; ++i)
|
||||
{
|
||||
buffer[i] = values[i].GetValue<T>();
|
||||
}
|
||||
|
||||
if ((viewPtr->Device().Type() != DeviceKind::CPU))
|
||||
{
|
||||
viewPtr->CopyFrom(*cpuDataViewPtr);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: we store the type info for every element in the vector, which is extremely redundant.
|
||||
// Instead, it'd be nice to introduce some sort of DictionaryValueVector.
|
||||
vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr)
|
||||
{
|
||||
switch (viewPtr->GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
return SerializeToVector<float>(viewPtr);
|
||||
case DataType::Double:
|
||||
return SerializeToVector<double>(viewPtr);
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
|
||||
}
|
||||
}
|
||||
|
||||
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const vector<DictionaryValue>& values)
|
||||
{
|
||||
switch (viewPtr->GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
DeserializeFromVector<float>(viewPtr, values);
|
||||
break;
|
||||
case DataType::Double:
|
||||
DeserializeFromVector<double>(viewPtr, values);
|
||||
break;
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
|
||||
}
|
||||
}
|
||||
|
||||
template void DictionaryValue::AllocateDataPtr<NDShape>(const NDShape& value);
|
||||
template void DictionaryValue::AllocateDataPtr<vector<DictionaryValue>>(const vector<DictionaryValue>& value);
|
||||
}
|
||||
|
|
|
@ -15,245 +15,6 @@ namespace CNTK
|
|||
// Forward declarations
|
||||
class Dictionary;
|
||||
|
||||
class DictionaryValue
|
||||
{
|
||||
public:
|
||||
enum class Type : unsigned int
|
||||
{
|
||||
None,
|
||||
Bool,
|
||||
SizeT,
|
||||
Double,
|
||||
NDShape,
|
||||
Vector
|
||||
};
|
||||
|
||||
static const char* TypeName(Type type)
|
||||
{
|
||||
if (type == Type::None)
|
||||
return "None";
|
||||
else if (type == Type::Bool)
|
||||
return "Bool";
|
||||
else if (type == Type::SizeT)
|
||||
return "SizeT";
|
||||
else if (type == Type::Double)
|
||||
return "Double";
|
||||
else if (type == Type::NDShape)
|
||||
return "NDShape";
|
||||
else if (type == Type::Vector)
|
||||
return "Vector";
|
||||
else
|
||||
LogicError("Unknown DictionaryValue::Type");
|
||||
}
|
||||
|
||||
public:
|
||||
DictionaryValue()
|
||||
: m_valueType(Type::None)
|
||||
{
|
||||
}
|
||||
|
||||
DictionaryValue(bool value)
|
||||
: m_valueType(GetValueType<bool>())
|
||||
{
|
||||
m_data.m_boolean = value;
|
||||
}
|
||||
|
||||
DictionaryValue(size_t value)
|
||||
: m_valueType(GetValueType<size_t>())
|
||||
{
|
||||
m_data.m_sizeT = value;
|
||||
}
|
||||
|
||||
DictionaryValue(double value)
|
||||
: m_valueType(GetValueType<double>())
|
||||
{
|
||||
m_data.m_double = value;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
DictionaryValue(const T& value)
|
||||
: m_valueType(GetValueType<T>())
|
||||
{
|
||||
static_assert(std::is_same<T, NDShape>::value ||
|
||||
std::is_same<T, Internal::SimpleVector<DictionaryValue>>::value,
|
||||
"Unsupported ValueType");
|
||||
|
||||
AllocateDataPtr(value);
|
||||
}
|
||||
|
||||
DictionaryValue(const DictionaryValue& other)
|
||||
: m_valueType(Type::Bool)
|
||||
{
|
||||
// The m_valueType must hvae been set to a non-ptr type to prevent an attempt to interpret
|
||||
// the underlying underlying uninitialized value as a ptr and free it.
|
||||
*this = other;
|
||||
}
|
||||
|
||||
DictionaryValue& operator=(const DictionaryValue& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
FreeDataPtr();
|
||||
|
||||
m_valueType = other.m_valueType;
|
||||
m_data = other.m_data;
|
||||
|
||||
if (other.m_valueType == Type::NDShape)
|
||||
AllocateDataPtr(other.GetValue<NDShape>());
|
||||
else if (other.m_valueType == Type::Vector)
|
||||
AllocateDataPtr(other.GetValue<Internal::SimpleVector<DictionaryValue>>());
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
~DictionaryValue()
|
||||
{
|
||||
FreeDataPtr();
|
||||
}
|
||||
|
||||
template <typename T, typename std::enable_if<std::is_same<T, bool>::value>::type* = nullptr>
|
||||
const T& GetValue() const
|
||||
{
|
||||
VerifyType<T>();
|
||||
return m_data.m_boolean;
|
||||
}
|
||||
|
||||
template <typename T, typename std::enable_if<std::is_same<T, size_t>::value>::type* = nullptr>
|
||||
const T& GetValue() const
|
||||
{
|
||||
VerifyType<T>();
|
||||
return m_data.m_sizeT;
|
||||
}
|
||||
|
||||
template <typename T, typename std::enable_if<std::is_same<T, double>::value>::type* = nullptr>
|
||||
const T& GetValue() const
|
||||
{
|
||||
VerifyType<T>();
|
||||
return m_data.m_double;
|
||||
}
|
||||
|
||||
template <typename T, typename std::enable_if<std::is_same<T, NDShape>::value || std::is_same<T, Internal::SimpleVector<DictionaryValue>>::value>::type* = nullptr>
|
||||
const T& GetValue() const
|
||||
{
|
||||
VerifyType<T>();
|
||||
return *(reinterpret_cast<T*>(m_data.m_ptr));
|
||||
}
|
||||
|
||||
bool HasValue() const
|
||||
{
|
||||
return m_valueType != Type::None;
|
||||
}
|
||||
|
||||
Type ValueType() const
|
||||
{
|
||||
return m_valueType;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
static Type GetValueType()
|
||||
{
|
||||
static_assert(std::is_same<T, bool>::value ||
|
||||
std::is_same<T, size_t>::value ||
|
||||
std::is_same<T, double>::value ||
|
||||
std::is_same<T, NDShape>::value ||
|
||||
std::is_same<T, Internal::SimpleVector<DictionaryValue>>::value ||
|
||||
std::is_same<T, CNTK::Dictionary>::value,
|
||||
"Unsupported ValueType");
|
||||
|
||||
if (std::is_same<T, bool>::value)
|
||||
return Type::Bool;
|
||||
else if (std::is_same<T, size_t>::value)
|
||||
return Type::SizeT;
|
||||
else if (std::is_same<T, double>::value)
|
||||
return Type::Double;
|
||||
else if (std::is_same<T, NDShape>::value)
|
||||
return Type::NDShape;
|
||||
else if (std::is_same<T, Internal::SimpleVector<DictionaryValue>>::value)
|
||||
return Type::Vector;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void VerifyType() const
|
||||
{
|
||||
if (GetValueType<T>() != m_valueType)
|
||||
RuntimeError("Reading a DictionaryValue as the wrong type; Reading as type %s when actual type is %s", typeid(T).name(), DictionaryValue::TypeName(m_valueType));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void AllocateDataPtr(const T& value)
|
||||
{
|
||||
static_assert(std::is_same<T, NDShape>::value || std::is_same<T, Internal::SimpleVector<DictionaryValue>>::value, "AllocateDataPtr called with invalid type");
|
||||
m_data.m_ptr = new T(value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void FreePtrAsType()
|
||||
{
|
||||
T* typedPtr = reinterpret_cast<T*>(m_data.m_ptr);
|
||||
delete typedPtr;
|
||||
|
||||
m_data.m_ptr = nullptr;
|
||||
}
|
||||
|
||||
void FreeDataPtr()
|
||||
{
|
||||
if (m_valueType == Type::NDShape)
|
||||
FreePtrAsType<NDShape>();
|
||||
else if (m_valueType == Type::Vector)
|
||||
FreePtrAsType<Internal::SimpleVector<DictionaryValue>>();
|
||||
}
|
||||
|
||||
private:
|
||||
Type m_valueType;
|
||||
|
||||
union ValueData
|
||||
{
|
||||
bool m_boolean;
|
||||
size_t m_sizeT;
|
||||
double m_double;
|
||||
void* m_ptr;
|
||||
} m_data;
|
||||
};
|
||||
|
||||
class Dictionary
|
||||
{
|
||||
public:
|
||||
Dictionary();
|
||||
~Dictionary();
|
||||
|
||||
// Disallow copy contruction and assignment
|
||||
Dictionary(const Dictionary&) = delete; Dictionary& operator=(const Dictionary&) = delete;
|
||||
|
||||
Dictionary(Dictionary&& other);
|
||||
Dictionary& operator=(Dictionary&& other);
|
||||
|
||||
DictionaryValue& operator[](const std::wstring& key)
|
||||
{
|
||||
return operator[](key.c_str());
|
||||
}
|
||||
|
||||
DictionaryValue& operator[](const wchar_t* key);
|
||||
|
||||
DictionaryValue operator[](const std::wstring& key) const
|
||||
{
|
||||
return operator[](key.c_str());
|
||||
}
|
||||
|
||||
DictionaryValue operator[](const wchar_t* key) const;
|
||||
|
||||
bool Contains(const std::wstring& key) const
|
||||
{
|
||||
return Contains(key.c_str());
|
||||
}
|
||||
|
||||
bool Contains(const wchar_t* key) const;
|
||||
|
||||
private:
|
||||
std::unordered_map<std::wstring, DictionaryValue>* m_dictionaryData;
|
||||
};
|
||||
|
||||
// Helper to get the size of an element of the specified DataType
|
||||
inline size_t ElementSize(DataType dataType)
|
||||
{
|
||||
|
@ -275,7 +36,7 @@ namespace CNTK
|
|||
NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
inline Microsoft::MSR::CNTK::MatrixFormat AsCNTKMatrixFormat(StorageFormat storageFormat)
|
||||
inline Microsoft::MSR::CNTK::MatrixFormat AsCNTKImplMatrixFormat(StorageFormat storageFormat)
|
||||
{
|
||||
if (storageFormat == StorageFormat::Dense)
|
||||
return Microsoft::MSR::CNTK::MatrixFormat::matrixFormatDense;
|
||||
|
@ -358,4 +119,13 @@ namespace CNTK
|
|||
|
||||
return{ matrixRowSize, matrixColSize };
|
||||
}
|
||||
|
||||
inline bool IsSparseInput(const Variable& var)
|
||||
{
|
||||
return var.IsInput() && var.IsSparse();
|
||||
}
|
||||
|
||||
std::vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr);
|
||||
|
||||
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const std::vector<DictionaryValue>& values);
|
||||
}
|
||||
|
|
|
@ -21,15 +21,15 @@ namespace CNTK
|
|||
auto maskShape = mask->Shape();
|
||||
|
||||
if (maskShape.NumAxes() > dataShape.NumAxes())
|
||||
InvalidArgument("The number of axes of the mask of a Value object cannot exceed the number of axes of the data NDArrayView object");
|
||||
InvalidArgument("The number of axes (%d) of the mask of a Value object cannot exceed the number of axes (%d) of the data NDArrayView object", (int)maskShape.NumAxes(), (int)dataShape.NumAxes());
|
||||
|
||||
if (dataShape.SubShape(dataShape.NumAxes() - maskShape.NumAxes()) != maskShape)
|
||||
InvalidArgument("Invalid Value object; the data and mask are incompatible. The trailing dimensions of the data do not match the dimensions of the mask");
|
||||
InvalidArgument("Invalid Value object; the data and mask are incompatible. The trailing dimensions of the data (%S) do not match the dimensions of the mask (%S)", dataShape.AsString().c_str(), maskShape.AsString().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static NDMaskPtr CreateMask(size_t sampleSize, const std::vector<std::vector<T>>& sequences, const DeviceDescriptor& device)
|
||||
static NDMaskPtr CreateMask(size_t numElementsPerSample, const std::vector<std::vector<T>>& sequences, const DeviceDescriptor& device)
|
||||
{
|
||||
size_t numSequences = sequences.size();
|
||||
std::vector<size_t> sequenceLengths(numSequences);
|
||||
|
@ -37,7 +37,7 @@ namespace CNTK
|
|||
bool needsMask = false;
|
||||
for (size_t i = 0; i < numSequences; ++i)
|
||||
{
|
||||
sequenceLengths[i] = sequences[i].size() / sampleSize;
|
||||
sequenceLengths[i] = sequences[i].size() / numElementsPerSample;
|
||||
|
||||
if (maxSequenceLength < sequenceLengths[i])
|
||||
maxSequenceLength = sequenceLengths[i];
|
||||
|
@ -46,11 +46,12 @@ namespace CNTK
|
|||
needsMask = true;
|
||||
}
|
||||
|
||||
// If needed, create a mask to account for variability in lengths of specified sequences
|
||||
NDMaskPtr deviceValueMask;
|
||||
if (needsMask)
|
||||
{
|
||||
NDShape valueMaskShape = { maxSequenceLength, numSequences };
|
||||
deviceValueMask = NDMaskPtr(new NDMask(valueMaskShape, device), [](Internal::ReferenceCount* ptr) {delete ptr; });
|
||||
deviceValueMask = MakeSharedObject<NDMask>(valueMaskShape, device);
|
||||
for (size_t i = 0; i < numSequences; ++i)
|
||||
deviceValueMask->MaskSection({ sequenceLengths[i], i }, { NDShape::InferredDimension, 1 });
|
||||
}
|
||||
|
@ -87,23 +88,23 @@ namespace CNTK
|
|||
}
|
||||
|
||||
colStarts[numSequences * maxSequenceLength] = (SparseIndexType)(nonZeroValues.size());
|
||||
NDArrayViewPtr deviceValueData(new NDArrayView(valueDataShape, colStarts.data(), rowIndices.data(), nonZeroValues.data(), nonZeroValues.size(), device, readOnly), [](ReferenceCount* ptr) { delete ptr; });
|
||||
return ValuePtr(new Value(deviceValueData, deviceValueMask), [](ReferenceCount* ptr) { delete ptr; });
|
||||
NDArrayViewPtr deviceValueData = MakeSharedObject<NDArrayView>(valueDataShape, colStarts.data(), rowIndices.data(), nonZeroValues.data(), nonZeroValues.size(), device, readOnly);
|
||||
return MakeSharedObject<Value>(deviceValueData, deviceValueMask);
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
/*static*/ ValuePtr Value::Create(const NDShape& sampleShape, const std::vector<std::vector<ElementType>>& sequences, const DeviceDescriptor& device, bool readOnly/* = false*/)
|
||||
{
|
||||
size_t sampleSize = sampleShape.TotalSize();
|
||||
NDMaskPtr deviceValueMask = CreateMask(sampleSize, sequences, device);
|
||||
size_t numElementsPerSample = sampleShape.TotalSize();
|
||||
NDMaskPtr deviceValueMask = CreateMask(numElementsPerSample, sequences, device);
|
||||
size_t maxSequenceLength = (deviceValueMask == nullptr) ? sequences[0].size() : deviceValueMask->Shape()[0];
|
||||
|
||||
size_t numSequences = sequences.size();
|
||||
NDShape valueDataShape = sampleShape.AppendShape({ maxSequenceLength, numSequences });
|
||||
NDArrayViewPtr valueData(new NDArrayView(AsDataType<ElementType>(), valueDataShape, DeviceDescriptor::CPUDevice()), [](ReferenceCount* ptr) { delete ptr; });
|
||||
NDArrayViewPtr valueData = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), valueDataShape, DeviceDescriptor::CPUDevice());
|
||||
ElementType* dataBuffer = valueData->WritableDataBuffer<ElementType>();
|
||||
for (size_t i = 0; i < numSequences; ++i)
|
||||
std::copy(sequences[i].data(), sequences[i].data() + sequences[i].size(), dataBuffer + (maxSequenceLength * i * sampleSize));
|
||||
std::copy(sequences[i].data(), sequences[i].data() + sequences[i].size(), dataBuffer + (maxSequenceLength * i * numElementsPerSample));
|
||||
|
||||
NDArrayViewPtr deviceValueData;
|
||||
if (device == DeviceDescriptor::CPUDevice())
|
||||
|
@ -115,13 +116,13 @@ namespace CNTK
|
|||
}
|
||||
else
|
||||
{
|
||||
deviceValueData = NDArrayViewPtr(new NDArrayView(AsDataType<ElementType>(), valueDataShape, device), [](ReferenceCount* ptr) { delete ptr; });
|
||||
deviceValueData = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), valueDataShape, device);
|
||||
deviceValueData->CopyFrom(*valueData);
|
||||
if (readOnly)
|
||||
deviceValueData = deviceValueData->Alias(true);
|
||||
}
|
||||
|
||||
return ValuePtr(new Value(deviceValueData, deviceValueMask), [](ReferenceCount* ptr) { delete ptr; });
|
||||
return MakeSharedObject<Value>(deviceValueData, deviceValueMask);
|
||||
}
|
||||
|
||||
/*virtual*/ Value::~Value()
|
||||
|
@ -143,13 +144,13 @@ namespace CNTK
|
|||
/*virtual*/ ValuePtr Value::DeepClone(bool readOnly/* = false*/) const
|
||||
{
|
||||
// TODO: Check if this is a derived type and throw an exception in that case
|
||||
return ValuePtr(new Value(Data()->DeepClone(readOnly), (Mask() != nullptr) ? Mask()->DeepClone() : nullptr), [](ReferenceCount* ptr) { delete ptr; });
|
||||
return MakeSharedObject<Value>(Data()->DeepClone(readOnly), (Mask() != nullptr) ? Mask()->DeepClone() : nullptr);
|
||||
}
|
||||
|
||||
/*virtual*/ ValuePtr Value::Alias(bool readOnly/* = false*/) const
|
||||
{
|
||||
// TODO: Check if this is a derived type and throw an exception in that case
|
||||
return ValuePtr(new Value(Data()->Alias(readOnly), (Mask() != nullptr) ? Mask()->Alias() : nullptr), [](ReferenceCount* ptr) { delete ptr; });
|
||||
return MakeSharedObject<Value>(Data()->Alias(readOnly), (Mask() != nullptr) ? Mask()->Alias() : nullptr);
|
||||
}
|
||||
|
||||
/*virtual*/ void Value::CopyFrom(const Value& source)
|
||||
|
|
|
@ -11,4 +11,9 @@ namespace CNTK
|
|||
: Variable(function->Output())
|
||||
{
|
||||
}
|
||||
|
||||
FunctionPtr Variable::Owner() const
|
||||
{
|
||||
return m_dataFields->m_ownerFunction->shared_from_this();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -106,22 +106,18 @@ public:
|
|||
~BestGpu();
|
||||
void Init();
|
||||
void SetAllowedDevices(const std::vector<int>& devices); // only allow certain GPUs
|
||||
bool DeviceAllowed(int device);
|
||||
void DisallowDevice(int device)
|
||||
{
|
||||
assert((device >= -1) && (device <= 31));
|
||||
|
||||
if (device < 0)
|
||||
m_disallowCPUDevice = true;
|
||||
else
|
||||
m_allowedDevices &= ~(1 << device);
|
||||
}
|
||||
bool DeviceAllowed(int deviceId);
|
||||
void DisallowUnsupportedDevices();
|
||||
void DisallowDevice(int deviceId);
|
||||
void AllowAll(); // reset to allow all GPUs (no allowed list)
|
||||
bool UseMultiple(); // using multiple GPUs?
|
||||
int GetDevice(BestGpuFlags flags = bestGpuNormal); // get a single device
|
||||
static const int AllDevices = -1; // can be used to specify all GPUs in GetDevices() call
|
||||
static const int RequeryDevices = -2; // Requery refreshing statistics and picking the same number as last query
|
||||
static const int MininumCCMajorForGpu = 3; // cntk supports GPUs with Compute Capability > 3.0
|
||||
std::vector<int> GetDevices(int number = AllDevices, BestGpuFlags flags = bestGpuNormal); // get multiple devices
|
||||
std::vector<ProcessorData *> GetProcessorData();
|
||||
|
||||
private:
|
||||
bool LockDevice(int deviceId, bool trial = true);
|
||||
};
|
||||
|
@ -156,6 +152,8 @@ static DEVICEID_TYPE SelectDevice(DEVICEID_TYPE deviceId, bool bLockGPU, const i
|
|||
{
|
||||
g_bestGpu->DisallowDevice(excludedDevices[i]);
|
||||
}
|
||||
|
||||
g_bestGpu->DisallowUnsupportedDevices();
|
||||
}
|
||||
|
||||
bestDeviceId = (DEVICEID_TYPE)g_bestGpu->GetDevice(BestGpuFlags(bLockGPU ? (bestGpuAvoidSharing | bestGpuExclusiveLock) : bestGpuAvoidSharing));
|
||||
|
@ -345,22 +343,32 @@ int BestGpu::GetDevice(BestGpuFlags bestFlags)
|
|||
void BestGpu::SetAllowedDevices(const std::vector<int>& devices)
|
||||
{
|
||||
m_allowedDevices = 0;
|
||||
for (int device : devices)
|
||||
for (int deviceId : devices)
|
||||
{
|
||||
m_allowedDevices |= (1 << device);
|
||||
m_allowedDevices |= (1 << deviceId);
|
||||
}
|
||||
}
|
||||
|
||||
// DeviceAllowed - is a particular device allowed?
|
||||
// returns: true if the device is allowed, otherwise false
|
||||
bool BestGpu::DeviceAllowed(int device)
|
||||
bool BestGpu::DeviceAllowed(int deviceId)
|
||||
{
|
||||
assert((device >= -1) && (device <= 31));
|
||||
assert((deviceId >= -1) && (deviceId <= 31));
|
||||
|
||||
if (device < 0)
|
||||
if (deviceId < 0)
|
||||
return !m_disallowCPUDevice;
|
||||
else
|
||||
return !!(m_allowedDevices & (1 << device));
|
||||
return !!(m_allowedDevices & (1 << deviceId));
|
||||
}
|
||||
|
||||
void BestGpu::DisallowDevice(int deviceId)
|
||||
{
|
||||
assert((deviceId >= -1) && (deviceId <= 31));
|
||||
|
||||
if (deviceId < 0)
|
||||
m_disallowCPUDevice = true;
|
||||
else
|
||||
m_allowedDevices &= ~(1 << deviceId);
|
||||
}
|
||||
|
||||
// AllowAll - Reset the allowed filter to allow all GPUs
|
||||
|
@ -527,6 +535,68 @@ std::vector<int> BestGpu::GetDevices(int number, BestGpuFlags p_bestFlags)
|
|||
return best; // return the array of the best GPUs
|
||||
}
|
||||
|
||||
// disallow devices wich don't comply with compute capability restriction when cntk runs with deviceId = 'auto'
|
||||
void BestGpu::DisallowUnsupportedDevices()
|
||||
{
|
||||
for (auto pd : m_procData)
|
||||
{
|
||||
if (pd->deviceProp.major < BestGpu::MininumCCMajorForGpu)
|
||||
{
|
||||
DisallowDevice(pd->deviceId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GpuData GetGpuData(DEVICEID_TYPE deviceId)
|
||||
{
|
||||
std::vector<GpuData> gpusData = GetAllGpusData();
|
||||
|
||||
auto it = std::find_if(gpusData.begin(), gpusData.end(), [&deviceId](const GpuData& gpu){return gpu.deviceId == deviceId;});
|
||||
|
||||
if (it != gpusData.end())
|
||||
{
|
||||
return *it;
|
||||
}
|
||||
|
||||
return GpuData(0, 0, deviceId, 0, GpuValidity::UnknownDevice, "", 0);
|
||||
}
|
||||
|
||||
// populate a vector with data (id, major/minor version, cuda cores, name and memory) for each gpu device in the machine
|
||||
std::vector<GpuData> GetAllGpusData()
|
||||
{
|
||||
std::vector<GpuData> data;
|
||||
|
||||
auto bestGpu = make_unique<BestGpu>();
|
||||
|
||||
std::vector<ProcessorData*> processorData = bestGpu->GetProcessorData();
|
||||
|
||||
for (ProcessorData* pd : processorData)
|
||||
{
|
||||
|
||||
GpuValidity validity = GpuValidity::UnknownDevice;
|
||||
|
||||
if (pd->deviceProp.major < BestGpu::MininumCCMajorForGpu)
|
||||
{
|
||||
validity = GpuValidity::ComputeCapabilityNotSupported;
|
||||
}
|
||||
else
|
||||
{
|
||||
validity = GpuValidity::Valid;
|
||||
}
|
||||
|
||||
size_t totalMemory = pd->deviceProp.totalGlobalMem/(1024*1024); //From bytes to MBytes
|
||||
GpuData gpuData = GpuData(pd->deviceProp.major, pd->deviceProp.minor, pd->deviceId, pd->cores, validity, string(pd->deviceProp.name), totalMemory);
|
||||
data.push_back(gpuData);
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
std::vector<ProcessorData*> BestGpu::GetProcessorData()
|
||||
{
|
||||
return m_procData;
|
||||
}
|
||||
|
||||
// QueryNvmlData - Query data from the Nvidia Management Library, and accumulate counters,
|
||||
// In case failure, this function simply backs out without filling in the data structure and without setting m_nvmlData.
|
||||
void BestGpu::QueryNvmlData()
|
||||
|
|
|
@ -8,15 +8,46 @@
|
|||
// #define CPUONLY // #define this to build without GPU support nor needing the SDK installed
|
||||
#include "CommonMatrix.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
// define IConfigRecord and ConfigParameters as incomplete types, in order to avoid having to include "ScriptableObjects.h" and "Config.h", as that confuses some .CU code
|
||||
namespace Microsoft { namespace MSR { namespace ScriptableObjects { struct IConfigRecord; }}}
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
using namespace std;
|
||||
|
||||
#ifndef CPUONLY
|
||||
enum class GpuValidity
|
||||
{
|
||||
Valid,
|
||||
UnknownDevice,
|
||||
ComputeCapabilityNotSupported
|
||||
};
|
||||
|
||||
struct GpuData
|
||||
{
|
||||
int versionMajor;
|
||||
int versionMinor;
|
||||
int deviceId;
|
||||
int cudaCores;
|
||||
GpuValidity validity;
|
||||
string name;
|
||||
size_t totalMemory;
|
||||
GpuData(int versionMajor, int versionMinor, int deviceId, int cudaCores, GpuValidity validity, const string& name, size_t totalMemory)
|
||||
:versionMajor(versionMajor), versionMinor(versionMinor), deviceId(deviceId), cudaCores(cudaCores), validity(validity), name(name), totalMemory(totalMemory)
|
||||
{
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
std::vector<GpuData> GetAllGpusData();
|
||||
GpuData GetGpuData(DEVICEID_TYPE deviceId);
|
||||
|
||||
class ConfigParameters;
|
||||
DEVICEID_TYPE DeviceFromConfig(const ConfigParameters& config);
|
||||
DEVICEID_TYPE DeviceFromConfig(const ScriptableObjects::IConfigRecord& config);
|
||||
|
||||
#else
|
||||
template <class ConfigRecordType>
|
||||
static inline DEVICEID_TYPE DeviceFromConfig(const ConfigRecordType& /*config*/)
|
||||
|
|
|
@ -282,7 +282,7 @@ class VariableSchema : public std::vector<VariableLayout>
|
|||
Values<ElemType> CreateBuffers(const std::vector<size_t>& maxLengths)
|
||||
{
|
||||
if (maxLengths.size() != size())
|
||||
throw std::exception("Expected max lengths for all variables.");
|
||||
throw std::runtime_error("Expected max lengths for all variables.");
|
||||
|
||||
Values<ElemType> buffers(size());
|
||||
for (size_t i = 0; i < size(); ++i)
|
||||
|
|
|
@ -29,7 +29,8 @@ public:
|
|||
runtime_error(msg)
|
||||
{
|
||||
}
|
||||
virtual void PrintError(const std::wstring& linePrefix) const = 0;
|
||||
virtual std::wstring GetError(const std::wstring& /*linePrefix*/) const = 0;
|
||||
virtual void PrintError(const std::wstring& /*linePrefix*/) const = 0;
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
|
@ -411,7 +411,7 @@ static inline void byteswap(V &v) throw()
|
|||
|
||||
// execute a block with retry
|
||||
// Block must be restartable.
|
||||
// Use this when writing small files to those unreliable Windows servers.
|
||||
// Use this when writing/reading small files to those unreliable Windows servers.
|
||||
// TODO: This will fail to compile under VS 2008--we need an #ifdef around this
|
||||
template <typename FUNCTION>
|
||||
static void attempt(int retries, const FUNCTION &body)
|
|
@ -30,6 +30,7 @@
|
|||
#include <assert.h>
|
||||
#include <string.h> // for strerror()
|
||||
#include <stdexcept> // for exception
|
||||
#include <fcntl.h>
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// fopenOrDie(): like fopen() but terminate with err msg in case of error.
|
||||
|
@ -591,7 +592,8 @@ void fgetfile(const std::wstring& pathname, std::vector<char>& buffer);
|
|||
void fgetfile(FILE* f, std::vector<char>& buffer);
|
||||
namespace msra { namespace files {
|
||||
|
||||
void fgetfilelines(const std::wstring& pathname, std::vector<char>& readbuffer, std::vector<std::string>& lines);
|
||||
void fgetfilelines(const std::wstring& pathname, std::vector<char>& readbuffer, std::vector<std::string>& lines, int numberOfTries = 1);
|
||||
|
||||
static inline std::vector<std::string> fgetfilelines(const std::wstring& pathname)
|
||||
{
|
||||
std::vector<char> buffer;
|
||||
|
@ -599,7 +601,7 @@ static inline std::vector<std::string> fgetfilelines(const std::wstring& pathnam
|
|||
fgetfilelines(pathname, buffer, lines);
|
||||
return lines;
|
||||
}
|
||||
std::vector<char*> fgetfilelines(const std::wstring& pathname, std::vector<char>& readbuffer);
|
||||
std::vector<char*> fgetfilelines(const std::wstring& pathname, std::vector<char>& readbuffer, int numberOfTries = 1);
|
||||
|
||||
}}
|
||||
|
||||
|
@ -698,8 +700,18 @@ class auto_file_ptr
|
|||
{
|
||||
if (f && f != stdin && f != stdout && f != stderr)
|
||||
{
|
||||
bool readMode = false;
|
||||
|
||||
#ifdef _WIN32
|
||||
if ((f->_flag&_IOREAD) == _IOREAD)
|
||||
readMode = true;
|
||||
#else
|
||||
int mode = fcntl(fileno(f), F_GETFL);
|
||||
if ((mode & O_ACCMODE) == O_RDONLY)
|
||||
readMode = true;
|
||||
#endif
|
||||
int rc = ::fclose(f);
|
||||
if ((rc != 0) && !std::uncaught_exception())
|
||||
if (!readMode && (rc != 0) && !std::uncaught_exception())
|
||||
RuntimeError("auto_file_ptr: failed to close file: %s", strerror(errno));
|
||||
|
||||
f = NULL;
|
||||
|
|
|
@ -1251,7 +1251,7 @@ public:
|
|||
// BUGBUG: we only really support one archive file at this point
|
||||
// read the TOC in one swoop
|
||||
std::vector<char> textbuffer;
|
||||
auto toclines = msra::files::fgetfilelines(tocpath, textbuffer);
|
||||
auto toclines = msra::files::fgetfilelines(tocpath, textbuffer, 3);
|
||||
|
||||
// parse it one by one
|
||||
size_t archiveindex = SIZE_MAX; // its index
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#endif
|
||||
#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
|
||||
#include "Basics.h"
|
||||
#include "basetypes.h" //for attemp()
|
||||
#include "fileutil.h"
|
||||
#include "ProgressTracing.h"
|
||||
|
||||
|
@ -1632,6 +1633,11 @@ static size_t fgetfilechars(const std::wstring& path, vector<char>& buffer)
|
|||
return len;
|
||||
}
|
||||
|
||||
static void fgetfilechars(const std::wstring& path, vector<char>& buffer, size_t& len)
|
||||
{
|
||||
len = fgetfilechars(path, buffer);
|
||||
}
|
||||
|
||||
template <class LINES>
|
||||
static void strtoklines(char* s, LINES& lines)
|
||||
{
|
||||
|
@ -1639,10 +1645,14 @@ static void strtoklines(char* s, LINES& lines)
|
|||
lines.push_back(p);
|
||||
}
|
||||
|
||||
void msra::files::fgetfilelines(const std::wstring& path, vector<char>& buffer, std::vector<std::string>& lines)
|
||||
void msra::files::fgetfilelines(const std::wstring& path, vector<char>& buffer, std::vector<std::string>& lines, int numberOfTries)
|
||||
{
|
||||
// load it into RAM in one huge chunk
|
||||
const size_t len = fgetfilechars(path, buffer);
|
||||
size_t len = 0;
|
||||
msra::util::attempt(numberOfTries, [&]() // (can be reading from network)
|
||||
{
|
||||
// load it into RAM in one huge chunk
|
||||
fgetfilechars(path, buffer, len);
|
||||
});
|
||||
|
||||
// parse into lines
|
||||
lines.resize(0);
|
||||
|
@ -1651,11 +1661,15 @@ void msra::files::fgetfilelines(const std::wstring& path, vector<char>& buffer,
|
|||
}
|
||||
|
||||
// same as above but returning const char* (avoiding the memory allocation)
|
||||
vector<char*> msra::files::fgetfilelines(const wstring& path, vector<char>& buffer)
|
||||
vector<char*> msra::files::fgetfilelines(const wstring& path, vector<char>& buffer, int numberOfTries)
|
||||
{
|
||||
// load it into RAM in one huge chunk
|
||||
const size_t len = fgetfilechars(path, buffer);
|
||||
|
||||
size_t len = 0;
|
||||
msra::util::attempt(numberOfTries, [&]() // (can be reading from network)
|
||||
{
|
||||
// load it into RAM in one huge chunk
|
||||
fgetfilechars(path, buffer, len);
|
||||
});
|
||||
|
||||
// parse into lines
|
||||
vector<char*> lines;
|
||||
lines.reserve(len / 20);
|
||||
|
|
|
@ -72,6 +72,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
else if (nodeType == OperationNameOf(InvStdDevNode)) return New<InvStdDevNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(KhatriRaoProductNode)) return New<KhatriRaoProductNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(LogNode)) return New<LogNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(LogPlusNode)) return New<LogPlusNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(LogSoftmaxNode)) return New<LogSoftmaxNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(LookupTableNode)) return New<LookupTableNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(MatrixL1RegNode)) return New<MatrixL1RegNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
|
@ -657,6 +658,12 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Plus(
|
|||
return net.AddNodeToNetAndAttachInputs(New<PlusNode<ElemType>>(net.GetDeviceId(), nodeName), { a, b });
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::LogPlus(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName)
|
||||
{
|
||||
return net.AddNodeToNetAndAttachInputs(New<LogPlusNode<ElemType>>(net.GetDeviceId(), nodeName), { a, b });
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Less(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName)
|
||||
{
|
||||
|
|
|
@ -134,6 +134,7 @@ public:
|
|||
ComputationNodePtr InvStdDev(const ComputationNodePtr a, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr KhatriRaoProduct(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr Log(const ComputationNodePtr a, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr LogPlus(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr LogSoftmax(const ComputationNodePtr a, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr Logistic(const ComputationNodePtr a, const ComputationNodePtr b, const ComputationNodePtr c, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr Logistic(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
|
|
|
@ -76,7 +76,7 @@ void ComputationNetwork::CopySubTree(const ComputationNetwork& fromNet,
|
|||
|
||||
ComputationNodeBasePtr fromRoot = fromNet.GetNodeFromName(fromName);
|
||||
|
||||
for (const auto& fromNode : GetEvalOrder(fromRoot)) // BUGBUG: This probably will fail because the precomputed eval orders are invalid at this point.
|
||||
for (const auto& fromNode : fromNet.GetEvalOrder(fromRoot)) // BUGBUG: This probably will fail because the precomputed eval orders are invalid at this point.
|
||||
{
|
||||
wstring fromNodeName = fromNode->NodeName();
|
||||
wstring toNodeName = toNamePrefix + fromNodeName;
|
||||
|
|
|
@ -67,6 +67,8 @@ template class PlusNode<double>;
|
|||
|
||||
// -----------------------------------------------------------------------
|
||||
// LogPlusNode (summand1, summand2)
|
||||
// Computes ln(exp(summand1) + exp(summand2)) in an overflow safe way.
|
||||
// Useful e.g. for computing softmax over sequence.
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -105,8 +107,16 @@ public:
|
|||
if (Input(inputIndex)->ReducesInTimeWrt(Input(1 - inputIndex)))
|
||||
Input(1 - inputIndex)->MaskMissingValueColumnsToZero(fr);
|
||||
|
||||
// TODO: would be nice to state the derivative here in a comment
|
||||
inputGradient.AddElementwiseProductWithLogSumDerivativeOf(gradient, input0, input1);
|
||||
if (inputIndex == 0)
|
||||
{
|
||||
// d/dx (ln( exp(x) + (exp(y)) = exp(x) / (exp(x) + exp(y)) = 1 / (1 + exp(y-x)) = sigmoid(x-y)
|
||||
inputGradient.AddElementwiseProductWithLogSumDerivativeOf(gradient, input1, input0);
|
||||
}
|
||||
else
|
||||
{
|
||||
// d/dy (ln( exp(x) + (exp(y)) = exp(y) / (exp(x) + exp(y)) = 1 / (1 + exp(x-y)) = sigmoid(y-x)
|
||||
inputGradient.AddElementwiseProductWithLogSumDerivativeOf(gradient, input0, input1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -321,15 +321,17 @@ void CNTKEvalExtended<ElemType>::ForwardPassT(const std::vector<ValueBuffer<Elem
|
|||
RuntimeError("Expected %d outputs, but got %d.", (int)m_outputNodes.size(), (int)outputs.size());
|
||||
|
||||
size_t i = 0;
|
||||
for (auto& input : m_inputMatrices)
|
||||
for (auto& inputNode : m_inputNodes)
|
||||
{
|
||||
// const cast: The matrix class takes this over without copying and could theoretically change the contents,
|
||||
// though it doesn't in this case.
|
||||
auto& buffer = const_cast<ValueBuffer<ElemType, ValueContainer>&>(inputs[i]);
|
||||
shared_ptr<Matrix<ElemType>> matrix = dynamic_pointer_cast<Matrix<ElemType>>(input.second.matrix);
|
||||
auto matrix = dynamic_pointer_cast<Matrix<ElemType>>(inputNode->ValuePtr());
|
||||
auto type = matrix->GetMatrixType();
|
||||
size_t numRows = input.second.sampleLayout.GetNumElements();
|
||||
size_t numRows = inputNode->GetSampleLayout().GetNumElements();
|
||||
|
||||
if (buffer.m_buffer.data() == nullptr)
|
||||
RuntimeError("Input %ls: Buffer is not allocated.", m_inputNodes[i]->GetName().c_str());
|
||||
if (type == MatrixType::DENSE)
|
||||
{
|
||||
if (buffer.m_buffer.size() % numRows != 0)
|
||||
|
@ -340,8 +342,12 @@ void CNTKEvalExtended<ElemType>::ForwardPassT(const std::vector<ValueBuffer<Elem
|
|||
}
|
||||
else if (type == MatrixType::SPARSE)
|
||||
{
|
||||
if (buffer.m_colIndices.data() == nullptr)
|
||||
RuntimeError("Input %ls: Due to sparse input format, expected colIndices array, but was nullptr.", m_inputNodes[i]->GetName().c_str());
|
||||
if (buffer.m_indices.data() == nullptr)
|
||||
RuntimeError("Input %ls: Due to sparse input format, expected Indices array, but was nullptr.", m_inputNodes[i]->GetName().c_str());
|
||||
if (buffer.m_colIndices.size() < 2)
|
||||
RuntimeError("Input %ls: Expected at least one element.", m_inputNodes[i]->GetName().c_str());
|
||||
RuntimeError("Input %ls: Expected at least one element (2 entries in colIndices array).", m_inputNodes[i]->GetName().c_str());
|
||||
if (buffer.m_colIndices[0] != 0)
|
||||
RuntimeError("Input %ls: First element of column indices must be 0", m_inputNodes[i]->GetName().c_str());
|
||||
if (buffer.m_colIndices[buffer.m_colIndices.size() - 1] != buffer.m_indices.size())
|
||||
|
@ -352,8 +358,8 @@ void CNTKEvalExtended<ElemType>::ForwardPassT(const std::vector<ValueBuffer<Elem
|
|||
|
||||
int numCols = type == MatrixType::DENSE ? buffer.m_buffer.size() / numRows : buffer.m_colIndices.size() - 1;
|
||||
assert(numCols >= 1);
|
||||
input.second.pMBLayout->Init(1, numCols);
|
||||
input.second.pMBLayout->AddSequence(0, 0, 0, numCols);
|
||||
inputNode->GetMBLayout()->Init(1, numCols);
|
||||
inputNode->GetMBLayout()->AddSequence(0, 0, 0, numCols);
|
||||
|
||||
if (type == MatrixType::DENSE)
|
||||
matrix->SetValue(numRows, numCols, matrix->GetDeviceId(), buffer.m_buffer.data(), matrixFlagNormal);
|
||||
|
|
|
@ -14,6 +14,11 @@
|
|||
#include <msclr\marshal_cppstd.h>
|
||||
|
||||
#include "CNTKException.h"
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4793) // Function compiled as native
|
||||
#include "Basics.h"
|
||||
#include "ScriptableObjects.h"
|
||||
#pragma warning(pop)
|
||||
#include "EvalCommon.h"
|
||||
#include "Eval.h"
|
||||
|
||||
|
@ -172,23 +177,10 @@ public:
|
|||
/// <param name="funcName">Factory function name for retrieving the native model from the dll.</param>
|
||||
ModelEvaluationExtended(String^ funcName)
|
||||
{
|
||||
auto dir = System::IO::Path::GetDirectoryName(System::Reflection::Assembly::GetExecutingAssembly()->Location);
|
||||
auto dllFileName = System::IO::Path::Combine(dir, "evaldll.dll");
|
||||
pin_ptr<const WCHAR> dllname = PtrToStringChars(dllFileName);
|
||||
auto hModule = LoadLibrary(dllname);
|
||||
if (hModule == nullptr)
|
||||
{
|
||||
throw gcnew CNTKException(System::String::Format("Cannot find library: {0}", gcnew String(dllname)));
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
msclr::interop::marshal_context context;
|
||||
const std::string func = context.marshal_as<std::string>(funcName);
|
||||
auto procAddress = GetProcAddress(hModule, func.c_str());
|
||||
auto getEvalProc = (GetEvalProc<ElemType>)procAddress;
|
||||
pin_ptr <IEvaluateModelExtended<ElemType>*> p_eval = &m_eval;
|
||||
getEvalProc(p_eval);
|
||||
GetEvalExtended<ElemType>(p_eval);
|
||||
}
|
||||
catch (const exception& ex)
|
||||
{
|
||||
|
@ -263,7 +255,14 @@ public:
|
|||
outputNodeNames.push_back(context.marshal_as<std::wstring>(output));
|
||||
}
|
||||
|
||||
m_eval->StartForwardEvaluation(outputNodeNames);
|
||||
try
|
||||
{
|
||||
m_eval->StartForwardEvaluation(outputNodeNames);
|
||||
}
|
||||
catch (const exception& ex)
|
||||
{
|
||||
throw GetCustomException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -367,6 +366,11 @@ private:
|
|||
{
|
||||
return gcnew CNTKBadAllocException(gcnew System::String(ex.what()));
|
||||
}
|
||||
else if (dynamic_cast<const ScriptableObjects::ScriptingException*>(&ex) != nullptr) // Includes derived classes
|
||||
{
|
||||
const auto& err = dynamic_cast<const ScriptableObjects::ScriptingException&>(ex);
|
||||
return gcnew CNTKLogicErrorException(gcnew System::String(wstrprintf(L"%ls\n%ls", utf16(err.what()).c_str(), err.GetError(L"").c_str()).c_str()), nullptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
return gcnew CNTKException(gcnew System::String(ex.what()));
|
||||
|
|
|
@ -43,21 +43,10 @@ public:
|
|||
/// <param name="funcName">Factory function name for retrieving the native model from the dll.</param>
|
||||
IEvaluateModelManaged(String^ funcName)
|
||||
{
|
||||
pin_ptr<const WCHAR> dllname = PtrToStringChars("evaldll.dll");
|
||||
auto hModule = LoadLibrary(dllname);
|
||||
if (hModule == nullptr)
|
||||
{
|
||||
throw gcnew CNTKException(System::String::Format("Cannot find library: {0}", gcnew String(dllname)));
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
msclr::interop::marshal_context context;
|
||||
const std::string func = context.marshal_as<std::string>(funcName);
|
||||
auto procAddress = GetProcAddress(hModule, func.c_str());
|
||||
auto getEvalProc = (GetEvalProc<ElemType>)procAddress;
|
||||
pin_ptr <IEvaluateModel<ElemType>*> p_eval = &m_eval;
|
||||
getEvalProc(p_eval);
|
||||
GetEval<ElemType>(p_eval);
|
||||
}
|
||||
catch (const exception& ex)
|
||||
{
|
||||
|
|
|
@ -56,6 +56,8 @@
|
|||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>EvalDLL.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<DelayLoadDLLs>EvalDll.dll</DelayLoadDLLs>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="$(DebugBuild)">
|
||||
|
@ -66,8 +68,6 @@
|
|||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<DelayLoadDLLs>
|
||||
</DelayLoadDLLs>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
|
||||
|
@ -77,8 +77,6 @@
|
|||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<DelayLoadDLLs>
|
||||
</DelayLoadDLLs>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include <emmintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
#include <immintrin.h>
|
||||
#include <smmintrin.h>
|
||||
#include <assert.h>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
|
|
|
@ -116,6 +116,44 @@ const char* CudaErrString<curandStatus>(curandStatus)
|
|||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
/*static*/ bool SyncGuard::s_isSyncEnabled = false;
|
||||
|
||||
/*static*/ void SyncGuard::EnableSync()
|
||||
{
|
||||
s_isSyncEnabled = true;
|
||||
}
|
||||
|
||||
SyncGuard::SyncGuard(bool forceSync /*= false*/)
|
||||
: m_forceSync(forceSync)
|
||||
{
|
||||
m_done = nullptr;
|
||||
if (m_forceSync || s_isSyncEnabled)
|
||||
{
|
||||
CUDA_CALL(cudaGetLastError());
|
||||
CUDA_CALL(cudaEventCreate(&m_done));
|
||||
}
|
||||
}
|
||||
|
||||
SyncGuard::~SyncGuard()
|
||||
{
|
||||
if (m_forceSync || s_isSyncEnabled)
|
||||
{
|
||||
// The regular use of this destructor is to synchronize the GPU, but also
|
||||
// to check for errors. So this destructor is where CUDA errors would be thrown.
|
||||
// If this destructor runs during stack unwinding, then a different error has
|
||||
// already happened that should be reported; so we only clean up the resource.
|
||||
if (std::uncaught_exception())
|
||||
cudaEventDestroy(m_done);
|
||||
else
|
||||
{
|
||||
// failures in a prior launch might be reported here
|
||||
CUDA_CALL(cudaEventRecord(m_done));
|
||||
CUDA_CALL(cudaEventSynchronize(m_done));
|
||||
CUDA_CALL(cudaEventDestroy(m_done));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename AllocatedElemType>
|
||||
AllocatedElemType* TracingGPUMemoryAllocator::Allocate(int deviceId, size_t numRows, size_t numCols)
|
||||
{
|
||||
|
@ -4278,11 +4316,16 @@ void GPUMatrix<ElemType>::RCRFTransGrdCompute(const GPUMatrix<ElemType>& lbls,
|
|||
template <class ElemType>
|
||||
static shared_ptr<GPUMatrix<ElemType>> GetOnesVector(size_t N, DEVICEID_TYPE deviceId)
|
||||
{
|
||||
// using an array of shared_ptrs because those are thread-safe. The objects themselves are immutable.
|
||||
// And using a plain array so this will never get freed, avoiding free-after-DLL-unload issues.
|
||||
static shared_ptr<GPUMatrix<ElemType>> onesCache[32]; // cache of objects
|
||||
if (deviceId >= _countof(onesCache))
|
||||
LogicError("GetOnesVector: onesCache[] too small (%d entries), increase (you need %d) and recompile.", (int) _countof(onesCache), (int) deviceId + 1);
|
||||
// using a dynamically allocated array so this will never get freed, avoiding free-after-DLL-unload issues.
|
||||
// and using shared_ptrs since we don't want to leak more than CacheSize elements
|
||||
// when using a plain array we would have to control lifetime of the object and destructor would be called for every element in the array at the end
|
||||
const int CacheSize = 32;
|
||||
static shared_ptr<GPUMatrix<ElemType>> * onesCache = new shared_ptr<GPUMatrix<ElemType>>[CacheSize]; // cache of objects
|
||||
|
||||
if (deviceId >= CacheSize){
|
||||
LogicError("GetOnesVector: onesCache[] too small (%d entries), increase (you need %d) and recompile.", CacheSize, (int)deviceId + 1);
|
||||
}
|
||||
|
||||
auto p = onesCache[deviceId];
|
||||
if (!p || p->GetNumRows() < N) // must (re-)allocate
|
||||
{
|
||||
|
|
|
@ -61,6 +61,27 @@ cudaStream_t MATH_API GetStream();
|
|||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// SyncGuard -- synchronize around CUDA calls
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
class SyncGuard
|
||||
{
|
||||
private:
|
||||
static bool s_isSyncEnabled;
|
||||
|
||||
bool m_forceSync;
|
||||
#ifndef CPUONLY
|
||||
cudaEvent_t m_done;
|
||||
#endif
|
||||
|
||||
public:
|
||||
static MATH_API void EnableSync();
|
||||
|
||||
SyncGuard(bool forceSync = false);
|
||||
~SyncGuard();
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// DeviceBoundNumber -- This class represents a number which resides on a particular device. Use it to avoid unnecessary transfers between CPU and GPU
|
||||
// -----------------------------------------------------------------------
|
||||
|
@ -623,51 +644,4 @@ static void CudaCall(ERRTYPE retCode, const char* exprString, const char* libNam
|
|||
#define CURAND_CALL(expr) (CudaCall((expr), #expr, "CURAND", CURAND_STATUS_SUCCESS))
|
||||
#define CUDNN_CALL(expr) (CudaCall((expr), #expr, "cuDNN", CUDNN_STATUS_SUCCESS))
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// SyncGuard -- synchronize around CUDA calls
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
class SyncGuard
|
||||
{
|
||||
static bool DoSync()
|
||||
{
|
||||
#ifdef NO_SYNC // this strange way of writing it allows modifying this variable at runtime in the debugger
|
||||
static bool do_sync = false;
|
||||
#else
|
||||
static bool do_sync = true;
|
||||
#endif
|
||||
return do_sync;
|
||||
}
|
||||
cudaEvent_t m_done;
|
||||
public:
|
||||
SyncGuard()
|
||||
{
|
||||
m_done = nullptr;
|
||||
if (DoSync())
|
||||
{
|
||||
CUDA_CALL(cudaGetLastError());
|
||||
CUDA_CALL(cudaEventCreate(&m_done));
|
||||
}
|
||||
}
|
||||
~SyncGuard()
|
||||
{
|
||||
if (DoSync())
|
||||
{
|
||||
// The regular use of this destructor is to synchronize the GPU, but also
|
||||
// to check for errors. So this destructor is where CUDA errors would be thrown.
|
||||
// If this destructor runs during stack unwinding, then a different error has
|
||||
// already happened that should be reported; so we only clean up the resource.
|
||||
if (std::uncaught_exception())
|
||||
cudaEventDestroy(m_done);
|
||||
else
|
||||
{
|
||||
// failures in a prior launch might be reported here
|
||||
CUDA_CALL(cudaEventRecord(m_done));
|
||||
CUDA_CALL(cudaEventSynchronize(m_done));
|
||||
CUDA_CALL(cudaEventDestroy(m_done));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#endif // CPUONLY
|
||||
|
|
|
@ -2276,6 +2276,9 @@ float CudaTimer::Elapsed()
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*static*/ void SyncGuard::EnableSync()
|
||||
{
|
||||
}
|
||||
} } }
|
||||
|
||||
// define a dummy GPUWatcher class too
|
||||
|
|
|
@ -14,6 +14,10 @@
|
|||
#pragma warning(push)
|
||||
#pragma warning(disable : 4251) // needs to have dll-interface to be used by clients of... caused by TensorView::m_shape which is only private. We use the same compiler everywhere.
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK { namespace Test {
|
||||
template <class ElemType> struct TensorTest;
|
||||
}}}}
|
||||
|
||||
// This class is exported from the Math.dll.
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
|
@ -148,7 +152,8 @@ private:
|
|||
// -------------------------------------------------------------------
|
||||
|
||||
const Matrix<ElemType>& GetSOB() const { return *m_sob; }
|
||||
Matrix<ElemType>& GetSOB() { return *m_sob; }
|
||||
Matrix<ElemType>& GetSOB() { return *m_sob; }
|
||||
friend Test::TensorTest<ElemType>;
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// sob members
|
||||
|
|
|
@ -110,9 +110,6 @@
|
|||
<PrecompiledHeader>Create</PrecompiledHeader>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="uci_to_cntk_text_format_converter.py" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets" />
|
||||
</Project>
|
|
@ -47,13 +47,5 @@
|
|||
<Filter Include="Common\Include">
|
||||
<UniqueIdentifier>{C6F55578-121A-4D7C-8F57-4172BC5C463B}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Scripts">
|
||||
<UniqueIdentifier>{cd70d891-88aa-40a4-8e47-0e31e4cac48e}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="uci_to_cntk_text_format_converter.py">
|
||||
<Filter>Scripts</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
|
|
|
@ -78,6 +78,12 @@ public:
|
|||
return &m_utterances[index];
|
||||
}
|
||||
|
||||
// Get utterance description by its index.
|
||||
UtteranceDescription* GetUtterance(size_t index)
|
||||
{
|
||||
return &m_utterances[index];
|
||||
}
|
||||
|
||||
// Get start frame index inside chunk.
|
||||
size_t GetStartFrameIndexInsideChunk(size_t index) const
|
||||
{
|
||||
|
@ -105,7 +111,7 @@ public:
|
|||
}
|
||||
|
||||
const size_t ts = m_firstFrames[index];
|
||||
const size_t n = GetUtterance(index)->GetNumberOfFrames();
|
||||
const size_t n = m_utterances[index].GetNumberOfFrames();
|
||||
return msra::dbn::matrixstripe(m_frames, ts, n);
|
||||
}
|
||||
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
#include "HTKDataDeserializer.h"
|
||||
#include "ConfigHelper.h"
|
||||
#include "Basics.h"
|
||||
#include <numeric>
|
||||
|
||||
// TODO: This will be removed when dependency on old code is eliminated.
|
||||
// Currently this fixes the linking.
|
||||
|
@ -46,6 +45,12 @@ HTKDataDeserializer::HTKDataDeserializer(
|
|||
ConfigParameters input = inputs.front();
|
||||
auto inputName = input.GetMemberIds().front();
|
||||
|
||||
m_expandToPrimary = cfg(L"expandToUtterance", false);
|
||||
if (m_expandToPrimary && m_primary)
|
||||
{
|
||||
InvalidArgument("Cannot expand utterances of the primary stream %ls, please change your configuration.", inputName.c_str());
|
||||
}
|
||||
|
||||
ConfigParameters streamConfig = input(inputName);
|
||||
|
||||
ConfigHelper config(streamConfig);
|
||||
|
@ -85,6 +90,12 @@ HTKDataDeserializer::HTKDataDeserializer(
|
|||
m_dimension = config.GetFeatureDimension();
|
||||
m_dimension = m_dimension * (1 + context.first + context.second);
|
||||
|
||||
m_expandToPrimary = feature(L"expandToUtterance", false);
|
||||
if (m_expandToPrimary && m_primary)
|
||||
{
|
||||
InvalidArgument("Cannot expand utterances of the primary stream %ls, please change your configuration.", featureName.c_str());
|
||||
}
|
||||
|
||||
InitializeChunkDescriptions(config);
|
||||
InitializeStreams(featureName);
|
||||
InitializeFeatureInformation();
|
||||
|
@ -118,6 +129,13 @@ void HTKDataDeserializer::InitializeChunkDescriptions(ConfigHelper& config)
|
|||
UtteranceDescription description(move(msra::asr::htkfeatreader::parsedpath(u)));
|
||||
size_t numberOfFrames = description.GetNumberOfFrames();
|
||||
|
||||
if (m_expandToPrimary && numberOfFrames != 1)
|
||||
{
|
||||
RuntimeError("Expanded stream should only contain sequences of length 1, utterance '%s' has %d",
|
||||
description.GetKey().c_str(),
|
||||
(int)numberOfFrames);
|
||||
}
|
||||
|
||||
// For logging, also account for utterances and frames that we skip
|
||||
allUtterances++;
|
||||
allFrames += numberOfFrames;
|
||||
|
@ -470,7 +488,8 @@ void HTKDataDeserializer::GetSequenceById(ChunkIdType chunkId, size_t id, vector
|
|||
|
||||
// wrapper that allows m[j].size() and m[j][i] as required by augmentneighbors()
|
||||
MatrixAsVectorOfVectors utteranceFramesWrapper(utteranceFrames);
|
||||
FeatureMatrix features(m_dimension, m_frameMode ? 1 : utterance->GetNumberOfFrames());
|
||||
size_t utteranceLength = m_frameMode ? 1 : (m_expandToPrimary ? utterance->GetExpansionLength() : utterance->GetNumberOfFrames());
|
||||
FeatureMatrix features(m_dimension, utteranceLength);
|
||||
|
||||
if (m_frameMode)
|
||||
{
|
||||
|
@ -479,9 +498,16 @@ void HTKDataDeserializer::GetSequenceById(ChunkIdType chunkId, size_t id, vector
|
|||
auto fillIn = features.col(0);
|
||||
AugmentNeighbors(utteranceFramesWrapper, frameIndex, m_augmentationWindow.first, m_augmentationWindow.second, fillIn);
|
||||
}
|
||||
else
|
||||
else if (m_expandToPrimary) // Broadcast a single frame to the complete utterance.
|
||||
{
|
||||
for (size_t resultingIndex = 0; resultingIndex < utterance->GetExpansionLength(); ++resultingIndex)
|
||||
{
|
||||
auto fillIn = features.col(resultingIndex);
|
||||
AugmentNeighbors(utteranceFramesWrapper, 0, m_augmentationWindow.first, m_augmentationWindow.second, fillIn);
|
||||
}
|
||||
}
|
||||
else // Augment the complete utterance.
|
||||
{
|
||||
// Augment complete utterance.
|
||||
for (size_t frameIndex = 0; frameIndex < utterance->GetNumberOfFrames(); ++frameIndex)
|
||||
{
|
||||
auto fillIn = features.col(frameIndex);
|
||||
|
@ -508,10 +534,10 @@ void HTKDataDeserializer::GetSequenceById(ChunkIdType chunkId, size_t id, vector
|
|||
}
|
||||
|
||||
// Gets sequence description by its key.
|
||||
bool HTKDataDeserializer::GetSequenceDescriptionByKey(const KeyType& key, SequenceDescription& d)
|
||||
bool HTKDataDeserializer::GetSequenceDescription(const SequenceDescription& primary, SequenceDescription& d)
|
||||
{
|
||||
assert(!m_primary);
|
||||
auto iter = m_keyToChunkLocation.find(key.m_sequence);
|
||||
auto iter = m_keyToChunkLocation.find(primary.m_key.m_sequence);
|
||||
if (iter == m_keyToChunkLocation.end())
|
||||
{
|
||||
return false;
|
||||
|
@ -519,11 +545,29 @@ bool HTKDataDeserializer::GetSequenceDescriptionByKey(const KeyType& key, Sequen
|
|||
|
||||
auto chunkId = iter->second.first;
|
||||
auto utteranceIndexInsideChunk = iter->second.second;
|
||||
const auto& chunk = m_chunks[chunkId];
|
||||
const auto& sequence = chunk.GetUtterance(utteranceIndexInsideChunk);
|
||||
auto& chunk = m_chunks[chunkId];
|
||||
auto utterance = chunk.GetUtterance(utteranceIndexInsideChunk);
|
||||
|
||||
d.m_chunkId = (ChunkIdType)chunkId;
|
||||
d.m_id = m_frameMode ? chunk.GetStartFrameIndexInsideChunk(utteranceIndexInsideChunk) + key.m_sample : utteranceIndexInsideChunk;
|
||||
d.m_numberOfSamples = m_frameMode ? 1 : (uint32_t)sequence->GetNumberOfFrames();
|
||||
|
||||
// TODO: When we move frame mode from deserializer, expanding should go away and be done on the higher level.
|
||||
// TODO: Currently for the frame mode the secondary deserializer does not know the size of the full utterance,
|
||||
// becase each frame has its own sequence description. So we get the length by the max sample we have seen.
|
||||
if (m_expandToPrimary)
|
||||
{
|
||||
// Expanding for sequence length/or max seen frame.
|
||||
size_t maxLength = max(primary.m_numberOfSamples, (uint32_t)primary.m_key.m_sample + 1);
|
||||
if (utterance->GetExpansionLength() < maxLength)
|
||||
{
|
||||
utterance->SetExpansionLength(maxLength);
|
||||
}
|
||||
d.m_id = utteranceIndexInsideChunk;
|
||||
}
|
||||
else
|
||||
{
|
||||
d.m_id = m_frameMode ? chunk.GetStartFrameIndexInsideChunk(utteranceIndexInsideChunk) + primary.m_key.m_sample : utteranceIndexInsideChunk;
|
||||
}
|
||||
d.m_numberOfSamples = m_frameMode ? 1 : (uint32_t)utterance->GetNumberOfFrames();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -34,8 +34,8 @@ public:
|
|||
// Retrieves data for a chunk.
|
||||
virtual ChunkPtr GetChunk(ChunkIdType chunkId) override;
|
||||
|
||||
// Gets sequence description by its key.
|
||||
virtual bool GetSequenceDescriptionByKey(const KeyType&, SequenceDescription&) override;
|
||||
// Gets sequence description by the primary one.
|
||||
virtual bool GetSequenceDescription(const SequenceDescription& primary, SequenceDescription&) override;
|
||||
|
||||
private:
|
||||
class HTKChunk;
|
||||
|
@ -84,6 +84,10 @@ private:
|
|||
unsigned int m_samplePeriod = 0;
|
||||
size_t m_ioFeatureDimension = 0;
|
||||
std::string m_featureKind;
|
||||
|
||||
// A flag that indicates whether the utterance should be extended to match the lenght of the utterance from the primary deserializer.
|
||||
// TODO: This should be moved to the packers when deserializers work in sequence mode only.
|
||||
bool m_expandToPrimary;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<HTKDataDeserializer> HTKDataDeserializerPtr;
|
||||
|
|
|
@ -20,9 +20,12 @@ class UtteranceDescription
|
|||
// Utterance id.
|
||||
size_t m_id;
|
||||
|
||||
// Expansion length in case if utterance should be expanded.
|
||||
size_t m_expansionLength;
|
||||
|
||||
public:
|
||||
UtteranceDescription(msra::asr::htkfeatreader::parsedpath&& path)
|
||||
: m_path(std::move(path))
|
||||
: m_path(std::move(path)), m_expansionLength(0)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -48,6 +51,9 @@ public:
|
|||
|
||||
size_t GetId() const { return m_id; }
|
||||
void SetId(size_t id) { m_id = id; }
|
||||
|
||||
size_t GetExpansionLength() const { return m_expansionLength; }
|
||||
void SetExpansionLength(size_t length) { m_expansionLength = length; }
|
||||
};
|
||||
|
||||
}}}
|
||||
|
|
|
@ -107,7 +107,6 @@
|
|||
<ClInclude Include="..\..\Common\Include\DataReader.h" />
|
||||
<ClInclude Include="..\..\Common\Include\DataWriter.h" />
|
||||
<ClInclude Include="..\..\Common\Include\ssematrix.h" />
|
||||
<ClInclude Include="basetypes.h" />
|
||||
<ClInclude Include="biggrowablevectors.h" />
|
||||
<ClInclude Include="chunkevalsource.h" />
|
||||
<ClInclude Include="..\..\Common\Include\fileutil.h" />
|
||||
|
|
|
@ -32,9 +32,6 @@
|
|||
<ClInclude Include="stdafx.h" />
|
||||
<ClInclude Include="targetver.h" />
|
||||
<ClInclude Include="utterancesourcemulti.h" />
|
||||
<ClInclude Include="basetypes.h">
|
||||
<Filter>Duplicates to remove</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\Common\Include\DataReader.h">
|
||||
<Filter>Common\Include</Filter>
|
||||
</ClInclude>
|
||||
|
|
|
@ -71,6 +71,11 @@ protected:
|
|||
const unsigned char* b = (const unsigned char*) &v;
|
||||
return (short) ((b[0] << 8) + b[1]);
|
||||
}
|
||||
static unsigned short swapunsignedshort(unsigned short v) throw()
|
||||
{
|
||||
const unsigned char* b = (const unsigned char*)&v;
|
||||
return (unsigned short)((b[0] << 8) + b[1]);
|
||||
}
|
||||
static int swapint(int v) throw()
|
||||
{
|
||||
const unsigned char* b = (const unsigned char*) &v;
|
||||
|
@ -81,13 +86,13 @@ protected:
|
|||
{
|
||||
int nsamples;
|
||||
int sampperiod;
|
||||
short sampsize;
|
||||
unsigned short sampsize;
|
||||
short sampkind;
|
||||
void read(FILE* f)
|
||||
{
|
||||
nsamples = fgetint(f);
|
||||
sampperiod = fgetint(f);
|
||||
sampsize = fgetshort(f);
|
||||
sampsize =(unsigned short) fgetshort(f);
|
||||
sampkind = fgetshort(f);
|
||||
}
|
||||
|
||||
|
@ -102,21 +107,24 @@ protected:
|
|||
sampkind = (short) 9; // user type
|
||||
int nRows = swapint(fgetint(f));
|
||||
int nCols = swapint(fgetint(f));
|
||||
sampsize = (short) (nRows * nCols); // features are stored as bytes;
|
||||
int rawsampsize = nRows * nCols;
|
||||
if (rawsampsize > UINT16_MAX)
|
||||
RuntimeError("reading idx feature cache header: sample size overflow");
|
||||
sampsize = (unsigned short)rawsampsize; // features are stored as bytes;
|
||||
}
|
||||
|
||||
void write(FILE* f)
|
||||
{
|
||||
fputint(f, nsamples);
|
||||
fputint(f, sampperiod);
|
||||
fputshort(f, sampsize);
|
||||
fputshort(f, (short) sampsize);
|
||||
fputshort(f, sampkind);
|
||||
}
|
||||
void byteswap()
|
||||
{
|
||||
nsamples = swapint(nsamples);
|
||||
sampperiod = swapint(sampperiod);
|
||||
sampsize = swapshort(sampsize);
|
||||
sampsize = swapunsignedshort(sampsize);
|
||||
sampkind = swapshort(sampkind);
|
||||
}
|
||||
};
|
||||
|
@ -215,7 +223,10 @@ public:
|
|||
H.nsamples = 0; // unknown for now, updated in close()
|
||||
H.sampperiod = period;
|
||||
const int bytesPerValue = sizeof(float); // we do not support compression for now
|
||||
H.sampsize = (short) featdim * bytesPerValue;
|
||||
size_t rawsampsize = featdim * bytesPerValue;
|
||||
if (rawsampsize > UINT16_MAX)
|
||||
RuntimeError("htkfeatwriter: sample size overflow");
|
||||
H.sampsize = (unsigned short)rawsampsize;
|
||||
H.sampkind = parsekind(kind);
|
||||
if (needbyteswapping)
|
||||
H.byteswap();
|
||||
|
|
|
@ -60,6 +60,12 @@ void Bundler::CreateChunkDescriptions()
|
|||
RuntimeError("Driving deserializer provided too many chunks.");
|
||||
}
|
||||
|
||||
// Creating a table of weak chunks for non driving deserializers.
|
||||
for (size_t i = 0; i < m_deserializers.size(); ++i)
|
||||
{
|
||||
m_weakChunkTable.push_back(std::vector<std::weak_ptr<Chunk>>(m_deserializers[i]->GetChunkDescriptions().size()));
|
||||
}
|
||||
|
||||
m_chunks.reserve(chunks.size());
|
||||
|
||||
if (m_verbosity)
|
||||
|
@ -105,7 +111,7 @@ void Bundler::CreateChunkDescriptions()
|
|||
size_t sequenceSamples = sequence.m_numberOfSamples;
|
||||
for (size_t deserializerIndex = 1; deserializerIndex < m_deserializers.size(); ++deserializerIndex)
|
||||
{
|
||||
isValid = m_deserializers[deserializerIndex]->GetSequenceDescriptionByKey(sequenceDescriptions[sequenceIndex].m_key, s);
|
||||
isValid = m_deserializers[deserializerIndex]->GetSequenceDescription(sequenceDescriptions[sequenceIndex], s);
|
||||
if (!isValid)
|
||||
{
|
||||
invalid.insert(sequenceIndex);
|
||||
|
@ -193,7 +199,7 @@ void Bundler::GetSequencesForChunk(ChunkIdType chunkId, std::vector<SequenceDesc
|
|||
uint32_t sequenceSamples = sequence.m_numberOfSamples;
|
||||
for (size_t deserializerIndex = 1; deserializerIndex < m_deserializers.size(); ++deserializerIndex)
|
||||
{
|
||||
m_deserializers[deserializerIndex]->GetSequenceDescriptionByKey(sequence.m_key, s);
|
||||
m_deserializers[deserializerIndex]->GetSequenceDescription(sequence, s);
|
||||
sequenceSamples = std::max(sequenceSamples, s.m_numberOfSamples);
|
||||
}
|
||||
sequence.m_numberOfSamples = sequenceSamples;
|
||||
|
@ -251,10 +257,9 @@ public:
|
|||
|
||||
// Creating sequence mapping and requiring underlying chunks.
|
||||
SequenceDescription s;
|
||||
for (size_t deserializerIndex = 1; deserializerIndex < m_parent->m_deserializers.size(); ++deserializerIndex)
|
||||
for (size_t deserializerIndex = 1; deserializerIndex < deserializers.size(); ++deserializerIndex)
|
||||
{
|
||||
std::map<size_t, ChunkPtr> secondaryChunks;
|
||||
|
||||
auto& chunkTable = m_parent->m_weakChunkTable[deserializerIndex];
|
||||
for (size_t sequenceIndex = 0; sequenceIndex < sequences.size(); ++sequenceIndex)
|
||||
{
|
||||
if (chunk->m_invalid.find(sequenceIndex) != chunk->m_invalid.end())
|
||||
|
@ -263,19 +268,14 @@ public:
|
|||
}
|
||||
|
||||
size_t currentIndex = sequenceIndex * deserializers.size() + deserializerIndex;
|
||||
deserializers[deserializerIndex]->GetSequenceDescriptionByKey(sequences[sequenceIndex].m_key, s);
|
||||
deserializers[deserializerIndex]->GetSequenceDescription(sequences[sequenceIndex], s);
|
||||
m_sequenceToSequence[currentIndex] = s.m_id;
|
||||
|
||||
ChunkPtr secondaryChunk;
|
||||
auto it = secondaryChunks.find(s.m_chunkId);
|
||||
if (it == secondaryChunks.end())
|
||||
ChunkPtr secondaryChunk = chunkTable[s.m_chunkId].lock();
|
||||
if (!secondaryChunk)
|
||||
{
|
||||
secondaryChunk = deserializers[deserializerIndex]->GetChunk(s.m_chunkId);
|
||||
secondaryChunks.insert(make_pair(s.m_chunkId, secondaryChunk));
|
||||
}
|
||||
else
|
||||
{
|
||||
secondaryChunk = it->second;
|
||||
chunkTable[s.m_chunkId] = secondaryChunk;
|
||||
}
|
||||
|
||||
m_innerChunks[currentIndex] = secondaryChunk;
|
||||
|
|
|
@ -59,6 +59,10 @@ private:
|
|||
// (i.e. often in speech)
|
||||
bool m_takePrimarySequenceLength;
|
||||
|
||||
// A table of loaded chunks to make sure we do not load same chunk twice.
|
||||
// Inner vector is the table of chunk id into weak pointer, the outer vector has an element per deserializer.
|
||||
std::vector<std::vector<std::weak_ptr<Chunk>>> m_weakChunkTable;
|
||||
|
||||
// General configuration
|
||||
int m_verbosity;
|
||||
};
|
||||
|
|
|
@ -37,9 +37,9 @@ public:
|
|||
return m_deserializer->GetSequencesForChunk(chunkId, descriptions);
|
||||
}
|
||||
|
||||
virtual bool GetSequenceDescriptionByKey(const KeyType& key, SequenceDescription& description) override
|
||||
virtual bool GetSequenceDescription(const SequenceDescription& primary, SequenceDescription& description) override
|
||||
{
|
||||
return m_deserializer->GetSequenceDescriptionByKey(key, description);
|
||||
return m_deserializer->GetSequenceDescription(primary, description);
|
||||
}
|
||||
|
||||
// Gets chunk data given its id.
|
||||
|
|
|
@ -161,11 +161,11 @@ public:
|
|||
// Gets sequence descriptions for a given a chunk.
|
||||
virtual void GetSequencesForChunk(ChunkIdType chunkId, std::vector<SequenceDescription>& descriptions) = 0;
|
||||
|
||||
// Gets sequence description by its key.
|
||||
// Used by deserializers not in driving/primary mode.
|
||||
// Returns false if provided sequence is not valid.
|
||||
// Gets sequence description given the sequence description of the primary deserializer.
|
||||
// Used for deserializers not in driving/primary mode.
|
||||
// Returns false if the corresponding secondary sequence is not valid.
|
||||
// TODO: Possibly move this out into a separate interface.
|
||||
virtual bool GetSequenceDescriptionByKey(const KeyType& key, SequenceDescription& description) = 0;
|
||||
virtual bool GetSequenceDescription(const SequenceDescription& primary, SequenceDescription& description) = 0;
|
||||
|
||||
// Gets chunk data given its id.
|
||||
virtual ChunkPtr GetChunk(ChunkIdType chunkId) = 0;
|
||||
|
|
|
@ -17,9 +17,9 @@ public:
|
|||
DataDeserializerBase()
|
||||
{}
|
||||
|
||||
virtual bool GetSequenceDescriptionByKey(const KeyType&, SequenceDescription&) override
|
||||
virtual bool GetSequenceDescription(const SequenceDescription& primary, SequenceDescription& result) override
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
return GetSequenceDescriptionByKey(primary.m_key, result);
|
||||
}
|
||||
|
||||
virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
|
||||
|
@ -28,6 +28,11 @@ public:
|
|||
}
|
||||
|
||||
protected:
|
||||
virtual bool GetSequenceDescriptionByKey(const KeyType&, SequenceDescription&)
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
// Streams this data deserializer can produce.
|
||||
std::vector<StreamDescriptionPtr> m_streams;
|
||||
|
||||
|
|
|
@ -900,6 +900,14 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
|
|||
bool noMoreSamplesToProcess = false;
|
||||
for (;;)
|
||||
{
|
||||
// Per-minibatch performance measurements; only enabled when perfTraceLevel > 0
|
||||
Timer fineGrainedPerfMeasurementTimer;
|
||||
double readTime = 0;
|
||||
double computeTime = 0;
|
||||
double parameterUpdateTime = 0;
|
||||
if (m_perfTraceLevel > 0)
|
||||
fineGrainedPerfMeasurementTimer.Start();
|
||||
|
||||
// get minibatch
|
||||
// TODO: is it guaranteed that the GPU is already completed at this point, is it safe to overwrite the buffers?
|
||||
size_t actualMBSize = 0;
|
||||
|
@ -908,6 +916,13 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
|
|||
if (!wasDataRead && (!useDistributedMBReading || noMoreSamplesToProcess)) // in case of distributed reading, we do a few more loops until all ranks have completed
|
||||
break; // end of epoch
|
||||
|
||||
if (m_perfTraceLevel > 0)
|
||||
{
|
||||
fineGrainedPerfMeasurementTimer.Stop();
|
||||
readTime = fineGrainedPerfMeasurementTimer.ElapsedSeconds();
|
||||
fineGrainedPerfMeasurementTimer.Start();
|
||||
}
|
||||
|
||||
// Note: If !wasDataRead then the data that GetMinibatchIntoNetwork() was supposed to full in are undefined.
|
||||
// Must not touch them.
|
||||
|
||||
|
@ -998,6 +1013,15 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
|
|||
smbDispatcher.DoneWithCurrentMinibatch();
|
||||
} // if (actualMBSize > 0)
|
||||
|
||||
if (m_perfTraceLevel > 0)
|
||||
{
|
||||
std::unique_ptr<MatrixComputeStreamEvent> mainStreamSyncEvent(MatrixComputeStreamEvent::Create(net->GetDeviceId()));
|
||||
mainStreamSyncEvent->SynchronizeEvent();
|
||||
fineGrainedPerfMeasurementTimer.Stop();
|
||||
computeTime = fineGrainedPerfMeasurementTimer.ElapsedSeconds();
|
||||
fineGrainedPerfMeasurementTimer.Start();
|
||||
}
|
||||
|
||||
// for momentum/clipping/regularization/etc., as well as for progress and statistics, we should only count frames that are not gaps
|
||||
// #samples according to the default dynamic axis, for use with criterion nodes that do not have an MBLayout
|
||||
size_t numSamplesWithLabelOfNetwork = wasDataRead ? net->GetNumSamplesWithLabelOfNetwork(actualMBSize) : 0;
|
||||
|
@ -1105,6 +1129,17 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
|
|||
}
|
||||
}
|
||||
|
||||
if (m_perfTraceLevel > 0)
|
||||
{
|
||||
std::unique_ptr<MatrixComputeStreamEvent> mainStreamSyncEvent(MatrixComputeStreamEvent::Create(net->GetDeviceId()));
|
||||
mainStreamSyncEvent->SynchronizeEvent();
|
||||
fineGrainedPerfMeasurementTimer.Stop();
|
||||
parameterUpdateTime = fineGrainedPerfMeasurementTimer.ElapsedSeconds();
|
||||
|
||||
PREPENDTS(stderr);
|
||||
fprintf(stderr, "Perf trace: Read = %.5gs; Compute = %.5gs; Parameter update = %.5gs\n", readTime, computeTime, parameterUpdateTime);
|
||||
}
|
||||
|
||||
commTimer.Start();
|
||||
// aggregation by model averaging or block momentum
|
||||
if (useModelAggregation)
|
||||
|
@ -1131,7 +1166,7 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
|
|||
if (useDistributedMBReading)
|
||||
{
|
||||
noMoreSamplesToProcess = !wasDataRead;
|
||||
}
|
||||
}
|
||||
|
||||
if (nSamplesSinceLastModelSync >= m_nFramesBetweenASGDSync[epochNumber])
|
||||
{
|
||||
|
@ -2629,6 +2664,8 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
|
|||
// BUGBUG: these are not passed to Init()
|
||||
m_doUnitTest = configSGD(L"unitTest", false);
|
||||
|
||||
m_perfTraceLevel = configSGD(L"perfTraceLevel", (int)0);
|
||||
|
||||
// parallel training
|
||||
m_parallelizationMethod = ParallelizationMethod::none;
|
||||
m_numGradientBits = 32;
|
||||
|
@ -2650,27 +2687,27 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
|
|||
else
|
||||
{
|
||||
size_t numMPIWorkers = pMPI->NumNodesInUse();
|
||||
const ConfigRecordType& configParallelTrain(configSGD(L"ParallelTrain", ConfigRecordType::Record()));
|
||||
m_parallelizationMethod = ParseParallelizationMethod(configParallelTrain(L"parallelizationMethod", L"none"));
|
||||
const ConfigRecordType& configParallelTrain(configSGD(L"ParallelTrain", ConfigRecordType::Record()));
|
||||
m_parallelizationMethod = ParseParallelizationMethod(configParallelTrain(L"parallelizationMethod", L"none"));
|
||||
m_parallelizationStartEpochNum = configParallelTrain(L"parallelizationStartEpoch", (int)1) - 1; // Epoch numbers internally are 0 based
|
||||
m_enableDistributedMBReading = configParallelTrain(L"distributedMBReading", false);
|
||||
m_enableDistributedMBReading = configParallelTrain(L"distributedMBReading", false);
|
||||
m_syncStatsTrace = configParallelTrain(L"syncPerfStats", (int)0);
|
||||
|
||||
if (configParallelTrain.Exists(L"DataParallelSGD"))
|
||||
{
|
||||
const ConfigRecordType& configDataParallelSGD(configParallelTrain(L"DataParallelSGD", ConfigRecordType::Record()));
|
||||
size_t defaultGradientBits = 8 * sizeofElemType;
|
||||
m_numGradientBits = configDataParallelSGD(L"gradientBits", defaultGradientBits);
|
||||
m_zeroThresholdFor1Bit = configDataParallelSGD(L"useZeroThresholdFor1BitQuantization", true);
|
||||
m_bufferedAsyncGradientAggregation = configDataParallelSGD(L"useBufferedAsyncGradientAggregation", false);
|
||||
if ( m_numGradientBits < 1 || m_numGradientBits > (8 * sizeofElemType) )
|
||||
if (configParallelTrain.Exists(L"DataParallelSGD"))
|
||||
{
|
||||
InvalidArgument("gradientBits must be in the range [1, 32] when using precision=float and in range [1, 64] when using precision=double!");
|
||||
const ConfigRecordType& configDataParallelSGD(configParallelTrain(L"DataParallelSGD", ConfigRecordType::Record()));
|
||||
size_t defaultGradientBits = 8 * sizeofElemType;
|
||||
m_numGradientBits = configDataParallelSGD(L"gradientBits", defaultGradientBits);
|
||||
m_zeroThresholdFor1Bit = configDataParallelSGD(L"useZeroThresholdFor1BitQuantization", true);
|
||||
m_bufferedAsyncGradientAggregation = configDataParallelSGD(L"useBufferedAsyncGradientAggregation", false);
|
||||
if ( m_numGradientBits < 1 || m_numGradientBits > (8 * sizeofElemType) )
|
||||
{
|
||||
InvalidArgument("gradientBits must be in the range [1, 32] when using precision=float and in range [1, 64] when using precision=double!");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (configParallelTrain.Exists(L"ModelAveragingSGD"))
|
||||
{
|
||||
const ConfigRecordType& configMASGD(configParallelTrain(L"ModelAveragingSGD", ConfigRecordType::Record()));
|
||||
if (configParallelTrain.Exists(L"ModelAveragingSGD"))
|
||||
{
|
||||
const ConfigRecordType& configMASGD(configParallelTrain(L"ModelAveragingSGD", ConfigRecordType::Record()));
|
||||
if (configMASGD.Exists(L"blockSizePerWorker") && configMASGD.Exists(L"blockSize"))
|
||||
{
|
||||
InvalidArgument("It is only allowed to set blockSizePerWorker or blockSize, not both of them");
|
||||
|
@ -2689,8 +2726,8 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
|
|||
m_modelAggregationBlockSize = 40000 * numMPIWorkers; // default value
|
||||
}
|
||||
#if 1 // legacy option
|
||||
if (configMASGD.Exists(L"syncFrequencyInFrames"))
|
||||
{
|
||||
if (configMASGD.Exists(L"syncFrequencyInFrames"))
|
||||
{
|
||||
if (configMASGD.Exists(L"blockSizePerWorker") || configMASGD.Exists(L"blockSize"))
|
||||
InvalidArgument("syncFrequencyInFrames is a deprecated alias of blockSizePerWorker. It is not allowed to specify both of them");
|
||||
m_modelAggregationBlockSize = configMASGD(L"syncFrequencyInFrames");
|
||||
|
@ -2706,15 +2743,15 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
|
|||
m_modelAggregationBlockSize = configMASGD(L"syncPeriod");
|
||||
m_modelAggregationBlockSize *= numMPIWorkers;
|
||||
fprintf(stderr, "WARNING: option syncPeroid in ModelAveragingSGD is going to be deprecated. Please use blockSizePerWorker instead in the future.\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (configParallelTrain.Exists(L"BlockMomentumSGD"))
|
||||
{
|
||||
}
|
||||
if (configParallelTrain.Exists(L"BlockMomentumSGD"))
|
||||
{
|
||||
#ifndef CNTK_PARALLEL_TRAINING_SUPPORT
|
||||
InvalidArgument("BlockMomentumSGD is not enabled in this version.\n");
|
||||
InvalidArgument("BlockMomentumSGD is not enabled in this version.\n");
|
||||
#else
|
||||
const ConfigRecordType& configBMSGD(configParallelTrain(L"BlockMomentumSGD", ConfigRecordType::Record()));
|
||||
const ConfigRecordType& configBMSGD(configParallelTrain(L"BlockMomentumSGD", ConfigRecordType::Record()));
|
||||
if (configBMSGD.Exists(L"blockSize") && configBMSGD.Exists(L"blockSizePerWorker"))
|
||||
{
|
||||
InvalidArgument("It is only allowed to set blockSizePerWorker or blockSize, not both of them");
|
||||
|
@ -2744,33 +2781,33 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
|
|||
fprintf(stderr, "WARNING: option syncPeroid in BlockMomentumSGD is going to be deprecated. Please use blockSizePerWorker instead in the future.\n");
|
||||
}
|
||||
#endif
|
||||
m_resetSGDMomentum = configBMSGD(L"resetSGDMomentum", true);
|
||||
m_useNesterovBlockMomentum = configBMSGD(L"useNesterovMomentum", true);
|
||||
m_blockLearningRate = configBMSGD(L"blockLearningRate", 1.0);
|
||||
m_resetSGDMomentum = configBMSGD(L"resetSGDMomentum", true);
|
||||
m_useNesterovBlockMomentum = configBMSGD(L"useNesterovMomentum", true);
|
||||
m_blockLearningRate = configBMSGD(L"blockLearningRate", 1.0);
|
||||
|
||||
if (configBMSGD.Exists(L"blockMomentumPerSync") && configBMSGD.Exists(L"blockMomentumAsTimeConstant"))
|
||||
{
|
||||
InvalidArgument("It is only allowed to set either blockMomentumPerSync or blockMomentumAsTimeConstant, not both of them");
|
||||
}
|
||||
else if (configBMSGD.Exists(L"blockMomentumAsTimeConstant"))
|
||||
{
|
||||
m_blockMomentumAsTimeConstant = configBMSGD(L"blockMomentumAsTimeConstant");
|
||||
}
|
||||
if (configBMSGD.Exists(L"blockMomentumPerSync") && configBMSGD.Exists(L"blockMomentumAsTimeConstant"))
|
||||
{
|
||||
InvalidArgument("It is only allowed to set either blockMomentumPerSync or blockMomentumAsTimeConstant, not both of them");
|
||||
}
|
||||
else if (configBMSGD.Exists(L"blockMomentumAsTimeConstant"))
|
||||
{
|
||||
m_blockMomentumAsTimeConstant = configBMSGD(L"blockMomentumAsTimeConstant");
|
||||
}
|
||||
#if 1 // This option "blockMomentumPerSync" is going to be deprecated in the future
|
||||
else if (configBMSGD.Exists(L"blockMomentumPerSync"))
|
||||
{
|
||||
double blockMomentum = configBMSGD(L"blockMomentumPerSync");
|
||||
else if (configBMSGD.Exists(L"blockMomentumPerSync"))
|
||||
{
|
||||
double blockMomentum = configBMSGD(L"blockMomentumPerSync");
|
||||
m_blockMomentumAsTimeConstant = BlockMomentumSGD<double>::Momentum2TimeConstant(blockMomentum, m_modelAggregationBlockSize);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
else /*if (!configBMSGD.Exists(L"blockMomentumPerSync") && !configBMSGD.Exists(L"blockMomentumAsTimeConstant"))*/
|
||||
{
|
||||
else /*if (!configBMSGD.Exists(L"blockMomentumPerSync") && !configBMSGD.Exists(L"blockMomentumAsTimeConstant"))*/
|
||||
{
|
||||
double blockMomentum = 1.0 - 1.0 / (double)numMPIWorkers; // this is a default value which ensures each block update contributes equally
|
||||
m_blockMomentumAsTimeConstant = BlockMomentumSGD<double>::Momentum2TimeConstant(blockMomentum, m_modelAggregationBlockSize);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
if (configParallelTrain.Exists(L"DataParallelASGD"))
|
||||
{
|
||||
const ConfigRecordType & configDataParallelASGD(configParallelTrain(L"DataParallelASGD", ConfigRecordType::Record()));
|
||||
|
@ -2784,7 +2821,7 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
|
|||
m_adjustcoefficient = configAdjustLearningRateAtBeginning(L"adjustCoefficient", (double)0.1);
|
||||
m_adjustnbminibatch = configAdjustLearningRateAtBeginning(L"adjustNbMinibatch", (size_t)256);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // if (!pMPI)
|
||||
} // if (configSGD.Exists(L"ParallelTrain"))
|
||||
}
|
||||
|
|
|
@ -253,6 +253,8 @@ protected:
|
|||
|
||||
bool m_useAllDataForPreComputedNode;
|
||||
|
||||
int m_perfTraceLevel;
|
||||
|
||||
// Parallel training
|
||||
MPIWrapperPtr m_mpi;
|
||||
|
||||
|
|
|
@ -0,0 +1,189 @@
|
|||
#!/usr/bin/env python
|
||||
# ----------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||
# ---------------------------------------------------------
|
||||
# This script extracts information (hardware used, final results) contained in the baselines files
|
||||
# and generates a markdown file (wiki page)
|
||||
|
||||
import sys, os, re
|
||||
import TestDriver as td
|
||||
|
||||
try:
|
||||
import six
|
||||
except ImportError:
|
||||
print("Python package 'six' not installed. Please run 'pip install six'.")
|
||||
sys.exit(1)
|
||||
|
||||
thisDir = os.path.dirname(os.path.realpath(__file__))
|
||||
windows = os.getenv("OS")=="Windows_NT"
|
||||
|
||||
class Baseline:
|
||||
def __init__(self, fullPath, testResult = "", trainResult = ""):
|
||||
self.fullPath = fullPath
|
||||
self.cpuInfo = ""
|
||||
self.gpuInfo = ""
|
||||
self.testResult = testResult
|
||||
self.trainResult = trainResult
|
||||
|
||||
# extracts results info. e.g.
|
||||
# Finished Epoch[ 5 of 5]: [Training] ce = 2.32253198 * 1000 err = 0.90000000 * 1000 totalSamplesSeen = 5000 learningRatePerSample = 2e-06 epochTime=0.175781
|
||||
# Final Results: Minibatch[1-1]: err = 0.90000000 * 100 ce = 2.32170486 * 100 perplexity = 10.1930372
|
||||
def extractResultsInfo(self, baselineContent):
|
||||
trainResults = re.findall('.*(Finished Epoch\[ *\d+ of \d+\]\: \[Training\]) (.*)', baselineContent)
|
||||
if trainResults:
|
||||
self.trainResult = Baseline.formatLastTrainResult(trainResults[-1])[0:-2]
|
||||
testResults = re.findall('.*(Final Results: Minibatch\[1-\d+\]:)(\s+\* \d+)?\s+(.*)', baselineContent)
|
||||
if testResults:
|
||||
self.testResult = Baseline.formatLastTestResult(testResults[-1])[0:-2]
|
||||
|
||||
# extracts cpu and gpu info from baseline content. e.g.:
|
||||
#CPU info:
|
||||
# CPU Model Name: Intel(R) Xeon(R) CPU E5-2620 v3 @ 2.40GHz
|
||||
# Hardware threads: 12
|
||||
#GPU info:
|
||||
#
|
||||
#Device[0]: cores = 2496; computeCapability = 5.2; type = "Quadro M4000"; memory = 8192 MB
|
||||
#Device[1]: cores = 96; computeCapability = 2.1; type = "Quadro 600"; memory = 1024 MB
|
||||
# Total Memory: 33474872 kB
|
||||
def extractHardwareInfo(self, baselineContent):
|
||||
startCpuInfoIndex = baselineContent.find("CPU info:")
|
||||
endCpuInfoIndex = baselineContent.find("----------", startCpuInfoIndex)
|
||||
cpuInfo = re.search("^CPU info:\s+"
|
||||
"CPU Model (Name:\s*.*)\s+"
|
||||
"(Hardware threads: \d+)\s+"
|
||||
"Total (Memory:\s*.*)\s+", baselineContent[startCpuInfoIndex:endCpuInfoIndex], re.MULTILINE)
|
||||
if cpuInfo is None:
|
||||
return
|
||||
self.cpuInfo = "\n".join(cpuInfo.groups())
|
||||
|
||||
startGpuInfoIndex = baselineContent.find("GPU info:")
|
||||
endGpuInfoIndex = baselineContent.find("----------", startGpuInfoIndex)
|
||||
gpuInfoSnippet = baselineContent[startGpuInfoIndex:endGpuInfoIndex]
|
||||
|
||||
gpuDevices = re.findall("\t\t(Device\[\d+\]: cores = \d+; computeCapability = \d\.\d; type = .*; memory = \d+ MB)[\r\n]?", gpuInfoSnippet)
|
||||
if not gpuDevices:
|
||||
return
|
||||
gpuInfo = [ device for device in gpuDevices ]
|
||||
self.gpuInfo = "\n".join(gpuInfo)
|
||||
|
||||
@staticmethod
|
||||
def formatLastTestResult(line):
|
||||
return line[0] + line[1] + "\n" + line[2].replace('; ', '\n').replace(' ','\n')
|
||||
|
||||
@staticmethod
|
||||
def formatLastTrainResult(line):
|
||||
epochsInfo, parameters = line[0], line[1]
|
||||
return epochsInfo + '\n' + parameters.replace('; ', '\n')
|
||||
|
||||
class Example:
|
||||
|
||||
allExamplesIndexedByFullName = {}
|
||||
|
||||
def __init__(self, suite, name, testDir):
|
||||
self.suite = suite
|
||||
self.name = name
|
||||
self.fullName = suite + "/" + name
|
||||
self.testDir = testDir
|
||||
self.baselineList = []
|
||||
|
||||
self.gitHash = ""
|
||||
|
||||
@staticmethod
|
||||
def discoverAllExamples():
|
||||
testsDir = thisDir
|
||||
for dirName, subdirList, fileList in os.walk(testsDir):
|
||||
if 'testcases.yml' in fileList:
|
||||
testDir = dirName
|
||||
exampleName = os.path.basename(dirName)
|
||||
suiteDir = os.path.dirname(dirName)
|
||||
# suite name will be derived from the path components
|
||||
suiteName = os.path.relpath(suiteDir, testsDir).replace('\\', '/')
|
||||
|
||||
example = Example(suiteName, exampleName, testDir)
|
||||
Example.allExamplesIndexedByFullName[example.fullName.lower()] = example
|
||||
|
||||
# it returns a list with all baseline files for current example
|
||||
def findBaselineFilesList(self):
|
||||
baselineFilesList = []
|
||||
|
||||
oses = [".windows", ".linux", ""]
|
||||
devices = [".cpu", ".gpu", ""]
|
||||
flavors = [".debug", ".release", ""]
|
||||
|
||||
for o in oses:
|
||||
for device in devices:
|
||||
for flavor in flavors:
|
||||
candidateName = "baseline" + o + flavor + device + ".txt"
|
||||
fullPath = td.cygpath(os.path.join(self.testDir, candidateName), relative=True)
|
||||
if os.path.isfile(fullPath):
|
||||
baseline = Baseline(fullPath);
|
||||
baselineFilesList.append(baseline)
|
||||
|
||||
return baselineFilesList
|
||||
|
||||
# extracts information for every example and stores it in Example.allExamplesIndexedByFullName
|
||||
def getExamplesMetrics():
|
||||
Example.allExamplesIndexedByFullName = list(sorted(Example.allExamplesIndexedByFullName.values(), key=lambda test: test.fullName))
|
||||
allExamples = Example.allExamplesIndexedByFullName
|
||||
|
||||
print ("CNTK - Metrics collector")
|
||||
|
||||
for example in allExamples:
|
||||
baselineListForExample = example.findBaselineFilesList()
|
||||
six.print_("Example: " + example.fullName)
|
||||
for baseline in baselineListForExample:
|
||||
with open(baseline.fullPath, "r") as f:
|
||||
baselineContent = f.read()
|
||||
gitHash = re.search('.*Build SHA1:\s([a-z0-9]{40})[\r\n]+', baselineContent, re.MULTILINE)
|
||||
if gitHash is None:
|
||||
continue
|
||||
example.gitHash = gitHash.group(1)
|
||||
baseline.extractHardwareInfo(baselineContent)
|
||||
baseline.extractResultsInfo(baselineContent)
|
||||
example.baselineList.append(baseline)
|
||||
|
||||
# creates a list with links to each example result
|
||||
def createAsciidocExampleList(file):
|
||||
for example in Example.allExamplesIndexedByFullName:
|
||||
if not example.baselineList:
|
||||
continue
|
||||
file.write("".join(["<<", example.fullName.replace("/","").lower(),",", example.fullName, ">> +\n"]))
|
||||
file.write("\n")
|
||||
|
||||
def writeMetricsToAsciidoc():
|
||||
metricsFile = open("metrics.adoc",'wb')
|
||||
|
||||
createAsciidocExampleList(metricsFile)
|
||||
|
||||
for example in Example.allExamplesIndexedByFullName:
|
||||
if not example.baselineList:
|
||||
continue
|
||||
metricsFile.write("".join(["===== ", example.fullName, "\n"]))
|
||||
metricsFile.write("".join(["**Git Hash: **", example.gitHash, "\n\n"]))
|
||||
metricsFile.write("[cols=3, options=\"header\"]\n")
|
||||
metricsFile.write("|====\n")
|
||||
metricsFile.write("|Log file / Configuration | Train Result | Test Result\n")
|
||||
for baseline in example.baselineList:
|
||||
pathInDir=baseline.fullPath.split(thisDir)[1][1:]
|
||||
metricsFile.write("".join(["|link:../blob/", example.gitHash[:7],"/Tests/EndToEndTests/", pathInDir, "[",
|
||||
baseline.fullPath.split("/")[-1], "] .2+|", baseline.trainResult.replace("\n", " "), " .2+|",
|
||||
baseline.testResult.replace("\n", " "), "|\n"]))
|
||||
cpuInfo = "".join(["CPU: ", re.sub("[\r]?\n", ' ', baseline.cpuInfo)])
|
||||
|
||||
gpuInfo = re.sub("[\r]?\n", ' ', baseline.gpuInfo)
|
||||
if gpuInfo:
|
||||
metricsFile.write("".join([cpuInfo, " GPU: ", gpuInfo]))
|
||||
else:
|
||||
metricsFile.write(cpuInfo)
|
||||
|
||||
metricsFile.write("\n|====\n\n")
|
||||
|
||||
# ======================= Entry point =======================
|
||||
six.print_("==============================================================================")
|
||||
|
||||
Example.discoverAllExamples()
|
||||
|
||||
getExamplesMetrics()
|
||||
|
||||
writeMetricsToAsciidoc()
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python
|
||||
# ----------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# ---------------------------------------------------------
|
||||
|
@ -687,89 +687,90 @@ def runCommand(args):
|
|||
sys.exit(10)
|
||||
|
||||
# ======================= Entry point =======================
|
||||
parser = argparse.ArgumentParser(description="TestDriver - CNTK Test Driver")
|
||||
subparsers = parser.add_subparsers(help="command to execute. Run TestDriver.py <command> --help for command-specific help")
|
||||
runSubparser = subparsers.add_parser("run", help="run test(s)")
|
||||
runSubparser.add_argument("test", nargs="*",
|
||||
help="optional test name(s) to run, specified as Suite/TestName. "
|
||||
"Use list command to list available tests. "
|
||||
"If not specified then all tests will be run.")
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="TestDriver - CNTK Test Driver")
|
||||
subparsers = parser.add_subparsers(help="command to execute. Run TestDriver.py <command> --help for command-specific help")
|
||||
runSubparser = subparsers.add_parser("run", help="run test(s)")
|
||||
runSubparser.add_argument("test", nargs="*",
|
||||
help="optional test name(s) to run, specified as Suite/TestName. "
|
||||
"Use list command to list available tests. "
|
||||
"If not specified then all tests will be run.")
|
||||
|
||||
defaultBuildSKU = "gpu"
|
||||
defaultBuildSKU = "gpu"
|
||||
|
||||
runSubparser.add_argument("-b", "--build-location", help="location of the CNTK build to run")
|
||||
runSubparser.add_argument("-t", "--tag", help="runs tests which match the specified tag")
|
||||
runSubparser.add_argument("-d", "--device", help="cpu|gpu - run on a specified device")
|
||||
runSubparser.add_argument("-f", "--flavor", help="release|debug - run only a specified flavor")
|
||||
runSubparser.add_argument("-s", "--build-sku", default=defaultBuildSKU, help="cpu|gpu|1bitsgd - run tests only for a specified build SKU")
|
||||
tmpDir = os.getenv("TEMP") if windows else "/tmp"
|
||||
defaultRunDir=os.path.join(tmpDir, "cntk-test-{0}.{1}".format(time.strftime("%Y%m%d%H%M%S"), random.randint(0,1000000)))
|
||||
runSubparser.add_argument("-r", "--run-dir", default=defaultRunDir, help="directory where to store test output, default: a random dir within /tmp")
|
||||
runSubparser.add_argument("--update-baseline", action='store_true', help="update baseline file(s) instead of matching them")
|
||||
runSubparser.add_argument("--create-baseline", action='store_true', help="create new baseline file(s) (named as baseline.<os>.<device>.txt) for tests that do not currently have baselines")
|
||||
runSubparser.add_argument("-v", "--verbose", action='store_true', help="verbose output - dump all output of test script")
|
||||
runSubparser.add_argument("-n", "--dry-run", action='store_true', help="do not run the tests, only print test names and configurations to be run along with full command lines")
|
||||
runSubparser.add_argument("-b", "--build-location", help="location of the CNTK build to run")
|
||||
runSubparser.add_argument("-t", "--tag", help="runs tests which match the specified tag")
|
||||
runSubparser.add_argument("-d", "--device", help="cpu|gpu - run on a specified device")
|
||||
runSubparser.add_argument("-f", "--flavor", help="release|debug - run only a specified flavor")
|
||||
runSubparser.add_argument("-s", "--build-sku", default=defaultBuildSKU, help="cpu|gpu|1bitsgd - run tests only for a specified build SKU")
|
||||
tmpDir = os.getenv("TEMP") if windows else "/tmp"
|
||||
defaultRunDir=os.path.join(tmpDir, "cntk-test-{0}.{1}".format(time.strftime("%Y%m%d%H%M%S"), random.randint(0,1000000)))
|
||||
runSubparser.add_argument("-r", "--run-dir", default=defaultRunDir, help="directory where to store test output, default: a random dir within /tmp")
|
||||
runSubparser.add_argument("--update-baseline", action='store_true', help="update baseline file(s) instead of matching them")
|
||||
runSubparser.add_argument("--create-baseline", action='store_true', help="create new baseline file(s) (named as baseline.<os>.<device>.txt) for tests that do not currently have baselines")
|
||||
runSubparser.add_argument("-v", "--verbose", action='store_true', help="verbose output - dump all output of test script")
|
||||
runSubparser.add_argument("-n", "--dry-run", action='store_true', help="do not run the tests, only print test names and configurations to be run along with full command lines")
|
||||
|
||||
runSubparser.set_defaults(func=runCommand)
|
||||
runSubparser.set_defaults(func=runCommand)
|
||||
|
||||
listSubparser = subparsers.add_parser("list", help="list available tests")
|
||||
listSubparser.add_argument("-t", "--tag", help="limits a resulting list to tests matching the specified tag")
|
||||
listSubparser.add_argument("-d", "--device", help="cpu|gpu - tests for a specified device")
|
||||
listSubparser.add_argument("-f", "--flavor", help="release|debug - tests for specified flavor")
|
||||
listSubparser.add_argument("-s", "--build-sku", default=defaultBuildSKU, help="cpu|gpu|1bitsgd - list tests only for a specified build SKU")
|
||||
listSubparser.add_argument("--os", help="windows|linux - tests for a specified operating system")
|
||||
listSubparser = subparsers.add_parser("list", help="list available tests")
|
||||
listSubparser.add_argument("-t", "--tag", help="limits a resulting list to tests matching the specified tag")
|
||||
listSubparser.add_argument("-d", "--device", help="cpu|gpu - tests for a specified device")
|
||||
listSubparser.add_argument("-f", "--flavor", help="release|debug - tests for specified flavor")
|
||||
listSubparser.add_argument("-s", "--build-sku", default=defaultBuildSKU, help="cpu|gpu|1bitsgd - list tests only for a specified build SKU")
|
||||
listSubparser.add_argument("--os", help="windows|linux - tests for a specified operating system")
|
||||
|
||||
listSubparser.set_defaults(func=listCommand)
|
||||
listSubparser.set_defaults(func=listCommand)
|
||||
|
||||
if len(sys.argv)==1:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
args = parser.parse_args(sys.argv[1:])
|
||||
|
||||
# parsing a --device, --flavor and --os options:
|
||||
args.devices = ["cpu", "gpu"]
|
||||
if (args.device):
|
||||
args.device = args.device.lower()
|
||||
if not args.device in args.devices:
|
||||
six.print_("--device must be one of", args.devices, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
args.devices = [args.device]
|
||||
|
||||
args.flavors = ["debug", "release"]
|
||||
if (args.flavor):
|
||||
args.flavor = args.flavor.lower()
|
||||
if not args.flavor in args.flavors:
|
||||
six.print_("--flavor must be one of", args.flavors, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
args.flavors = [args.flavor]
|
||||
|
||||
args.buildSKUs = ["cpu", "gpu", "1bitsgd"]
|
||||
if (args.build_sku):
|
||||
args.build_sku = args.build_sku.lower()
|
||||
if not args.build_sku in args.buildSKUs:
|
||||
six.print_("--build-sku must be one of", args.buildSKUs, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
args.buildSKUs = [args.build_sku]
|
||||
if args.build_sku == "cpu" and args.devices == ["gpu"]:
|
||||
print >>sys.stderr, "Invalid combination: --build-sku cpu and --device gpu"
|
||||
sys.exit(1)
|
||||
|
||||
if args.func == runCommand and not args.build_location:
|
||||
args.build_location = os.path.realpath(os.path.join(thisDir, "../..", "x64" if windows else "build/"))
|
||||
|
||||
if args.func == listCommand:
|
||||
args.oses = ["windows", "linux"]
|
||||
if (args.os):
|
||||
args.os = args.os.lower()
|
||||
if not args.os in args.oses:
|
||||
six.print_("--os must be one of", args.oses, file=sys.stderr)
|
||||
if len(sys.argv)==1:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
args.oses = [args.os]
|
||||
|
||||
# discover all the tests
|
||||
Test.discoverAllTests()
|
||||
args = parser.parse_args(sys.argv[1:])
|
||||
|
||||
# execute the command
|
||||
args.func(args)
|
||||
# parsing a --device, --flavor and --os options:
|
||||
args.devices = ["cpu", "gpu"]
|
||||
if (args.device):
|
||||
args.device = args.device.lower()
|
||||
if not args.device in args.devices:
|
||||
six.print_("--device must be one of", args.devices, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
args.devices = [args.device]
|
||||
|
||||
args.flavors = ["debug", "release"]
|
||||
if (args.flavor):
|
||||
args.flavor = args.flavor.lower()
|
||||
if not args.flavor in args.flavors:
|
||||
six.print_("--flavor must be one of", args.flavors, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
args.flavors = [args.flavor]
|
||||
|
||||
args.buildSKUs = ["cpu", "gpu", "1bitsgd"]
|
||||
if (args.build_sku):
|
||||
args.build_sku = args.build_sku.lower()
|
||||
if not args.build_sku in args.buildSKUs:
|
||||
six.print_("--build-sku must be one of", args.buildSKUs, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
args.buildSKUs = [args.build_sku]
|
||||
if args.build_sku == "cpu" and args.devices == ["gpu"]:
|
||||
print >>sys.stderr, "Invalid combination: --build-sku cpu and --device gpu"
|
||||
sys.exit(1)
|
||||
|
||||
if args.func == runCommand and not args.build_location:
|
||||
args.build_location = os.path.realpath(os.path.join(thisDir, "../..", "x64" if windows else "build/"))
|
||||
|
||||
if args.func == listCommand:
|
||||
args.oses = ["windows", "linux"]
|
||||
if (args.os):
|
||||
args.os = args.os.lower()
|
||||
if not args.os in args.oses:
|
||||
six.print_("--os must be one of", args.oses, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
args.oses = [args.os]
|
||||
|
||||
# discover all the tests
|
||||
Test.discoverAllTests()
|
||||
|
||||
# execute the command
|
||||
args.func(args)
|
||||
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
#!/bin/bash
|
||||
|
||||
# ----------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||
# ----------------------------------------------------------
|
||||
# Helper script containing common code used by run-test scripts of E2E tests
|
||||
|
||||
BinaryPath=$TEST_CNTK_BINARY
|
||||
|
@ -26,6 +29,21 @@ MPIArgs=
|
|||
DeleteExistingModels=1
|
||||
DeleteModelsAfterTest=1
|
||||
|
||||
# Print info needed by MetricsDriver.py to extract tests metrics
|
||||
printHardwareInfo()
|
||||
{
|
||||
cpuName=$(cat /proc/cpuinfo 2> /dev/null | grep -m 1 'model name' | cut -d : -f 2- | tr -s " " | cut -c 2-)
|
||||
totalMemory=$(cat /proc/meminfo 2> /dev/null | grep 'MemTotal' | cut -d : -f 2- | tr -s " " | cut -c 2-)
|
||||
nproc=$(nproc)
|
||||
|
||||
# Note that MetricsDriver.py depends on this format
|
||||
echo "CPU info:"
|
||||
echo " CPU Model Name: $cpuName"
|
||||
echo " Hardware threads: $nproc"
|
||||
echo " Total Memory: $totalMemory"
|
||||
echo "-------------------------------------------------------------------"
|
||||
}
|
||||
|
||||
# Helper function to print and run a command
|
||||
run()
|
||||
{
|
||||
|
@ -119,3 +137,6 @@ cntkmpirun()
|
|||
cntkrun "$2" "$3"
|
||||
return $?
|
||||
}
|
||||
|
||||
# place printHardwareInfo here, so that all tests print it
|
||||
printHardwareInfo
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
|
||||
#include "stdafx.h"
|
||||
#include "EvalTestHelper.h"
|
||||
#define __STDC_FORMAT_MACROS
|
||||
#include <inttypes.h>
|
||||
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
|
||||
|
@ -21,22 +23,10 @@ BOOST_FIXTURE_TEST_SUITE(EvalTestSuite, EvalFixture)
|
|||
|
||||
IEvaluateModelExtended<float>* SetupNetworkAndGetLayouts(std::string modelDefinition, VariableSchema& inputLayouts, VariableSchema& outputLayouts)
|
||||
{
|
||||
// Load the eval library
|
||||
auto hModule = LoadLibrary(L"evaldll.dll");
|
||||
if (hModule == nullptr)
|
||||
{
|
||||
auto err = GetLastError();
|
||||
throw std::exception((boost::format("Cannot load evaldll.dll: 0x%08lx") % err).str().c_str());
|
||||
}
|
||||
|
||||
// Get the factory method to the evaluation engine
|
||||
std::string func = "GetEvalExtendedF";
|
||||
auto procAddress = GetProcAddress(hModule, func.c_str());
|
||||
auto getEvalProc = (GetEvalProc<float>)procAddress;
|
||||
|
||||
// Native model evaluation instance
|
||||
IEvaluateModelExtended<float> *eval;
|
||||
getEvalProc(&eval);
|
||||
|
||||
GetEvalExtendedF(&eval);
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -44,7 +34,7 @@ IEvaluateModelExtended<float>* SetupNetworkAndGetLayouts(std::string modelDefini
|
|||
}
|
||||
catch (std::exception& ex)
|
||||
{
|
||||
fprintf(stderr, ex.what());
|
||||
fprintf(stderr, "%s\n", ex.what());
|
||||
throw;
|
||||
}
|
||||
fflush(stderr);
|
||||
|
@ -53,9 +43,9 @@ IEvaluateModelExtended<float>* SetupNetworkAndGetLayouts(std::string modelDefini
|
|||
outputLayouts = eval->GetOutputSchema();
|
||||
|
||||
for (auto vl : outputLayouts)
|
||||
{
|
||||
fprintf(stderr, "Output dimension: %d\n", vl.m_numElements);
|
||||
fprintf(stderr, "Output name: %ls\n", vl.m_name);
|
||||
{
|
||||
fprintf(stderr, "Output dimension: %" PRIu64 "\n", vl.m_numElements);
|
||||
fprintf(stderr, "Output name: %ls\n", vl.m_name.c_str());
|
||||
}
|
||||
|
||||
eval->StartForwardEvaluation({outputLayouts[0].m_name});
|
||||
|
|
|
@ -63,13 +63,13 @@ struct EvalFixture
|
|||
if (!envVariableErrorMessage.empty())
|
||||
{
|
||||
BOOST_TEST_MESSAGE(envVariableErrorMessage);
|
||||
fprintf(stderr, envVariableErrorMessage.c_str());
|
||||
fprintf(stderr, "%s\n", envVariableErrorMessage.c_str());
|
||||
}
|
||||
|
||||
newCurrentPath = m_testDataPath;
|
||||
}
|
||||
}
|
||||
else if ((subPath[0] == '/' && subPath[1] == '//') || (subPath[0] == '\\' && subPath[1] == '\\'))
|
||||
else if ((subPath[0] == '/' && subPath[1] == '/') || (subPath[0] == '\\' && subPath[1] == '\\'))
|
||||
{
|
||||
newCurrentPath = subPath;
|
||||
}
|
||||
|
|
|
@ -72,6 +72,7 @@
|
|||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalLibraryDirectories>$(OutDir)..;$(BOOST_LIB_PATH)</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>EvalDll.lib; %(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="$(DebugBuild)">
|
||||
|
@ -86,8 +87,7 @@
|
|||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
|
@ -112,8 +112,7 @@
|
|||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="$(GpuBuild)">
|
||||
|
@ -168,4 +167,4 @@
|
|||
<Output TaskParameter="DestinationFiles" ItemName="NewFileWrites" />
|
||||
</Copy>
|
||||
</Target>
|
||||
</Project>
|
||||
</Project>
|
|
@ -9,16 +9,19 @@
|
|||
#define _SCL_SECURE_NO_WARNINGS // current API of matrix does not allow safe invokations. TODO: change api to proper one.
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "targetver.h"
|
||||
#include "targetver.h"
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <windows.h>
|
||||
|
||||
|
||||
// TODO: reference additional headers your program requires here
|
||||
#include "Eval.h"
|
||||
|
||||
//Adding required boost header
|
||||
#ifndef _WIN32
|
||||
// Use dynamic library on Linux
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#endif
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <boost/format.hpp>
|
||||
|
|
|
@ -5,171 +5,20 @@
|
|||
// MathPerformanceTests.cpp : Defines the entry point for the console application.
|
||||
//
|
||||
#include "stdafx.h"
|
||||
#define NOMINMAX
|
||||
#include "Windows.h"
|
||||
//#define NOMINMAX
|
||||
//#include "Windows.h"
|
||||
#include "Matrix.h"
|
||||
#include "CPUMatrix.h"
|
||||
#include "TensorView.h"
|
||||
#include "Sequences.h"
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include "Matrix.h"
|
||||
#include "CPUMatrix.h"
|
||||
#include "Sequences.h"
|
||||
#include <algorithm>
|
||||
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
using namespace std;
|
||||
|
||||
template <class ElemType>
|
||||
void SetToInitStateValueForResetSeg(const Matrix<ElemType>& sentenceBegin,
|
||||
size_t nStream, ElemType initStateValue, Matrix<ElemType>& newprevstate)
|
||||
{
|
||||
Matrix<ElemType> colSeg(sentenceBegin.GetDeviceId());
|
||||
colSeg.Resize(nStream, nStream);
|
||||
size_t nStateRow = newprevstate.GetNumRows();
|
||||
|
||||
assert(nStream == sentenceBegin.GetNumRows());
|
||||
|
||||
// only set state to init state value for segmentation = 0, and -1
|
||||
// e.g., -1 0 1 -> 0 0 1 -> 0 0 -1 -> 1 1 0
|
||||
|
||||
Matrix<ElemType> colPos(sentenceBegin.GetDeviceId());
|
||||
colPos.SetValue(sentenceBegin); // -1 0 1
|
||||
colPos.InplaceTruncateBottom(1 << 0 /*(int)MinibatchPackingFlags::SequenceStart*/); // TODO: these flags no longer exist, this test probably no longer applies
|
||||
Matrix<ElemType>::Scale((ElemType) -1.0, colPos);
|
||||
colPos += 0; // (int)MinibatchPackingFlags::None; // TODO: these flags no longer exist, this test probably no longer applies
|
||||
colSeg.SetDiagonalValue(colPos);
|
||||
Matrix<ElemType> ones(sentenceBegin.GetDeviceId());
|
||||
ones.Resize(nStateRow, nStream);
|
||||
ones.SetValue((ElemType) 1);
|
||||
// add default state value if it is for reset
|
||||
Matrix<ElemType>::MultiplyAndWeightedAdd(initStateValue, ones, false, colSeg, false, 1.0, newprevstate); // += [0 initStateValue 0 ]
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void rnnForwardPropSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matrix<ElemType>& pastActivity, Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& colBegin, const Matrix<ElemType>& needToCompute)
|
||||
{
|
||||
size_t ncol = functionValues.GetNumCols();
|
||||
size_t ntime = ncol / mNbr;
|
||||
Matrix<ElemType> out = functionValues.ColumnSlice(0, mNbr);
|
||||
Matrix<ElemType> inp((DEVICEID_TYPE) functionValues.GetDeviceId());
|
||||
|
||||
for (size_t d = 0; d < ntime; d++)
|
||||
{
|
||||
if (d == 0)
|
||||
inp = pastActivity.ColumnSlice(d, mNbr);
|
||||
else
|
||||
inp = inputFunctionValues.ColumnSlice(d, mNbr);
|
||||
|
||||
if (needToCompute.ColumnSlice(d, 1).Get00Element() == 1)
|
||||
{
|
||||
Matrix<ElemType> colSegPastActivity((DEVICEID_TYPE) functionValues.GetDeviceId());
|
||||
Matrix<ElemType> colSeg((DEVICEID_TYPE) functionValues.GetDeviceId());
|
||||
colSeg.Resize(mNbr, mNbr);
|
||||
colSeg.SetValue(0);
|
||||
colSegPastActivity.SetValue(colBegin);
|
||||
colSegPastActivity.InplaceTruncateBottom(1 << 0 /*(int)MinibatchPackingFlags::SequenceStart*/); // TODO: these flags no longer exist, this test probably no longer applies
|
||||
colSeg.SetDiagonalValue(colSegPastActivity);
|
||||
Matrix<ElemType>::Multiply(inp, false, colSeg, false, out);
|
||||
ElemType initStateValue = (ElemType) 0.1;
|
||||
SetToInitStateValueForResetSeg<ElemType>(colBegin, mNbr, initStateValue, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void oldRnnForwardPropSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matrix<ElemType>& pastActivity, Matrix<ElemType>& inputFunctionValues)
|
||||
{
|
||||
size_t ncol = functionValues.GetNumCols();
|
||||
size_t ntime = ncol / mNbr;
|
||||
for (size_t timeIdxInSeq = 0; timeIdxInSeq < ntime; timeIdxInSeq++)
|
||||
{
|
||||
for (size_t i = 0; i < mNbr; i++)
|
||||
{
|
||||
bool reset = false;
|
||||
|
||||
if (timeIdxInSeq == 0)
|
||||
{
|
||||
reset = true;
|
||||
}
|
||||
oldRNNForwardPropSRP<ElemType>(timeIdxInSeq, 1, reset, (ElemType) 0.1, functionValues, pastActivity, inputFunctionValues, i, mNbr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void oldRNNForwardPropSRP(const size_t timeIdxInSeq, const int delay, const bool reset, const ElemType default_activity, Matrix<ElemType>& functionValues, const Matrix<ElemType>& pastActivity, const Matrix<ElemType>& inputFunctionValues, const size_t indexInBatch, const size_t mNbr)
|
||||
{
|
||||
assert(delay > 0);
|
||||
|
||||
if (functionValues.GetNumRows() != inputFunctionValues.GetNumRows() ||
|
||||
functionValues.GetNumCols() != inputFunctionValues.GetNumCols())
|
||||
functionValues.Resize(inputFunctionValues.GetNumRows(),
|
||||
inputFunctionValues.GetNumCols());
|
||||
|
||||
int iPastIndex = (int) ((int) timeIdxInSeq - (int) delay) * (int) mNbr;
|
||||
int d = iPastIndex;
|
||||
if (d < 0)
|
||||
d = (int) functionValues.Mod((float) iPastIndex, (float) pastActivity.GetNumCols());
|
||||
// this can point to the past activity of the previous mninibatch
|
||||
|
||||
Matrix<ElemType> out = functionValues.ColumnSlice(timeIdxInSeq * mNbr + indexInBatch, 1);
|
||||
Matrix<ElemType> inp((DEVICEID_TYPE) functionValues.GetDeviceId());
|
||||
|
||||
if (reset)
|
||||
out.SetValue(default_activity);
|
||||
else
|
||||
{
|
||||
if (iPastIndex < 0)
|
||||
inp = pastActivity.ColumnSlice(d + indexInBatch, 1);
|
||||
else
|
||||
inp = inputFunctionValues.ColumnSlice(d + indexInBatch, 1);
|
||||
out.AssignValuesOf(inp);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
The new way of resetting RNN state.
|
||||
*/
|
||||
template <class ElemType>
|
||||
void TestRnnForwardPropSRP(size_t nRow = 100, size_t nCol = 1000, size_t mNbr = 10, DEVICEID_TYPE deviceID = 0)
|
||||
{
|
||||
Matrix<ElemType> functionValues(deviceID);
|
||||
Matrix<ElemType> colBegin(deviceID);
|
||||
Matrix<ElemType> pastActivity(deviceID);
|
||||
Matrix<ElemType> inputFunctionValues(deviceID);
|
||||
Matrix<ElemType> needToCompute(deviceID);
|
||||
|
||||
functionValues.Resize(nRow, nCol);
|
||||
colBegin.Resize(mNbr, 1);
|
||||
pastActivity.Resize(nRow, nCol);
|
||||
inputFunctionValues.Resize(nRow, nCol);
|
||||
needToCompute.Resize(1, nCol / mNbr);
|
||||
needToCompute.SetValue(0);
|
||||
needToCompute.ColumnSlice(0, 1).SetValue(1);
|
||||
auto t_start = clock();
|
||||
rnnForwardPropSRP<ElemType>(functionValues, mNbr, pastActivity, inputFunctionValues, colBegin, needToCompute);
|
||||
auto t_end = clock();
|
||||
std::cout << "testRnnForwardPropSRP: " << 1.0 * (t_end - t_start) / CLOCKS_PER_SEC << " seconds" << endl;
|
||||
}
|
||||
|
||||
/**
|
||||
The old way of resetting RNN state, which used if statement. Also only supports up to two sentences within a minibatch
|
||||
*/
|
||||
template <class ElemType>
|
||||
void TestOldRnnForwardPropSRP(size_t nRow = 100, size_t nCol = 1000, size_t mNbr = 10, DEVICEID_TYPE deviceID = 0)
|
||||
{
|
||||
Matrix<ElemType> functionValues(deviceID);
|
||||
Matrix<ElemType> colBegin(deviceID);
|
||||
Matrix<ElemType> pastActivity(deviceID);
|
||||
Matrix<ElemType> inputFunctionValues(deviceID);
|
||||
|
||||
functionValues.Resize(nRow, nCol);
|
||||
colBegin.Resize(mNbr, 1);
|
||||
pastActivity.Resize(nRow, nCol);
|
||||
inputFunctionValues.Resize(nRow, nCol);
|
||||
auto t_start = clock();
|
||||
oldRnnForwardPropSRP<ElemType>(functionValues, mNbr, pastActivity, inputFunctionValues);
|
||||
auto t_end = clock();
|
||||
std::cout << "TestOldRnnForwardPropSRP: " << 1.0 * (t_end - t_start) / CLOCKS_PER_SEC << " seconds" << endl;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void randomInitializeCPUMatrix(CPUMatrix<ElemType>& M, float min = -10, float max = 10)
|
||||
{
|
||||
|
@ -250,77 +99,6 @@ void AddMultiplyAndInplaceSigmoidTest(int n, int k, int m)
|
|||
std::cout << "Matrix in: " << 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC << " seconds" << endl;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void ColumnSliceMultAndAddTest(int n, int k, int m, DEVICEID_TYPE deviceID)
|
||||
{
|
||||
cout << "Testing Matrix" << endl;
|
||||
|
||||
Matrix<ElemType> AG((size_t) n, (size_t) k, deviceID);
|
||||
AG.SetUniformRandomValue(-1, 1);
|
||||
|
||||
Matrix<ElemType> BG((size_t) k, (size_t) m, deviceID);
|
||||
BG.SetUniformRandomValue(-1, 1);
|
||||
|
||||
Matrix<ElemType> CG((size_t) n, (size_t) m, deviceID);
|
||||
Matrix<ElemType> DG((size_t) n, (size_t) m, deviceID);
|
||||
|
||||
auto t_startG = clock();
|
||||
Matrix<ElemType>::MultiplyAndAdd(AG, false, BG, false, CG);
|
||||
auto t_endG = clock();
|
||||
std::cout << "MultiplyAndAdd Directly: " << 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC << " seconds" << endl;
|
||||
|
||||
t_startG = clock();
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
Matrix<ElemType> col_BG = BG.ColumnSlice(i, 1);
|
||||
Matrix<ElemType> col_CG = CG.ColumnSlice(i, 1);
|
||||
Matrix<ElemType>::MultiplyAndAdd(AG, false, col_BG, false, col_CG);
|
||||
}
|
||||
t_endG = clock();
|
||||
std::cout << "MultiplyAndAdd With ColumnSlice: " << 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC << " seconds" << endl;
|
||||
|
||||
t_startG = clock();
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
Matrix<ElemType> col_BG = BG.ColumnSlice(i, 1);
|
||||
Matrix<ElemType> col_CG = CG.ColumnSlice(i, 1);
|
||||
Matrix<ElemType>::MultiplyAndAdd(AG, false, col_BG, false, col_CG);
|
||||
}
|
||||
t_endG = clock();
|
||||
std::cout << "MultiplyAndAdd With ColumnSlice&: " << 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC << " seconds" << endl;
|
||||
|
||||
Matrix<ElemType> col_BG1(0), col_CG1(0);
|
||||
t_startG = clock();
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
col_BG1.AssignColumnSlice(BG, i, 1);
|
||||
col_CG1.AssignColumnSlice(CG, i, 1);
|
||||
Matrix<ElemType>::MultiplyAndAdd(AG, false, col_BG1, false, col_CG1);
|
||||
}
|
||||
t_endG = clock();
|
||||
std::cout << "MultiplyAndAdd With AssignColumnSlice: " << 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC << " seconds" << endl;
|
||||
|
||||
t_startG = clock();
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
Matrix<ElemType> col_CG = CG.ColumnSlice(i, 1);
|
||||
Matrix<ElemType> col_DG = DG.ColumnSlice(i, 1);
|
||||
col_DG.AssignSigmoidOf(col_CG);
|
||||
}
|
||||
t_endG = clock();
|
||||
std::cout << "AssignSigmoidOf With ColumnSlice: " << 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC << " seconds" << endl;
|
||||
|
||||
t_startG = clock();
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
col_BG1.AssignColumnSlice(BG, i, 1);
|
||||
col_CG1.AssignColumnSlice(CG, i, 1);
|
||||
col_BG1.AssignSigmoidOf(col_CG1);
|
||||
}
|
||||
t_endG = clock();
|
||||
std::cout << "AssignSigmoidOf With AssignColumnSlice: " << 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC << " seconds" << endl;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void SquareMultiplyAndAdd10TimesAvgTest(int n, int count)
|
||||
{
|
||||
|
@ -437,12 +215,6 @@ void MandSTest(int count, int devId)
|
|||
|
||||
int wmain()
|
||||
{
|
||||
ColumnSliceMultAndAddTest<float>(2048, 2048, 256, 0);
|
||||
|
||||
TestRnnForwardPropSRP<float>();
|
||||
|
||||
TestOldRnnForwardPropSRP<float>();
|
||||
|
||||
// MandSTest<float>(100, 2);
|
||||
|
||||
/*cout<<endl<<"********************Matrix SquareMultiplyAndWeightedAdd10TimesAvg TEST********************"<<endl;
|
||||
|
|
|
@ -114,10 +114,11 @@
|
|||
<ClInclude Include="targetver.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\Source\Common\ExceptionWithCallStack.cpp" />
|
||||
<ClCompile Include="MathPerformanceTests.cpp" />
|
||||
<ClCompile Include="stdafx.cpp">
|
||||
<PrecompiledHeader>Create</PrecompiledHeader>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
</Project>
|
||||
</Project>
|
|
@ -8,15 +8,11 @@
|
|||
namespace Microsoft { namespace MSR { namespace CNTK { namespace TEST {
|
||||
|
||||
//The simplest possible matrix multiplier, used here as a check.
|
||||
template<typename ScalarAT, typename ScalarBT, typename ScalarCT, int MAXRANGE = 1 << ((8 * sizeof(ScalarAT)) - 3)> class ReferenceMultiplier
|
||||
template<typename ScalarAT, typename ScalarBT, typename ScalarCT> class ReferenceMultiplier
|
||||
{
|
||||
public:
|
||||
|
||||
typedef ScalarAT ScalarAT;
|
||||
typedef ScalarBT ScalarBT;
|
||||
typedef ScalarCT ScalarCT;
|
||||
|
||||
static const int MAXRANGE = MAXRANGE;
|
||||
static const int MAXRANGE = 1 << ((8 * sizeof(ScalarAT)) - 3);
|
||||
|
||||
ScalarBT* PrepareB(ScalarBT* oldB, int k, int n) { return oldB; }
|
||||
static ScalarAT* CreateMatrixA(int m, int n)
|
||||
|
@ -77,9 +73,17 @@ template<typename ScalarAT, typename ScalarBT, typename ScalarCT, int MAXRANGE =
|
|||
}
|
||||
};
|
||||
|
||||
template<typename ScalarAT, typename ScalarBT, typename ScalarCT, typename MultiplierT>static void TestMultiplierSub(
|
||||
int m, int k, int n, MultiplierT& testMult, int numThreads = 1, ScalarCT epsilon = ScalarCT())
|
||||
template<typename ScalarCT> void CompareMatricesAndDump(const ScalarCT* ref, const ScalarCT* test,
|
||||
int m, int /*k*/, int n)
|
||||
{
|
||||
for (int i = 0; i < m * n; ++i)
|
||||
{
|
||||
BOOST_CHECK_EQUAL(ref[i], test[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename ScalarAT, typename ScalarBT, typename ScalarCT, typename MultiplierT>static void TestMultiplierSub(
|
||||
int m, int k, int n, MultiplierT& testMult, int numThreads = 1, ScalarCT epsilon = ScalarCT())
|
||||
{
|
||||
epsilon;
|
||||
testMult.SetNumThreads(numThreads);
|
||||
|
@ -126,23 +130,13 @@ template<typename ScalarAT, typename ScalarBT, typename ScalarCT, int MAXRANGE =
|
|||
|
||||
}
|
||||
|
||||
|
||||
template<typename ScalarAT, typename ScalarBT, typename ScalarCT, typename MultiplierT>static void TestMultiplierSub(
|
||||
template<typename ScalarAT, typename ScalarBT, typename ScalarCT, typename MultiplierT>static void TestMultiplierSub(
|
||||
int m, int k, int n, int numThreads = 1, ScalarCT epsilon = ScalarCT())
|
||||
{
|
||||
MultiplierT testMult;
|
||||
TestMultiplierSub<ScalarAT, ScalarBT, ScalarCT, MultiplierT>(m, k, n, testMult, numThreads, epsilon);
|
||||
}
|
||||
|
||||
template<typename ScalarCT> void CompareMatricesAndDump(const ScalarCT* ref, const ScalarCT* test,
|
||||
int m, int /*k*/, int n)
|
||||
{
|
||||
for (int i = 0; i < m * n; ++i)
|
||||
{
|
||||
BOOST_CHECK_EQUAL(ref[i], test[i]);
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE(BlockMultiplierSuite)
|
||||
|
||||
BOOST_AUTO_TEST_CASE(BlockMultiplyTest8x128x8SingleThread)
|
||||
|
|
|
@ -3,7 +3,9 @@
|
|||
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||
//
|
||||
#include "stdafx.h"
|
||||
#ifdef _WIN32
|
||||
#include <crtdefs.h>
|
||||
#endif
|
||||
#include "../../../Source/Math/CPUSparseMatrix.h"
|
||||
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
|
|
|
@ -6,7 +6,9 @@
|
|||
//
|
||||
#include "stdafx.h"
|
||||
#include <math.h>
|
||||
#ifdef _WIN32
|
||||
#include <crtdefs.h>
|
||||
#endif
|
||||
#include "../../../Source/Math/GPUSparseMatrix.h"
|
||||
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" InitialTargets="CheckDependencies" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
|
@ -66,7 +66,7 @@
|
|||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<AdditionalIncludeDirectories>$(BOOST_INCLUDE_PATH);$(SolutionDir)Source\Common\Include</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>$(BOOST_INCLUDE_PATH);$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include</AdditionalIncludeDirectories>
|
||||
<DisableSpecificWarnings>4819</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
|
@ -135,6 +135,7 @@
|
|||
<ClInclude Include="fixtures.h" />
|
||||
<ClInclude Include="stdafx.h" />
|
||||
<ClInclude Include="targetver.h" />
|
||||
<ClInclude Include="TensorTestsHelper.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="BatchNormalizationEngineTests.cpp" />
|
||||
|
@ -156,6 +157,7 @@
|
|||
<PrecompiledHeader>Create</PrecompiledHeader>
|
||||
</ClCompile>
|
||||
<ClCompile Include="CPUMatrixTests.cpp" />
|
||||
<ClCompile Include="TensorTests.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<Target Name="Build" Condition="$(HasBoost)" Outputs="$(TargetPath)" DependsOnTargets="$(BuildDependsOn)" />
|
||||
|
|
|
@ -4,7 +4,9 @@
|
|||
//
|
||||
#include "stdafx.h"
|
||||
#include <math.h>
|
||||
#ifdef _WIN32
|
||||
#include <crtdefs.h>
|
||||
#endif
|
||||
#include "../../../Source/Math/Matrix.h"
|
||||
#include "../../../Source/Math/CPUMatrix.h"
|
||||
|
||||
|
|
|
@ -5,7 +5,12 @@
|
|||
#include "stdafx.h"
|
||||
#include "File.h"
|
||||
#include <memory>
|
||||
#ifdef _WIN32
|
||||
#include <io.h>
|
||||
#else // Linux
|
||||
#define _dup2 dup2
|
||||
#define _fileno fileno
|
||||
#endif
|
||||
|
||||
#include "../../../Source/Math/MatrixQuantizerImpl.h"
|
||||
#include "../../../Source/Math/CUDAPageLockedMemAllocator.h"
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
//
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||
//
|
||||
//
|
||||
#include "stdafx.h"
|
||||
#include "TensorView.h"
|
||||
#include "Sequences.h"
|
||||
#include "TensorTestsHelper.h"
|
||||
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK { namespace Test {
|
||||
|
||||
BOOST_AUTO_TEST_SUITE(MathTensorTests)
|
||||
|
||||
BOOST_AUTO_TEST_CASE(ElementwiseAddition)
|
||||
{
|
||||
Test::TensorTest<float> tensorTester;
|
||||
|
||||
// --- elementwise
|
||||
|
||||
// elementwise sum
|
||||
tensorTester.OneTensorTest("elementwise addition", 1e-8, [&tensorTester](DEVICEID_TYPE deviceId)
|
||||
{
|
||||
return tensorTester.BroadcastingTest(TensorShape{ 512, 256 }, TensorShape({ 512, 256 }), deviceId);
|
||||
});
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(AdditionWithSimpleBroadcasting)
|
||||
{
|
||||
Test::TensorTest<float> tensorTester;
|
||||
|
||||
// --- broadcasting
|
||||
|
||||
// simple broadcasting
|
||||
tensorTester.OneTensorTest("addition wth simple broadcasting", 1e-8, [&tensorTester](DEVICEID_TYPE deviceId)
|
||||
{
|
||||
return tensorTester.BroadcastingTest(TensorShape{ 3, 2 }, TensorShape({ 3, 1 }), deviceId);
|
||||
});
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(BiasAddition)
|
||||
{
|
||||
Test::TensorTest<float> tensorTester;
|
||||
|
||||
// typical bias for convolutional layer
|
||||
tensorTester.OneTensorTest("bias addition (broadcasting)", 1e-8, [&tensorTester](DEVICEID_TYPE deviceId)
|
||||
{
|
||||
return tensorTester.BroadcastingTest(TensorShape{ 28, 28, 128, 32 }, TensorShape({ 1, 1, 128 }), deviceId);
|
||||
});
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(BiasAddition2)
|
||||
{
|
||||
Test::TensorTest<float> tensorTester;
|
||||
// BUGBUG: This test is strange--Print() shows different values with depth 128 instead of 64, but IsEqual() does not fail with 1e-3 tolerance.
|
||||
// Something fishy going on. Dimension overflow?
|
||||
tensorTester.OneTensorTest("bias addition (broadcasting)", 1e-8, [&tensorTester](DEVICEID_TYPE deviceId)
|
||||
{
|
||||
return tensorTester.BroadcastingTest(TensorShape{ 256, 256, 64, 32 }, TensorShape({ 1, 1, 64 }), deviceId);
|
||||
});
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(BiasGradient)
|
||||
{
|
||||
Test::TensorTest<float> tensorTester;
|
||||
// --- reduction
|
||||
|
||||
// typical bias gradient (reduction) for FF-DNN
|
||||
tensorTester.OneTensorTest("bias gradient (reduction)", 1e-4, [&tensorTester](DEVICEID_TYPE deviceId)
|
||||
{
|
||||
return tensorTester.BiasGradientTest(TensorShape{ 2048, 1024 }, TensorShape(2048), deviceId);
|
||||
});
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(BiasGradient2)
|
||||
{
|
||||
Test::TensorTest<float> tensorTester;
|
||||
|
||||
// typical bias gradient (reduction) for convolutional layer
|
||||
tensorTester.OneTensorTest("bias gradient (reduction)", 1e-1, [&tensorTester](DEVICEID_TYPE deviceId)
|
||||
{
|
||||
return tensorTester.BiasGradientTest(TensorShape{ 256, 256, 64, 32 }, TensorShape({ 1, 1, 64 }), deviceId);
|
||||
});
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(ColumnSliceMultAndAdd)
|
||||
{
|
||||
ColumnSliceMultAndAddTest<float>(2048, 2048, 256, 0);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(RnnForwardProp)
|
||||
{
|
||||
TestRnnForwardPropSRP<float>();
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(OldRnnForwardProp)
|
||||
{
|
||||
TestOldRnnForwardPropSRP<float>();
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
||||
|
||||
} } } }
|
|
@ -0,0 +1,310 @@
|
|||
//
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Matrix.h"
|
||||
#include "CPUMatrix.h"
|
||||
#include "TensorView.h"
|
||||
#include "Sequences.h"
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK { namespace Test {
|
||||
|
||||
template <class ElemType>
|
||||
struct TensorTest
|
||||
{
|
||||
// run one test for both GPU and CPU and verify they are the same
|
||||
template<typename FN>
|
||||
void OneTensorTest(const char* what, double tolerance, const FN& fn)
|
||||
{
|
||||
fprintf(stderr, "===== Tensor test '%s'\n", what);
|
||||
|
||||
// run on GPU and CPU
|
||||
let resultGPU = fn(0);
|
||||
let resultCPU = fn(-1);
|
||||
|
||||
// dump top corner of the result to get a feel for the error
|
||||
resultGPU.GetSOB().Print("GPU result", 0, 7, 0, 9);
|
||||
resultGPU.GetSOB().TransferToDeviceIfNotThere(-1, true, false, true);
|
||||
resultCPU.GetSOB().Print("CPU result", 0, 7, 0, 9);
|
||||
|
||||
BOOST_CHECK(resultGPU.GetSOB().IsEqualTo(resultCPU.GetSOB(), (ElemType)tolerance));
|
||||
}
|
||||
|
||||
// helper to create a randomly initialized tensor object
|
||||
TensorView<ElemType> CreateTensor(TensorShape shape, int randomSeed, DEVICEID_TYPE deviceId, bool isResult = false)
|
||||
{
|
||||
let numElements = shape.GetNumElements();
|
||||
|
||||
if (isResult)
|
||||
cout << " ->";
|
||||
cout << " [" << string(shape) << "]";
|
||||
if (isResult)
|
||||
cout << " \t// " << (deviceId < 0 ? "C" : "G") << "PU\n " << flush;
|
||||
|
||||
// random init
|
||||
mt19937 rng(randomSeed);
|
||||
uniform_real_distribution<float> nd(-1, 1);
|
||||
vector<ElemType> init(numElements);
|
||||
generate(begin(init), end(init), [&] { return nd(rng); });
|
||||
|
||||
// create storage object (one-column matrix)
|
||||
let sob = make_shared<Matrix<ElemType>>(numElements/*rows*/, 1/*cols*/, init.data(), deviceId);
|
||||
|
||||
// create TensorView
|
||||
return TensorView<ElemType>(sob, shape);
|
||||
}
|
||||
|
||||
// test bias gradient (reduction)
|
||||
TensorView<ElemType> BiasGradientTest(TensorShape layerShape, TensorShape biasShape, DEVICEID_TYPE deviceId)
|
||||
{
|
||||
int randomSeed = 1;
|
||||
let gradient = CreateTensor(layerShape, randomSeed++, deviceId);
|
||||
auto bias = CreateTensor(biasShape, randomSeed++, deviceId, true);
|
||||
//gradient.GetSOB().Print("incoming gradient", 0, 9, 0, 9);
|
||||
//bias.GetSOB().Print("bias gradient", 0, 9, 0, 9);
|
||||
bias.DoCopyOf(1, gradient, 1);
|
||||
//bias.GetSOB().Print("updated bias gradient", 0, 9, 0, 9);
|
||||
return bias;
|
||||
}
|
||||
|
||||
// test broadcast summation gradient
|
||||
TensorView<ElemType> BroadcastingTest(TensorShape layerShape, TensorShape biasShape, DEVICEID_TYPE deviceId)
|
||||
{
|
||||
int randomSeed = 1;
|
||||
let input = CreateTensor(layerShape, randomSeed++, deviceId);
|
||||
auto bias = CreateTensor(biasShape, randomSeed++, deviceId);
|
||||
//input.GetSOB().Print("input data", 0, 9, 0, 9);
|
||||
//bias.GetSOB().Print("bias", 0, 9, 0, 9);
|
||||
auto result = CreateTensor(layerShape, randomSeed++, deviceId, true);
|
||||
result.AssignSumOf(input, bias);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template <class ElemType>
|
||||
void SetToInitStateValueForResetSeg(const Matrix<ElemType>& sentenceBegin, size_t nStream, ElemType initStateValue, Matrix<ElemType>& newprevstate)
|
||||
{
|
||||
Matrix<ElemType> colSeg(sentenceBegin.GetDeviceId());
|
||||
colSeg.Resize(nStream, nStream);
|
||||
size_t nStateRow = newprevstate.GetNumRows();
|
||||
|
||||
assert(nStream == sentenceBegin.GetNumRows());
|
||||
|
||||
// only set state to init state value for segmentation = 0, and -1
|
||||
// e.g., -1 0 1 -> 0 0 1 -> 0 0 -1 -> 1 1 0
|
||||
|
||||
Matrix<ElemType> colPos(sentenceBegin.GetDeviceId());
|
||||
colPos.SetValue(sentenceBegin); // -1 0 1
|
||||
colPos.InplaceTruncateBottom(1 << 0 /*(int)MinibatchPackingFlags::SequenceStart*/); // TODO: these flags no longer exist, this test probably no longer applies
|
||||
Matrix<ElemType>::Scale((ElemType)-1.0, colPos);
|
||||
colPos += 0; // (int)MinibatchPackingFlags::None; // TODO: these flags no longer exist, this test probably no longer applies
|
||||
colSeg.SetDiagonalValue(colPos);
|
||||
Matrix<ElemType> ones(sentenceBegin.GetDeviceId());
|
||||
ones.Resize(nStateRow, nStream);
|
||||
ones.SetValue((ElemType)1);
|
||||
// add default state value if it is for reset
|
||||
Matrix<ElemType>::MultiplyAndWeightedAdd(initStateValue, ones, false, colSeg, false, 1.0, newprevstate); // += [0 initStateValue 0 ]
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void rnnForwardPropSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matrix<ElemType>& pastActivity, Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& colBegin, const Matrix<ElemType>& needToCompute)
|
||||
{
|
||||
size_t ncol = functionValues.GetNumCols();
|
||||
size_t ntime = ncol / mNbr;
|
||||
Matrix<ElemType> out = functionValues.ColumnSlice(0, mNbr);
|
||||
Matrix<ElemType> inp((DEVICEID_TYPE)functionValues.GetDeviceId());
|
||||
|
||||
for (size_t d = 0; d < ntime; d++)
|
||||
{
|
||||
if (d == 0)
|
||||
inp = pastActivity.ColumnSlice(d, mNbr);
|
||||
else
|
||||
inp = inputFunctionValues.ColumnSlice(d, mNbr);
|
||||
|
||||
if (needToCompute.ColumnSlice(d, 1).Get00Element() == 1)
|
||||
{
|
||||
Matrix<ElemType> colSegPastActivity((DEVICEID_TYPE)functionValues.GetDeviceId());
|
||||
Matrix<ElemType> colSeg((DEVICEID_TYPE)functionValues.GetDeviceId());
|
||||
colSeg.Resize(mNbr, mNbr);
|
||||
colSeg.SetValue(0);
|
||||
colSegPastActivity.SetValue(colBegin);
|
||||
colSegPastActivity.InplaceTruncateBottom(1 << 0 /*(int)MinibatchPackingFlags::SequenceStart*/); // TODO: these flags no longer exist, this test probably no longer applies
|
||||
colSeg.SetDiagonalValue(colSegPastActivity);
|
||||
Matrix<ElemType>::Multiply(inp, false, colSeg, false, out);
|
||||
ElemType initStateValue = (ElemType) 0.1;
|
||||
SetToInitStateValueForResetSeg<ElemType>(colBegin, mNbr, initStateValue, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void oldRNNForwardPropSRP(const size_t timeIdxInSeq, const int delay, const bool reset, const ElemType default_activity, Matrix<ElemType>& functionValues, const Matrix<ElemType>& pastActivity, const Matrix<ElemType>& inputFunctionValues, const size_t indexInBatch, const size_t mNbr)
|
||||
{
|
||||
assert(delay > 0);
|
||||
|
||||
if (functionValues.GetNumRows() != inputFunctionValues.GetNumRows() ||
|
||||
functionValues.GetNumCols() != inputFunctionValues.GetNumCols())
|
||||
functionValues.Resize(inputFunctionValues.GetNumRows(),
|
||||
inputFunctionValues.GetNumCols());
|
||||
|
||||
int iPastIndex = (int)((int)timeIdxInSeq - (int)delay) * (int)mNbr;
|
||||
int d = iPastIndex;
|
||||
if (d < 0)
|
||||
d = (int)functionValues.Mod((float)iPastIndex, (float)pastActivity.GetNumCols());
|
||||
// this can point to the past activity of the previous mninibatch
|
||||
|
||||
Matrix<ElemType> out = functionValues.ColumnSlice(timeIdxInSeq * mNbr + indexInBatch, 1);
|
||||
Matrix<ElemType> inp((DEVICEID_TYPE)functionValues.GetDeviceId());
|
||||
|
||||
if (reset)
|
||||
out.SetValue(default_activity);
|
||||
else
|
||||
{
|
||||
if (iPastIndex < 0)
|
||||
inp = pastActivity.ColumnSlice(d + indexInBatch, 1);
|
||||
else
|
||||
inp = inputFunctionValues.ColumnSlice(d + indexInBatch, 1);
|
||||
out.AssignValuesOf(inp);
|
||||
}
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void oldRnnForwardPropSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matrix<ElemType>& pastActivity, Matrix<ElemType>& inputFunctionValues)
|
||||
{
|
||||
size_t ncol = functionValues.GetNumCols();
|
||||
size_t ntime = ncol / mNbr;
|
||||
for (size_t timeIdxInSeq = 0; timeIdxInSeq < ntime; timeIdxInSeq++)
|
||||
{
|
||||
for (size_t i = 0; i < mNbr; i++)
|
||||
{
|
||||
bool reset = false;
|
||||
|
||||
if (timeIdxInSeq == 0)
|
||||
{
|
||||
reset = true;
|
||||
}
|
||||
oldRNNForwardPropSRP<ElemType>(timeIdxInSeq, 1, reset, (ElemType) 0.1, functionValues, pastActivity, inputFunctionValues, i, mNbr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void ColumnSliceMultAndAddTest(int n, int k, int m, DEVICEID_TYPE deviceID)
|
||||
{
|
||||
Matrix<ElemType> AG((size_t)n, (size_t)k, deviceID);
|
||||
AG.SetUniformRandomValue(-1, 1);
|
||||
|
||||
Matrix<ElemType> BG((size_t)k, (size_t)m, deviceID);
|
||||
BG.SetUniformRandomValue(-1, 1);
|
||||
|
||||
Matrix<ElemType> CG((size_t)n, (size_t)m, deviceID);
|
||||
Matrix<ElemType> DG((size_t)n, (size_t)m, deviceID);
|
||||
|
||||
auto t_startG = clock();
|
||||
Matrix<ElemType>::MultiplyAndAdd(AG, false, BG, false, CG);
|
||||
auto t_endG = clock();
|
||||
|
||||
fprintf(stderr, "MultiplyAndAdd Directly: %f seconds\n", 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC);
|
||||
|
||||
t_startG = clock();
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
Matrix<ElemType> col_BG = BG.ColumnSlice(i, 1);
|
||||
Matrix<ElemType> col_CG = CG.ColumnSlice(i, 1);
|
||||
Matrix<ElemType>::MultiplyAndAdd(AG, false, col_BG, false, col_CG);
|
||||
}
|
||||
t_endG = clock();
|
||||
fprintf(stderr, "MultiplyAndAdd With ColumnSlice: %f seconds\n", 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC);
|
||||
|
||||
t_startG = clock();
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
Matrix<ElemType> col_BG = BG.ColumnSlice(i, 1);
|
||||
Matrix<ElemType> col_CG = CG.ColumnSlice(i, 1);
|
||||
Matrix<ElemType>::MultiplyAndAdd(AG, false, col_BG, false, col_CG);
|
||||
}
|
||||
t_endG = clock();
|
||||
fprintf(stderr, "MultiplyAndAdd With ColumnSlice&: %f seconds\n", 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC);
|
||||
|
||||
Matrix<ElemType> col_BG1(0), col_CG1(0);
|
||||
t_startG = clock();
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
col_BG1.AssignColumnSlice(BG, i, 1);
|
||||
col_CG1.AssignColumnSlice(CG, i, 1);
|
||||
Matrix<ElemType>::MultiplyAndAdd(AG, false, col_BG1, false, col_CG1);
|
||||
}
|
||||
t_endG = clock();
|
||||
fprintf(stderr, "MultiplyAndAdd With AssignColumnSlice: %f seconds\n", 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC);
|
||||
|
||||
t_startG = clock();
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
Matrix<ElemType> col_CG = CG.ColumnSlice(i, 1);
|
||||
Matrix<ElemType> col_DG = DG.ColumnSlice(i, 1);
|
||||
col_DG.AssignSigmoidOf(col_CG);
|
||||
}
|
||||
t_endG = clock();
|
||||
fprintf(stderr, "AssignSigmoidOf With ColumnSlice: %f seconds\n", 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC);
|
||||
|
||||
t_startG = clock();
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
col_BG1.AssignColumnSlice(BG, i, 1);
|
||||
col_CG1.AssignColumnSlice(CG, i, 1);
|
||||
col_BG1.AssignSigmoidOf(col_CG1);
|
||||
}
|
||||
t_endG = clock();
|
||||
fprintf(stderr, "AssignSigmoidOf With AssignColumnSlice: %f seconds\n", 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC);
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void TestRnnForwardPropSRP(size_t nRow = 100, size_t nCol = 1000, size_t mNbr = 10, DEVICEID_TYPE deviceID = 0)
|
||||
{
|
||||
Matrix<ElemType> functionValues(deviceID);
|
||||
Matrix<ElemType> colBegin(deviceID);
|
||||
Matrix<ElemType> pastActivity(deviceID);
|
||||
Matrix<ElemType> inputFunctionValues(deviceID);
|
||||
Matrix<ElemType> needToCompute(deviceID);
|
||||
|
||||
functionValues.Resize(nRow, nCol);
|
||||
colBegin.Resize(mNbr, 1);
|
||||
pastActivity.Resize(nRow, nCol);
|
||||
inputFunctionValues.Resize(nRow, nCol);
|
||||
needToCompute.Resize(1, nCol / mNbr);
|
||||
needToCompute.SetValue(0);
|
||||
needToCompute.ColumnSlice(0, 1).SetValue(1);
|
||||
auto t_start = clock();
|
||||
rnnForwardPropSRP<ElemType>(functionValues, mNbr, pastActivity, inputFunctionValues, colBegin, needToCompute);
|
||||
auto t_end = clock();
|
||||
fprintf(stderr, "testRnnForwardPropSRP: %f seconds\n", 1.0 * (t_end - t_start) / CLOCKS_PER_SEC);
|
||||
}
|
||||
|
||||
/**
|
||||
The old way of resetting RNN state, which used if statement. Also only supports up to two sentences within a minibatch
|
||||
*/
|
||||
template <class ElemType>
|
||||
void TestOldRnnForwardPropSRP(size_t nRow = 100, size_t nCol = 1000, size_t mNbr = 10, DEVICEID_TYPE deviceID = 0)
|
||||
{
|
||||
Matrix<ElemType> functionValues(deviceID);
|
||||
Matrix<ElemType> colBegin(deviceID);
|
||||
Matrix<ElemType> pastActivity(deviceID);
|
||||
Matrix<ElemType> inputFunctionValues(deviceID);
|
||||
|
||||
functionValues.Resize(nRow, nCol);
|
||||
colBegin.Resize(mNbr, 1);
|
||||
pastActivity.Resize(nRow, nCol);
|
||||
inputFunctionValues.Resize(nRow, nCol);
|
||||
auto t_start = clock();
|
||||
oldRnnForwardPropSRP<ElemType>(functionValues, mNbr, pastActivity, inputFunctionValues);
|
||||
auto t_end = clock();
|
||||
fprintf(stderr, "TestOldRnnForwardPropSRP: %f seconds\n", 1.0 * (t_end - t_start) / CLOCKS_PER_SEC);
|
||||
}
|
||||
}}}}
|
|
@ -15,14 +15,6 @@ struct Err
|
|||
static const T Rel;
|
||||
static const T Abs;
|
||||
};
|
||||
template <>
|
||||
const float Err<float>::Rel = 1e-5f;
|
||||
template <>
|
||||
const double Err<double>::Rel = 1e-5f;
|
||||
template <>
|
||||
const float Err<float>::Abs = 1.192092896e-07f;
|
||||
template <>
|
||||
const double Err<double>::Abs = 2.2204460492503131e-016;
|
||||
|
||||
bool AreEqual(float a, float b, float maxRelError, float maxAbsError);
|
||||
bool AreEqual(double a, double b, double maxRelError, double maxAbsError);
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||
//
|
||||
#include "stdafx.h"
|
||||
#include "common.h"
|
||||
|
||||
const int c_deviceIdZero = 0;
|
||||
|
||||
|
@ -13,3 +14,12 @@ const float c_epsilonFloatE1 = 0.1f;
|
|||
const float c_epsilonFloat5E4 = 0.0005f;
|
||||
const float c_epsilonFloatE5 = 0.00001f;
|
||||
const double c_epsilonDoubleE11 = 0.00000000001;
|
||||
|
||||
template <>
|
||||
const float Microsoft::MSR::CNTK::Test::Err<float>::Rel = 1e-5f;
|
||||
template <>
|
||||
const double Microsoft::MSR::CNTK::Test::Err<double>::Rel = 1e-5f;
|
||||
template <>
|
||||
const float Microsoft::MSR::CNTK::Test::Err<float>::Abs = 1.192092896e-07f;
|
||||
template <>
|
||||
const double Microsoft::MSR::CNTK::Test::Err<double>::Abs = 2.2204460492503131e-016;
|
||||
|
|
|
@ -14,8 +14,16 @@
|
|||
#endif
|
||||
#define _SCL_SECURE_NO_WARNINGS // current API of matrix does not allow safe invokations. TODO: change api to proper one.
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "targetver.h"
|
||||
#endif
|
||||
|
||||
#include <array>
|
||||
|
||||
#ifndef _WIN32
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#endif
|
||||
#include <boost/test/unit_test.hpp>
|
||||
|
||||
#include "constants.h"
|
||||
#include "fixtures.h"
|
||||
|
|
|
@ -70,7 +70,7 @@ struct DataFixture
|
|||
if (!envVariableErrorMessage.empty())
|
||||
{
|
||||
BOOST_TEST_MESSAGE(envVariableErrorMessage);
|
||||
fprintf(stderr, envVariableErrorMessage.c_str());
|
||||
fprintf(stderr, "%s", envVariableErrorMessage.c_str());
|
||||
}
|
||||
|
||||
newCurrentPath = m_testDataPath;
|
||||
|
|
|
@ -8,7 +8,15 @@
|
|||
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms
|
||||
#define _SCL_SECURE_NO_WARNINGS // current API of matrix does not allow safe invokations. TODO: change api to proper one.
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "targetver.h"
|
||||
#include "basics.h"
|
||||
#endif
|
||||
|
||||
#include "Basics.h"
|
||||
#include "BrainScriptParser.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
// Use dynamic library on Linux
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
#endif
|
||||
#include <boost/test/unit_test.hpp>
|
||||
|
|
|
@ -4,7 +4,14 @@
|
|||
//
|
||||
#include "stdafx.h"
|
||||
#include <algorithm>
|
||||
#ifdef _WIN32
|
||||
#include <io.h>
|
||||
#else // On Linux
|
||||
#define _dup2 dup2
|
||||
#define _dup dup
|
||||
#define _close close
|
||||
#define _fileno fileno
|
||||
#endif
|
||||
#include <cstdio>
|
||||
#include <boost/scope_exit.hpp>
|
||||
#include "Common/ReaderTestHelper.h"
|
||||
|
|
|
@ -64,13 +64,13 @@ struct ReaderFixture
|
|||
if (!envVariableErrorMessage.empty())
|
||||
{
|
||||
BOOST_TEST_MESSAGE(envVariableErrorMessage);
|
||||
fprintf(stderr, envVariableErrorMessage.c_str());
|
||||
fprintf(stderr, "%s\n", envVariableErrorMessage.c_str());
|
||||
}
|
||||
|
||||
newCurrentPath = m_testDataPath;
|
||||
}
|
||||
}
|
||||
else if ((subPath[0] == '/' && subPath[1] == '//') || (subPath[0] == '\\' && subPath[1] == '\\'))
|
||||
else if ((subPath[0] == '/' && subPath[1] == '/') || (subPath[0] == '\\' && subPath[1] == '\\'))
|
||||
{
|
||||
newCurrentPath = subPath;
|
||||
}
|
||||
|
@ -295,16 +295,22 @@ struct ReaderFixture
|
|||
// readerSectionName : the reader field name in the test section
|
||||
|
||||
shared_ptr<DataReader> GetDataReader(
|
||||
const string configFileName,
|
||||
const string testSectionName,
|
||||
const string readerSectionName)
|
||||
const std::string& configFileName,
|
||||
const std::string& testSectionName,
|
||||
const std::string& readerSectionName,
|
||||
std::vector<std::wstring> additionalConfigParameters)
|
||||
{
|
||||
std::wstring configFN(configFileName.begin(), configFileName.end());
|
||||
std::wstring configFileCommand(L"configFile=" + configFN);
|
||||
std::wstring cntk(L"CNTK");
|
||||
std::vector<wchar_t*> arg{ &cntk[0], &configFileCommand[0] };
|
||||
for(auto& p : additionalConfigParameters)
|
||||
{
|
||||
arg.push_back(&p[0]);
|
||||
}
|
||||
|
||||
wchar_t* arg[2]{L"CNTK", &configFileCommand[0]};
|
||||
ConfigParameters config;
|
||||
const std::string rawConfigString = ConfigParameters::ParseCommandLine(2, arg, config);
|
||||
const std::string rawConfigString = ConfigParameters::ParseCommandLine((int)arg.size(), &arg[0], config);
|
||||
|
||||
config.ResolveVariables(rawConfigString);
|
||||
const ConfigParameters simpleDemoConfig = config(testSectionName);
|
||||
|
@ -344,14 +350,15 @@ struct ReaderFixture
|
|||
size_t numSubsets,
|
||||
bool sparseFeatures = false,
|
||||
bool sparseLabels = false,
|
||||
bool useSharedLayout = true)
|
||||
bool useSharedLayout = true,
|
||||
std::vector<std::wstring> additionalConfigParameters = {})
|
||||
{
|
||||
shared_ptr<StreamMinibatchInputs> inputsPtr =
|
||||
CreateStreamMinibatchInputs<ElemType>(numFeatureFiles, numLabelFiles,
|
||||
sparseFeatures, sparseLabels, useSharedLayout);
|
||||
|
||||
shared_ptr<DataReader> readerPtr = GetDataReader(configFileName,
|
||||
testSectionName, readerSectionName);
|
||||
testSectionName, readerSectionName, additionalConfigParameters);
|
||||
|
||||
// Perform the data reading
|
||||
HelperWriteReaderContentToFile<ElemType>(testDataFilePath, *readerPtr, *inputsPtr,
|
||||
|
@ -391,11 +398,12 @@ struct ReaderFixture
|
|||
size_t numSubsets,
|
||||
bool sparseFeatures = false,
|
||||
bool sparseLabels = false,
|
||||
bool useSharedLayout = true)
|
||||
bool useSharedLayout = true,
|
||||
std::vector<std::wstring> additionalConfigParameters = {})
|
||||
{
|
||||
HelperReadInAndWriteOut<ElemType>(configFileName, testDataFilePath, testSectionName, readerSectionName,
|
||||
epochSize, mbSize, epochs, numFeatureFiles, numLabelFiles, subsetNum,numSubsets,
|
||||
sparseFeatures, sparseLabels, useSharedLayout);
|
||||
sparseFeatures, sparseLabels, useSharedLayout, additionalConfigParameters);
|
||||
|
||||
CheckFilesEquivalent(controlDataFilePath, testDataFilePath);
|
||||
}
|
||||
|
@ -408,10 +416,11 @@ struct ReaderFixture
|
|||
void HelperRunReaderTestWithException(
|
||||
string configFileName,
|
||||
string testSectionName,
|
||||
string readerSectionName)
|
||||
string readerSectionName,
|
||||
std::vector<std::wstring> additionalConfigParameters = {})
|
||||
{
|
||||
BOOST_CHECK_THROW(
|
||||
GetDataReader(configFileName,testSectionName, readerSectionName),
|
||||
GetDataReader(configFileName, testSectionName, readerSectionName, additionalConfigParameters),
|
||||
ExceptionType);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
RootDir = .
|
||||
DataDir = $RootDir$
|
||||
|
||||
# deviceId = -1 for CPU, >= 0 for GPU devices
|
||||
deviceId = -1
|
||||
|
||||
precision = "double"
|
||||
|
||||
Simple_Test = [
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = 450000
|
||||
verbosity = 0
|
||||
|
||||
features1 = [
|
||||
dim = 40
|
||||
contextWindow=1
|
||||
type = "real"
|
||||
scpFile = "$DataDir$/features.rscp"
|
||||
]
|
||||
|
||||
features2 = [
|
||||
dim = 100
|
||||
scpFile = "$DataDir$/ivector.rscp"
|
||||
type = "real"
|
||||
expandToUtterance = true
|
||||
]
|
||||
|
||||
labels = [
|
||||
mlfFile = "$DataDir$/labels.smlf"
|
||||
labelMappingFile = "$DataDir$/labels.statelist"
|
||||
labelDim = 9000
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
]
|
|
@ -0,0 +1,32 @@
|
|||
RootDir = .
|
||||
DataDir = $RootDir$
|
||||
|
||||
# deviceId = -1 for CPU, >= 0 for GPU devices
|
||||
deviceId = -1
|
||||
|
||||
precision = "double"
|
||||
|
||||
Simple_Test = [
|
||||
reader = [
|
||||
minimizeReaderMemoryFootprint=false
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = 450000
|
||||
verbosity = 0
|
||||
|
||||
features = [
|
||||
dim = 40
|
||||
contextWindow=1
|
||||
type = "real"
|
||||
scpFile = "$DataDir$/features.rscp"
|
||||
]
|
||||
|
||||
labels = [
|
||||
mlfFile = "$DataDir$/labels.smlf"
|
||||
labelMappingFile = "$DataDir$/labels.statelist"
|
||||
labelDim = 9000
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
]
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче