Merge branch 'master' into qiwye/asgd-dev

Conflicts:
	Source/1BitSGD
	Source/SGDLib/SGD.cpp
This commit is contained in:
Qiwei Ye 2016-07-25 20:13:27 +08:00
Родитель fb9cded35b f3dec438d6
Коммит 5a33a35eac
132 изменённых файлов: 87000 добавлений и 2240 удалений

Просмотреть файл

@ -1133,6 +1133,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Scripts", "Scripts", "{6826
ProjectSection(SolutionItems) = preProject
Scripts\pytest.ini = Scripts\pytest.ini
Scripts\txt2ctf.py = Scripts\txt2ctf.py
Scripts\uci2ctf.py = Scripts\uci2ctf.py
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ManagedEvalTests", "Tests\UnitTests\ManagedEvalTests\ManagedEvalTests.csproj", "{CC8DDDCB-D53A-4B30-8596-AEF1C493DB31}"
@ -1142,6 +1143,9 @@ EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CSEvalClient", "Examples\Evaluation\CSEvalClient\CSEvalClient.csproj", "{1C6E6C53-1AA7-4B69-913E-B97BB5A872CF}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CPPEvalClient", "Examples\Evaluation\CPPEvalClient\CPPEvalClient.vcxproj", "{CCC07E8E-F33A-4AF7-9F60-93E2AA61C75E}"
ProjectSection(ProjectDependencies) = postProject
{482999D1-B7E2-466E-9F8D-2119F93EAFD9} = {482999D1-B7E2-466E-9F8D-2119F93EAFD9}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution

Просмотреть файл

@ -863,38 +863,27 @@ The dimension reduced matrix consisting of the maximum value within each pooling
This function is often associated with Convolution() operations.
### Delay
### PastValue, FutureValue
Delay node used in recurrent networks, allows creation of a loop in the convolutional network that will repeat a specified number of times.
PastValue and FutureValue nodes are used in recurrent networks, allow creation of a loop in the convolutional network that will repeat a specified number of times. PastValue retrieves the value of a node several steps away in the past, while FutureValue retrieves the value of a node from future.
`Delay(rows, [cols], delayNode, delayTime=1, needGradient=true, defaultHiddenActivity=0.1)`
`PastValue(rows, [cols], node, timeStep=1, defaultHiddenActivity=0.1)`
`FutureValue(rows, [cols], node, timeStep=1, defaultHiddenActivity=0.1)`
#### Parameters
`cvweight` – convolution weight matrix, it has the dimensions of \[outputChannels, kernelWidth \* kernelHeight \* inputChannels\]
`rows` – number of rows in the node
`kernelWidth` – width of the kernel
`cols` – number of cols in the node. This value is often ommit since the length of a sequence varies
`kernelHeight` – height of the kernel
`timeStep` – \[default = 1\] number of time steps toward the past and future
`outputChannels` – number of output channels
`horizontalSubsample` – subsamples in the horizontal direction
`verticalSubsample` – subsamples in the vertical direction
#### Optional Parameters
`delayTime` – \[default = 1\] the amount of delay that will be introduced (number of times the loop will happen)
`needGradient` – \[default = true\] does the gradient need to be computed for this node
`defaultHiddenActivity` – \[default = 0.1\] the numerical amount for the defaultHiddenActivity
`defaultHiddenActivity` – \[default = 0.1\] default value to use when passing the sequence bounday or when the value is missing.
#### Returns
The results of the completed Delay loop
Eitehr the past or future value of a node
#### Notes
This node is used in recurrent networks, where a delay is introduced to examine values from a previous time, such as the prior value (t-1). This has the affect of creating a loop in the computational network that will repeat delayTime number of iterations.
This node is used in recurrent networks, where a past value is introduced to examine values from a previous time, such as the prior value (t-1). This has the affect of creating a loop in the computational network.

Просмотреть файл

@ -37,34 +37,23 @@ int main(int argc, char* argv[])
std::string app = argv[0];
std::string path;
IEvaluateModel<float> *model;
size_t pos;
#ifdef _WIN32
path = app.substr(0, app.rfind("\\"));
// Load the eval library
auto hModule = LoadLibrary(L"evaldll.dll");
if (hModule == nullptr)
{
fprintf(stderr, "Cannot find evaldll.dll library.");
return 1;
}
// Get the factory method to the evaluation engine
std::string func = "GetEvalF";
auto procAddress = GetProcAddress(hModule, func.c_str());
auto getEvalProc = (GetEvalProc<float>)procAddress;
// Native model evaluation instance
getEvalProc(&model);
pos = app.rfind("\\");
path = (pos == std::string::npos) ? "." : app.substr(0, pos);
// This relative path assumes launching from CNTK's binary folder, e.g. x64\Release
const std::string modelWorkingDirectory = path + "/../../Examples/Image/MNIST/Data/";
#else // on Linux
path = app.substr(0, app.rfind("/"));
GetEvalF(&model);
pos = app.rfind("/");
path = (pos == std::string::npos) ? "." : app.substr(0, pos);
// This relative path assumes launching from CNTK's binary folder, e.g. build/release/bin/
const std::string modelWorkingDirectory = path + "/../../../Examples/Image/MNIST/Data/";
#endif
GetEvalF(&model);
const std::string modelFilePath = modelWorkingDirectory + "../Output/Models/01_OneHidden";

Просмотреть файл

@ -69,7 +69,7 @@
<AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>EvalDLL.lib;%(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>%(DelayLoadDLLs)</DelayLoadDLLs>
<Profile>true</Profile>
</Link>

Просмотреть файл

@ -110,9 +110,15 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
public static bool Evaluate(string record)
{
var model = Models.Take();
var outcome = model.EvaluateRecord(record);
Models.Add(model);
return outcome;
try
{
var outcome = model.EvaluateRecord(record);
return outcome;
}
finally
{
Models.Add(model);
}
}
/// <summary>
@ -123,9 +129,15 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
public static List<float> Evaluate(List<float> inputs)
{
var model = Models.Take();
var outcome = model.EvaluateInput(inputs);
Models.Add(model);
return outcome;
try
{
var outcome = model.EvaluateInput(inputs);
return outcome;
}
finally
{
Models.Add(model);
}
}
/// <summary>

Просмотреть файл

@ -0,0 +1,226 @@
# The configuration file to build language understanding model with ATIS corpus.
# An LSTM model is built to tag each word in sentences with its semantic label.
WorkDir = work
DataDir = data
modelPath = $WorkDir$/ATIS.slot.lstm
parallelTrain = true
#stderr = $WorkDir$/log
command = Train:Output:Test
precision = "float"
deviceId = "-1" # change to "auto" to use GPUs
wordCount = 944 # number of words
labelCount = 127 # number of labels
# The command to train the LSTM model
Train = [
action = train
BrainScriptNetworkBuilder = [
inputDim = $wordCount$
labelDim = $labelCount$
featDim = inputDim*3 # contextual words are used as features: previous word, current word, next word.
embDim = 150
hiddenDim = 300
maxLayer = 1
initScale = 6
featuresPW = Input(inputDim) # the previous word
featuresCW = Input(inputDim) # the current word
featuresNW = Input(inputDim) # the next word
features = RowStack(featuresPW : featuresCW : featuresNW)
labels = Input(labelDim, tag = "label")
# embedding layer
emb = Parameter(embDim, featDim)
featEmbedded = emb * features
# build the LSTM stack
lstmDims[i:0..maxLayer-1] = hiddenDim
NoAuxInputHook (input, lstmState) = BS.Constants.None
lstmStack = BS.RNNs.RecurrentLSTMPStack (lstmDims,
cellDims=lstmDims,
featEmbedded,
inputDim=embDim,
previousHook=BS.RNNs.PreviousHC,
augmentInputHook=BS.RNNs.NoAuxInputHook,
augmentInputDim=0,
enableSelfStabilization=false)
lstmOutputLayer = Length (lstmStack)-1
LSTMoutput = lstmStack[lstmOutputLayer].h
W = Parameter(labelDim, hiddenDim, init = "uniform", initValueScale=initScale)
b = Parameter(labelDim, 1, init = "fixedValue", value=0)
outputs = W * LSTMoutput + b
cr = CrossEntropyWithSoftmax(labels, outputs)
criterionNodes = (cr)
evaluationNodes = (cr)
outputNodes = (outputs)
]
SGD = [
# maximum number of epochs
maxEpochs = 1 # set to 1 so this can be added to regression test. Increase to 20 get a good accuracy
# for each epoch, maximum number of input samples(words) is set below
epochSize = 36000
# minibatchSize should be larger than the maximum sentence length
minibatchSize = 70
learningRatesPerSample = 0.01*2:0.005*12:0.001
gradUpdateType = "FSAdaGrad"
gradientClippingWithTruncation = true
clippingThresholdPerSample = 15.0
# number of minibatches to report progress
numMBsToShowResult = 100
firstMBsToShowResult = 10
# if validation shows that the model has no improvement, then do back-up to the previously
# estimated model and reduce learning rate
loadBestModel = true
parallelTrain = [
parallelizationMethod = "DataParallelSGD"
parallelizationStartEpoch = 2
distributedMBReading = true
dataParallelSGD = [
gradientBits = 1
]
]
]
reader = [
readerType = "CNTKTextFormatReader"
file = "$DataDir$/ATIS.train.cntk.sparse"
randomize = true
input = [
featuresPW = [
alias = "PW" # previous word
dim = $wordCount$
format = "sparse"
]
featuresCW = [
alias = "CW" # current word
dim = $wordCount$
format = "sparse"
]
featuresNW = [
alias = "NW" # next word
dim = $wordCount$
format = "sparse"
]
labels = [
alias = "L" # label
dim = $labelCount$
format = "sparse"
]
]
]
]
# Evaluate the model to predict labels
Output = [
action = "write"
traceLevel = 1
epochSize = 0
defaultHiddenActivity = 0.1
BrainScriptNetworkBuilder = [
modelAsTrained = BS.Network.Load ("$modelPath$")
final = Hardmax(modelAsTrained.outputs)
]
outputPath = $WorkDir$/model.writeaction
outputNodeNames = final
reader = [
readerType = "CNTKTextFormatReader"
file = "$DataDir$/ATIS.test.cntk.sparse"
randomize = false
input = [
featuresPW = [
alias = "PW" # previous word
dim = $wordCount$
format = "sparse"
]
featuresCW = [
alias = "CW" # current word
dim = $wordCount$
format = "sparse"
]
featuresNW = [
alias = "NW" # next word
dim = $wordCount$
format = "sparse"
]
labels = [
alias = "L" # label
dim = $labelCount$
format = "sparse"
]
]
]
]
# Evaluate the model's accuracy
Test = [
action = "test"
traceLevel = 1
epochSize = 0
defaultHiddenActivity = 0.1
BrainScriptNetworkBuilder = [
labels = Input($labelCount$, tag = "label")
modelAsTrained = BS.Network.Load ("$modelPath$")
final = Hardmax(modelAsTrained.outputs)
errorRate = ErrorPrediction(labels, final, tag='evaluation')
]
evalNodeNames = errorRate
reader = [
readerType = "CNTKTextFormatReader"
file = "$DataDir$/ATIS.test.cntk.sparse"
randomize = false
input = [
featuresPW = [
alias = "PW" # previous word
dim = $wordCount$
format = "sparse"
]
featuresCW = [
alias = "CW" # current word
dim = $wordCount$
format = "sparse"
]
featuresNW = [
alias = "NW" # next word
dim = $wordCount$
format = "sparse"
]
labels = [
alias = "L" # label
dim = $labelCount$
format = "sparse"
]
]
]
]

Просмотреть файл

@ -0,0 +1,168 @@
# Build Language Understanding Models with CNTK
This example demonstrates how to use build language understanding model with CNTK using ATIS data set. This example is similar to
[SLU example](https://github.com/Microsoft/CNTK/tree/master/Examples/Text/Miscellaneous/SLU). They are different in that
- CNTKTextFormatReader is used here, instead of LUSequenceReader
- With CNTKTextFormatReader, the input format is much more flexible. In the example setting, sparse contextual feature vectors are explored
- Sparse label input is used.
The Air travel information system (ATIS) corpus is used for training and testing.
## Download the example
The data and configuration is checked in to github. You can get it by command:
`git clone https://github.com/Microsoft/cntk`
The example is under folder:
`<cntk_root>\Examples\Text\ATIS`
## Data File Format
There are four files under `data` sub-folder
|Files |Content |
|:----------------------|:--------|
|ATIS.train.cntk.sparse |featurized training data set
|ATIS.test.cntk.sparse |featurized test data set
|ATIS.vocab |all words extracted from training data. Vocab size: 944
|ATIS.labels |all semantic labels extracted from training data. Total labels: 127
We preprocess ATIS data by converting words into word indexes, and labels into label IDs in order to use
[CNTKTextFormatReader](https://github.com/Microsoft/CNTK/wiki/CNTKTextFormat-Reader). You can use any
script/tool to preprocess your text data files. In this example, data is already preprocessed.
The last two files ATIS.vocab and ATIS.labels are not really required to run the example. They are included for evaluation and debugging purpose.
E.g. they can be used to convert .sparse files back to original text files.
To understand the data format (two .sparse files), let's start with a sample sentence:
```
BOS i would like to find a flight from charlotte to Las Vegas that makes a stop in St. Louis EOS
```
it is converted into the following text:
```
1 |PW 1:1 |CW 1:1 |NW 12:1 |L 126:1
1 |PW 1:1 |CW 12:1 |NW 39:1 |L 126:1
1 |PW 12:1 |CW 39:1 |NW 28:1 |L 126:1
1 |PW 39:1 |CW 28:1 |NW 3:1 |L 126:1
1 |PW 28:1 |CW 3:1 |NW 86:1 |L 126:1
1 |PW 3:1 |CW 86:1 |NW 15:1 |L 126:1
1 |PW 86:1 |CW 15:1 |NW 10:1 |L 126:1
1 |PW 15:1 |CW 10:1 |NW 4:1 |L 126:1
1 |PW 10:1 |CW 4:1 |NW 101:1 |L 126:1
1 |PW 4:1 |CW 101:1 |NW 3:1 |L 48:1
1 |PW 101:1 |CW 3:1 |NW 92:1 |L 126:1
1 |PW 3:1 |CW 92:1 |NW 90:1 |L 78:1
1 |PW 92:1 |CW 90:1 |NW 33:1 |L 123:1
1 |PW 90:1 |CW 33:1 |NW 338:1 |L 126:1
1 |PW 33:1 |CW 338:1 |NW 15:1 |L 126:1
1 |PW 338:1 |CW 15:1 |NW 132:1 |L 126:1
1 |PW 15:1 |CW 132:1 |NW 17:1 |L 126:1
1 |PW 132:1 |CW 17:1 |NW 72:1 |L 126:1
1 |PW 17:1 |CW 72:1 |NW 144:1 |L 71:1
1 |PW 72:1 |CW 144:1 |NW 2:1 |L 119:1
1 |PW 144:1 |CW 2:1 |NW 2:1 |L 126:1
```
where the first column identifies the sequence (sentence) ID, which is the same for all words of the same sentence. There are four input streams: PW, CW, NW, L.
The input "PW" represents the previous word ID, "CW" for current word, and "NW" for next word. Input name "L" is for labels. The input names can be anything you
like and you can add more input as needed, e.g. words in a bigger window.
Words "BOS" and "EOS" denote beginning of sentence and end of sentences respectively.
Each line above represents one sample (word). E.g. the meaning of this line: `1 |PW 4:1 |CW 101:1 |NW 3:1 |L 48:1`:
* the sequence ID is 1
* the current word is "charlotte" whose word ID is 101
* the previous word is "from" whose ID is 4
* the next word is "to" whose ID is 3
* the semantic label is "B-fromloc.city_name" whose label Id is 48.
All word IDs, label IDs and corresponding words and labels are stored in ATIS.vocab and ATIS.labels.
## CNTK Configuration
In this example, we use BrainScript to create one-layer LSTM with embedding for slot tagging. The consolidated config file is ATIS.cntk. One can check the file (with some comments)
for details, especially how the reader is configured in ATIS.cntk.
reader=[
readerType = "CNTKTextFormatReader"
file = "$DataDir$/ATIS.train.cntk.sparse"
miniBatchMode = "partial"
randomize = true
input = [
featuresPW = [
alias = "PW" # previous word
dim = $wordCount$
format = "sparse"
]
featuresCW = [
alias = "CW" # current word
dim = $wordCount$
format = "sparse"
]
featuresNW = [
alias = "NW" # next word
dim = $wordCount$
format = "sparse"
]
labels = [
alias = "L" # label
dim = $labelCount$
format = "sparse"
]
]
]
The above section tell CNTK to use CNTKTextFormatReader to read data from the file "$DataDir/ATIS.train.cntk.sparse". The same input names (PW, CW, NW, L) are used to refer inputs (features and labels) provided in data files. The input is read into different
feature vectors: featuresPW, featuresCW, featuresNW and labels. These vectors are later used to build LSTM node with BrainScript as follows.
```
featuresPW = Input(inputDim)
featuresCW = Input(inputDim)
featuresNW = Input(inputDim)
features = RowStack(featuresPW : featuresCW : featuresNW)
labels=Input(labelDim, tag="label")
# embedding layer
emb = LearnableParameter(embDim, featDim)
featEmbedded = Times(emb, features)
# build the LSTM stack
lstmDims[i:0..maxLayer] = hiddenDim
NoAuxInputHook (input, lstmState) = BS.Constants.None
lstmStack = BS.RNNs.RecurrentLSTMPStack (lstmDims,
cellDims=lstmDims,
featEmbedded,
inputDim=embDim,
previousHook=BS.RNNs.PreviousHC,
augmentInputHook=BS.RNNs.NoAuxInputHook,
augmentInputDim=0,
enableSelfStabilization=false)
lstmOutputLayer = Length (lstmStack)-1
LSTMoutput = lstmStack[lstmOutputLayer].h
```
A few other notes about the config:
- it is important to specify the format is "sparse".
- the gradUpdateType is set FSAdaGrad. This setting reports better model accuracy comparing any other update methods.
- multiple LSTM layers can be used by changing the value of maxLayer.
Three commands are configured: Train, Output and Test. The command "Train" is used to train a model, "Output" is used to evaluate the model against a test set and store
the model output, and the command "Test" is to calculate the model's accuracy.
## Run the example
One can run the example locally or on Philly (for Microsoft internal users).
To run locally,
```sh
> mkdir work # the default work_dir
> open ATIS.cntk and update the value of deviceId: -1 for CPU, auto for GPU
> cntk.exe configFile=ATIS.cntk
```
By default, the maxEpochs is set to 1 to save training time. One can change it to larger value such as 20 in order to get a good model accuracy.
Depends on GPU, it normally takes about 20 minutes to run 20 epochs on single GPU, and slot F1 score is about 93.
**For Microsoft users only**, to run the job on Philly:
- first upload data folder to philly cloud. e.g. `\\storage.gcr.philly.selfhost.corp.microsoft.com\pnrsy\<your_alias>\ATIS `
- update the config file to philly cloud, e.g. `\\storage.gcr.philly.selfhost.corp.microsoft.com\pnrsy_scratch\<your_alias>\ATIS`
- go to http://philly/ to create a new job by specifying data folder and config file, and start the job.
More details about Philly, including how to upload data to Philly and start jobs, can be found [here](https://microsoft.sharepoint.com/teams/ATISG/SitePages/Philly%20Users%20Guide.aspx)

Просмотреть файл

@ -0,0 +1,127 @@
B-aircraft_code
B-airline_code
B-airline_name
B-airport_code
B-airport_name
B-arrive_date.date_relative
B-arrive_date.day_name
B-arrive_date.day_number
B-arrive_date.month_name
B-arrive_date.today_relative
B-arrive_time.end_time
B-arrive_time.period_mod
B-arrive_time.period_of_day
B-arrive_time.start_time
B-arrive_time.time
B-arrive_time.time_relative
B-booking_class
B-city_name
B-class_type
B-compartment
B-connect
B-cost_relative
B-day_name
B-day_number
B-days_code
B-depart_date.date_relative
B-depart_date.day_name
B-depart_date.day_number
B-depart_date.month_name
B-depart_date.today_relative
B-depart_date.year
B-depart_time.end_time
B-depart_time.period_mod
B-depart_time.period_of_day
B-depart_time.start_time
B-depart_time.time
B-depart_time.time_relative
B-economy
B-fare_amount
B-fare_basis_code
B-flight
B-flight_days
B-flight_mod
B-flight_number
B-flight_stop
B-flight_time
B-fromloc.airport_code
B-fromloc.airport_name
B-fromloc.city_name
B-fromloc.state_code
B-fromloc.state_name
B-meal
B-meal_code
B-meal_description
B-mod
B-month_name
B-or
B-period_of_day
B-restriction_code
B-return_date.date_relative
B-return_date.day_name
B-return_date.day_number
B-return_date.month_name
B-return_date.today_relative
B-return_time.period_mod
B-return_time.period_of_day
B-round_trip
B-state_code
B-state_name
B-stoploc.airport_code
B-stoploc.airport_name
B-stoploc.city_name
B-stoploc.state_code
B-time
B-time_relative
B-today_relative
B-toloc.airport_code
B-toloc.airport_name
B-toloc.city_name
B-toloc.country_name
B-toloc.state_code
B-toloc.state_name
B-transport_type
I-airline_name
I-airport_name
I-arrive_date.day_number
I-arrive_time.end_time
I-arrive_time.period_of_day
I-arrive_time.start_time
I-arrive_time.time
I-arrive_time.time_relative
I-city_name
I-class_type
I-cost_relative
I-depart_date.day_number
I-depart_date.today_relative
I-depart_time.end_time
I-depart_time.period_of_day
I-depart_time.start_time
I-depart_time.time
I-depart_time.time_relative
I-economy
I-fare_amount
I-fare_basis_code
I-flight_mod
I-flight_number
I-flight_stop
I-flight_time
I-fromloc.airport_name
I-fromloc.city_name
I-fromloc.state_name
I-meal_code
I-meal_description
I-restriction_code
I-return_date.date_relative
I-return_date.day_number
I-return_date.today_relative
I-round_trip
I-state_name
I-stoploc.city_name
I-time
I-today_relative
I-toloc.airport_name
I-toloc.city_name
I-toloc.state_name
I-transport_type
O

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,944 @@
</s>
BOS
EOS
to
from
flights
the
on
what
me
flight
show
i
boston
san
a
denver
in
and
francisco
atlanta
is
pittsburgh
dallas
all
baltimore
list
philadelphia
like
are
airlines
of
between
that
washington
pm
leaving
please
morning
would
fly
for
city
fare
wednesday
first
need
after
trip
oakland
there
ground
round
does
transportation
'd
which
cheapest
you
arriving
class
before
available
american
new
fares
milwaukee
with
give
have
afternoon
york
st.
one
dc
at
way
monday
leave
arrive
airport
thursday
how
want
tuesday
nonstop
find
am
earliest
go
vegas
miami
las
united
information
orlando
phoenix
chicago
sunday
saturday
evening
charlotte
twenty
newark
can
delta
toronto
seattle
diego
kansas
indianapolis
houston
airline
noon
any
friday
lake
salt
's
next
us
o'clock
cleveland
continental
air
angeles
los
august
worth
do
fort
july
stop
code
5
seventh
early
memphis
tell
aircraft
downtown
or
june
6
louis
montreal
cincinnati
around
tomorrow
cost
going
latest
petersburg
tampa
many
minneapolis
nashville
8
get
mean
jose
detroit
10
an
departing
stopover
tacoma
by
about
twa
much
7
leaves
may
long
type
burbank
see
expensive
ticket
international
12
travel
could
dollars
than
daily
columbus
service
beach
'm
california
9
night
least
know
economy
time
4
depart
into
meal
paul
coach
book
april
airports
northwest
la
lowest
now
december
less
westchester
day
serves
it
serve
november
okay
arrives
used
field
love
last
ontario
second
county
return
kind
september
mitchell
general
as
stops
flying
2
third
be
direct
fifth
eighth
stopping
times
breakfast
out
make
capacity
car
take
schedule
seating
sixth
1000
number
goes
cities
dinner
connecting
3
dl
fourth
airfare
possible
this
has
served
meals
ninth
looking
also
restriction
week
late
eastern
returning
back
today
interested
price
business
most
prices
1991
two
types
flies
twentieth
will
through
limousine
ua
bwi
via
tenth
using
stand
plane
ap
fifteenth
guardia
same
1
should
other
arrangements
f
only
rental
then
display
your
shortest
wednesdays
listing
canadian
classes
again
numbers
thirtieth
florida
express
midwest
tickets
where
twelfth
sixteenth
h
north
eleventh
carolina
seventeenth
under
smallest
mco
distance
lunch
either
makes
if
qx
transport
far
hp
57
october
no
my
m80
thank
arizona
jfk
colorado
jersey
q
weekday
airplane
y
planes
some
departure
use
ewr
their
ohio
thirty
nineteenth
when
fourteenth
explain
layover
alaska
march
stopovers
live
people
traveling
serving
rent
hi
offer
later
yes
january
area
logan
right
booking
sfo
midnight
yn
but
during
landings
february
dfw
abbreviation
630
both
're
230
qw
boeing
coming
passengers
arrange
hours
qo
codes
trying
tower
466
canada
each
530
over
uses
arrivals
11
southwest
281
trips
838
days
those
takeoffs
lufthansa
west
1100
arrival
757
minnesota
anywhere
america
430
thrift
let
mornings
nationair
'll
kinds
cheap
close
seats
pennsylvania
name
quebec
indiana
michigan
saturdays
different
taxi
provided
rates
utah
these
starting
sometime
costs
making
bh
eighteenth
following
another
ff
near
747
ea
1992
connect
help
choices
sa
maximum
wish
1115
six
weekdays
more
total
s
dc10
d9s
2100
snack
1245
georgia
72s
73s
f28
heading
departures
amount
825
737
813
ap57
sixteen
m
sorry
serviced
three
miles
departs
1700
requesting
718
land
nevada
100
so
tennessee
tuesdays
hello
destination
reservation
texas
rentals
co
meaning
ap80
1500
270
thursdays
philly
thirteenth
services
sundays
turboprop
stands
415
provide
cars
we
great
mondays
include
sure
't
well
2134
fn
555
ord
934
connection
296
abbreviations
755
highest
hold
720
fit
80
soon
four
ten
noontime
too
offers
options
within
difference
c
restrictions
plan
originating
describe
nw
1110
connections
dulles
21
733
say
approximately
define
852
1291
rate
who
proper
beginning
being
329
352
don
1024
such
wanted
615
mealtime
provides
prefer
1288
257
across
continent
overnight
local
route
746
off
j31
closest
19
lax
l10
be1
1994
red
eye
not
aa
dca
determine
1200
1205
dtw
airfares
capacities
200
town
lga
300
1993
database
1765
eight
up
originate
look
cp
carries
here
201
located
dinnertime
1039
lastest
1222
they
just
d
limo
3724
210
stapleton
343
1145
schedules
932
nonstops
without
landing
b
midway
217
bound
727
takeoff
324
train
along
friends
transcontinental
missouri
reservations
lives
767
269
ac
atl
month
taking
repeat
845
airplanes
buy
still
itinerary
actually
earlier
various
reaching
very
names
505
grounds
ap68
must
kennedy
operation
4400
1201
297
question
combination
basis
laying
1133
650
tonight
43
ls
sam
ap58
once
nighttime
yx
kw
212
1600
tpa
prior
good
1800
819
inform
k
dc9
305
anything
771
459
calling
designate
417
spend
hou
1220
directly
jet
reverse
staying
l1011
belong
445
515
travels
order
mci
150
110
connects
charges
minimum
intercontinental
497766
sounds
811
seat
final
phl
20
start
823
1059
271
382
able
put
locate
hartfield
scheduled
run
225
1158
equipment
begins
lands
reaches
carried
wn
bn
try
included
130
continuing
india
lester
pearson
listings
1209
everywhere
sd
whether
offered
486
1300
950
usa
1045
al
currently
enroute
visit
them
takes
55
thing
705
fridays
catch
straight
advertises
having
planning
listed
1055
405
468
equal
working
sb
hopefully
dh8
symbols
sort
cover
810
operating
320
639
seventeen
1207
608
besides
companies
've
got
somebody
else
wants
level
vicinity
1940
311
mia
instead
priced
eleven
comes
greatest
summer
economic
bay
402
gets
date
1020
730
400
doesn
toward
home
1850
1505
runs
673
723
thanks
bring
zone
yyz
afternoons
non
largest
500
come
428
98
qualify
279
137338
d10
539
fine
while
665
concerning
iah
1230
oak
preferably
twelve
3357
323
nights
229
regarding
seven
inexpensive
420
416
repeating
scenario
139
82
kindly
limousines
345
afterwards
734
place
includes
106
1026
124
fifteen
bna
supper
oh
71
thereafter
2153
year
discount
1130
1030
world
trans
including
represented
o
'hare
exceeding
815
928
163
bur
419
cvg
1017
315
842
1083
0900
longest
called
snacks
645
ever
single

131
Makefile
Просмотреть файл

@ -71,7 +71,7 @@ INCLUDEPATH:= $(addprefix $(SOURCEDIR)/, Common/Include CNTKv2LibraryDll CNTKv2L
# COMMON_FLAGS include settings that are passed both to NVCC and C++ compilers.
COMMON_FLAGS:= -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K -std=c++11
CPPFLAGS:=
CXXFLAGS:= -msse3 -mssse3 -std=c++0x -fopenmp -fpermissive -fPIC -Werror -fcheck-new
CXXFLAGS:= -msse4.1 -mssse3 -std=c++0x -fopenmp -fpermissive -fPIC -Werror -fcheck-new
LIBPATH:=
LIBS:=
LDFLAGS:=
@ -375,6 +375,8 @@ CNTKLIBRARY_SRC =\
$(SOURCEDIR)/CNTKv2LibraryDll/Utils.cpp \
$(SOURCEDIR)/CNTKv2LibraryDll/Value.cpp \
$(SOURCEDIR)/CNTKv2LibraryDll/Variable.cpp \
$(SOURCEDIR)/CNTKv2LibraryDll/Learner.cpp \
CNTKLIBRARY_SRC+=$(CNTK_COMMON_SRC)
CNTKLIBRARY_SRC+=$(COMPUTATION_NETWORK_LIB_SRC)
@ -437,7 +439,7 @@ EVAL_SRC=\
$(SOURCEDIR)/ActionsLib/NetworkFactory.cpp \
$(SOURCEDIR)/ActionsLib/NetworkDescriptionLanguage.cpp \
$(SOURCEDIR)/ActionsLib/SimpleNetworkBuilder.cpp \
$(SOURCEDIR)/ActionsLib/NDLNetworkBuilder.cpp
$(SOURCEDIR)/ActionsLib/NDLNetworkBuilder.cpp \
EVAL_SRC+=$(SGDLIB_SRC)
EVAL_SRC+=$(COMPUTATION_NETWORK_LIB_SRC)
@ -814,6 +816,127 @@ $(CNTK_CORE_BS): $(SOURCEDIR)/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs
@echo bin-placing deployable resource files
cp -f $^ $@
########################################
# Unit Tests
########################################
# use system pre-installed Boost libraries
# Todo: use our own version of boost libraries
BOOSTLIB_PATH = /usr/lib/x86_64-linux-gnu
BOOSTLIBS := boost_unit_test_framework boost_filesystem boost_system
UNITTEST_EVAL_SRC = \
$(SOURCEDIR)/../Tests/UnitTests/EvalTests/EvalExtendedTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/EvalTests/stdafx.cpp
UNITTEST_EVAL_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_EVAL_SRC))
UNITTEST_EVAL := $(BINDIR)/evaltests
# Temporarily not build unit tests as the docker image does not include boost.
#ALL += $(UNITTEST_EVAL)
#SRC += $(UNITTEST_EVAL_SRC)
$(UNITTEST_EVAL) : $(UNITTEST_EVAL_OBJ) | $(EVAL_LIB) $(CNTKMATH_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(LIBDIR) $(BOOSTLIB_PATH)) -o $@ $^ $(patsubst %, -l%, $(BOOSTLIBS)) -l$(EVAL) -l$(CNTKMATH)
#TODO: create project specific makefile or rules to avoid adding project specific path to the global path
INCLUDEPATH += $(SOURCEDIR)/Readers/CNTKTextFormatReader
UNITTEST_READER_SRC = \
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/CNTKTextFormatReaderTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/HTKLMFReaderTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/ImageReaderTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/ReaderLibTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/UCIFastReaderTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/ReaderTests/stdafx.cpp \
$(SOURCEDIR)/Readers/CNTKTextFormatReader/Indexer.cpp \
$(SOURCEDIR)/Readers/CNTKTextFormatReader/TextParser.cpp \
UNITTEST_READER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_READER_SRC))
UNITTEST_READER := $(BINDIR)/readertests
# Temporarily not build unit tests as the docker image does not include boost.
#ALL += $(UNITTEST_READER)
#SRC += $(UNITTEST_READER_SRC)
$(UNITTEST_READER): $(UNITTEST_READER_OBJ) | $(HTKMLFREADER) $(HTKDESERIALIZERS) $(UCIFASTREADER) $(COMPOSITEDATAREADER) $(IMAGEREADER) $(CNTKMATH_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(LIBDIR) $(BOOSTLIB_PATH)) -o $@ $^ $(patsubst %, -l%, $(BOOSTLIBS)) -l$(CNTKMATH)
UNITTEST_NETWORK_SRC = \
$(SOURCEDIR)/../Tests/UnitTests/NetworkTests/OperatorEvaluation.cpp \
$(SOURCEDIR)/../Tests/UnitTests/NetworkTests/stdafx.cpp \
$(SOURCEDIR)/CNTK/ModelEditLanguage.cpp \
$(SOURCEDIR)/ActionsLib/TrainActions.cpp \
$(SOURCEDIR)/ActionsLib/EvalActions.cpp \
$(SOURCEDIR)/ActionsLib/OtherActions.cpp \
$(SOURCEDIR)/ActionsLib/SpecialPurposeActions.cpp \
$(SOURCEDIR)/ActionsLib/NetworkFactory.cpp \
$(SOURCEDIR)/ActionsLib/NetworkDescriptionLanguage.cpp \
$(SOURCEDIR)/ActionsLib/SimpleNetworkBuilder.cpp \
$(SOURCEDIR)/ActionsLib/NDLNetworkBuilder.cpp \
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptEvaluator.cpp \
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptParser.cpp \
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptTest.cpp \
UNITTEST_NETWORK_SRC += $(COMPUTATION_NETWORK_LIB_SRC)
UNITTEST_NETWORK_SRC += $(CNTK_COMMON_SRC)
UNITTEST_NETWORK_SRC += $(SEQUENCE_TRAINING_LIB_SRC)
UNITTEST_NETWORK_SRC += $(SGDLIB_SRC)
UNITTEST_NETWORK_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_NETWORK_SRC)))
UNITTEST_NETWORK := $(BINDIR)/networktests
# Temporarily not build unit tests as the docker image does not include boost.
#ALL += $(UNITTEST_NETWORK)
#SRC += $(UNITTEST_NETWORK_SRC)
$(UNITTEST_NETWORK): $(UNITTEST_NETWORK_OBJ) | $(CNTKMATH_LIB) $(CNTKTEXTFORMATREADER)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(NVMLLIBPATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(LIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(patsubst %, -l%, $(BOOSTLIBS)) -l$(CNTKMATH) $(LIBS)
UNITTEST_MATH_SRC = \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/BatchNormalizationEngineTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/BlockMultiplierTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/constants.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/ConvolutionEngineTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/CPUMatrixTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/CPUSparseMatrixTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/fixtures.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/GPUMatrixCudaBlasTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/GPUMatrixTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/GPUSparseMatrixTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/MatrixBlasTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/MatrixDataSynchronizationTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/MatrixFileWriteReadTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/MatrixQuantizerTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/MatrixSparseDenseInteractionsTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/MatrixTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/stdafx.cpp \
UNITTEST_MATH_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_MATH_SRC))
UNITTEST_MATH := $(BINDIR)/mathtests
# Temporarily not build unit tests as the docker image does not include boost.
#ALL += $(UNITTEST_MATH)
#SRC += $(UNITTEST_MATH_SRC)
$(UNITTEST_MATH): $(UNITTEST_MATH_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(NVMLLIBPATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(LIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(patsubst %, -l%, $(BOOSTLIBS)) $(LIBS) -l$(CNTKMATH)
unittests: $(UNITTEST_EVAL) $(UNITTEST_READER) $(UNITTEST_NETWORK) $(UNITTEST_MATH)
########################################
# General compile and dependency rules
########################################
@ -838,13 +961,13 @@ $(OBJDIR)/%.o : %.cu $(BUILD_CONFIGURATION)
@mkdir -p $(dir $@)
$(NVCC) -c $< -o $@ $(COMMON_FLAGS) $(CUFLAGS) $(INCLUDEPATH:%=-I%) -Xcompiler "-fPIC -Werror"
$(OBJDIR)/%.o : %.cpp $(BUILD_CONFIGURATION)
$(OBJDIR)/%.o : %.cpp $(BUILD_CONFIGURATION)
@echo $(SEPARATOR)
@echo creating $@ for $(ARCH) with build type $(BUILDTYPE)
@mkdir -p $(dir $@)
$(CXX) -c $< -o $@ $(COMMON_FLAGS) $(CPPFLAGS) $(CXXFLAGS) $(INCLUDEPATH:%=-I%) -MD -MP -MF ${@:.o=.d}
.PHONY: clean buildall all
.PHONY: clean buildall all unittests
clean:
@echo $(SEPARATOR)

Просмотреть файл

@ -1,6 +1,11 @@
# CNTK
## Latest news
*2016-07-15.* V 1.6 Binary release
CNTK v.1.6 binaries are on the [CNTK Releases page](https://github.com/Microsoft/CNTK/releases)
*2016-07-12.* We have further expanded Licensing options for CNTK 1bit-SGD and related components. See the details at the [Wiki page](https://github.com/microsoft/cntk/wiki/CNTK-1bit-SGD-License). These new options are an extension of the new CNTK 1bit-SGD License that we have announced on Jun 23, 2016.
*2016-07-05.* CNTK now supports *Deconvolution* and *Unpooling*. See the usage example in the Network number 4 in [MNIST Sample](https://github.com/Microsoft/CNTK/blob/master/Examples/Image/MNIST/README.md).
*2016-06-23.* New License Terms for CNTK 1bit-SGD and related components.
@ -8,12 +13,6 @@ Effective immediately the License Terms for CNTK 1bit-SGD and related components
*2016-06-20.* A [post](http://itpeernetwork.intel.com/accelerating-the-computational-network-tool-kit-with-intel-mkl/) on Intel MKL and CNTK is published in the [Intel IT Peer Network](http://itpeernetwork.intel.com/accelerating-the-computational-network-tool-kit-with-intel-mkl/)
*2016-06-16.* V 1.5 Binary release. NuGet Package with CNTK Model Evaluation Libraries.
NuGet Package is added to CNTK v.1.5 binaries. See [CNTK Releases page](https://github.com/Microsoft/CNTK/releases) and [NuGet Package description](https://github.com/Microsoft/CNTK/wiki/Nuget-Package-for-Evaluation).
*2016-06-15.* CNTK now supports building against a custom Intel® Math Kernel Library (MKL).
See [setup instructions](https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-your-machine) on how to set this up for your platform.
See [all news](https://github.com/Microsoft/CNTK/wiki/News).
## What is CNTK

24
Scripts/README.md Normal file
Просмотреть файл

@ -0,0 +1,24 @@
This directory contains different script helping using different components of CNTK.
### CNTK Text format Converters
Two Python Scripts for converting Data to CNTK Text format for using as an input for CNTK Text Format Reader (see https://github.com/microsoft/cnTK/wiki/CNTKTextFormat-Reader).
```
txt2ctf.py
```
Converts a set of dictionary files and a plain text file to CNTK Text format. Run ```python txt2ctf.py -h``` to see usage instructions. See the comments in the beginning of the script file for the specific usage example.
```
uci2ctf.py
```
Converts data stored in a text file in UCI format to CNTK Text format. Run ```python uci2ctf.py -h``` to see usage instructions and example. Also see a usage example below:
```
python Scripts/uci2ctf.py --input_file Examples/Image/MNIST/Data/Train-28x28.txt --features_start 1 --features_dim 784 --labels_start 0 --labels_dim 1 --num_labels 10 --output_file Examples/Image/MNIST/Data/Train-28x28_cntk_text.txt
```
```input_file``` – original dataset in the (columnar) UCI format
```features_start``` – index of the first feature column (start parameter in the UCIFastReader config, see https://github.com/Microsoft/CNTK/wiki/UCI-Fast-Reader)
```features_dim``` – number of feature columns (dim parameter in the UCIFastReader config)
```labels_start``` - index of the first label column
```labels_dim``` – number of label columns
```num_labels``` – number of possible label values (labelDim parameter in the UCIFastReader config)
```output_file``` – path and filename of the resulting dataset.

Просмотреть файл

@ -191,6 +191,7 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
else if (EqualInsensitive(nodeType, OperationNameOf(KhatriRaoProductNode), L"ColumnwiseCrossProduct")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(LearnableParameter), L"Parameter")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(LogNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(LogPlusNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(LogSoftmaxNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(LogisticNode), L"Logistic")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(LookupTableNode))) ret = true;

Просмотреть файл

@ -53,7 +53,6 @@ public:
__declspec_noreturn static inline void EvaluationError(const wstring &msg, TextLocation where)
{
//Microsoft::MSR::CNTK::DebugUtil::PrintCallStack();
throw EvaluationException(msg, where);
}

Просмотреть файл

@ -89,9 +89,18 @@ struct Issue
// Because it is often hard to recognize an issue only from the point where it occurred, we also report the history in compact visual form.
// Since often multiple contexts are on the same source line, we only print each source line once in a consecutive row of contexts.
/*static*/ void TextLocation::PrintIssue(const vector<TextLocation>& locations, const wchar_t* errorKind, const wchar_t* kind, const wchar_t* what)
{
wstring error = CreateIssueMessage(locations, errorKind, kind, what);
fprintf(stderr, "%ls", error.c_str());
fflush(stderr);
}
/*static*/ wstring TextLocation::CreateIssueMessage(const vector<TextLocation>& locations, const wchar_t* errorKind, const wchar_t* kind, const wchar_t* what)
{
vector<Issue> issues; // tracing the error backwards
size_t symbolIndex = 0;
wstring message;
for (size_t n = 0; n < locations.size(); n++)
{
let& location = locations[n];
@ -125,20 +134,23 @@ struct Issue
if (!locations.empty()) // (be resilient to some throwers not having a TextLocation; to be avoided)
{
let& firstLoc = issues.front().location;
fprintf(stderr, "[CALL STACK]\n");
message += wstrprintf(L"[CALL STACK]\n");
for (auto i = issues.rbegin(); i != issues.rend(); i++)
{
let& issue = *i;
auto& where = issue.location;
const auto& lines = where.GetSourceFile().lines;
const auto line = (where.lineNo == lines.size()) ? L"(end)" : lines[where.lineNo].c_str();
fprintf(stderr, " %ls\n %ls\n", line, issue.markup.c_str());
message += wstrprintf(L" %ls\n %ls\n", line, issue.markup.c_str());
}
fprintf(stderr, "%ls while %ls: %ls(%d)", errorKind, kind, firstLoc.GetSourceFile().path.c_str(), (int)firstLoc.lineNo + 1 /*report 1-based*/);
message += wstrprintf(L"%ls while %ls: %ls(%d)", errorKind, kind, firstLoc.GetSourceFile().path.c_str(), (int)firstLoc.lineNo + 1 /*report 1-based*/);
}
else
fprintf(stderr, "%ls while %ls", errorKind, kind);
fprintf(stderr, ": %ls\n", what), fflush(stderr);
{
message += wstrprintf(L"%ls while %ls", errorKind, kind);
}
message += wstrprintf(L": %ls\n", what);
return message;
}
/*static*/ vector<SourceFile> TextLocation::sourceFileMap;

Просмотреть файл

@ -37,6 +37,7 @@ struct TextLocation // position in the text. Lightweight value struct that we ca
// helpers for pretty-printing errors: Show source-code line with ...^ under it to mark up the point of error
static void PrintIssue(const vector<TextLocation>& locations, const wchar_t* errorKind, const wchar_t* kind, const wchar_t* what);
static std::wstring CreateIssueMessage(const vector<TextLocation>& locations, const wchar_t* errorKind, const wchar_t* kind, const wchar_t* what);
static void Trace(TextLocation, const wchar_t* traceKind, const wchar_t* op, const wchar_t* exprPath);
// construction
@ -77,8 +78,12 @@ public:
} // where the error happened
virtual const wchar_t* kind() const = 0; // e.g. "warning" or "error"
wstring GetError(const std::wstring& linePrefix) const override
{
return TextLocation::CreateIssueMessage(locations, linePrefix.c_str(), kind(), msra::strfun::utf16(what()).c_str());
}
// pretty-print this as an error message
void /*ScriptingException::*/ PrintError(const std::wstring& linePrefix) const
void /*ScriptingException::*/ PrintError(const std::wstring& linePrefix) const override
{
TextLocation::PrintIssue(locations, linePrefix.c_str(), kind(), msra::strfun::utf16(what()).c_str());
}

Просмотреть файл

@ -18,6 +18,7 @@
#include "NDLNetworkBuilder.h"
#include "ModelEditLanguage.h"
#include "CPUMatrix.h" // used for SetNumThreads()
#include "GPUMatrix.h" // used for SyncGuard::EnableSync()
#include "CommonMatrix.h"
#include "SGD.h"
#include "MPIWrapper.h"
@ -117,6 +118,23 @@ size_t GetMaxEpochs(const ConfigParameters& configParams)
return maxEpochs;
}
#ifndef CPUONLY
// abort execution is GPU is not supported (e.g. compute capability not supported)
void CheckSupportForGpu(DEVICEID_TYPE deviceId)
{
auto gpuData = GetGpuData(deviceId);
if (gpuData.validity == GpuValidity::ComputeCapabilityNotSupported)
{
InvalidArgument("CNTK: The GPU (%s) has compute capability %d.%d. CNTK is only supported on GPUs with compute capability 3.0 or greater",
gpuData.name.c_str(), gpuData.versionMajor, gpuData.versionMinor);
}
else if (gpuData.validity == GpuValidity::UnknownDevice)
{
InvalidArgument("CNTK: Unknown GPU with Device ID %d.", gpuData.deviceId);
}
}
#endif
// special temporary function to guard against a now invalid usage of "truncated" which exists in some IPG production setups
static void DisableLegacyTruncationSettings(const ConfigParameters& TopLevelConfig, const ConfigParameters& commandConfig)
{
@ -373,6 +391,30 @@ void PrintUsageInfo()
LOGPRINTF(stderr, "-------------------------------------------------------------------\n");
}
// print gpu info for current gpu devices (e.g. Device[0]: cores = 2496; computeCapability = 5.2; type = "Quadro M4000"; memory = 8192 MB)
void PrintGpuInfo()
{
#ifndef CPUONLY
std::vector<GpuData> gpusData = GetAllGpusData();
if (gpusData.empty())
{
LOGPRINTF(stderr, "No GPUs found\n");
return;
}
LOGPRINTF(stderr, "-------------------------------------------------------------------\n");
LOGPRINTF(stderr, "GPU info:\n\n");
for (GpuData& data : gpusData)
{
LOGPRINTF(stderr, "\t\tDevice[%d]: cores = %d; computeCapability = %d.%d; type = \"%s\"; memory = %lu MB\n",
data.deviceId, data.cudaCores, data.versionMajor, data.versionMinor, data.name.c_str(), data.totalMemory);
}
LOGPRINTF(stderr, "-------------------------------------------------------------------\n");
#endif
}
// ---------------------------------------------------------------------------
// main() for use with BrainScript
// ---------------------------------------------------------------------------
@ -464,6 +506,21 @@ int wmainWithBS(int argc, wchar_t* argv[]) // called from wmain which is a wrapp
let valp = BS::Evaluate(expr); // evaluate parse into a dictionary
let& config = valp.AsRef<ScriptableObjects::IConfigRecord>(); // this is the dictionary
#ifndef CPUONLY
auto valpp = config.Find(L"deviceId");
if (valpp)
{
auto valp = *valpp;
if (!valp.Is<ScriptableObjects::String>()) // if it's not string 'auto' or 'cpu', then it's a gpu
{
if (static_cast<int>(valp) >= 0) // gpu (id >= 0)
{
CheckSupportForGpu(valp); // throws if gpu is not supported
}
}
}
#endif
// legacy parameters that have changed spelling
if (config.Find(L"DoneFile")) // variables follow camel case (start with lower-case letters)
InvalidArgument("Legacy spelling of 'DoneFile' no longer allowed. Use 'doneFile'.");
@ -485,6 +542,10 @@ int wmainWithBS(int argc, wchar_t* argv[]) // called from wmain which is a wrapp
TracingGPUMemoryAllocator::SetTraceLevel(config(L"traceGPUMemoryAllocations", 0));
bool synchronizeCUDAKernelExecutions = config(L"synchronizeCUDAKernelExecutions", false);
if (synchronizeCUDAKernelExecutions)
SyncGuard::EnableSync();
// logging
wstring logpath = config(L"stderr", L"");
if (logpath != L"")
@ -502,6 +563,9 @@ int wmainWithBS(int argc, wchar_t* argv[]) // called from wmain which is a wrapp
// echo config info to log
PrintBuiltInfo();
// echo gpu info to log
PrintGpuInfo();
// execute the actions
// std::string type = config(L"precision", "float");
int numCPUThreads = config(L"numCPUThreads", 0);
@ -559,6 +623,18 @@ int wmainOldCNTKConfig(int argc, wchar_t* argv[])
{
ConfigParameters config;
std::string rawConfigString = ConfigParameters::ParseCommandLine(argc, argv, config); // get the command param set they want
#ifndef CPUONLY
ConfigValue val = config("deviceId", "auto");
if (!EqualCI(val, "cpu") && !EqualCI(val, "auto"))
{
if (static_cast<int>(val) >= 0) // gpu (id >= 0)
{
CheckSupportForGpu(static_cast<int>(val)); // throws if gpu is not supported
}
}
#endif
bool timestamping = config(L"timestamping", false);
if (timestamping)
{
@ -602,6 +678,8 @@ int wmainOldCNTKConfig(int argc, wchar_t* argv[])
}
PrintBuiltInfo(); // this one goes to log file
PrintGpuInfo();
std::string timestamp = TimeDateStamp();
// dump config info

Просмотреть файл

@ -144,6 +144,7 @@
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\Common\CrossProcessMutex.h" />
<ClInclude Include="..\Common\Include\basetypes.h" />
<ClInclude Include="..\Common\Include\Basics.h" />
<ClInclude Include="..\Common\Include\BestGpu.h" />
<ClInclude Include="..\Common\Include\DataReader.h" />
@ -222,4 +223,4 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
</Project>
</Project>

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -47,6 +47,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template <typename ElementType>
class ComputationNode;
class File;
}}}
// TODO: The following should be reconciled with the equivalent code in the CNTK implementation
@ -135,352 +137,30 @@ namespace CNTK
// Forward declarations
class CompositeFunction;
class Function;
class Variable;
namespace Internal
// Similar to make_shared except that it associates a custom deleter with the shared_ptr to ensure
// that objects are deleted on the same side of the library DLL where they are allocated
template <typename T, typename ...CtorArgTypes>
inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs)
{
// A reference count to be used as the base class for all reference counted types.
class CNTK_API ReferenceCount
{
public:
ReferenceCount();
virtual ~ReferenceCount();
size_t AddReference();
size_t RemoveReference();
size_t GetReferenceCount();
private:
std::atomic<size_t>* m_rc;
};
// A smart pointer to a reference counted object
// T must be a type derived from ReferenceCount
template <class T>
class CNTK_API ReferenceCountedPtr final
{
typedef void(*ReferenceCountedObjectDeleter)(ReferenceCount* obj);
public:
ReferenceCountedPtr(T* ptr = nullptr, ReferenceCountedObjectDeleter deleter = nullptr) : m_objPtr(ptr), m_deleter(deleter)
{
AddReferenceIfNeeded();
}
ReferenceCountedPtr(const ReferenceCountedPtr& other) : m_objPtr(nullptr), m_deleter(nullptr)
{
*this = other;
}
ReferenceCountedPtr(ReferenceCountedPtr&& other) : m_objPtr(nullptr), m_deleter(nullptr)
{
*this = std::move(other);
}
~ReferenceCountedPtr()
{
DeleteReferenceIfNeeded(m_objPtr, m_deleter);
}
ReferenceCountedPtr& operator=(const ReferenceCountedPtr& other)
{
if (this != &other)
{
T* oldPtr = m_objPtr;
ReferenceCountedObjectDeleter oldDeleter = m_deleter;
m_objPtr = other.m_objPtr;
m_deleter = other.m_deleter;
AddReferenceIfNeeded();
DeleteReferenceIfNeeded(oldPtr, oldDeleter);
}
return *this;
}
ReferenceCountedPtr& operator=(ReferenceCountedPtr&& other)
{
assert(this != &other);
T* oldPtr = m_objPtr;
ReferenceCountedObjectDeleter oldDeleter = m_deleter;
m_objPtr = other.m_objPtr;
m_deleter = other.m_deleter;
// No change to ref-count of the adopted pointer.
other.m_objPtr = nullptr;
other.m_deleter = nullptr;
DeleteReferenceIfNeeded(oldPtr, oldDeleter);
return *this;
}
// Conversion to a ReferenceCountedSharedPtr instance of a base type
template <typename Base, typename std::enable_if<std::is_base_of<Base, T>::value>::type* = nullptr>
operator ReferenceCountedPtr<Base>()
{
return ReferenceCountedPtr<Base>(m_objPtr, m_deleter);
}
T* operator->() const
{
return m_objPtr;
}
T& operator*() const
{
return *m_objPtr;
}
operator T*() const
{
return m_objPtr;
}
T* GetPtr() const
{
return m_objPtr;
}
private:
void AddReferenceIfNeeded()
{
static_assert(std::is_base_of<ReferenceCount, T>::value, "ReferenceCountedPtr<T> can only be used when ReferenceCount is a base type of T!");
if (m_objPtr != nullptr)
reinterpret_cast<ReferenceCount*>(m_objPtr)->AddReference();
}
static void DeleteReferenceIfNeeded(T* objPtr, ReferenceCountedObjectDeleter deleter)
{
static_assert(std::is_base_of<ReferenceCount, T>::value, "ReferenceCountedPtr<T> can only be used when ReferenceCount is a base type of T!");
if (objPtr != nullptr)
{
size_t refCountRemaining = reinterpret_cast<ReferenceCount*>(objPtr)->RemoveReference();
if (refCountRemaining == 0)
{
if (deleter != nullptr)
deleter(reinterpret_cast<ReferenceCount*>(objPtr));
else
delete objPtr;
}
}
}
private:
T* m_objPtr;
ReferenceCountedObjectDeleter m_deleter;
};
template <typename T>
bool operator==(const ReferenceCountedPtr<T>& first, const ReferenceCountedPtr<T>& second)
{
return first.GetPtr() == second.GetPtr();
}
// A wrapper around the STL vector implementation with a safe ABI to allow usage across the library DLL boundary
// as STL vectors cannot be used across the DLL boundary
template <typename T>
class CNTK_API SimpleVector final
{
template <typename ValueType>
friend CNTK_API bool operator==(const SimpleVector<ValueType>& first, const SimpleVector<ValueType>& second);
friend class CNTK::Function;
public:
SimpleVector();
template <typename ContainerType, typename std::enable_if<std::is_same<ContainerType, std::vector<T>>::value ||
std::is_same<ContainerType, std::initializer_list<T>>::value ||
std::is_same<ContainerType, std::array<T, sizeof(ContainerType) / sizeof(T)>>::value>::type* = nullptr>
SimpleVector(const ContainerType& initList)
: SimpleVector(initList.size())
{
std::copy(initList.begin(), initList.end(), Data());
}
SimpleVector(size_t numElements, const T& initVal = T());
~SimpleVector();
SimpleVector(const SimpleVector& other);
SimpleVector& operator=(const SimpleVector& other);
SimpleVector(SimpleVector&& other);
SimpleVector& operator=(SimpleVector&& other);
T& operator[](size_t idx);
const T& operator[](size_t idx) const;
size_t Size() const;
T* Data();
const T* Data() const;
void PushBack(const T& value);
void PushBack(T&& value);
operator std::vector<T>() const
{
std::vector<T> retVector(Size());
for (size_t i = 0; i < Size(); ++i)
retVector[i] = this->operator[](i);
return retVector;
}
std::unordered_set<T> GetAsUnorderedSet(bool ensureUnique = true)
{
std::unordered_set<T> retSet;
for (size_t i = 0; i < Size(); ++i)
{
auto insertRet = retSet.insert(this->operator[](i));
if (ensureUnique && !insertRet.second)
RuntimeError("A SimpleVector with duplicate elements cannot be converted to an unordered_set");
}
return retSet;
}
private:
std::vector<T>* m_vector;
};
template <typename ValueType>
CNTK_API bool operator==(const SimpleVector<ValueType>& first, const SimpleVector<ValueType>& second);
template <typename ValueType>
bool operator!=(const SimpleVector<ValueType>& first, const SimpleVector<ValueType>& second)
{
return !(first == second);
}
// A wrapper around the STL set implementation with a safe ABI to allow usage across the library DLL boundary
// as STL sets cannot be used across the DLL boundary
template <typename KeyType>
class CNTK_API SimpleSet final
{
friend class CNTK::CompositeFunction;
template <typename T>
friend CNTK_API bool operator==(const SimpleSet<T>& first, const SimpleSet<T>& second);
public:
SimpleSet();
~SimpleSet();
SimpleSet(const SimpleSet& other);
SimpleSet& operator=(const SimpleSet& other);
SimpleSet(SimpleSet&& other);
SimpleSet& operator=(SimpleSet&& other);
bool Insert(const KeyType& key);
bool Contains(const KeyType& key) const;
size_t Size() const;
operator SimpleVector<KeyType>() const;
operator std::unordered_set<KeyType>() const
{
return ((SimpleVector<KeyType>)(*this)).GetAsUnorderedSet();
}
static SimpleSet<KeyType> CreateSimpleSet(const std::unordered_set<KeyType>& initSet)
{
SimpleSet<KeyType> simpleSet;
for (auto key : initSet)
simpleSet.Insert(key);
return simpleSet;
}
private:
std::unordered_set<KeyType>* m_set;
};
template <typename KeyType>
CNTK_API bool operator==(const SimpleSet<KeyType>& first, const SimpleSet<KeyType>& second);
template <typename KeyType>
bool operator!=(const SimpleSet<KeyType>& first, const SimpleSet<KeyType>& second)
{
return !(first == second);
}
// A wrapper aroound the STL map implementation with a safe ABI to allow usage across the library DLL boundary
// as STL maps cannot be used across the DLL boundary
template <typename KeyType, typename ValueType>
class CNTK_API SimpleMap final
{
friend class CNTK::CompositeFunction;
friend class CNTK::Function;
public:
SimpleMap();
~SimpleMap();
SimpleMap(const SimpleMap& other);
SimpleMap& operator=(const SimpleMap& other);
SimpleMap(SimpleMap&& other);
SimpleMap& operator=(SimpleMap&& other);
ValueType& operator[](const KeyType& key);
const ValueType& operator[](const KeyType& key) const;
bool Insert(const KeyType& key, const ValueType& value);
bool Contains(const KeyType& key) const;
size_t Size() const;
SimpleSet<KeyType> Keys() const;
static SimpleMap<KeyType, ValueType> CreateSimpleMap(const std::unordered_map<KeyType, ValueType>& initMap)
{
SimpleMap<KeyType, ValueType> simpleMap;
for (auto keyValuePair : initMap)
simpleMap.Insert(keyValuePair.first, keyValuePair.second);
return simpleMap;
}
private:
std::unordered_map<KeyType, ValueType>* m_map;
};
auto objPtr = new T(std::forward<CtorArgTypes>(ctorArgs)...);
return std::shared_ptr<T>(objPtr, [](T* ptr) { delete ptr; });
}
// Forward declarations
class NDArrayView;
typedef Internal::ReferenceCountedPtr<NDArrayView> NDArrayViewPtr;
typedef std::shared_ptr<NDArrayView> NDArrayViewPtr;
class NDMask;
typedef Internal::ReferenceCountedPtr<NDMask> NDMaskPtr;
typedef std::shared_ptr<NDMask> NDMaskPtr;
class Value;
typedef Internal::ReferenceCountedPtr<Value> ValuePtr;
typedef std::shared_ptr<Value> ValuePtr;
class Function;
typedef Internal::ReferenceCountedPtr<Function> FunctionPtr;
typedef std::shared_ptr<Function> FunctionPtr;
namespace Internal
{
CNTK_API FunctionPtr Combine(const Internal::SimpleVector<FunctionPtr>& operands, const std::wstring& name = L"");
}
}
namespace std {
template <typename T>
struct hash<CNTK::Internal::ReferenceCountedPtr<T>>
{
size_t operator()(const CNTK::Internal::ReferenceCountedPtr<T>& x) const
{
return std::hash<const void*>()(x.GetPtr());
}
};
class Learner;
typedef std::shared_ptr<Learner> LearnerPtr;
}

Просмотреть файл

@ -128,6 +128,7 @@
<ClInclude Include="API\CNTKLibrary.h" />
<ClInclude Include="API\CNTKLibraryInternals.h" />
<ClInclude Include="Function.h" />
<ClInclude Include="Learner.h" />
<ClInclude Include="Utils.h" />
<ClInclude Include="stdafx.h" />
<ClInclude Include="targetver.h" />
@ -140,6 +141,7 @@
</PrecompiledHeader>
</ClCompile>
<ClCompile Include="Function.cpp" />
<ClCompile Include="Learner.cpp" />
<ClCompile Include="NDArrayView.cpp" />
<ClCompile Include="NDMask.cpp" />
<ClCompile Include="stdafx.cpp">

Просмотреть файл

@ -10,6 +10,7 @@
<ClCompile Include="Variable.cpp" />
<ClCompile Include="Utils.cpp" />
<ClCompile Include="NDMask.cpp" />
<ClCompile Include="Learner.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="stdafx.h" />
@ -22,6 +23,7 @@
<Filter>API</Filter>
</ClInclude>
<ClInclude Include="Function.h" />
<ClInclude Include="Learner.h" />
</ItemGroup>
<ItemGroup>
<Filter Include="API">

Просмотреть файл

@ -14,7 +14,21 @@ namespace CNTK
return GPUDevice(0);
}
/*static*/ Axis Axis::DefaultDynamicAxis = Axis(L"defaultDynamicAxis");
/*static*/ Axis Axis::BatchAxis = Axis(L"batchAxis");
/*static*/ Axis Axis::AllAxes = Axis(L"allAxes");
/*static*/ const Axis& Axis::DefaultDynamicAxis()
{
static Axis s_defaultDynamicAxis(L"defaultDynamicAxis");
return s_defaultDynamicAxis;
}
/*static*/ const Axis& Axis::BatchAxis()
{
static Axis s_batchAxis(L"batchAxis");
return s_batchAxis;
}
/*static*/ const Axis& Axis::AllAxes()
{
static Axis s_allAxes(L"allAxes");
return s_allAxes;
}
}

Просмотреть файл

@ -17,34 +17,82 @@ bool g_shareNodeValueMatrices = true;
namespace CNTK
{
Internal::SimpleVector<Variable> Function::InputsImpl() const
std::shared_ptr<std::vector<Variable>> Function::InputsImpl() const
{
const CompositeFunction* compositeFunction = dynamic_cast<const CompositeFunction*>(this);
std::vector<Variable> inputs;
if (compositeFunction == nullptr)
return m_inputs;
inputs = m_inputs;
else
return Internal::SimpleVector<Variable>(compositeFunction->DetermineInputs());
inputs = compositeFunction->DetermineInputs();
return std::shared_ptr<std::vector<Variable>>(new std::vector<Variable>(std::move(inputs)), [](std::vector<Variable>* ptr) { delete ptr; });
}
/*virtual*/ void Function::_ReplacePlaceholders(const Internal::SimpleMap<Placeholder, Variable>& placeholderReplacements,
Internal::SimpleSet<const Function*>& visitedFunctions,
Internal::SimpleSet<Placeholder>& replacedPlaceholders)
FunctionPtr Function::ReplacePlaceholders(const std::unordered_map<Placeholder, Variable>& placeholderReplacements)
{
visitedFunctions.Insert(this);
// Cannot be called on primitive functions
if (RootFunction() == nullptr)
InvalidArgument("ReplacePlaceholders should never be called on primitive functions");
for (auto& inputVar : *(m_inputs.m_vector))
std::unordered_set<const Function*> visitedFunctions;
std::unordered_set<Placeholder> replacedPlaceholders;
ReplacePlaceholders(placeholderReplacements, visitedFunctions, replacedPlaceholders);
for (auto replacementPair : placeholderReplacements)
{
if (replacedPlaceholders.find(replacementPair.first) == replacedPlaceholders.end())
InvalidArgument("At least one of the placeholders specified for replacement was not found in the function");
}
return this->shared_from_this();
}
// Placeholders can be replaced incrementally - i.e. not all placeholders need to replaced in one go.
// The only requirement is that they must all be replaced before making any 'Forward' calls on the Function instance.
/*virtual*/ void Function::ReplacePlaceholders(const std::unordered_map<Placeholder, Variable>& placeholderReplacements,
std::unordered_set<const Function*>& visitedFunctions,
std::unordered_set<Placeholder>& replacedPlaceholders)
{
visitedFunctions.insert(this);
for (auto& inputVar : m_inputs)
{
if (inputVar.IsPlaceholder())
{
Placeholder placeholder(inputVar);
if (placeholderReplacements.Contains(placeholder))
if (placeholderReplacements.find(placeholder) != placeholderReplacements.end())
{
inputVar = placeholderReplacements[placeholder];
replacedPlaceholders.Insert(placeholder);
inputVar = placeholderReplacements.at(placeholder);
replacedPlaceholders.insert(placeholder);
}
}
else if (inputVar.IsOutput() && !visitedFunctions.Contains(inputVar.Owner()))
inputVar.Owner()->_ReplacePlaceholders(placeholderReplacements, visitedFunctions, replacedPlaceholders);
else if (inputVar.IsOutput() && (visitedFunctions.find(inputVar.Owner().get()) == visitedFunctions.end()))
inputVar.Owner()->ReplacePlaceholders(placeholderReplacements, visitedFunctions, replacedPlaceholders);
}
}
// Replace any PlaceHolder Variables in the graph of Functions underlying 'this' CompositeFunction. All PlaceHolder variables
// should have been replaced before performing any Forward compute of 'this' Function.
/*virtual*/ void CompositeFunction::ReplacePlaceholders(const std::unordered_map<Placeholder, Variable>& placeholderReplacements,
std::unordered_set<const Function*>& visitedFunctions,
std::unordered_set<Placeholder>& replacedPlaceholders)
{
RootFunction()->ReplacePlaceholders(placeholderReplacements, visitedFunctions, replacedPlaceholders);
// If any of the placeholders were replaced with Output variables, let's add the graph of function underneath each of those to 'm_allPrimitiveFunctions' set
for (auto replacedPlaceholder : replacedPlaceholders)
{
auto replacingVariable = placeholderReplacements.at(replacedPlaceholder);
if (replacingVariable.IsOutput())
{
auto ownerFunc = replacingVariable.Owner();
std::unordered_set<FunctionPtr> visitedFunctions;
DetermineInputs(ownerFunc, visitedFunctions);
// Add the newly visited functions to 'm_allPrimitiveFunctions' set
m_allPrimitiveFunctions.insert(visitedFunctions.begin(), visitedFunctions.end());
}
}
}
@ -79,7 +127,7 @@ namespace CNTK
else if (variable.IsInput())
{
// TODO: Specify dynamic axis
if (variable.IsSparseInput())
if (IsSparseInput(variable))
computationNodePtr = builder.CreateSparseInputNode(variable.Name(), AsTensorShape(variable.Shape()));
else
computationNodePtr = builder.CreateInputNode(variable.Name(), AsTensorShape(variable.Shape()));
@ -111,7 +159,7 @@ namespace CNTK
{
assert(variable.IsOutput());
Function* function = variable.Owner();
Function* function = variable.Owner().get();
ComputationNodeBasePtr computationNodePtr;
if (dynamic_cast<PrimitiveFunction*>(function))
{
@ -222,14 +270,14 @@ namespace CNTK
}
template <typename ElementType>
ComputationNetworkPtr CompositeFunction::GetComputationNetwork(const DeviceDescriptor& device, const Internal::SimpleSet<Variable>& backpropRoots)
ComputationNetworkPtr CompositeFunction::GetComputationNetwork(const DeviceDescriptor& device, const std::unordered_set<Variable>& backpropRoots)
{
if (m_computationNetwork != nullptr)
{
// TODO: We should either invalidate and readapt the network if he backpropRoots change compared to what was specified when the network
// was last constructed, to just recreate a new network.
// For now just disallow changing the backpropRoots after the network is created
if (m_currentBackpropRoots != *backpropRoots.m_set)
if (m_currentBackpropRoots != backpropRoots)
LogicError("Changing backprop roots across different Forward calls on a CNTK composite Function is currently unsupported");
// TODO: Support changing the device across different invocations of the forward method on a Function instance
@ -244,7 +292,7 @@ namespace CNTK
ComputationNetworkBuilder<ElementType> builder(*m_computationNetwork);
// TODO: We current only support one backprop root
if (backpropRoots.Size() > 1)
if (backpropRoots.size() > 1)
LogicError("More than one backprop roots is currently unsupported");
ComputationNodeBasePtr backpropRootNode;
@ -258,7 +306,7 @@ namespace CNTK
auto currentRootNode = GetNode(rootOutput, m_computationNetwork, builder, m_variableToNodeMap, m_isVariableRootMap);
forwardRootNodes.push_back(currentRootNode);
if (backpropRoots.Contains(rootOutput))
if (backpropRoots.find(rootOutput) != backpropRoots.end())
backpropRootNode = m_variableToNodeMap[rootOutput];
}
@ -281,7 +329,7 @@ namespace CNTK
if (std::find(currentComputationNodeInputs.begin(), currentComputationNodeInputs.end(), nullptr) != currentComputationNodeInputs.end())
{
// We found a null input; this variable must correspond to a PastValue or FutureValue function
const PrimitiveFunction* primitiveFunc = dynamic_cast<const PrimitiveFunction*>(varNodePair.first.Owner().GetPtr());
const PrimitiveFunction* primitiveFunc = dynamic_cast<const PrimitiveFunction*>(varNodePair.first.Owner().get());
if ((primitiveFunc == nullptr) || ((primitiveFunc->OpType() != PrimitiveOpType::PastValue) && (primitiveFunc->OpType() != PrimitiveOpType::FutureValue)))
InvalidArgument("Invalid Function graph detected; recurrence found at a Function that is not a PastValue/FutureValue function");
@ -326,10 +374,10 @@ namespace CNTK
LogicError("The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(value->Data()->GetDataType()));
// TODO: Is supplying dense data for an Input variable tagged as sparse, a fatal error?
if (var.IsSparseInput() && !value->Data()->IsSparse())
if (IsSparseInput(var) && !value->Data()->IsSparse())
InvalidArgument("Dense input data supplied for a sparse input Variable");
if (var.IsSparseInput() && (value->Data()->GetStorageFormat() != StorageFormat::SparseCSC))
if (IsSparseInput(var) && (value->Data()->GetStorageFormat() != StorageFormat::SparseCSC))
InvalidArgument("Sparse Input data must be in SparseCSC format");
if (value->Data()->Shape().NumAxes() == var.Shape().NumAxes())
@ -413,7 +461,7 @@ namespace CNTK
layout->GetNumCols(),
AsCNTKImplDeviceId(value->Data()->Device()),
value->Data()->IsSparse() ? MatrixType::SPARSE : MatrixType::DENSE,
AsCNTKMatrixFormat(value->Data()->GetStorageFormat()));
AsCNTKImplMatrixFormat(value->Data()->GetStorageFormat()));
std::vector<size_t> sequencesShorterThanLongestSequence;
for (size_t i = 0; i < numSequences; ++i)
@ -458,8 +506,8 @@ namespace CNTK
{
// Just create a view over the existing matrix itself
auto tensorView = new TensorView<ElementType>(std::make_shared<Matrix<ElementType>>(matrix.AsReference()), AsTensorShape(valueDataShape));
auto data = NDArrayViewPtr(new NDArrayView(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(matrix.GetFormat()), valueDataShape, true, tensorView), [](ReferenceCount* ptr) { delete ptr; });
return ValuePtr(new Value(data), [](ReferenceCount* ptr) { delete ptr; });
auto data = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(matrix.GetFormat()), valueDataShape, true, tensorView);
return MakeSharedObject<Value>(data);
}
if (layout->GetNumCols() != matrix.GetNumCols())
@ -509,7 +557,7 @@ namespace CNTK
NDMaskPtr mask;
if (!sequencesShorterThanLongestSequence.empty())
{
mask = NDMaskPtr(new NDMask({ maxNumTimeSteps, numSequences }, AsDeviceDescriptor(matrix.GetDeviceId())), [](ReferenceCount* ptr) { delete ptr; });
mask = MakeSharedObject<NDMask>(NDShape({ maxNumTimeSteps, numSequences }), AsDeviceDescriptor(matrix.GetDeviceId()));
for (auto shortSequenceIdx : sequencesShorterThanLongestSequence)
{
mask->MaskSection({ sequenceLengths[shortSequenceIdx], shortSequenceIdx }, { NDShape::InferredDimension, 1 });
@ -517,97 +565,89 @@ namespace CNTK
}
auto tensorView = new TensorView<ElementType>(shuffledMatrixData, AsTensorShape(valueDataShape));
auto data = NDArrayViewPtr(new NDArrayView(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), StorageFormat::Dense, valueDataShape, true, tensorView), [](ReferenceCount* ptr) { delete ptr; });
return ValuePtr(new Value(data, mask), [](ReferenceCount* ptr) { delete ptr; });
auto data = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), StorageFormat::Dense, valueDataShape, true, tensorView);
return MakeSharedObject<Value>(data, mask);
}
void CompositeFunction::PopulateNetworkInputs(const Internal::SimpleMap<Variable, const ValuePtr>& arguments)
template <typename ElementType>
/*static*/ void CompositeFunction::PopulateComputationNodeValue(const std::pair<Variable, ValuePtr>& variableValue, ComputationNodeBasePtr& computationNode)
{
auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<ElementType>(variableValue.first, variableValue.second);
MBLayoutPtr layout = CNTKMatrixAndMBLayout.second;
auto& nodeData = computationNode->As<ComputationNode<ElementType>>()->Value();
// Switch the node matrix to the right matrix type
nodeData.SwitchToMatrixType(CNTKMatrixAndMBLayout.first->GetMatrixType(), CNTKMatrixAndMBLayout.first->GetFormat(), false);
nodeData.AssignValuesOf(*CNTKMatrixAndMBLayout.first);
computationNode->GetMBLayout()->CopyFrom(layout);
}
void CompositeFunction::PopulateNetworkInputs(const std::unordered_map<Variable, const ValuePtr>& arguments)
{
auto functionArguments = this->Arguments();
std::vector<ComputationNodeBasePtr> inputNodes;
for (auto argument : functionArguments)
{
// Ensure we have values for all arguments of the function
if (!arguments.Contains(argument))
if (arguments.find(argument) == arguments.end())
InvalidArgument("Value not specified for required Function Argument");
auto argumentComputationNode = m_variableToNodeMap[argument];
inputNodes.push_back(argumentComputationNode);
ValuePtr argumentValue = arguments[argument];
ValuePtr argumentValue = arguments.at(argument);
MBLayoutPtr layout;
switch (argumentValue->Data()->GetDataType())
{
case DataType::Float:
{
auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<float>(argument, argumentValue);
layout = CNTKMatrixAndMBLayout.second;
auto& nodeData = argumentComputationNode->As<ComputationNode<float>>()->Value();
// Switch the node matrix to the right matrix type
nodeData.SwitchToMatrixType(CNTKMatrixAndMBLayout.first->GetMatrixType(), CNTKMatrixAndMBLayout.first->GetFormat(), false);
nodeData.AssignValuesOf(*CNTKMatrixAndMBLayout.first);
PopulateComputationNodeValue<float>({ argument, argumentValue }, argumentComputationNode);
break;
}
case DataType::Double:
{
auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<double>(argument, argumentValue);
layout = CNTKMatrixAndMBLayout.second;
auto& nodeData = argumentComputationNode->As<ComputationNode<double>>()->Value();
// Switch the node matrix to the right matrix type
nodeData.SwitchToMatrixType(CNTKMatrixAndMBLayout.first->GetMatrixType(), CNTKMatrixAndMBLayout.first->GetFormat(), false);
nodeData.AssignValuesOf(*CNTKMatrixAndMBLayout.first);
PopulateComputationNodeValue<double>({ argument, argumentValue }, argumentComputationNode);
break;
}
default:
LogicError("Unsupported DataType %s", DataTypeName(argumentValue->Data()->GetDataType()));
break;
}
argumentComputationNode->GetMBLayout()->CopyFrom(layout);
}
m_computationNetwork->BumpEvalTimeStamp(inputNodes);
}
void CompositeFunction::PopulateNetworkGradients(const Internal::SimpleMap<Variable, const ValuePtr>& gradients)
template <typename ElementType>
/*static*/ void CompositeFunction::PopulateComputationNodeGradient(const std::pair<Variable, ValuePtr>& variableGradient, Microsoft::MSR::CNTK::ComputationNodeBasePtr& computationNode)
{
auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<ElementType>(variableGradient.first, variableGradient.second);
MBLayoutPtr layout = CNTKMatrixAndMBLayout.second;
auto nodeLayout = computationNode->GetMBLayout();
if (((layout == nullptr) != (nodeLayout == nullptr)) || ((layout != nullptr) && (*layout != *nodeLayout)))
InvalidArgument("The layout of the specified gradient Value in incompatible with the layout of the corresponding Variable computed during Forward call");
computationNode->As<ComputationNode<ElementType>>()->AssignGradient(*CNTKMatrixAndMBLayout.first);
}
// Assign the supplied gradients corresponding to the root(s) of the network to be backpropagated through the graph
void CompositeFunction::PopulateNetworkGradients(const std::unordered_map<Variable, const ValuePtr>& gradients)
{
auto functionOutputs = this->Outputs();
std::unordered_map<Variable, const ValuePtr>& gradientsValueMap = *gradients.m_map;
for (auto gradientVarValuePair : gradientsValueMap)
for (auto gradientVarValuePair : gradients)
{
// Only gradients for roots of the function can be specified
if (std::find(functionOutputs.begin(), functionOutputs.end(), gradientVarValuePair.first) == functionOutputs.end())
InvalidArgument("Gradients cannot be specified for a Variable that is not an Output of the Function");
auto outputComputationNode = m_variableToNodeMap[gradientVarValuePair.first];
auto nodeLayout = outputComputationNode->GetMBLayout();
ValuePtr gradientValue = gradientVarValuePair.second;
MBLayoutPtr layout;
switch (gradientValue->Data()->GetDataType())
{
case DataType::Float:
{
auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<float>(gradientVarValuePair.first, gradientValue);
layout = CNTKMatrixAndMBLayout.second;
if (((layout == nullptr) != (nodeLayout == nullptr)) || ((layout != nullptr) && (*layout != *nodeLayout)))
InvalidArgument("The layout of the specified gradient Value in incompatible with the layout of the corresponding Variable computed during Forward call");
outputComputationNode->As<ComputationNode<float>>()->AssignGradient(*CNTKMatrixAndMBLayout.first);
PopulateComputationNodeGradient<float>(gradientVarValuePair, outputComputationNode);
break;
}
case DataType::Double:
{
auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<double>(gradientVarValuePair.first, gradientValue);
layout = CNTKMatrixAndMBLayout.second;
if (((layout == nullptr) != (nodeLayout == nullptr)) || ((layout != nullptr) && (*layout != *nodeLayout)))
InvalidArgument("The layout of the specified gradient Value in incompatible with the layout of the corresponding Variable computed during Forward call");
outputComputationNode->As<ComputationNode<double>>()->AssignGradient(*CNTKMatrixAndMBLayout.first);
PopulateComputationNodeGradient<double>(gradientVarValuePair, outputComputationNode);
break;
}
default:
LogicError("Unsupported DataType %s", DataTypeName(gradientValue->Data()->GetDataType()));
break;
@ -618,6 +658,8 @@ namespace CNTK
static NDShape GetValueShape(const Variable& var, const ComputationNodeBasePtr& computationNodePtr)
{
size_t outputValueNumAxes = var.Shape().NumAxes();
// Add the batch and dynamic axes if needed
if (computationNodePtr->GetMBLayout() != nullptr)
outputValueNumAxes += 2;
@ -650,37 +692,27 @@ namespace CNTK
InvalidArgument("The shape %s of the specified Value object for output does not match the actual output shape %s", AsString(outputValuePtr->Data()->Shape()).c_str(), AsString(outputShape).c_str());
}
ValuePtr nodeValue;
switch (outputVarValuePair.first.GetDataType())
{
case DataType::Float:
{
auto nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<float>(outputVarValuePair.first, computationNodePtr->As<ComputationNode<float>>()->Value(), computationNodePtr->GetMBLayout());
if (outputValuePtr == nullptr)
{
auto data = NDArrayViewPtr(new NDArrayView(outputVarValuePair.first.GetDataType(), outputShape, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId())), [](ReferenceCount* ptr) { delete ptr; });
auto mask = (nodeValue->Mask() != nullptr) ? NDMaskPtr(new NDMask(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()), [](ReferenceCount* ptr) { delete ptr; }) : nullptr;
outputValuePtr = ValuePtr(new Value(data, mask), [](ReferenceCount* ptr) { delete ptr; });
}
outputValuePtr->CopyFrom(*nodeValue);
nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<float>(outputVarValuePair.first, computationNodePtr->As<ComputationNode<float>>()->Value(), computationNodePtr->GetMBLayout());
break;
}
case DataType::Double:
{
auto nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<double>(outputVarValuePair.first, computationNodePtr->As<ComputationNode<double>>()->Value(), computationNodePtr->GetMBLayout());
if (outputValuePtr == nullptr)
{
auto data = NDArrayViewPtr(new NDArrayView(outputVarValuePair.first.GetDataType(), outputShape, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId())), [](ReferenceCount* ptr) { delete ptr; });
auto mask = (nodeValue->Mask() != nullptr) ? NDMaskPtr(new NDMask(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()), [](ReferenceCount* ptr) { delete ptr; }) : nullptr;
outputValuePtr = ValuePtr(new Value(data, mask), [](ReferenceCount* ptr) { delete ptr; });
}
outputValuePtr->CopyFrom(*nodeValue);
nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<double>(outputVarValuePair.first, computationNodePtr->As<ComputationNode<double>>()->Value(), computationNodePtr->GetMBLayout());
break;
}
default:
LogicError("Unsupported DataType %s", DataTypeName(outputVarValuePair.first.GetDataType()));
break;
}
if (outputValuePtr == nullptr)
{
auto data = MakeSharedObject<NDArrayView>(outputVarValuePair.first.GetDataType(), outputShape, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId()));
auto mask = (nodeValue->Mask() != nullptr) ? MakeSharedObject<NDMask>(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()) : nullptr;
outputValuePtr = MakeSharedObject<Value>(data, mask);
}
outputValuePtr->CopyFrom(*nodeValue);
outputs[outputVarValuePair.first] = outputValuePtr;
}
}
@ -713,50 +745,40 @@ namespace CNTK
if (!computationNodePtr->NeedsGradient())
LogicError("Backpropagated gradient value cannot be read from a ComputationNode that has NeedsGradient set to false");
ValuePtr nodeValue;
switch (gradientVarValuePair.first.GetDataType())
{
case DataType::Float:
{
auto nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<float>(gradientVarValuePair.first, computationNodePtr->As<ComputationNode<float>>()->Gradient(), computationNodePtr->GetMBLayout());
if (gradientValuePtr == nullptr)
{
auto data = NDArrayViewPtr(new NDArrayView(gradientVarValuePair.first.GetDataType(), gradientShape, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId())), [](ReferenceCount* ptr) { delete ptr; });
auto mask = NDMaskPtr((nodeValue->Mask() != nullptr) ? new NDMask(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()) : nullptr, [](ReferenceCount* ptr) { delete ptr; });
gradientValuePtr = ValuePtr(new Value(data, mask), [](ReferenceCount* ptr) { delete ptr; });
}
gradientValuePtr->CopyFrom(*nodeValue);
nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<float>(gradientVarValuePair.first, computationNodePtr->As<ComputationNode<float>>()->Gradient(), computationNodePtr->GetMBLayout());
break;
}
case DataType::Double:
{
auto nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<double>(gradientVarValuePair.first, computationNodePtr->As<ComputationNode<double>>()->Gradient(), computationNodePtr->GetMBLayout());
if (gradientValuePtr == nullptr)
{
auto data = NDArrayViewPtr(new NDArrayView(gradientVarValuePair.first.GetDataType(), gradientShape, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId())), [](ReferenceCount* ptr) { delete ptr; });
auto mask = NDMaskPtr((nodeValue->Mask() != nullptr) ? new NDMask(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()) : nullptr, [](ReferenceCount* ptr) { delete ptr; });
gradientValuePtr = ValuePtr(new Value(data, mask), [](ReferenceCount* ptr) { delete ptr; });
}
gradientValuePtr->CopyFrom(*nodeValue);
nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<double>(gradientVarValuePair.first, computationNodePtr->As<ComputationNode<double>>()->Gradient(), computationNodePtr->GetMBLayout());
break;
}
default:
LogicError("Unsupported DataType %s", DataTypeName(gradientVarValuePair.first.GetDataType()));
break;
}
if (gradientValuePtr == nullptr)
{
auto data = MakeSharedObject<NDArrayView>(gradientVarValuePair.first.GetDataType(), gradientShape, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId()));
auto mask = (nodeValue->Mask() != nullptr) ? MakeSharedObject<NDMask>(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()) : nullptr;
gradientValuePtr = MakeSharedObject<Value>(data, mask);
}
gradientValuePtr->CopyFrom(*nodeValue);
gradients[gradientVarValuePair.first] = gradientValuePtr;
}
}
/*virtual*/ BackPropStatePtr CompositeFunction::Forward(const Internal::SimpleMap<Variable, const ValuePtr>& arguments,
Internal::SimpleMap<Variable, ValuePtr>& outputs,
const Internal::SimpleSet<Variable>& outputsToRetainBackwardStateFor,
const DeviceDescriptor& computeDevice)
/*virtual*/ BackPropStatePtr CompositeFunction::Forward(const std::unordered_map<Variable, const ValuePtr>& arguments,
std::unordered_map<Variable, ValuePtr>& outputs,
const DeviceDescriptor& computeDevice,
const std::unordered_set<Variable>& outputsToRetainBackwardStateFor)
{
// TODO: How about zero argument functions?
// TODO: We need a better way to determine the ElementType for the network
auto dataType = arguments.m_map->begin()->second->Data()->GetDataType();
auto dataType = arguments.begin()->second->Data()->GetDataType();
if (dataType == DataType::Float)
GetComputationNetwork<float>(computeDevice, outputsToRetainBackwardStateFor);
else
@ -767,10 +789,10 @@ namespace CNTK
// Feed data into the arguments of the network
PopulateNetworkInputs(arguments);
std::unordered_set<Variable> functionOutputs = Internal::SimpleVector<Variable>(this->Outputs()).GetAsUnorderedSet();
std::unordered_set<Variable> functionOutputs(this->Outputs().begin(), this->Outputs().end());
std::vector<ComputationNodeBasePtr> outputsToEvaluate;
for (auto outputVarValuePair : *outputs.m_map)
for (auto outputVarValuePair : outputs)
{
// Ensure that only a subset of this function's outputs are being asked to be evaluated
if (functionOutputs.find(outputVarValuePair.first) == functionOutputs.end())
@ -781,128 +803,105 @@ namespace CNTK
}
// The 'outputsToRetainBackwardStateFor' nodes also need to be evaluated if not already specified in 'outputs'
for (auto rootVarForBackprop : *outputsToRetainBackwardStateFor.m_set)
for (auto rootVarForBackprop : outputsToRetainBackwardStateFor)
{
if (outputs.m_map->find(rootVarForBackprop) == outputs.m_map->end())
if (outputs.find(rootVarForBackprop) == outputs.end())
outputsToEvaluate.push_back(m_variableToNodeMap[rootVarForBackprop]);
}
m_computationNetwork->ForwardProp(outputsToEvaluate);
GetNetworkOutputs(*(outputs.m_map));
GetNetworkOutputs(outputs);
// TODO: How to deal with the specified 'computeDevice'
return (outputsToRetainBackwardStateFor.Size() > 0) ? BackPropStatePtr(new CNTKBackPropState(this, { arguments.m_map->begin()->first, m_variableToNodeMap[arguments.m_map->begin()->first]->GetEvalTimeStamp() }), [](ReferenceCount* ptr) { delete ptr; }) : nullptr;
return (outputsToRetainBackwardStateFor.size() > 0) ? MakeSharedObject<CNTKBackPropState>(this->shared_from_this(), std::make_pair(arguments.begin()->first, m_variableToNodeMap[arguments.begin()->first]->GetEvalTimeStamp())) : nullptr;
}
/*virtual*/ void CompositeFunction::Backward(const BackPropStatePtr& state,
const Internal::SimpleMap<Variable, const ValuePtr>& rootGradientValues,
Internal::SimpleMap<Variable, ValuePtr>& backPropagatedGradientValuesForInputs)
const std::unordered_map<Variable, const ValuePtr>& rootGradientValues,
std::unordered_map<Variable, ValuePtr>& backPropagatedGradientValuesForInputs)
{
if ((state == nullptr) || (dynamic_cast<const CNTKBackPropState*>(state.GetPtr()) == nullptr))
auto backpropState = dynamic_cast<const CNTKBackPropState*>(state.get());
if (backpropState == nullptr)
InvalidArgument("Invalid backprop state specified");
// TODO: Support multiple concurrent backprop states
auto backpropState = dynamic_cast<const CNTKBackPropState*>(state.GetPtr());
if (backpropState->EvalTimeStamp().second != m_variableToNodeMap[backpropState->EvalTimeStamp().first]->GetEvalTimeStamp())
LogicError("The specified backprop state specified cannot be used for backpropagation as the Function's internal state was modified by subsequent Forward calls to the function."
"This is not a user error but a shortcoming of the current implementation where multiple independent backprop states are not simultaneously supported");
if (rootGradientValues.Size() > 1)
if (rootGradientValues.size() > 1)
LogicError("Currently gradient backprop from only one of the Function Outputs is supported");
// TODO: Avoid copying the data when possible
// Zero all gradients of nodes below the root nodes
for (auto rootGradientVarValuePair : *rootGradientValues.m_map)
for (auto rootGradientVarValuePair : rootGradientValues)
m_computationNetwork->ZeroInputGradients(m_variableToNodeMap[rootGradientVarValuePair.first]);
// Feed data into the arguments of the network
PopulateNetworkGradients(rootGradientValues);
// Backpropagate through the network
auto rootComputationNodePtr = m_variableToNodeMap[rootGradientValues.m_map->begin()->first];
auto rootComputationNodePtr = m_variableToNodeMap[rootGradientValues.begin()->first];
m_computationNetwork->GetNestedNetwork(rootComputationNodePtr)->Backprop(FrameRange(nullptr), true, true);
GetNetworkGradients(*(backPropagatedGradientValuesForInputs.m_map));
GetNetworkGradients(backPropagatedGradientValuesForInputs);
// TODO: How to deal with the specified 'computeDevice'
}
/*virtual*/ void CompositeFunction::_ReplacePlaceholders(const Internal::SimpleMap<Placeholder, Variable>& placeholderReplacements, Internal::SimpleSet<const Function*>& visitedFunctions, Internal::SimpleSet<Placeholder>& replacedPlaceholders)
{
RootFunction()->_ReplacePlaceholders(placeholderReplacements, visitedFunctions, replacedPlaceholders);
// If any of the placeholders were replaced with Output variables, let's add the graph of function underneath each of those to 'm_allPrimitiveFunctions' set
for (auto replacedPlaceholder : *replacedPlaceholders.m_set)
{
auto replacingVariable = placeholderReplacements[replacedPlaceholder];
if (replacingVariable.IsOutput())
{
auto ownerFunc = replacingVariable.Owner();
Internal::SimpleSet<FunctionPtr> visitedFunctions;
DetermineInputs(ownerFunc, visitedFunctions);
// Add the newly visited functions to 'm_allPrimitiveFunctions' set
m_allPrimitiveFunctions.m_set->insert(visitedFunctions.m_set->begin(), visitedFunctions.m_set->end());
}
}
}
FunctionPtr Times(const Variable& leftOperand, const Variable& rightOperand, const std::wstring& name/* = L""*/)
{
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::Times, { leftOperand, rightOperand }, Dictionary(), name), name);
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Times, std::vector<Variable>({ leftOperand, rightOperand }), Dictionary(), name), name);
}
FunctionPtr Plus(const Variable& leftOperand, const Variable& rightOperand, const std::wstring& name/* = L""*/)
{
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::Plus, { leftOperand, rightOperand }, Dictionary(), name), name);
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Plus, std::vector<Variable>({ leftOperand, rightOperand }), Dictionary(), name), name);
}
FunctionPtr Sigmoid(const Variable& operand, const std::wstring& name/* = L""*/)
{
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::Sigmoid, { operand }, Dictionary(), name), name);
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Sigmoid, std::vector<Variable>({ operand }), Dictionary(), name), name);
}
FunctionPtr Tanh(const Variable& operand, const std::wstring& name/* = L""*/)
{
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::Tanh, { operand }, Dictionary(), name), name);
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Tanh, std::vector<Variable>({ operand }), Dictionary(), name), name);
}
namespace Internal
FunctionPtr Combine(const std::initializer_list<FunctionPtr>& operands, const std::wstring& name/* = L""*/)
{
FunctionPtr Combine(const Internal::SimpleVector<FunctionPtr>& operands, const std::wstring& name/* = L""*/)
std::unordered_set<FunctionPtr> uniqueOperands;
std::vector<Variable> inputs;
for (auto operand : operands)
{
Internal::SimpleSet<FunctionPtr> uniqueOperands;
std::vector<Variable> inputs;
for (size_t i = 0; i < operands.Size(); ++i)
{
if (uniqueOperands.Contains(operands[i]))
LogicError("All function operands specified to Combine must be unique");
if (uniqueOperands.find(operand) != uniqueOperands.end())
LogicError("All function operands specified to Combine must be unique");
uniqueOperands.Insert(operands[i]);
auto currentFunctionOutputs = operands[i]->Outputs();
std::copy(currentFunctionOutputs.begin(), currentFunctionOutputs.end(), std::back_inserter(inputs));
}
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::Combine, inputs, Dictionary(), name), name);
uniqueOperands.insert(operand);
auto currentFunctionOutputs = operand->Outputs();
std::copy(currentFunctionOutputs.begin(), currentFunctionOutputs.end(), std::back_inserter(inputs));
}
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Combine, inputs, Dictionary(), name), name);
}
FunctionPtr CrossEntropyWithSoftmax(const Variable& output, const Variable& labels, const std::wstring& name/* = L""*/)
{
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::CrossEntropyWithSoftmax, { output, labels }, Dictionary(), name), name);
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::CrossEntropyWithSoftmax, std::vector<Variable>({ output, labels }), Dictionary(), name), name);
}
FunctionPtr ClassificationError(const Variable& prediction, const Variable& labels, const std::wstring& name/* = L""*/)
{
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::ClassificationError, { prediction, labels }, Dictionary(), name), name);
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::ClassificationError, std::vector<Variable>({ prediction, labels }), Dictionary(), name), name);
}
FunctionPtr Exp(const Variable& operand, const std::wstring& name/* = L""*/)
{
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::Exp, { operand }, Dictionary(), name), name);
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Exp, std::vector<Variable>({ operand }), Dictionary(), name), name);
}
FunctionPtr PastValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name/* = L""*/)
@ -912,7 +911,7 @@ namespace CNTK
auto additionalProperties = Dictionary();
additionalProperties[L"stepSize"] = DictionaryValue(stepSize);
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::PastValue, { initialState, operand }, std::move(additionalProperties), name), name);
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::PastValue, std::vector<Variable>({ initialState, operand }), std::move(additionalProperties), name), name);
}
FunctionPtr FutureValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name/* = L""*/)
@ -922,16 +921,16 @@ namespace CNTK
auto additionalProperties = Dictionary();
additionalProperties[L"stepSize"] = DictionaryValue(stepSize);
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::FutureValue, { initialState, operand }, std::move(additionalProperties), name), name);
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::FutureValue, std::vector<Variable>({ initialState, operand }), std::move(additionalProperties), name), name);
}
FunctionPtr ElementTimes(const Variable& leftOperand, const Variable& rightOperand, const std::wstring& name/* = L""*/)
{
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::ElementTimes, { leftOperand, rightOperand }, Dictionary(), name), name);
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::ElementTimes, std::vector<Variable>({ leftOperand, rightOperand }), Dictionary(), name), name);
}
FunctionPtr ReduceSum(const Variable& operand, const std::wstring& name/* = L""*/)
{
return CompositeFunction::Create(new PrimitiveFunction(PrimitiveOpType::ReduceSum, { operand }, Dictionary(), name), name);
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::ReduceSum, std::vector<Variable>({ operand }), Dictionary(), name), name);
}
}

Просмотреть файл

@ -68,17 +68,17 @@ namespace CNTK
{
}
virtual BackPropStatePtr Forward(const Internal::SimpleMap<Variable, const ValuePtr>& /*arguments*/,
Internal::SimpleMap<Variable, ValuePtr>& /*outputs*/,
const Internal::SimpleSet<Variable>& /*outputsToRetainBackwardStateFor*/,
const DeviceDescriptor& /*computeDevice*/) override
virtual BackPropStatePtr Forward(const std::unordered_map<Variable, const ValuePtr>& /*arguments*/,
std::unordered_map<Variable, ValuePtr>& /*outputs*/,
const DeviceDescriptor& /*computeDevice*/,
const std::unordered_set<Variable>& /*outputsToRetainBackwardStateFor*/) override
{
NOT_IMPLEMENTED;
}
virtual void Backward(const BackPropStatePtr& /*state*/,
const Internal::SimpleMap<Variable, const ValuePtr>& /*rootGradientValues*/,
Internal::SimpleMap<Variable, ValuePtr>& /*backPropagatedGradientValuesForInputs*/) override
const std::unordered_map<Variable, const ValuePtr>& /*rootGradientValues*/,
std::unordered_map<Variable, ValuePtr>& /*backPropagatedGradientValuesForInputs*/) override
{
NOT_IMPLEMENTED;
}
@ -280,54 +280,58 @@ namespace CNTK
private:
std::pair<Variable, int64_t> m_evalTimeStamp;
};
typedef Internal::ReferenceCountedPtr<CNTKBackPropState> CNTKBackPropStatePtr;
typedef std::shared_ptr<CNTKBackPropState> CNTKBackPropStatePtr;
class CompositeFunction;
typedef Internal::ReferenceCountedPtr<CompositeFunction> CompositeFunctionPtr;
typedef std::shared_ptr<CompositeFunction> CompositeFunctionPtr;
class CompositeFunction final : public Function
{
friend class Function;
template <typename T, typename ...CtorArgTypes>
friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs);
public:
static CompositeFunctionPtr Create(const FunctionPtr& rootFunction, const std::wstring& name = L"")
{
Internal::SimpleSet<FunctionPtr> visitedFunctions;
std::unordered_set<FunctionPtr> visitedFunctions;
// Call DetermineInputs to get the set of all functions in the graph
DetermineInputs(rootFunction, visitedFunctions);
auto func = new CompositeFunction(rootFunction, std::move(visitedFunctions), name);
return CompositeFunctionPtr(func, [](ReferenceCount* ptr) { delete ptr; });
return MakeSharedObject<CompositeFunction>(rootFunction, std::move(visitedFunctions), name);
}
virtual BackPropStatePtr Forward(const Internal::SimpleMap<Variable, const ValuePtr>& arguments,
Internal::SimpleMap<Variable, ValuePtr>& outputs,
const Internal::SimpleSet<Variable>& outputsToRetainBackwardStateFor,
const DeviceDescriptor& computeDevice) override;
virtual BackPropStatePtr Forward(const std::unordered_map<Variable, const ValuePtr>& arguments,
std::unordered_map<Variable, ValuePtr>& outputs,
const DeviceDescriptor& computeDevice,
const std::unordered_set<Variable>& outputsToRetainBackwardStateFor) override;
virtual void Backward(const BackPropStatePtr& state,
const Internal::SimpleMap<Variable, const ValuePtr>& rootGradientValues,
Internal::SimpleMap<Variable, ValuePtr>& backPropagatedGradientValuesForInputs) override;
const std::unordered_map<Variable, const ValuePtr>& rootGradientValues,
std::unordered_map<Variable, ValuePtr>& backPropagatedGradientValuesForInputs) override;
private:
virtual void _ReplacePlaceholders(const Internal::SimpleMap<Placeholder, Variable>& placeholderReplacements, Internal::SimpleSet<const Function*>& visitedFunctions, Internal::SimpleSet<Placeholder>& replacedPlaceholders) override;
virtual void ReplacePlaceholders(const std::unordered_map<Placeholder, Variable>& placeholderReplacements,
std::unordered_set<const Function*>& visitedFunctions,
std::unordered_set<Placeholder>& replacedPlaceholders) override;
CompositeFunction(const FunctionPtr& rootFunction, Internal::SimpleSet<FunctionPtr>&& allPrimitiveFunctions, const std::wstring& name)
CompositeFunction(const FunctionPtr& rootFunction, std::unordered_set<FunctionPtr>&& allPrimitiveFunctions, const std::wstring& name)
: Function({}, rootFunction->Outputs(), rootFunction, name), m_allPrimitiveFunctions(std::move(allPrimitiveFunctions))
{
}
std::vector<Variable> DetermineInputs() const
{
Internal::SimpleSet<FunctionPtr> visitedFunctions;
std::unordered_set<FunctionPtr> visitedFunctions;
return DetermineInputs(RootFunction(), visitedFunctions);
}
// Recursively traverses the Function graph underlying the 'rootFunction' to determine all the leaves (aka inputs) of the graph
static std::vector<Variable> DetermineInputs(const FunctionPtr& rootFunction, Internal::SimpleSet<FunctionPtr>& visitedFunctions)
static std::vector<Variable> DetermineInputs(const FunctionPtr& rootFunction, std::unordered_set<FunctionPtr>& visitedFunctions)
{
visitedFunctions.Insert(rootFunction);
visitedFunctions.insert(rootFunction);
std::vector<Variable> inputs;
std::vector<Variable> rootFunctionInputs = rootFunction->Inputs();
@ -335,7 +339,7 @@ namespace CNTK
{
if (!rootInput.IsOutput())
inputs.push_back(rootInput);
else if (!visitedFunctions.Contains(rootInput.Owner()))
else if (visitedFunctions.find(rootInput.Owner()) == visitedFunctions.end())
{
FunctionPtr function = rootInput.Owner();
std::vector<Variable> functionInputs = DetermineInputs(function, visitedFunctions);
@ -347,7 +351,7 @@ namespace CNTK
}
template <typename ElementType>
Microsoft::MSR::CNTK::ComputationNetworkPtr GetComputationNetwork(const DeviceDescriptor& device, const Internal::SimpleSet<Variable>& backpropRoots);
Microsoft::MSR::CNTK::ComputationNetworkPtr GetComputationNetwork(const DeviceDescriptor& device, const std::unordered_set<Variable>& backpropRoots);
template <typename ElementType>
static Microsoft::MSR::CNTK::ComputationNodeBasePtr GetOutputVariableNode(const Variable& variable, Microsoft::MSR::CNTK::ComputationNetworkPtr& network, Microsoft::MSR::CNTK::ComputationNetworkBuilder<ElementType>& builder, std::unordered_map<Variable, Microsoft::MSR::CNTK::ComputationNodeBasePtr>& variableToNodeMap, std::unordered_map<Variable, bool>& isVariableRootMap);
@ -355,8 +359,13 @@ namespace CNTK
template <typename ElementType>
static Microsoft::MSR::CNTK::ComputationNodeBasePtr GetNode(const Variable& variable, Microsoft::MSR::CNTK::ComputationNetworkPtr& network, Microsoft::MSR::CNTK::ComputationNetworkBuilder<ElementType>& builder, std::unordered_map<Variable, Microsoft::MSR::CNTK::ComputationNodeBasePtr>& variableToNodeMap, std::unordered_map<Variable, bool>& isVariableRootMap);
void PopulateNetworkInputs(const Internal::SimpleMap<Variable, const ValuePtr>& arguments);
void PopulateNetworkGradients(const Internal::SimpleMap<Variable, const ValuePtr>& gradients);
template <typename ElementType>
static void PopulateComputationNodeValue(const std::pair<Variable, ValuePtr>& variableValue, Microsoft::MSR::CNTK::ComputationNodeBasePtr& computationNode);
void PopulateNetworkInputs(const std::unordered_map<Variable, const ValuePtr>& arguments);
template <typename ElementType>
static void PopulateComputationNodeGradient(const std::pair<Variable, ValuePtr>& variableGradient, Microsoft::MSR::CNTK::ComputationNodeBasePtr& computationNode);
void PopulateNetworkGradients(const std::unordered_map<Variable, const ValuePtr>& gradients);
void GetNetworkOutputs(std::unordered_map<Variable, ValuePtr>& outputs);
void GetNetworkGradients(std::unordered_map<Variable, ValuePtr>& gradients);
@ -371,7 +380,7 @@ namespace CNTK
// Set of all primitive functions in the graph underlying 'this' Function. Also keeps the primitive Function objects alive
// by holding strong references to them
Internal::SimpleSet<FunctionPtr> m_allPrimitiveFunctions;
std::unordered_set<FunctionPtr> m_allPrimitiveFunctions;
// A map from Variable objects to ComputationNode objects in the ComputationNetwork instance that implements 'this' Composite Function
std::unordered_map<Variable, Microsoft::MSR::CNTK::ComputationNodeBasePtr> m_variableToNodeMap;

Просмотреть файл

@ -0,0 +1,464 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#include "Learner.h"
#include "TensorView.h"
#include "Utils.h"
#define UPDATE_FUNCTION \
switch (smoothedGradientValue->Data()->GetDataType()) \
{ \
case DataType::Float: \
Update<float>(parameter, smoothedGradientValue, gradientValue, parameterValue, trainingSampleCount); \
break; \
case DataType::Double: \
Update<double>(parameter, smoothedGradientValue, gradientValue, parameterValue, trainingSampleCount); \
break; \
default: \
NOT_IMPLEMENTED; \
}
using namespace Microsoft::MSR::CNTK;
using namespace std;
namespace CNTK
{
template <typename ElementType>
/*static*/ shared_ptr<const Matrix<ElementType>> LearnerBase::GetMatrix(const NDArrayViewPtr arrayView)
{
return arrayView->GetMatrix<ElementType>();
}
template <typename ElementType>
/*static*/ shared_ptr<Matrix<ElementType>> LearnerBase::GetWritableMatrix(NDArrayViewPtr arrayView)
{
return arrayView->GetWritableMatrix<ElementType>();
}
template <typename ElementType>
/*static*/ const TensorView<ElementType>* LearnerBase::GetTensorView(const NDArrayViewPtr arrayView)
{
return arrayView->GetTensorView<ElementType>();
}
/*static*/ bool LearnerBase::HasNan(const ValuePtr& value, const char* name)
{
const auto& data = value->Data();
switch (data->GetDataType())
{
case DataType::Float:
return data->GetMatrix<float>()->HasNan(name);
case DataType::Double:
return data->GetMatrix<double>()->HasNan(name);
default:
LogicError("Unsupported DataType %s", DataTypeName(data->GetDataType()));
}
}
/*static*/ void LearnerBase::Print(const ValuePtr& value, const char* msg)
{
const auto& data = value->Data();
switch (data->GetDataType())
{
case DataType::Float:
data->GetMatrix<float>()->Print(msg);
break;
case DataType::Double:
data->GetMatrix<double>()->Print(msg);
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(data->GetDataType()));
}
}
// Clipping gradients to prevent outliers,
template <typename ElementType>
void LearnerBase::ClipGradient(Matrix<ElementType>& gradient, size_t actualMBSize) const
{
if (m_additionalOptions.gradientClippingThresholdPerSample != numeric_limits<double>::infinity())
{
double maxGradientPerMB = m_additionalOptions.gradientClippingThresholdPerSample * actualMBSize;
if (m_additionalOptions.gradientClippingWithTruncation)
gradient.InplaceTruncate(ElementType(maxGradientPerMB));
else
{
// norm2 normalized
double gradientNorm = gradient.FrobeniusNorm();
if (gradientNorm > maxGradientPerMB)
{
double normFactor = maxGradientPerMB / gradientNorm;
gradient *= ElementType(normFactor);
}
}
}
}
// Performs additional preprocessing before calling the update method
// (gradient clipping and L2 regularization depending on the additional learning parameters).
template <typename ElementType>
void LearnerBase::PreProcess(const ValuePtr& gradientValue,const ValuePtr& parameterValue, size_t actualMBSize) const
{
const auto& gradientMatrix = gradientValue->Data()->GetWritableMatrix<ElementType>();
// clipping gradients to prevent outliers
ClipGradient<ElementType>(*gradientMatrix, actualMBSize);
// L2 regularizer
if (m_additionalOptions.l2RegularizationWeight > 0)
{
// multiply by actualMBSize so that it's invariant to minibatch size since learning rate is per sample
auto weight = ElementType(m_additionalOptions.l2RegularizationWeight * actualMBSize);
const auto& parameterMatrix = parameterValue->Data()->GetWritableMatrix<ElementType>();
Matrix<ElementType>::ScaleAndAdd(weight, *parameterMatrix, *gradientMatrix);
}
}
// Performs additional postprocessing after the update method has been executed
// (noise injection and L1 regularization specified by the additional learning parameters).
template <typename ElementType>
void LearnerBase::PostProcess(const Variable& parameter, const ValuePtr& gradientValue,
const ValuePtr& parameterValue, size_t actualMBSize) const
{
const auto& parameterMatrix = parameterValue->Data()->GetWritableMatrix<ElementType>();
if (m_additionalOptions.gaussianNoiseInjectionStdDev > 0)
{
const auto& gradientMatrix = gradientValue->Data()->GetWritableMatrix<ElementType>();
Matrix<ElementType> sgdUpdateNoise((DEVICEID_TYPE)parameterMatrix->GetDeviceId());
// get the gradient structure since gradient is sparse
sgdUpdateNoise.SetValue(*gradientMatrix);
auto noiseStdDev = ElementType(m_additionalOptions.gaussianNoiseInjectionStdDev);
// reset its value to random
sgdUpdateNoise.SetGaussianRandomValue(ElementType(0.0), noiseStdDev);
Matrix<ElementType>::ScaleAndAdd(ElementType(1.0), sgdUpdateNoise, *parameterMatrix);
}
// L1 regularizer with proximal gradient descent method
if (m_additionalOptions.l1RegularizationWeight > 0)
{
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
// multiply by actualMBSize so that it's invariant to minibatch size since learning rate is per sample
auto weight = ElementType(learningRate * m_additionalOptions.l1RegularizationWeight * actualMBSize);
parameterValue->Data()->GetWritableMatrix<ElementType>()->InplaceSoftThreshold(weight);
}
}
template <typename ElementType>
/*static*/ TensorView<ElementType>* LearnerBase::GetWritableTensorView(NDArrayViewPtr arrayView)
{
return arrayView->GetWritableTensorView<ElementType>();
}
LearnerBase::LearnerBase(const unordered_set<Variable>& parameters, const DeviceDescriptor& device)
: Learner(parameters),
m_learningRatePerSample(0.0),
m_sampleCount(0)
{
const unordered_set<Variable>& parameterSet = parameters;
for (const auto& parameter : parameterSet)
{
// TODO: using the same device to allocate data for all smoothed gradients. Is this correct?
// Should the device be specified on the per-parameter basis?
NDArrayViewPtr view;
if (parameter.GetDataType() == DataType::Float)
{
view = MakeSharedObject<NDArrayView>(0.0f, parameter.Shape(), device);
}
else
{
view = MakeSharedObject<NDArrayView>(0.0, parameter.Shape(), device);
}
m_smoothedGradientValues.insert(make_pair(parameter, MakeSharedObject<Value>(view)));
m_additionalOptions.learningRateMultipliers.insert(make_pair(parameter, 1.0));
}
}
void LearnerBase::ResetSmoothedGradients()
{
for (const auto& parameter : Parameters())
{
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
const auto& data = smoothedGradientValue->Data();
switch (data->GetDataType())
{
case DataType::Float:
data->SetValue(0.0f);
break;
case DataType::Double:
data->SetValue(0.0);
break;
default:
LogicError("Unsupported DataType %s", ::CNTK::DataTypeName(data->GetDataType()));
}
}
}
/*virtual*/ bool LearnerBase::Update(const unordered_map<Variable, ValuePtr>& parameterValues,
const unordered_map<Variable, const ValuePtr>& gradientValues,
size_t trainingSampleCount) /*override*/
{
// make sure trainingSampleCount is a valid value
assert(trainingSampleCount > 0);
for (const auto& parameter : Parameters())
{
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
const auto& gradientValue = gradientValues.at(parameter);
const auto& parameterValue = parameterValues.at(parameter);
// TODO: make this a runtime parameter.
#if DUMPOUTPUT
LOGPRINTF(stderr, "Update_%ls\n", parameter.Name().c_str());
#endif
#ifdef _DEBUG
if (HasNan(smoothedGradientValue, "TrainOneEpoch/UpdateWeights/Learner::Update(): "))
LogicError("%ls has NaNs in smoothedGradient.", parameter.Name().c_str());
#endif
#if DUMPOUTPUT
LOGPRINTF(stderr, "learnRatePerSample=%0.8f, momentum=%0.8f, actualMBSize=%ld\n",
m_learningRatePerSample, m_momentumPerSample, trainingSampleCount);
LOGPRINTF(stderr, "GradUpdateType()=%s, GradientUpdateNoiseStd()=%0.8f\n",
LearnerType().c_str(), m_GaussianNoiseInjectStd);
Print(gradientValue, "Gradient Update");
Print(smoothedGradientValue, "Smoothed Gradient Input");
#endif
UPDATE_FUNCTION;
#if DUMPOUTPUT
Print(parameterValue, "Parameter Update");
#endif
#ifdef _DEBUG
if (HasNan(parameterValue, "TrainOneEpoch/UpdateWeights/Learner::Update(): "))
LogicError("%ls has NaNs in parameter values after parameter update.", parameter.Name().c_str());
#endif
}
m_sampleCount += trainingSampleCount;
return false;
}
template <typename ElementType>
void LearnerBase::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const
{
PreProcess<ElementType>(gradientValue, parameterValue, trainingSampleCount);
Update(parameter, smoothedGradientValue, gradientValue, parameterValue, trainingSampleCount);
PostProcess<ElementType>(parameter, gradientValue, parameterValue, trainingSampleCount);
}
string LearnerBase::LearnerType() const
{
auto name = typeid(*this).name();
if (strncmp(name, "class ", 6) == 0)
{
// On Windows, the type name contains "class" prefix.
// Return the actual name, omitting the prefix.
return &name[6];
}
return name;
}
/*virtual*/ Dictionary LearnerBase::GetCheckpointState() const /*override*/
{
NOT_IMPLEMENTED; // Until the new checkpointing is fully fleshed out, nobody should be calling this.
Dictionary checkpoint;
for (const auto& parameter : Parameters())
{
// TODO: parameter name is not guaranteed to be unique. Instead, all serializable objects
// need to expose "UId" property -- a persistent unique internal name.
// Switch to UId as soon as it's available.
if (checkpoint.Contains(parameter.Name()))
{
LogicError("Parameter names must be unique");
}
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
// Potentially, could store things like dimensions, element size, format, etc., but
// that seems to be redundant, since all of that is passed in the constructor.
checkpoint[parameter.Name()] = SerializeToVector(smoothedGradientValue->Data());
}
return checkpoint;
}
/*virtual*/ void LearnerBase::RestoreFromCheckpoint(const Dictionary& checkpoint) /*override*/
{
NOT_IMPLEMENTED; // Until the new checkpointing is fully fleshed out, nobody should be calling this.
for (const auto& parameter : Parameters())
{
if (!checkpoint.Contains(parameter.Name()))
{
LogicError("Checkpoint does not contain state for parameter %ls", parameter.Name().c_str());
}
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
const DictionaryValue& state = checkpoint[parameter.Name()];
const auto& data = smoothedGradientValue->Data();
DeserializeFromVector(data, state.GetValue<vector<DictionaryValue>>());
}
}
/*virtual*/ void LearnerSGD::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const /*override*/
{
UPDATE_FUNCTION;
}
template <typename ElementType>
void LearnerSGD::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const
{
UNUSED(trainingSampleCount);
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue->Data());
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue->Data());
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue->Data());
const auto& learningRate = ElementType(ParameterDependentLearningRate(parameter));
// TODO: break up the NormalGrad into 3 different functions, each with its own set of parameters
// (one for vanilla SGD, the other for momentum SGD, and the third one for NAG).
smoothedGradientMatrix->NormalGrad(*gradientMatrix, *parameterMatrix,
learningRate, ElementType(m_momentumPerSample), m_useNesterovAcceleration);
}
LearnerAdaGrad::LearnerAdaGrad(const unordered_set<Variable>& parameters, bool needAveMultiplier, const DeviceDescriptor& device)
: LearnerBase(parameters, device),
m_needAveMultiplier(needAveMultiplier)
{
}
/*virtual*/ void LearnerAdaGrad::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const /*override*/
{
UPDATE_FUNCTION;
}
template <typename ElementType>
void LearnerAdaGrad::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const
{
UNUSED(trainingSampleCount);
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue->Data());
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue->Data());
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue->Data());
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
auto aveMultiplier = smoothedGradientMatrix->Adagrad(*gradientMatrix, m_needAveMultiplier);
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
}
LearnerFSAdaGrad::LearnerFSAdaGrad(const unordered_set<Variable>& parameters, const DeviceDescriptor& device)
: LearnerMomentumSGD(parameters, device)
{
}
/*virtual*/ void LearnerFSAdaGrad::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const /*override*/
{
UPDATE_FUNCTION;
}
template <typename ElementType>
void LearnerFSAdaGrad::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const
{
UNUSED(trainingSampleCount);
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue->Data());
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue->Data());
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue->Data());
//const double momentum = MomentumPerMB(m_momentumPerSample, trainingSampleCount);
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
smoothedGradientMatrix->FSAdagrad(trainingSampleCount, *gradientMatrix, *parameterMatrix,
learningRate, ElementType(m_momentumPerSample));
}
LearnerRMSProp::LearnerRMSProp(const unordered_set<Variable>& parameters,
double gamma, double inc, double dec, double max, double min,
bool needAveMultiplier, const DeviceDescriptor& device)
: LearnerBase(parameters, device),
m_gamma(gamma), m_inc(inc), m_dec(dec), m_max(max), m_min(min),
m_needAveMultiplier(needAveMultiplier)
{
}
/*virtual*/ void LearnerRMSProp::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const /*override*/
{
UPDATE_FUNCTION;
}
template <typename ElementType>
void LearnerRMSProp::Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const
{
UNUSED(trainingSampleCount);
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue->Data());
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue->Data());
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue->Data());
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
auto aveMultiplier = smoothedGradientMatrix->RmsProp(*gradientMatrix,
ElementType(m_gamma), ElementType(m_inc),
ElementType(m_max), ElementType(m_dec),
ElementType(m_min), m_needAveMultiplier);
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
}
// Explicit template instantiations
template shared_ptr<Matrix<float>> LearnerBase::GetWritableMatrix<float>(const NDArrayViewPtr arrayView);
template shared_ptr<Matrix<double>> LearnerBase::GetWritableMatrix<double>(const NDArrayViewPtr arrayView);
LearnerPtr SGDLearner(const unordered_set<Variable>& parameters, const DeviceDescriptor& device)
{
return MakeSharedObject<LearnerSGD>(parameters, device);
}
LearnerPtr MomentumSGDLearner(const unordered_set<Variable>& parameters, const DeviceDescriptor& device)
{
return MakeSharedObject<LearnerMomentumSGD>(parameters, device);
}
LearnerPtr NesterovLearner(const unordered_set<Variable>& parameters, const DeviceDescriptor& device)
{
return MakeSharedObject<LearnerNesterov>(parameters, device);
}
LearnerPtr AdaGradLearner(const unordered_set<Variable>& parameters, bool needAveMultiplier, const DeviceDescriptor& device)
{
return MakeSharedObject<LearnerAdaGrad>(parameters, needAveMultiplier, device);
}
LearnerPtr FSAdaGradLearner(const unordered_set<Variable>& parameters, const DeviceDescriptor& device)
{
return MakeSharedObject<LearnerFSAdaGrad>(parameters, device);
}
LearnerPtr RMSPropLearner(const unordered_set<Variable>& parameters,
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier,
const DeviceDescriptor& device)
{
return MakeSharedObject<LearnerRMSProp>(parameters, gamma, inc, dec, max, min, needAveMultiplier, device);
}
}

Просмотреть файл

@ -0,0 +1,224 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#include "stdafx.h"
#include "CNTKLibrary.h"
namespace CNTK
{
// A collection of additional options that are applicable for all standard learners
// (after these options are set, they retain their value for the entire lifespan of a learner).
struct AdditionalLearningOptions
{
double l1RegularizationWeight = 0.0;
double l2RegularizationWeight = 0.0;
double gaussianNoiseInjectionStdDev = 0.0;
bool gradientClippingWithTruncation = false;
double gradientClippingThresholdPerSample = 0.0;
std::unordered_map<Variable, double> learningRateMultipliers;
};
// An abstract base class at the root of the standard learners hierarchy
// It implements most of the learner functionality, except for the actual update function,
// and adds a few pre-/postprocessing methods (which are invoked before and after the update).
class LearnerBase : public Learner
{
public:
CNTK_API virtual bool Update(const std::unordered_map<Variable, ValuePtr>& parameterValues,
const std::unordered_map<Variable, const ValuePtr>& gradientValues,
size_t trainingSampleCount) override final;
CNTK_API virtual Dictionary GetCheckpointState() const override;
CNTK_API virtual void RestoreFromCheckpoint(const Dictionary& checkpoint) override;
CNTK_API void SetAdditionalOptions(const AdditionalLearningOptions& additionalOptions)
{
m_additionalOptions = additionalOptions;
}
// TODO: should this be called ResetMomentum?
// needed for BlockMomemtumSGD to reset SGD momentum after aggregation.
CNTK_API void ResetSmoothedGradients();
// TODO: move learning rate and momentum scheduling and adjustment functionality
// inside the learner and drop these setters.
void SetLearningRate(double value) { m_learningRatePerSample = value; }
protected:
LearnerBase(const std::unordered_set<Variable>& parameters,
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice());
virtual void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const = 0;
double ParameterDependentLearningRate(const Variable& parameter) const
{
return m_learningRatePerSample * m_additionalOptions.learningRateMultipliers.at(parameter);
}
std::string LearnerType() const;
double m_learningRatePerSample;
AdditionalLearningOptions m_additionalOptions;
std::unordered_map<Variable, ValuePtr> m_smoothedGradientValues;
// The following four static protected methods expose private methods of NDArrayView class
// (which declares LearnerBase as friend class), so that they are available to subclasses.
template <typename ElementType>
static std::shared_ptr<const Microsoft::MSR::CNTK::Matrix<ElementType>> GetMatrix(const NDArrayViewPtr arrayView);
template <typename ElementType>
static std::shared_ptr<Microsoft::MSR::CNTK::Matrix<ElementType>> GetWritableMatrix(NDArrayViewPtr arrayView);
template <typename ElementType>
static const Microsoft::MSR::CNTK::TensorView<ElementType>* GetTensorView(const NDArrayViewPtr arrayView);
template <typename ElementType>
static Microsoft::MSR::CNTK::TensorView<ElementType>* GetWritableTensorView(NDArrayViewPtr arrayView);
template <typename ElementType>
void ClipGradient(Microsoft::MSR::CNTK::Matrix<ElementType>& gradient, size_t actualMBSize) const;
// Performs additional preprocessing before calling the update method
// (gradient clipping and L2 regularization depending on the additional learning parameters).
template <typename ElementType>
void PreProcess(const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t actualMBSize) const;
// Performs additional postprocessing after the update method has been executed
// (noise injection and L1 regularization specified by the additional learning parameters).
template <typename ElementType>
void PostProcess(const Variable& parameter, const ValuePtr& gradientValue,
const ValuePtr& parameterValue, size_t actualMBSize) const;
private:
// Templatized update function, it invokes preprocess and postprocess using the provided
// template parameter and also invokes virtual Update method implemented in one of the subclasses.
template <typename ElementType>
void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const;
// TODO: make these functions friends of NDViewArray and move to Utils?
static bool HasNan(const ValuePtr& value, const char* name);
static void Print(const ValuePtr& value, const char* msg);
size_t m_sampleCount;
};
// Vanilla gradient descent optimization algorithm.
class LearnerSGD : public LearnerBase
{
public:
LearnerSGD(const std::unordered_set<Variable>& parameters,
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice())
: LearnerBase(parameters, device),
m_momentumPerSample(0.0),
m_useNesterovAcceleration(false)
{
}
protected:
virtual void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const override;
template <typename ElementType>
void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const;
double m_momentumPerSample;
bool m_useNesterovAcceleration;
};
// SGD optimization with momentum.
class LearnerMomentumSGD : public LearnerSGD
{
public:
LearnerMomentumSGD(const std::unordered_set<Variable>& parameters,
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice())
: LearnerSGD(parameters, device)
{
}
void SetMomentum(double value) { m_momentumPerSample = value; }
};
// Nesterov's accelerated SGDLearnerBase descent.
class LearnerNesterov : public LearnerSGD
{
public:
LearnerNesterov(const std::unordered_set<Variable>& parameters,
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice())
: LearnerSGD(parameters, device)
{
m_useNesterovAcceleration = true;
}
};
class LearnerAdaGrad : public LearnerBase
{
public:
LearnerAdaGrad(const std::unordered_set<Variable>& parameters, bool needAveMultiplier,
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice());
protected:
bool m_needAveMultiplier;
virtual void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const override;
template <typename ElementType>
void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const;
};
class LearnerFSAdaGrad : public LearnerMomentumSGD
{
public:
LearnerFSAdaGrad(const std::unordered_set<Variable>& parameters,
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice());
protected:
virtual void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const override;
template <typename ElementType>
void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const;
};
class LearnerRMSProp : public LearnerBase
{
public:
LearnerRMSProp(const std::unordered_set<Variable>& parameters,
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier,
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice());
protected:
double m_gamma;
double m_inc;
double m_dec;
double m_max;
double m_min;
bool m_needAveMultiplier;
virtual void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const override;
template <typename ElementType>
void Update(const Variable& parameter, const ValuePtr& smoothedGradientValue,
const ValuePtr& gradientValue, const ValuePtr& parameterValue, size_t trainingSampleCount) const;
};
}

Просмотреть файл

@ -60,7 +60,7 @@ namespace CNTK
matrixDims.second,
AsCNTKImplDeviceId(device),
IsSparseStorageFormat(storageType) ? MatrixType::SPARSE : MatrixType::DENSE,
AsCNTKMatrixFormat(storageType));
AsCNTKImplMatrixFormat(storageType));
return new TensorView<ElementType>(matrix, AsTensorShape(viewShape));
}
@ -99,8 +99,22 @@ namespace CNTK
}
NDArrayView::NDArrayView(CNTK::DataType dataType, const DeviceDescriptor& device, CNTK::StorageFormat storageType, const NDShape& viewShape, bool readOnly, void* tensorView)
: m_dataType(dataType), m_device(device), m_storageFormat(storageType), m_viewShape(viewShape), m_isReadOnly(readOnly), m_tensorView(tensorView)
: m_dataType(dataType), m_device(device), m_storageFormat(storageType), m_viewShape(viewShape), m_isReadOnly(readOnly)
{
m_tensorView = std::shared_ptr<void>(tensorView, [this](void*) {
switch (m_dataType)
{
case DataType::Float:
delete GetTensorView<float>();
break;
case DataType::Double:
delete GetTensorView<double>();
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(m_dataType));
break;
}
});
}
NDArrayView::NDArrayView(CNTK::DataType dataType, CNTK::StorageFormat storageType, const NDShape& viewShape, const DeviceDescriptor& device)
@ -108,6 +122,10 @@ namespace CNTK
{
}
NDArrayView::~NDArrayView()
{
}
void NDArrayView::SetValue(float value)
{
if (IsSparse())
@ -124,22 +142,6 @@ namespace CNTK
GetWritableMatrix<double>()->SetValue(value);
}
NDArrayView::~NDArrayView()
{
switch (m_dataType)
{
case DataType::Float:
delete GetTensorView<float>();
break;
case DataType::Double:
delete GetTensorView<double>();
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(m_dataType));
break;
}
}
template <typename ElementType>
/*static*/ std::shared_ptr<Matrix<ElementType>> NDArrayView::GetMatrixImpl(const TensorView<ElementType>* tensorView, size_t rowColSplitPoint)
{
@ -150,7 +152,8 @@ namespace CNTK
size_t splitPoint = rowColSplitPoint;
if (splitPoint == NDArrayView::AutoSelectRowColSplitPoint)
{
// Determine the split point
// Determine the split point by determining which of the axes can be
// folded and selecting the non-foldable axis as the split point
std::vector<bool> dimsToDrop(tensorShape.GetRank(), false);
for (size_t k = 1; k < tensorShape.GetRank(); ++k)
if (tensorShape.CanFlatten(k))
@ -197,7 +200,7 @@ namespace CNTK
if (AsDataType<ElementType>() != m_dataType)
LogicError("NDArrayView::GetTensorView: The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(m_dataType));
return (const TensorView<ElementType>*)(m_tensorView);
return (const TensorView<ElementType>*)(m_tensorView.get());
}
template <typename ElementType>
@ -211,7 +214,7 @@ namespace CNTK
NDArrayViewPtr NDArrayView::DeepClone(bool readOnly/* = false*/) const
{
NDArrayViewPtr newView(new NDArrayView(this->GetDataType(), this->GetStorageFormat(), this->Shape(), this->Device()), [](ReferenceCount* ptr) { delete ptr; });
NDArrayViewPtr newView = MakeSharedObject<NDArrayView>(this->GetDataType(), this->GetStorageFormat(), this->Shape(), this->Device());
switch (m_dataType)
{
case DataType::Float:
@ -234,9 +237,7 @@ namespace CNTK
}
newView->m_isReadOnly = readOnly;
return NDArrayViewPtr(newView, [](ReferenceCount* ptr) {
delete ptr;
});
return newView;
}
void NDArrayView::CopyFrom(const NDArrayView& source)
@ -285,8 +286,7 @@ namespace CNTK
break;
}
auto aliasView = new NDArrayView(GetDataType(), Device(), GetStorageFormat(), Shape(), IsReadOnly() || readOnly, tensorView);;
return NDArrayViewPtr(aliasView, [](ReferenceCount* ptr) { delete ptr; });
return MakeSharedObject<NDArrayView>(GetDataType(), Device(), GetStorageFormat(), Shape(), IsReadOnly() || readOnly, tensorView);
}
// TODO: This could actually be strided?
@ -322,8 +322,7 @@ namespace CNTK
auto randomUniformMatrix = std::make_shared<Matrix<ElementType>>(Matrix<ElementType>::RandomUniform(matrixDims.first, matrixDims.second, AsCNTKImplDeviceId(device), (ElementType)rangeBegin, (ElementType)rangeEnd, seed));
auto tensorView = new TensorView<ElementType>(randomUniformMatrix, AsTensorShape(shape));
auto view = new NDArrayView(AsDataType<ElementType>(), device, StorageFormat::Dense, shape, false, tensorView);
return NDArrayViewPtr(view, [](ReferenceCount* ptr) { delete ptr; });
return MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), device, StorageFormat::Dense, shape, false, tensorView);
}
// Explicit template instantiations
@ -339,8 +338,10 @@ namespace CNTK
template std::shared_ptr<const Matrix<float>> NDArrayView::GetMatrix(size_t rowColSplitPoint/* = AutoSelectRowColSplitPoint*/) const;
template std::shared_ptr<const Matrix<double>> NDArrayView::GetMatrix(size_t rowColSplitPoint/* = AutoSelectRowColSplitPoint*/) const;
template std::shared_ptr<Matrix<float>> NDArrayView::GetWritableMatrix(size_t rowColSplitPoint/* = AutoSelectRowColSplitPoint*/);
template std::shared_ptr<Matrix<double>> NDArrayView::GetWritableMatrix(size_t rowColSplitPoint/* = AutoSelectRowColSplitPoint*/);
template std::shared_ptr<Matrix<float>> NDArrayView::GetWritableMatrix<float>(size_t rowColSplitPoint/* = AutoSelectRowColSplitPoint*/);
template std::shared_ptr<Matrix<double>> NDArrayView::GetWritableMatrix<double>(size_t rowColSplitPoint/* = AutoSelectRowColSplitPoint*/);
template TensorView<float>* NDArrayView::GetWritableTensorView<float>();
template TensorView<double>* NDArrayView::GetWritableTensorView<double>();
template CNTK_API NDArrayView::NDArrayView(const NDShape& viewShape, const SparseIndexType* colStarts, const SparseIndexType* rowIndices, const float* nonZeroValues, size_t numNonZeroValues, const DeviceDescriptor& device, bool readOnly/* = false*/);
template CNTK_API NDArrayView::NDArrayView(const NDShape& viewShape, const SparseIndexType* colStarts, const SparseIndexType* rowIndices, const double* nonZeroValues, size_t numNonZeroValues, const DeviceDescriptor& device, bool readOnly/* = false*/);

Просмотреть файл

@ -17,15 +17,13 @@ namespace CNTK
static Matrix<char>* AllocateMatrix(const NDShape& viewShape, const DeviceDescriptor& device)
{
auto matrixDims = GetMatrixDimensions(viewShape);
auto maskMatrix = new Matrix<char>(matrixDims.first, matrixDims.second, AsCNTKImplDeviceId(device));
maskMatrix->SetValue(1);
return maskMatrix;
return new Matrix<char>(matrixDims.first, matrixDims.second, AsCNTKImplDeviceId(device));
}
NDMask::NDMask(const NDShape& shape, Matrix<char>* matrix)
: m_device(AsDeviceDescriptor(matrix->GetDeviceId())), m_maskShape(shape), m_matrixView(matrix)
: m_device(AsDeviceDescriptor(matrix->GetDeviceId())), m_maskShape(shape)
{
m_matrixView = std::shared_ptr<Matrix<char>>(matrix, [](Matrix<char>* ptr) { delete ptr; });
}
NDMask::NDMask(const NDShape& shape, const DeviceDescriptor& device/* = DeviceDescriptor::DefaultDevice()*/)
@ -33,16 +31,17 @@ namespace CNTK
{
if (shape.NumAxes() > 2)
LogicError("NDMask instances with more than 2 axes are currently unsupported");
Clear();
}
NDMask::~NDMask()
{
delete m_matrixView;
}
void NDMask::MaskSection(const std::vector<size_t>& sectionOffset, const NDShape& sectionShape)
{
// TODO: Implement batching of masking operation for masks residing on GPUs to avoid making
// TODO: Implement batching of masking operation for masks residing on GPUs to avoid making
// GPU invocations for each MaskSection call.
if (sectionOffset.size() > m_maskShape.NumAxes())
@ -78,12 +77,13 @@ namespace CNTK
void NDMask::Clear()
{
// Clear the mask by marking all samples as Valid; i.e. a value of 1
GetMatrix()->SetValue(1);
}
Matrix<char>* NDMask::GetMatrix() const
{
return m_matrixView;
return m_matrixView.get();
}
void NDMask::CopyFrom(const NDMask& source)
@ -96,14 +96,14 @@ namespace CNTK
NDMaskPtr NDMask::DeepClone() const
{
NDMaskPtr newMask = new NDMask(this->Shape(), this->Device());
NDMaskPtr newMask = MakeSharedObject<NDMask>(this->Shape(), this->Device());
newMask->CopyFrom(*this);
return NDMaskPtr(newMask, [](ReferenceCount* ptr) { delete ptr; });
return newMask;
}
NDMaskPtr NDMask::Alias() const
{
return NDMaskPtr(new NDMask(this->Shape(), new Matrix<char>(GetMatrix()->AsReference())), [](ReferenceCount* ptr) { delete ptr; });
return MakeSharedObject<NDMask>(this->Shape(), new Matrix<char>(GetMatrix()->AsReference()));
}
}

Просмотреть файл

@ -6,354 +6,138 @@
#include "stdafx.h"
#include "CNTKLibrary.h"
#include "Utils.h"
#include "File.h"
using namespace std;
namespace CNTK
{
namespace Internal
template <typename T>
void DictionaryValue::AllocateDataPtr(const T& value)
{
ReferenceCount::ReferenceCount()
: m_rc(new std::atomic<size_t>(0))
{}
static_assert(is_same<T, NDShape>::value || is_same<T, vector<DictionaryValue>>::value, "AllocateDataPtr called with invalid type");
m_data.m_ptr = new T(value);
}
/*virtual*/ ReferenceCount::~ReferenceCount()
template <typename T>
void DictionaryValue::FreePtrAsType()
{
T* typedPtr = reinterpret_cast<T*>(m_data.m_ptr);
delete typedPtr;
m_data.m_ptr = nullptr;
}
void DictionaryValue::FreeDataPtr()
{
if (m_valueType == Type::NDShape)
FreePtrAsType<NDShape>();
else if (m_valueType == Type::Vector)
FreePtrAsType<vector<DictionaryValue>>();
}
Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, DictionaryValue& us)
{
size_t version;
stream >> version;
stream >> us.m_valueType;
switch (us.ValueType())
{
delete m_rc;
}
size_t ReferenceCount::AddReference()
case DictionaryValue::Type::Bool:
stream >> us.m_data.m_boolean;
break;
case DictionaryValue::Type::SizeT:
stream >> us.m_data.m_sizeT;
break;
case DictionaryValue::Type::Float:
stream >> us.m_data.m_float;
break;
case DictionaryValue::Type::Double:
stream >> us.m_data.m_double;
break;
case DictionaryValue::Type::NDShape:
{
return ++(*m_rc);
}
size_t ReferenceCount::RemoveReference()
{
assert(m_rc->load() > 0);
return --(*m_rc);
}
size_t ReferenceCount::GetReferenceCount()
{
return m_rc->load();
}
#pragma region SimpleVector
template <typename T>
SimpleVector<T>::SimpleVector()
: m_vector(new std::vector<T>())
{
}
template <typename T>
SimpleVector<T>::SimpleVector(size_t numElements, const T& initVal/* = T()*/)
: m_vector(new std::vector<T>(numElements, initVal))
{
}
template <typename T>
SimpleVector<T>::~SimpleVector()
{
delete m_vector;
}
template <typename T>
SimpleVector<T>::SimpleVector(const SimpleVector<T>& other)
: m_vector(new std::vector<T>(*other.m_vector))
{
}
template <typename T>
SimpleVector<T>& SimpleVector<T>::operator=(const SimpleVector<T>& other)
{
if (this != &other)
size_t size;
stream >> size;
vector<size_t> dims(size);
for (auto i = 0; i < size; i++)
{
delete m_vector;
m_vector = new std::vector<T>(*other.m_vector);
stream >> dims[i];
}
return *this;
us.AllocateDataPtr(NDShape(dims));
break;
}
template <typename T>
SimpleVector<T>::SimpleVector(SimpleVector<T>&& other)
: m_vector(nullptr)
case DictionaryValue::Type::Vector:
{
*this = std::move(other);
}
template <typename T>
SimpleVector<T>& SimpleVector<T>::operator=(SimpleVector<T>&& other)
{
assert(this != &other);
delete m_vector;
m_vector = other.m_vector;
other.m_vector = nullptr;
return *this;
}
template <typename T>
T& SimpleVector<T>::operator[](size_t idx)
{
assert(idx < Size());
return (*m_vector)[idx];
}
template <typename T>
const T& SimpleVector<T>::operator[](size_t idx) const
{
assert(idx < Size());
return (*m_vector)[idx];
}
template <typename T>
size_t SimpleVector<T>::Size() const
{
return m_vector->size();
}
template <typename T>
T* SimpleVector<T>::Data()
{
return m_vector->data();
}
template <typename T>
const T* SimpleVector<T>::Data() const
{
return m_vector->data();
}
template <typename T>
void SimpleVector<T>::PushBack(const T& value)
{
m_vector->push_back(value);
}
template <typename T>
void SimpleVector<T>::PushBack(T&& value)
{
m_vector->push_back(std::move(value));
}
template <typename ValueType>
bool operator==(const SimpleVector<ValueType>& first, const SimpleVector<ValueType>& second)
{
return *first.m_vector == *second.m_vector;
}
// Explicit template instantiations
template class SimpleVector<Variable>;
template class SimpleVector<size_t>;
template class SimpleVector<Axis>;
template class SimpleVector<FunctionPtr>;
template bool operator==(const SimpleVector<size_t>& first, const SimpleVector<size_t>& second);
#pragma endregion SimpleVector
#pragma region SimpleSet
template <typename KeyType>
SimpleSet<KeyType>::SimpleSet()
: m_set(new std::unordered_set<KeyType>())
{
}
template <typename KeyType>
SimpleSet<KeyType>::~SimpleSet()
{
delete m_set;
}
template <typename KeyType>
SimpleSet<KeyType>::SimpleSet(const SimpleSet& other)
: m_set(nullptr)
{
*this = other;
}
template <typename KeyType>
SimpleSet<KeyType>& SimpleSet<KeyType>::operator=(const SimpleSet& other)
{
if (this != &other)
size_t size;
stream >> size;
vector<DictionaryValue> values(size);
for (auto i = 0; i < size; i++)
{
delete m_set;
m_set = new std::unordered_set<KeyType>(*(other.m_set));
stream >> values[i];
}
return *this;
us.AllocateDataPtr(values);
break;
}
template <typename KeyType>
SimpleSet<KeyType>::SimpleSet(SimpleSet&& other)
: m_set(nullptr)
{
*this = std::move(other);
default:
NOT_IMPLEMENTED;
}
return stream;
}
template <typename KeyType>
SimpleSet<KeyType>& SimpleSet<KeyType>::operator=(SimpleSet&& other)
Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const DictionaryValue& us)
{
stream << us.version;
stream << us.ValueType();
switch (us.ValueType())
{
assert(this != &other);
delete m_set;
m_set = other.m_set;
other.m_set = nullptr;
return *this;
}
template <typename KeyType>
bool SimpleSet<KeyType>::Insert(const KeyType& key)
case DictionaryValue::Type::Bool:
stream << us.m_data.m_boolean;
break;
case DictionaryValue::Type::SizeT:
stream << us.m_data.m_sizeT;
break;
case DictionaryValue::Type::Float:
stream << us.m_data.m_float;
break;
case DictionaryValue::Type::Double:
stream << us.m_data.m_double;
break;
case DictionaryValue::Type::NDShape:
{
return m_set->insert(key).second;
}
template <typename KeyType>
bool SimpleSet<KeyType>::Contains(const KeyType& key) const
{
return (m_set->find(key) != m_set->end());
}
template <typename KeyType>
size_t SimpleSet<KeyType>::Size() const
{
return m_set->size();
}
template <typename KeyType>
SimpleSet<KeyType>::operator SimpleVector<KeyType>() const
{
SimpleVector<KeyType> retVector;
for (auto key : *m_set)
retVector.PushBack(key);
return retVector;
}
template <typename KeyType>
bool operator==(const SimpleSet<KeyType>& first, const SimpleSet<KeyType>& second)
{
return *first.m_set == *second.m_set;
}
// Explicit template instantiations
template class SimpleSet<FunctionPtr>;
template class SimpleSet<Variable>;
template class SimpleSet<Placeholder>;
template class SimpleSet<const Function*>;
template bool operator==(const SimpleSet<Variable>& first, const SimpleSet<Variable>& second);
template bool operator==(const SimpleSet<Placeholder>& first, const SimpleSet<Placeholder>& second);
#pragma endregion SimpleSet
#pragma region SimpleMap
template <typename KeyType, typename ValueType>
SimpleMap<KeyType, ValueType>::SimpleMap()
: m_map(new std::unordered_map<KeyType, ValueType>())
{
}
template <typename KeyType, typename ValueType>
SimpleMap<KeyType, ValueType>::~SimpleMap()
{
delete m_map;
}
template <typename KeyType, typename ValueType>
SimpleMap<KeyType, ValueType>::SimpleMap(const SimpleMap& other)
: m_map(nullptr)
{
*this = other;
}
template <typename KeyType, typename ValueType>
SimpleMap<KeyType, ValueType>& SimpleMap<KeyType, ValueType>::operator=(const SimpleMap& other)
{
if (this != &other)
NDShape* shapePtr = reinterpret_cast<NDShape*>(us.m_data.m_ptr);
auto size = shapePtr->NumAxes();
stream << size;
for (auto i = 0; i < size; i++)
{
delete m_map;
m_map = new std::unordered_map<KeyType, ValueType>(*(other.m_map));
stream << shapePtr->operator[](i);
}
return *this;
break;
}
template <typename KeyType, typename ValueType>
SimpleMap<KeyType, ValueType>::SimpleMap(SimpleMap&& other)
: m_map(nullptr)
case DictionaryValue::Type::Vector:
{
*this = std::move(other);
vector<DictionaryValue>* vectorPtr =
reinterpret_cast<vector<DictionaryValue>*>(us.m_data.m_ptr);
auto size = vectorPtr->size();
stream << size;
for (auto i = 0; i < size; i++)
{
stream << vectorPtr->operator[](i);
}
break;
}
template <typename KeyType, typename ValueType>
SimpleMap<KeyType, ValueType>& SimpleMap<KeyType, ValueType>::operator=(SimpleMap&& other)
{
assert(this != &other);
delete m_map;
m_map = other.m_map;
other.m_map = nullptr;
return *this;
default:
NOT_IMPLEMENTED;
}
template <typename KeyType, typename ValueType>
ValueType& SimpleMap<KeyType, ValueType>::operator[](const KeyType& key)
{
return (*m_map)[key];
}
template <typename KeyType, typename ValueType>
const ValueType& SimpleMap<KeyType, ValueType>::operator[](const KeyType& key) const
{
return (*m_map)[key];
}
template <typename KeyType, typename ValueType>
bool SimpleMap<KeyType, ValueType>::Insert(const KeyType& key, const ValueType& value)
{
return m_map->insert({ key, value }).second;
}
template <typename KeyType, typename ValueType>
bool SimpleMap<KeyType, ValueType>::Contains(const KeyType& key) const
{
return (m_map->find(key) != m_map->end());
}
template <typename KeyType, typename ValueType>
size_t SimpleMap<KeyType, ValueType>::Size() const
{
return m_map->size();
}
template <typename KeyType, typename ValueType>
SimpleSet<KeyType> SimpleMap<KeyType, ValueType>::Keys() const
{
SimpleSet<KeyType> keys;
for (auto keyValuePair : *m_map)
keys.Insert(keyValuePair.first);
return keys;
}
// Explicit template instantiations
template class SimpleMap<Variable, ValuePtr>;
template class SimpleMap<Variable, const ValuePtr>;
template class SimpleMap<Placeholder, Variable>;
#pragma endregion SimpleMap
return stream;
}
Dictionary::Dictionary()
: m_dictionaryData(new std::unordered_map < std::wstring, DictionaryValue>)
: m_dictionaryData(new unordered_map <wstring, DictionaryValue>)
{
}
@ -365,7 +149,7 @@ namespace CNTK
Dictionary::Dictionary(Dictionary&& other)
: m_dictionaryData(nullptr)
{
*this = std::move(other);
*this = move(other);
}
Dictionary& Dictionary::operator=(Dictionary&& other)
@ -394,4 +178,130 @@ namespace CNTK
{
return (m_dictionaryData->find(key) != m_dictionaryData->end());
}
Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const Dictionary& us)
{
stream << us.version;
stream << us.m_dictionaryData->size();
for (auto it = us.m_dictionaryData->begin(); it != us.m_dictionaryData->end(); ++it)
{
stream << it->first;
stream << it->second;
}
return stream;
}
Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, Dictionary& us)
{
size_t version;
stream >> version;
size_t size;
stream >> size;
us.m_dictionaryData->reserve(size);
for (auto i = 0; i < size; i++)
{
wstring key;
stream >> key;
DictionaryValue value;
stream >> value;
us.m_dictionaryData->insert(make_pair(key, value));
}
return stream;
}
template <typename T>
vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr)
{
if (viewPtr->IsSparse())
{
LogicError("Sparse NDArrayView cannot be serialized into a vector.");
}
auto numElements = viewPtr->Shape().TotalSize();
vector<DictionaryValue> values(numElements);
NDArrayViewPtr cpuDataViewPtr = viewPtr;
if ((viewPtr->Device().Type() != DeviceKind::CPU))
{
cpuDataViewPtr = MakeSharedObject<NDArrayView>(viewPtr->GetDataType(), viewPtr->Shape(), DeviceDescriptor::CPUDevice());
cpuDataViewPtr->CopyFrom(*viewPtr);
}
const T* buffer = cpuDataViewPtr->DataBuffer<T>();
for (auto i = 0; i < numElements; ++i)
{
T v = buffer[i];
values[i] = DictionaryValue(v);
}
return values;
}
template <typename T>
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const vector<DictionaryValue>& values)
{
if (viewPtr->IsSparse())
{
LogicError("Sparse NDArrayView cannot be deserialized from a vector.");
}
auto numElements = viewPtr->Shape().TotalSize();
if (values.size() != numElements)
{
LogicError("Number of elements (%lu) in the deserialized representation does not match the expected value (%lu)",
values.size(), numElements);
}
NDArrayViewPtr cpuDataViewPtr = viewPtr;
if ((viewPtr->Device().Type() != DeviceKind::CPU))
{
cpuDataViewPtr = MakeSharedObject<NDArrayView>(viewPtr->GetDataType(), viewPtr->Shape(), DeviceDescriptor::CPUDevice());
}
T* buffer = cpuDataViewPtr->WritableDataBuffer<T>();
for (auto i = 0; i < numElements; ++i)
{
buffer[i] = values[i].GetValue<T>();
}
if ((viewPtr->Device().Type() != DeviceKind::CPU))
{
viewPtr->CopyFrom(*cpuDataViewPtr);
}
}
// TODO: we store the type info for every element in the vector, which is extremely redundant.
// Instead, it'd be nice to introduce some sort of DictionaryValueVector.
vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr)
{
switch (viewPtr->GetDataType())
{
case DataType::Float:
return SerializeToVector<float>(viewPtr);
case DataType::Double:
return SerializeToVector<double>(viewPtr);
default:
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
}
}
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const vector<DictionaryValue>& values)
{
switch (viewPtr->GetDataType())
{
case DataType::Float:
DeserializeFromVector<float>(viewPtr, values);
break;
case DataType::Double:
DeserializeFromVector<double>(viewPtr, values);
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
}
}
template void DictionaryValue::AllocateDataPtr<NDShape>(const NDShape& value);
template void DictionaryValue::AllocateDataPtr<vector<DictionaryValue>>(const vector<DictionaryValue>& value);
}

Просмотреть файл

@ -15,245 +15,6 @@ namespace CNTK
// Forward declarations
class Dictionary;
class DictionaryValue
{
public:
enum class Type : unsigned int
{
None,
Bool,
SizeT,
Double,
NDShape,
Vector
};
static const char* TypeName(Type type)
{
if (type == Type::None)
return "None";
else if (type == Type::Bool)
return "Bool";
else if (type == Type::SizeT)
return "SizeT";
else if (type == Type::Double)
return "Double";
else if (type == Type::NDShape)
return "NDShape";
else if (type == Type::Vector)
return "Vector";
else
LogicError("Unknown DictionaryValue::Type");
}
public:
DictionaryValue()
: m_valueType(Type::None)
{
}
DictionaryValue(bool value)
: m_valueType(GetValueType<bool>())
{
m_data.m_boolean = value;
}
DictionaryValue(size_t value)
: m_valueType(GetValueType<size_t>())
{
m_data.m_sizeT = value;
}
DictionaryValue(double value)
: m_valueType(GetValueType<double>())
{
m_data.m_double = value;
}
template <typename T>
DictionaryValue(const T& value)
: m_valueType(GetValueType<T>())
{
static_assert(std::is_same<T, NDShape>::value ||
std::is_same<T, Internal::SimpleVector<DictionaryValue>>::value,
"Unsupported ValueType");
AllocateDataPtr(value);
}
DictionaryValue(const DictionaryValue& other)
: m_valueType(Type::Bool)
{
// The m_valueType must hvae been set to a non-ptr type to prevent an attempt to interpret
// the underlying underlying uninitialized value as a ptr and free it.
*this = other;
}
DictionaryValue& operator=(const DictionaryValue& other)
{
if (this != &other)
{
FreeDataPtr();
m_valueType = other.m_valueType;
m_data = other.m_data;
if (other.m_valueType == Type::NDShape)
AllocateDataPtr(other.GetValue<NDShape>());
else if (other.m_valueType == Type::Vector)
AllocateDataPtr(other.GetValue<Internal::SimpleVector<DictionaryValue>>());
}
return *this;
}
~DictionaryValue()
{
FreeDataPtr();
}
template <typename T, typename std::enable_if<std::is_same<T, bool>::value>::type* = nullptr>
const T& GetValue() const
{
VerifyType<T>();
return m_data.m_boolean;
}
template <typename T, typename std::enable_if<std::is_same<T, size_t>::value>::type* = nullptr>
const T& GetValue() const
{
VerifyType<T>();
return m_data.m_sizeT;
}
template <typename T, typename std::enable_if<std::is_same<T, double>::value>::type* = nullptr>
const T& GetValue() const
{
VerifyType<T>();
return m_data.m_double;
}
template <typename T, typename std::enable_if<std::is_same<T, NDShape>::value || std::is_same<T, Internal::SimpleVector<DictionaryValue>>::value>::type* = nullptr>
const T& GetValue() const
{
VerifyType<T>();
return *(reinterpret_cast<T*>(m_data.m_ptr));
}
bool HasValue() const
{
return m_valueType != Type::None;
}
Type ValueType() const
{
return m_valueType;
}
private:
template <typename T>
static Type GetValueType()
{
static_assert(std::is_same<T, bool>::value ||
std::is_same<T, size_t>::value ||
std::is_same<T, double>::value ||
std::is_same<T, NDShape>::value ||
std::is_same<T, Internal::SimpleVector<DictionaryValue>>::value ||
std::is_same<T, CNTK::Dictionary>::value,
"Unsupported ValueType");
if (std::is_same<T, bool>::value)
return Type::Bool;
else if (std::is_same<T, size_t>::value)
return Type::SizeT;
else if (std::is_same<T, double>::value)
return Type::Double;
else if (std::is_same<T, NDShape>::value)
return Type::NDShape;
else if (std::is_same<T, Internal::SimpleVector<DictionaryValue>>::value)
return Type::Vector;
}
template <typename T>
void VerifyType() const
{
if (GetValueType<T>() != m_valueType)
RuntimeError("Reading a DictionaryValue as the wrong type; Reading as type %s when actual type is %s", typeid(T).name(), DictionaryValue::TypeName(m_valueType));
}
template <typename T>
void AllocateDataPtr(const T& value)
{
static_assert(std::is_same<T, NDShape>::value || std::is_same<T, Internal::SimpleVector<DictionaryValue>>::value, "AllocateDataPtr called with invalid type");
m_data.m_ptr = new T(value);
}
template <typename T>
void FreePtrAsType()
{
T* typedPtr = reinterpret_cast<T*>(m_data.m_ptr);
delete typedPtr;
m_data.m_ptr = nullptr;
}
void FreeDataPtr()
{
if (m_valueType == Type::NDShape)
FreePtrAsType<NDShape>();
else if (m_valueType == Type::Vector)
FreePtrAsType<Internal::SimpleVector<DictionaryValue>>();
}
private:
Type m_valueType;
union ValueData
{
bool m_boolean;
size_t m_sizeT;
double m_double;
void* m_ptr;
} m_data;
};
class Dictionary
{
public:
Dictionary();
~Dictionary();
// Disallow copy contruction and assignment
Dictionary(const Dictionary&) = delete; Dictionary& operator=(const Dictionary&) = delete;
Dictionary(Dictionary&& other);
Dictionary& operator=(Dictionary&& other);
DictionaryValue& operator[](const std::wstring& key)
{
return operator[](key.c_str());
}
DictionaryValue& operator[](const wchar_t* key);
DictionaryValue operator[](const std::wstring& key) const
{
return operator[](key.c_str());
}
DictionaryValue operator[](const wchar_t* key) const;
bool Contains(const std::wstring& key) const
{
return Contains(key.c_str());
}
bool Contains(const wchar_t* key) const;
private:
std::unordered_map<std::wstring, DictionaryValue>* m_dictionaryData;
};
// Helper to get the size of an element of the specified DataType
inline size_t ElementSize(DataType dataType)
{
@ -275,7 +36,7 @@ namespace CNTK
NOT_IMPLEMENTED;
}
inline Microsoft::MSR::CNTK::MatrixFormat AsCNTKMatrixFormat(StorageFormat storageFormat)
inline Microsoft::MSR::CNTK::MatrixFormat AsCNTKImplMatrixFormat(StorageFormat storageFormat)
{
if (storageFormat == StorageFormat::Dense)
return Microsoft::MSR::CNTK::MatrixFormat::matrixFormatDense;
@ -358,4 +119,13 @@ namespace CNTK
return{ matrixRowSize, matrixColSize };
}
inline bool IsSparseInput(const Variable& var)
{
return var.IsInput() && var.IsSparse();
}
std::vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr);
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const std::vector<DictionaryValue>& values);
}

Просмотреть файл

@ -21,15 +21,15 @@ namespace CNTK
auto maskShape = mask->Shape();
if (maskShape.NumAxes() > dataShape.NumAxes())
InvalidArgument("The number of axes of the mask of a Value object cannot exceed the number of axes of the data NDArrayView object");
InvalidArgument("The number of axes (%d) of the mask of a Value object cannot exceed the number of axes (%d) of the data NDArrayView object", (int)maskShape.NumAxes(), (int)dataShape.NumAxes());
if (dataShape.SubShape(dataShape.NumAxes() - maskShape.NumAxes()) != maskShape)
InvalidArgument("Invalid Value object; the data and mask are incompatible. The trailing dimensions of the data do not match the dimensions of the mask");
InvalidArgument("Invalid Value object; the data and mask are incompatible. The trailing dimensions of the data (%S) do not match the dimensions of the mask (%S)", dataShape.AsString().c_str(), maskShape.AsString().c_str());
}
}
template <typename T>
static NDMaskPtr CreateMask(size_t sampleSize, const std::vector<std::vector<T>>& sequences, const DeviceDescriptor& device)
static NDMaskPtr CreateMask(size_t numElementsPerSample, const std::vector<std::vector<T>>& sequences, const DeviceDescriptor& device)
{
size_t numSequences = sequences.size();
std::vector<size_t> sequenceLengths(numSequences);
@ -37,7 +37,7 @@ namespace CNTK
bool needsMask = false;
for (size_t i = 0; i < numSequences; ++i)
{
sequenceLengths[i] = sequences[i].size() / sampleSize;
sequenceLengths[i] = sequences[i].size() / numElementsPerSample;
if (maxSequenceLength < sequenceLengths[i])
maxSequenceLength = sequenceLengths[i];
@ -46,11 +46,12 @@ namespace CNTK
needsMask = true;
}
// If needed, create a mask to account for variability in lengths of specified sequences
NDMaskPtr deviceValueMask;
if (needsMask)
{
NDShape valueMaskShape = { maxSequenceLength, numSequences };
deviceValueMask = NDMaskPtr(new NDMask(valueMaskShape, device), [](Internal::ReferenceCount* ptr) {delete ptr; });
deviceValueMask = MakeSharedObject<NDMask>(valueMaskShape, device);
for (size_t i = 0; i < numSequences; ++i)
deviceValueMask->MaskSection({ sequenceLengths[i], i }, { NDShape::InferredDimension, 1 });
}
@ -87,23 +88,23 @@ namespace CNTK
}
colStarts[numSequences * maxSequenceLength] = (SparseIndexType)(nonZeroValues.size());
NDArrayViewPtr deviceValueData(new NDArrayView(valueDataShape, colStarts.data(), rowIndices.data(), nonZeroValues.data(), nonZeroValues.size(), device, readOnly), [](ReferenceCount* ptr) { delete ptr; });
return ValuePtr(new Value(deviceValueData, deviceValueMask), [](ReferenceCount* ptr) { delete ptr; });
NDArrayViewPtr deviceValueData = MakeSharedObject<NDArrayView>(valueDataShape, colStarts.data(), rowIndices.data(), nonZeroValues.data(), nonZeroValues.size(), device, readOnly);
return MakeSharedObject<Value>(deviceValueData, deviceValueMask);
}
template <typename ElementType>
/*static*/ ValuePtr Value::Create(const NDShape& sampleShape, const std::vector<std::vector<ElementType>>& sequences, const DeviceDescriptor& device, bool readOnly/* = false*/)
{
size_t sampleSize = sampleShape.TotalSize();
NDMaskPtr deviceValueMask = CreateMask(sampleSize, sequences, device);
size_t numElementsPerSample = sampleShape.TotalSize();
NDMaskPtr deviceValueMask = CreateMask(numElementsPerSample, sequences, device);
size_t maxSequenceLength = (deviceValueMask == nullptr) ? sequences[0].size() : deviceValueMask->Shape()[0];
size_t numSequences = sequences.size();
NDShape valueDataShape = sampleShape.AppendShape({ maxSequenceLength, numSequences });
NDArrayViewPtr valueData(new NDArrayView(AsDataType<ElementType>(), valueDataShape, DeviceDescriptor::CPUDevice()), [](ReferenceCount* ptr) { delete ptr; });
NDArrayViewPtr valueData = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), valueDataShape, DeviceDescriptor::CPUDevice());
ElementType* dataBuffer = valueData->WritableDataBuffer<ElementType>();
for (size_t i = 0; i < numSequences; ++i)
std::copy(sequences[i].data(), sequences[i].data() + sequences[i].size(), dataBuffer + (maxSequenceLength * i * sampleSize));
std::copy(sequences[i].data(), sequences[i].data() + sequences[i].size(), dataBuffer + (maxSequenceLength * i * numElementsPerSample));
NDArrayViewPtr deviceValueData;
if (device == DeviceDescriptor::CPUDevice())
@ -115,13 +116,13 @@ namespace CNTK
}
else
{
deviceValueData = NDArrayViewPtr(new NDArrayView(AsDataType<ElementType>(), valueDataShape, device), [](ReferenceCount* ptr) { delete ptr; });
deviceValueData = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), valueDataShape, device);
deviceValueData->CopyFrom(*valueData);
if (readOnly)
deviceValueData = deviceValueData->Alias(true);
}
return ValuePtr(new Value(deviceValueData, deviceValueMask), [](ReferenceCount* ptr) { delete ptr; });
return MakeSharedObject<Value>(deviceValueData, deviceValueMask);
}
/*virtual*/ Value::~Value()
@ -143,13 +144,13 @@ namespace CNTK
/*virtual*/ ValuePtr Value::DeepClone(bool readOnly/* = false*/) const
{
// TODO: Check if this is a derived type and throw an exception in that case
return ValuePtr(new Value(Data()->DeepClone(readOnly), (Mask() != nullptr) ? Mask()->DeepClone() : nullptr), [](ReferenceCount* ptr) { delete ptr; });
return MakeSharedObject<Value>(Data()->DeepClone(readOnly), (Mask() != nullptr) ? Mask()->DeepClone() : nullptr);
}
/*virtual*/ ValuePtr Value::Alias(bool readOnly/* = false*/) const
{
// TODO: Check if this is a derived type and throw an exception in that case
return ValuePtr(new Value(Data()->Alias(readOnly), (Mask() != nullptr) ? Mask()->Alias() : nullptr), [](ReferenceCount* ptr) { delete ptr; });
return MakeSharedObject<Value>(Data()->Alias(readOnly), (Mask() != nullptr) ? Mask()->Alias() : nullptr);
}
/*virtual*/ void Value::CopyFrom(const Value& source)

Просмотреть файл

@ -11,4 +11,9 @@ namespace CNTK
: Variable(function->Output())
{
}
FunctionPtr Variable::Owner() const
{
return m_dataFields->m_ownerFunction->shared_from_this();
}
}

Просмотреть файл

@ -106,22 +106,18 @@ public:
~BestGpu();
void Init();
void SetAllowedDevices(const std::vector<int>& devices); // only allow certain GPUs
bool DeviceAllowed(int device);
void DisallowDevice(int device)
{
assert((device >= -1) && (device <= 31));
if (device < 0)
m_disallowCPUDevice = true;
else
m_allowedDevices &= ~(1 << device);
}
bool DeviceAllowed(int deviceId);
void DisallowUnsupportedDevices();
void DisallowDevice(int deviceId);
void AllowAll(); // reset to allow all GPUs (no allowed list)
bool UseMultiple(); // using multiple GPUs?
int GetDevice(BestGpuFlags flags = bestGpuNormal); // get a single device
static const int AllDevices = -1; // can be used to specify all GPUs in GetDevices() call
static const int RequeryDevices = -2; // Requery refreshing statistics and picking the same number as last query
static const int MininumCCMajorForGpu = 3; // cntk supports GPUs with Compute Capability > 3.0
std::vector<int> GetDevices(int number = AllDevices, BestGpuFlags flags = bestGpuNormal); // get multiple devices
std::vector<ProcessorData *> GetProcessorData();
private:
bool LockDevice(int deviceId, bool trial = true);
};
@ -156,6 +152,8 @@ static DEVICEID_TYPE SelectDevice(DEVICEID_TYPE deviceId, bool bLockGPU, const i
{
g_bestGpu->DisallowDevice(excludedDevices[i]);
}
g_bestGpu->DisallowUnsupportedDevices();
}
bestDeviceId = (DEVICEID_TYPE)g_bestGpu->GetDevice(BestGpuFlags(bLockGPU ? (bestGpuAvoidSharing | bestGpuExclusiveLock) : bestGpuAvoidSharing));
@ -345,22 +343,32 @@ int BestGpu::GetDevice(BestGpuFlags bestFlags)
void BestGpu::SetAllowedDevices(const std::vector<int>& devices)
{
m_allowedDevices = 0;
for (int device : devices)
for (int deviceId : devices)
{
m_allowedDevices |= (1 << device);
m_allowedDevices |= (1 << deviceId);
}
}
// DeviceAllowed - is a particular device allowed?
// returns: true if the device is allowed, otherwise false
bool BestGpu::DeviceAllowed(int device)
bool BestGpu::DeviceAllowed(int deviceId)
{
assert((device >= -1) && (device <= 31));
assert((deviceId >= -1) && (deviceId <= 31));
if (device < 0)
if (deviceId < 0)
return !m_disallowCPUDevice;
else
return !!(m_allowedDevices & (1 << device));
return !!(m_allowedDevices & (1 << deviceId));
}
void BestGpu::DisallowDevice(int deviceId)
{
assert((deviceId >= -1) && (deviceId <= 31));
if (deviceId < 0)
m_disallowCPUDevice = true;
else
m_allowedDevices &= ~(1 << deviceId);
}
// AllowAll - Reset the allowed filter to allow all GPUs
@ -527,6 +535,68 @@ std::vector<int> BestGpu::GetDevices(int number, BestGpuFlags p_bestFlags)
return best; // return the array of the best GPUs
}
// disallow devices wich don't comply with compute capability restriction when cntk runs with deviceId = 'auto'
void BestGpu::DisallowUnsupportedDevices()
{
for (auto pd : m_procData)
{
if (pd->deviceProp.major < BestGpu::MininumCCMajorForGpu)
{
DisallowDevice(pd->deviceId);
}
}
}
GpuData GetGpuData(DEVICEID_TYPE deviceId)
{
std::vector<GpuData> gpusData = GetAllGpusData();
auto it = std::find_if(gpusData.begin(), gpusData.end(), [&deviceId](const GpuData& gpu){return gpu.deviceId == deviceId;});
if (it != gpusData.end())
{
return *it;
}
return GpuData(0, 0, deviceId, 0, GpuValidity::UnknownDevice, "", 0);
}
// populate a vector with data (id, major/minor version, cuda cores, name and memory) for each gpu device in the machine
std::vector<GpuData> GetAllGpusData()
{
std::vector<GpuData> data;
auto bestGpu = make_unique<BestGpu>();
std::vector<ProcessorData*> processorData = bestGpu->GetProcessorData();
for (ProcessorData* pd : processorData)
{
GpuValidity validity = GpuValidity::UnknownDevice;
if (pd->deviceProp.major < BestGpu::MininumCCMajorForGpu)
{
validity = GpuValidity::ComputeCapabilityNotSupported;
}
else
{
validity = GpuValidity::Valid;
}
size_t totalMemory = pd->deviceProp.totalGlobalMem/(1024*1024); //From bytes to MBytes
GpuData gpuData = GpuData(pd->deviceProp.major, pd->deviceProp.minor, pd->deviceId, pd->cores, validity, string(pd->deviceProp.name), totalMemory);
data.push_back(gpuData);
}
return data;
}
std::vector<ProcessorData*> BestGpu::GetProcessorData()
{
return m_procData;
}
// QueryNvmlData - Query data from the Nvidia Management Library, and accumulate counters,
// In case failure, this function simply backs out without filling in the data structure and without setting m_nvmlData.
void BestGpu::QueryNvmlData()

Просмотреть файл

@ -8,15 +8,46 @@
// #define CPUONLY // #define this to build without GPU support nor needing the SDK installed
#include "CommonMatrix.h"
#include <vector>
// define IConfigRecord and ConfigParameters as incomplete types, in order to avoid having to include "ScriptableObjects.h" and "Config.h", as that confuses some .CU code
namespace Microsoft { namespace MSR { namespace ScriptableObjects { struct IConfigRecord; }}}
namespace Microsoft { namespace MSR { namespace CNTK {
using namespace std;
#ifndef CPUONLY
enum class GpuValidity
{
Valid,
UnknownDevice,
ComputeCapabilityNotSupported
};
struct GpuData
{
int versionMajor;
int versionMinor;
int deviceId;
int cudaCores;
GpuValidity validity;
string name;
size_t totalMemory;
GpuData(int versionMajor, int versionMinor, int deviceId, int cudaCores, GpuValidity validity, const string& name, size_t totalMemory)
:versionMajor(versionMajor), versionMinor(versionMinor), deviceId(deviceId), cudaCores(cudaCores), validity(validity), name(name), totalMemory(totalMemory)
{
}
};
std::vector<GpuData> GetAllGpusData();
GpuData GetGpuData(DEVICEID_TYPE deviceId);
class ConfigParameters;
DEVICEID_TYPE DeviceFromConfig(const ConfigParameters& config);
DEVICEID_TYPE DeviceFromConfig(const ScriptableObjects::IConfigRecord& config);
#else
template <class ConfigRecordType>
static inline DEVICEID_TYPE DeviceFromConfig(const ConfigRecordType& /*config*/)

Просмотреть файл

@ -282,7 +282,7 @@ class VariableSchema : public std::vector<VariableLayout>
Values<ElemType> CreateBuffers(const std::vector<size_t>& maxLengths)
{
if (maxLengths.size() != size())
throw std::exception("Expected max lengths for all variables.");
throw std::runtime_error("Expected max lengths for all variables.");
Values<ElemType> buffers(size());
for (size_t i = 0; i < size(); ++i)

Просмотреть файл

@ -29,7 +29,8 @@ public:
runtime_error(msg)
{
}
virtual void PrintError(const std::wstring& linePrefix) const = 0;
virtual std::wstring GetError(const std::wstring& /*linePrefix*/) const = 0;
virtual void PrintError(const std::wstring& /*linePrefix*/) const = 0;
};
// -----------------------------------------------------------------------

Просмотреть файл

@ -411,7 +411,7 @@ static inline void byteswap(V &v) throw()
// execute a block with retry
// Block must be restartable.
// Use this when writing small files to those unreliable Windows servers.
// Use this when writing/reading small files to those unreliable Windows servers.
// TODO: This will fail to compile under VS 2008--we need an #ifdef around this
template <typename FUNCTION>
static void attempt(int retries, const FUNCTION &body)

Просмотреть файл

@ -30,6 +30,7 @@
#include <assert.h>
#include <string.h> // for strerror()
#include <stdexcept> // for exception
#include <fcntl.h>
// ----------------------------------------------------------------------------
// fopenOrDie(): like fopen() but terminate with err msg in case of error.
@ -591,7 +592,8 @@ void fgetfile(const std::wstring& pathname, std::vector<char>& buffer);
void fgetfile(FILE* f, std::vector<char>& buffer);
namespace msra { namespace files {
void fgetfilelines(const std::wstring& pathname, std::vector<char>& readbuffer, std::vector<std::string>& lines);
void fgetfilelines(const std::wstring& pathname, std::vector<char>& readbuffer, std::vector<std::string>& lines, int numberOfTries = 1);
static inline std::vector<std::string> fgetfilelines(const std::wstring& pathname)
{
std::vector<char> buffer;
@ -599,7 +601,7 @@ static inline std::vector<std::string> fgetfilelines(const std::wstring& pathnam
fgetfilelines(pathname, buffer, lines);
return lines;
}
std::vector<char*> fgetfilelines(const std::wstring& pathname, std::vector<char>& readbuffer);
std::vector<char*> fgetfilelines(const std::wstring& pathname, std::vector<char>& readbuffer, int numberOfTries = 1);
}}
@ -698,8 +700,18 @@ class auto_file_ptr
{
if (f && f != stdin && f != stdout && f != stderr)
{
bool readMode = false;
#ifdef _WIN32
if ((f->_flag&_IOREAD) == _IOREAD)
readMode = true;
#else
int mode = fcntl(fileno(f), F_GETFL);
if ((mode & O_ACCMODE) == O_RDONLY)
readMode = true;
#endif
int rc = ::fclose(f);
if ((rc != 0) && !std::uncaught_exception())
if (!readMode && (rc != 0) && !std::uncaught_exception())
RuntimeError("auto_file_ptr: failed to close file: %s", strerror(errno));
f = NULL;

Просмотреть файл

@ -1251,7 +1251,7 @@ public:
// BUGBUG: we only really support one archive file at this point
// read the TOC in one swoop
std::vector<char> textbuffer;
auto toclines = msra::files::fgetfilelines(tocpath, textbuffer);
auto toclines = msra::files::fgetfilelines(tocpath, textbuffer, 3);
// parse it one by one
size_t archiveindex = SIZE_MAX; // its index

Просмотреть файл

@ -16,6 +16,7 @@
#endif
#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
#include "Basics.h"
#include "basetypes.h" //for attemp()
#include "fileutil.h"
#include "ProgressTracing.h"
@ -1632,6 +1633,11 @@ static size_t fgetfilechars(const std::wstring& path, vector<char>& buffer)
return len;
}
static void fgetfilechars(const std::wstring& path, vector<char>& buffer, size_t& len)
{
len = fgetfilechars(path, buffer);
}
template <class LINES>
static void strtoklines(char* s, LINES& lines)
{
@ -1639,10 +1645,14 @@ static void strtoklines(char* s, LINES& lines)
lines.push_back(p);
}
void msra::files::fgetfilelines(const std::wstring& path, vector<char>& buffer, std::vector<std::string>& lines)
void msra::files::fgetfilelines(const std::wstring& path, vector<char>& buffer, std::vector<std::string>& lines, int numberOfTries)
{
// load it into RAM in one huge chunk
const size_t len = fgetfilechars(path, buffer);
size_t len = 0;
msra::util::attempt(numberOfTries, [&]() // (can be reading from network)
{
// load it into RAM in one huge chunk
fgetfilechars(path, buffer, len);
});
// parse into lines
lines.resize(0);
@ -1651,11 +1661,15 @@ void msra::files::fgetfilelines(const std::wstring& path, vector<char>& buffer,
}
// same as above but returning const char* (avoiding the memory allocation)
vector<char*> msra::files::fgetfilelines(const wstring& path, vector<char>& buffer)
vector<char*> msra::files::fgetfilelines(const wstring& path, vector<char>& buffer, int numberOfTries)
{
// load it into RAM in one huge chunk
const size_t len = fgetfilechars(path, buffer);
size_t len = 0;
msra::util::attempt(numberOfTries, [&]() // (can be reading from network)
{
// load it into RAM in one huge chunk
fgetfilechars(path, buffer, len);
});
// parse into lines
vector<char*> lines;
lines.reserve(len / 20);

Просмотреть файл

@ -72,6 +72,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
else if (nodeType == OperationNameOf(InvStdDevNode)) return New<InvStdDevNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(KhatriRaoProductNode)) return New<KhatriRaoProductNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(LogNode)) return New<LogNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(LogPlusNode)) return New<LogPlusNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(LogSoftmaxNode)) return New<LogSoftmaxNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(LookupTableNode)) return New<LookupTableNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(MatrixL1RegNode)) return New<MatrixL1RegNode<ElemType>>(forward<_Types>(_Args)...);
@ -657,6 +658,12 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Plus(
return net.AddNodeToNetAndAttachInputs(New<PlusNode<ElemType>>(net.GetDeviceId(), nodeName), { a, b });
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::LogPlus(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName)
{
return net.AddNodeToNetAndAttachInputs(New<LogPlusNode<ElemType>>(net.GetDeviceId(), nodeName), { a, b });
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Less(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName)
{

Просмотреть файл

@ -134,6 +134,7 @@ public:
ComputationNodePtr InvStdDev(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr KhatriRaoProduct(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
ComputationNodePtr Log(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr LogPlus(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
ComputationNodePtr LogSoftmax(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr Logistic(const ComputationNodePtr a, const ComputationNodePtr b, const ComputationNodePtr c, const std::wstring nodeName = L"");
ComputationNodePtr Logistic(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");

Просмотреть файл

@ -76,7 +76,7 @@ void ComputationNetwork::CopySubTree(const ComputationNetwork& fromNet,
ComputationNodeBasePtr fromRoot = fromNet.GetNodeFromName(fromName);
for (const auto& fromNode : GetEvalOrder(fromRoot)) // BUGBUG: This probably will fail because the precomputed eval orders are invalid at this point.
for (const auto& fromNode : fromNet.GetEvalOrder(fromRoot)) // BUGBUG: This probably will fail because the precomputed eval orders are invalid at this point.
{
wstring fromNodeName = fromNode->NodeName();
wstring toNodeName = toNamePrefix + fromNodeName;

Просмотреть файл

@ -67,6 +67,8 @@ template class PlusNode<double>;
// -----------------------------------------------------------------------
// LogPlusNode (summand1, summand2)
// Computes ln(exp(summand1) + exp(summand2)) in an overflow safe way.
// Useful e.g. for computing softmax over sequence.
// -----------------------------------------------------------------------
template <class ElemType>
@ -105,8 +107,16 @@ public:
if (Input(inputIndex)->ReducesInTimeWrt(Input(1 - inputIndex)))
Input(1 - inputIndex)->MaskMissingValueColumnsToZero(fr);
// TODO: would be nice to state the derivative here in a comment
inputGradient.AddElementwiseProductWithLogSumDerivativeOf(gradient, input0, input1);
if (inputIndex == 0)
{
// d/dx (ln( exp(x) + (exp(y)) = exp(x) / (exp(x) + exp(y)) = 1 / (1 + exp(y-x)) = sigmoid(x-y)
inputGradient.AddElementwiseProductWithLogSumDerivativeOf(gradient, input1, input0);
}
else
{
// d/dy (ln( exp(x) + (exp(y)) = exp(y) / (exp(x) + exp(y)) = 1 / (1 + exp(x-y)) = sigmoid(y-x)
inputGradient.AddElementwiseProductWithLogSumDerivativeOf(gradient, input0, input1);
}
}
};

Просмотреть файл

@ -321,15 +321,17 @@ void CNTKEvalExtended<ElemType>::ForwardPassT(const std::vector<ValueBuffer<Elem
RuntimeError("Expected %d outputs, but got %d.", (int)m_outputNodes.size(), (int)outputs.size());
size_t i = 0;
for (auto& input : m_inputMatrices)
for (auto& inputNode : m_inputNodes)
{
// const cast: The matrix class takes this over without copying and could theoretically change the contents,
// though it doesn't in this case.
auto& buffer = const_cast<ValueBuffer<ElemType, ValueContainer>&>(inputs[i]);
shared_ptr<Matrix<ElemType>> matrix = dynamic_pointer_cast<Matrix<ElemType>>(input.second.matrix);
auto matrix = dynamic_pointer_cast<Matrix<ElemType>>(inputNode->ValuePtr());
auto type = matrix->GetMatrixType();
size_t numRows = input.second.sampleLayout.GetNumElements();
size_t numRows = inputNode->GetSampleLayout().GetNumElements();
if (buffer.m_buffer.data() == nullptr)
RuntimeError("Input %ls: Buffer is not allocated.", m_inputNodes[i]->GetName().c_str());
if (type == MatrixType::DENSE)
{
if (buffer.m_buffer.size() % numRows != 0)
@ -340,8 +342,12 @@ void CNTKEvalExtended<ElemType>::ForwardPassT(const std::vector<ValueBuffer<Elem
}
else if (type == MatrixType::SPARSE)
{
if (buffer.m_colIndices.data() == nullptr)
RuntimeError("Input %ls: Due to sparse input format, expected colIndices array, but was nullptr.", m_inputNodes[i]->GetName().c_str());
if (buffer.m_indices.data() == nullptr)
RuntimeError("Input %ls: Due to sparse input format, expected Indices array, but was nullptr.", m_inputNodes[i]->GetName().c_str());
if (buffer.m_colIndices.size() < 2)
RuntimeError("Input %ls: Expected at least one element.", m_inputNodes[i]->GetName().c_str());
RuntimeError("Input %ls: Expected at least one element (2 entries in colIndices array).", m_inputNodes[i]->GetName().c_str());
if (buffer.m_colIndices[0] != 0)
RuntimeError("Input %ls: First element of column indices must be 0", m_inputNodes[i]->GetName().c_str());
if (buffer.m_colIndices[buffer.m_colIndices.size() - 1] != buffer.m_indices.size())
@ -352,8 +358,8 @@ void CNTKEvalExtended<ElemType>::ForwardPassT(const std::vector<ValueBuffer<Elem
int numCols = type == MatrixType::DENSE ? buffer.m_buffer.size() / numRows : buffer.m_colIndices.size() - 1;
assert(numCols >= 1);
input.second.pMBLayout->Init(1, numCols);
input.second.pMBLayout->AddSequence(0, 0, 0, numCols);
inputNode->GetMBLayout()->Init(1, numCols);
inputNode->GetMBLayout()->AddSequence(0, 0, 0, numCols);
if (type == MatrixType::DENSE)
matrix->SetValue(numRows, numCols, matrix->GetDeviceId(), buffer.m_buffer.data(), matrixFlagNormal);

Просмотреть файл

@ -14,6 +14,11 @@
#include <msclr\marshal_cppstd.h>
#include "CNTKException.h"
#pragma warning(push)
#pragma warning(disable : 4793) // Function compiled as native
#include "Basics.h"
#include "ScriptableObjects.h"
#pragma warning(pop)
#include "EvalCommon.h"
#include "Eval.h"
@ -172,23 +177,10 @@ public:
/// <param name="funcName">Factory function name for retrieving the native model from the dll.</param>
ModelEvaluationExtended(String^ funcName)
{
auto dir = System::IO::Path::GetDirectoryName(System::Reflection::Assembly::GetExecutingAssembly()->Location);
auto dllFileName = System::IO::Path::Combine(dir, "evaldll.dll");
pin_ptr<const WCHAR> dllname = PtrToStringChars(dllFileName);
auto hModule = LoadLibrary(dllname);
if (hModule == nullptr)
{
throw gcnew CNTKException(System::String::Format("Cannot find library: {0}", gcnew String(dllname)));
}
try
{
msclr::interop::marshal_context context;
const std::string func = context.marshal_as<std::string>(funcName);
auto procAddress = GetProcAddress(hModule, func.c_str());
auto getEvalProc = (GetEvalProc<ElemType>)procAddress;
pin_ptr <IEvaluateModelExtended<ElemType>*> p_eval = &m_eval;
getEvalProc(p_eval);
GetEvalExtended<ElemType>(p_eval);
}
catch (const exception& ex)
{
@ -263,7 +255,14 @@ public:
outputNodeNames.push_back(context.marshal_as<std::wstring>(output));
}
m_eval->StartForwardEvaluation(outputNodeNames);
try
{
m_eval->StartForwardEvaluation(outputNodeNames);
}
catch (const exception& ex)
{
throw GetCustomException(ex);
}
}
//
@ -367,6 +366,11 @@ private:
{
return gcnew CNTKBadAllocException(gcnew System::String(ex.what()));
}
else if (dynamic_cast<const ScriptableObjects::ScriptingException*>(&ex) != nullptr) // Includes derived classes
{
const auto& err = dynamic_cast<const ScriptableObjects::ScriptingException&>(ex);
return gcnew CNTKLogicErrorException(gcnew System::String(wstrprintf(L"%ls\n%ls", utf16(err.what()).c_str(), err.GetError(L"").c_str()).c_str()), nullptr);
}
else
{
return gcnew CNTKException(gcnew System::String(ex.what()));

Просмотреть файл

@ -43,21 +43,10 @@ public:
/// <param name="funcName">Factory function name for retrieving the native model from the dll.</param>
IEvaluateModelManaged(String^ funcName)
{
pin_ptr<const WCHAR> dllname = PtrToStringChars("evaldll.dll");
auto hModule = LoadLibrary(dllname);
if (hModule == nullptr)
{
throw gcnew CNTKException(System::String::Format("Cannot find library: {0}", gcnew String(dllname)));
}
try
{
msclr::interop::marshal_context context;
const std::string func = context.marshal_as<std::string>(funcName);
auto procAddress = GetProcAddress(hModule, func.c_str());
auto getEvalProc = (GetEvalProc<ElemType>)procAddress;
pin_ptr <IEvaluateModel<ElemType>*> p_eval = &m_eval;
getEvalProc(p_eval);
GetEval<ElemType>(p_eval);
}
catch (const exception& ex)
{

Просмотреть файл

@ -56,6 +56,8 @@
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
<AdditionalDependencies>EvalDLL.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>EvalDll.dll</DelayLoadDLLs>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(DebugBuild)">
@ -66,8 +68,6 @@
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<DelayLoadDLLs>
</DelayLoadDLLs>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
@ -77,8 +77,6 @@
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<DelayLoadDLLs>
</DelayLoadDLLs>
</Link>
</ItemDefinitionGroup>
<ItemGroup>

Просмотреть файл

@ -9,6 +9,7 @@
#include <emmintrin.h>
#include <tmmintrin.h>
#include <immintrin.h>
#include <smmintrin.h>
#include <assert.h>
#include <cstdint>
#include <iostream>

Просмотреть файл

@ -116,6 +116,44 @@ const char* CudaErrString<curandStatus>(curandStatus)
namespace Microsoft { namespace MSR { namespace CNTK {
/*static*/ bool SyncGuard::s_isSyncEnabled = false;
/*static*/ void SyncGuard::EnableSync()
{
s_isSyncEnabled = true;
}
SyncGuard::SyncGuard(bool forceSync /*= false*/)
: m_forceSync(forceSync)
{
m_done = nullptr;
if (m_forceSync || s_isSyncEnabled)
{
CUDA_CALL(cudaGetLastError());
CUDA_CALL(cudaEventCreate(&m_done));
}
}
SyncGuard::~SyncGuard()
{
if (m_forceSync || s_isSyncEnabled)
{
// The regular use of this destructor is to synchronize the GPU, but also
// to check for errors. So this destructor is where CUDA errors would be thrown.
// If this destructor runs during stack unwinding, then a different error has
// already happened that should be reported; so we only clean up the resource.
if (std::uncaught_exception())
cudaEventDestroy(m_done);
else
{
// failures in a prior launch might be reported here
CUDA_CALL(cudaEventRecord(m_done));
CUDA_CALL(cudaEventSynchronize(m_done));
CUDA_CALL(cudaEventDestroy(m_done));
}
}
}
template <typename AllocatedElemType>
AllocatedElemType* TracingGPUMemoryAllocator::Allocate(int deviceId, size_t numRows, size_t numCols)
{
@ -4278,11 +4316,16 @@ void GPUMatrix<ElemType>::RCRFTransGrdCompute(const GPUMatrix<ElemType>& lbls,
template <class ElemType>
static shared_ptr<GPUMatrix<ElemType>> GetOnesVector(size_t N, DEVICEID_TYPE deviceId)
{
// using an array of shared_ptrs because those are thread-safe. The objects themselves are immutable.
// And using a plain array so this will never get freed, avoiding free-after-DLL-unload issues.
static shared_ptr<GPUMatrix<ElemType>> onesCache[32]; // cache of objects
if (deviceId >= _countof(onesCache))
LogicError("GetOnesVector: onesCache[] too small (%d entries), increase (you need %d) and recompile.", (int) _countof(onesCache), (int) deviceId + 1);
// using a dynamically allocated array so this will never get freed, avoiding free-after-DLL-unload issues.
// and using shared_ptrs since we don't want to leak more than CacheSize elements
// when using a plain array we would have to control lifetime of the object and destructor would be called for every element in the array at the end
const int CacheSize = 32;
static shared_ptr<GPUMatrix<ElemType>> * onesCache = new shared_ptr<GPUMatrix<ElemType>>[CacheSize]; // cache of objects
if (deviceId >= CacheSize){
LogicError("GetOnesVector: onesCache[] too small (%d entries), increase (you need %d) and recompile.", CacheSize, (int)deviceId + 1);
}
auto p = onesCache[deviceId];
if (!p || p->GetNumRows() < N) // must (re-)allocate
{

Просмотреть файл

@ -61,6 +61,27 @@ cudaStream_t MATH_API GetStream();
namespace Microsoft { namespace MSR { namespace CNTK {
// -----------------------------------------------------------------------
// SyncGuard -- synchronize around CUDA calls
// -----------------------------------------------------------------------
class SyncGuard
{
private:
static bool s_isSyncEnabled;
bool m_forceSync;
#ifndef CPUONLY
cudaEvent_t m_done;
#endif
public:
static MATH_API void EnableSync();
SyncGuard(bool forceSync = false);
~SyncGuard();
};
// -----------------------------------------------------------------------
// DeviceBoundNumber -- This class represents a number which resides on a particular device. Use it to avoid unnecessary transfers between CPU and GPU
// -----------------------------------------------------------------------
@ -623,51 +644,4 @@ static void CudaCall(ERRTYPE retCode, const char* exprString, const char* libNam
#define CURAND_CALL(expr) (CudaCall((expr), #expr, "CURAND", CURAND_STATUS_SUCCESS))
#define CUDNN_CALL(expr) (CudaCall((expr), #expr, "cuDNN", CUDNN_STATUS_SUCCESS))
// -----------------------------------------------------------------------
// SyncGuard -- synchronize around CUDA calls
// -----------------------------------------------------------------------
class SyncGuard
{
static bool DoSync()
{
#ifdef NO_SYNC // this strange way of writing it allows modifying this variable at runtime in the debugger
static bool do_sync = false;
#else
static bool do_sync = true;
#endif
return do_sync;
}
cudaEvent_t m_done;
public:
SyncGuard()
{
m_done = nullptr;
if (DoSync())
{
CUDA_CALL(cudaGetLastError());
CUDA_CALL(cudaEventCreate(&m_done));
}
}
~SyncGuard()
{
if (DoSync())
{
// The regular use of this destructor is to synchronize the GPU, but also
// to check for errors. So this destructor is where CUDA errors would be thrown.
// If this destructor runs during stack unwinding, then a different error has
// already happened that should be reported; so we only clean up the resource.
if (std::uncaught_exception())
cudaEventDestroy(m_done);
else
{
// failures in a prior launch might be reported here
CUDA_CALL(cudaEventRecord(m_done));
CUDA_CALL(cudaEventSynchronize(m_done));
CUDA_CALL(cudaEventDestroy(m_done));
}
}
}
};
#endif // CPUONLY

Просмотреть файл

@ -2276,6 +2276,9 @@ float CudaTimer::Elapsed()
return 0;
}
/*static*/ void SyncGuard::EnableSync()
{
}
} } }
// define a dummy GPUWatcher class too

Просмотреть файл

@ -14,6 +14,10 @@
#pragma warning(push)
#pragma warning(disable : 4251) // needs to have dll-interface to be used by clients of... caused by TensorView::m_shape which is only private. We use the same compiler everywhere.
namespace Microsoft { namespace MSR { namespace CNTK { namespace Test {
template <class ElemType> struct TensorTest;
}}}}
// This class is exported from the Math.dll.
namespace Microsoft { namespace MSR { namespace CNTK {
@ -148,7 +152,8 @@ private:
// -------------------------------------------------------------------
const Matrix<ElemType>& GetSOB() const { return *m_sob; }
Matrix<ElemType>& GetSOB() { return *m_sob; }
Matrix<ElemType>& GetSOB() { return *m_sob; }
friend Test::TensorTest<ElemType>;
// -------------------------------------------------------------------
// sob members

Просмотреть файл

@ -110,9 +110,6 @@
<PrecompiledHeader>Create</PrecompiledHeader>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="uci_to_cntk_text_format_converter.py" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
</Project>

Просмотреть файл

@ -47,13 +47,5 @@
<Filter Include="Common\Include">
<UniqueIdentifier>{C6F55578-121A-4D7C-8F57-4172BC5C463B}</UniqueIdentifier>
</Filter>
<Filter Include="Scripts">
<UniqueIdentifier>{cd70d891-88aa-40a4-8e47-0e31e4cac48e}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<None Include="uci_to_cntk_text_format_converter.py">
<Filter>Scripts</Filter>
</None>
</ItemGroup>
</Project>
</Project>

Просмотреть файл

@ -78,6 +78,12 @@ public:
return &m_utterances[index];
}
// Get utterance description by its index.
UtteranceDescription* GetUtterance(size_t index)
{
return &m_utterances[index];
}
// Get start frame index inside chunk.
size_t GetStartFrameIndexInsideChunk(size_t index) const
{
@ -105,7 +111,7 @@ public:
}
const size_t ts = m_firstFrames[index];
const size_t n = GetUtterance(index)->GetNumberOfFrames();
const size_t n = m_utterances[index].GetNumberOfFrames();
return msra::dbn::matrixstripe(m_frames, ts, n);
}

Просмотреть файл

@ -9,7 +9,6 @@
#include "HTKDataDeserializer.h"
#include "ConfigHelper.h"
#include "Basics.h"
#include <numeric>
// TODO: This will be removed when dependency on old code is eliminated.
// Currently this fixes the linking.
@ -46,6 +45,12 @@ HTKDataDeserializer::HTKDataDeserializer(
ConfigParameters input = inputs.front();
auto inputName = input.GetMemberIds().front();
m_expandToPrimary = cfg(L"expandToUtterance", false);
if (m_expandToPrimary && m_primary)
{
InvalidArgument("Cannot expand utterances of the primary stream %ls, please change your configuration.", inputName.c_str());
}
ConfigParameters streamConfig = input(inputName);
ConfigHelper config(streamConfig);
@ -85,6 +90,12 @@ HTKDataDeserializer::HTKDataDeserializer(
m_dimension = config.GetFeatureDimension();
m_dimension = m_dimension * (1 + context.first + context.second);
m_expandToPrimary = feature(L"expandToUtterance", false);
if (m_expandToPrimary && m_primary)
{
InvalidArgument("Cannot expand utterances of the primary stream %ls, please change your configuration.", featureName.c_str());
}
InitializeChunkDescriptions(config);
InitializeStreams(featureName);
InitializeFeatureInformation();
@ -118,6 +129,13 @@ void HTKDataDeserializer::InitializeChunkDescriptions(ConfigHelper& config)
UtteranceDescription description(move(msra::asr::htkfeatreader::parsedpath(u)));
size_t numberOfFrames = description.GetNumberOfFrames();
if (m_expandToPrimary && numberOfFrames != 1)
{
RuntimeError("Expanded stream should only contain sequences of length 1, utterance '%s' has %d",
description.GetKey().c_str(),
(int)numberOfFrames);
}
// For logging, also account for utterances and frames that we skip
allUtterances++;
allFrames += numberOfFrames;
@ -470,7 +488,8 @@ void HTKDataDeserializer::GetSequenceById(ChunkIdType chunkId, size_t id, vector
// wrapper that allows m[j].size() and m[j][i] as required by augmentneighbors()
MatrixAsVectorOfVectors utteranceFramesWrapper(utteranceFrames);
FeatureMatrix features(m_dimension, m_frameMode ? 1 : utterance->GetNumberOfFrames());
size_t utteranceLength = m_frameMode ? 1 : (m_expandToPrimary ? utterance->GetExpansionLength() : utterance->GetNumberOfFrames());
FeatureMatrix features(m_dimension, utteranceLength);
if (m_frameMode)
{
@ -479,9 +498,16 @@ void HTKDataDeserializer::GetSequenceById(ChunkIdType chunkId, size_t id, vector
auto fillIn = features.col(0);
AugmentNeighbors(utteranceFramesWrapper, frameIndex, m_augmentationWindow.first, m_augmentationWindow.second, fillIn);
}
else
else if (m_expandToPrimary) // Broadcast a single frame to the complete utterance.
{
for (size_t resultingIndex = 0; resultingIndex < utterance->GetExpansionLength(); ++resultingIndex)
{
auto fillIn = features.col(resultingIndex);
AugmentNeighbors(utteranceFramesWrapper, 0, m_augmentationWindow.first, m_augmentationWindow.second, fillIn);
}
}
else // Augment the complete utterance.
{
// Augment complete utterance.
for (size_t frameIndex = 0; frameIndex < utterance->GetNumberOfFrames(); ++frameIndex)
{
auto fillIn = features.col(frameIndex);
@ -508,10 +534,10 @@ void HTKDataDeserializer::GetSequenceById(ChunkIdType chunkId, size_t id, vector
}
// Gets sequence description by its key.
bool HTKDataDeserializer::GetSequenceDescriptionByKey(const KeyType& key, SequenceDescription& d)
bool HTKDataDeserializer::GetSequenceDescription(const SequenceDescription& primary, SequenceDescription& d)
{
assert(!m_primary);
auto iter = m_keyToChunkLocation.find(key.m_sequence);
auto iter = m_keyToChunkLocation.find(primary.m_key.m_sequence);
if (iter == m_keyToChunkLocation.end())
{
return false;
@ -519,11 +545,29 @@ bool HTKDataDeserializer::GetSequenceDescriptionByKey(const KeyType& key, Sequen
auto chunkId = iter->second.first;
auto utteranceIndexInsideChunk = iter->second.second;
const auto& chunk = m_chunks[chunkId];
const auto& sequence = chunk.GetUtterance(utteranceIndexInsideChunk);
auto& chunk = m_chunks[chunkId];
auto utterance = chunk.GetUtterance(utteranceIndexInsideChunk);
d.m_chunkId = (ChunkIdType)chunkId;
d.m_id = m_frameMode ? chunk.GetStartFrameIndexInsideChunk(utteranceIndexInsideChunk) + key.m_sample : utteranceIndexInsideChunk;
d.m_numberOfSamples = m_frameMode ? 1 : (uint32_t)sequence->GetNumberOfFrames();
// TODO: When we move frame mode from deserializer, expanding should go away and be done on the higher level.
// TODO: Currently for the frame mode the secondary deserializer does not know the size of the full utterance,
// becase each frame has its own sequence description. So we get the length by the max sample we have seen.
if (m_expandToPrimary)
{
// Expanding for sequence length/or max seen frame.
size_t maxLength = max(primary.m_numberOfSamples, (uint32_t)primary.m_key.m_sample + 1);
if (utterance->GetExpansionLength() < maxLength)
{
utterance->SetExpansionLength(maxLength);
}
d.m_id = utteranceIndexInsideChunk;
}
else
{
d.m_id = m_frameMode ? chunk.GetStartFrameIndexInsideChunk(utteranceIndexInsideChunk) + primary.m_key.m_sample : utteranceIndexInsideChunk;
}
d.m_numberOfSamples = m_frameMode ? 1 : (uint32_t)utterance->GetNumberOfFrames();
return true;
}

Просмотреть файл

@ -34,8 +34,8 @@ public:
// Retrieves data for a chunk.
virtual ChunkPtr GetChunk(ChunkIdType chunkId) override;
// Gets sequence description by its key.
virtual bool GetSequenceDescriptionByKey(const KeyType&, SequenceDescription&) override;
// Gets sequence description by the primary one.
virtual bool GetSequenceDescription(const SequenceDescription& primary, SequenceDescription&) override;
private:
class HTKChunk;
@ -84,6 +84,10 @@ private:
unsigned int m_samplePeriod = 0;
size_t m_ioFeatureDimension = 0;
std::string m_featureKind;
// A flag that indicates whether the utterance should be extended to match the lenght of the utterance from the primary deserializer.
// TODO: This should be moved to the packers when deserializers work in sequence mode only.
bool m_expandToPrimary;
};
typedef std::shared_ptr<HTKDataDeserializer> HTKDataDeserializerPtr;

Просмотреть файл

@ -20,9 +20,12 @@ class UtteranceDescription
// Utterance id.
size_t m_id;
// Expansion length in case if utterance should be expanded.
size_t m_expansionLength;
public:
UtteranceDescription(msra::asr::htkfeatreader::parsedpath&& path)
: m_path(std::move(path))
: m_path(std::move(path)), m_expansionLength(0)
{
}
@ -48,6 +51,9 @@ public:
size_t GetId() const { return m_id; }
void SetId(size_t id) { m_id = id; }
size_t GetExpansionLength() const { return m_expansionLength; }
void SetExpansionLength(size_t length) { m_expansionLength = length; }
};
}}}

Просмотреть файл

@ -107,7 +107,6 @@
<ClInclude Include="..\..\Common\Include\DataReader.h" />
<ClInclude Include="..\..\Common\Include\DataWriter.h" />
<ClInclude Include="..\..\Common\Include\ssematrix.h" />
<ClInclude Include="basetypes.h" />
<ClInclude Include="biggrowablevectors.h" />
<ClInclude Include="chunkevalsource.h" />
<ClInclude Include="..\..\Common\Include\fileutil.h" />

Просмотреть файл

@ -32,9 +32,6 @@
<ClInclude Include="stdafx.h" />
<ClInclude Include="targetver.h" />
<ClInclude Include="utterancesourcemulti.h" />
<ClInclude Include="basetypes.h">
<Filter>Duplicates to remove</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\DataReader.h">
<Filter>Common\Include</Filter>
</ClInclude>

Просмотреть файл

@ -71,6 +71,11 @@ protected:
const unsigned char* b = (const unsigned char*) &v;
return (short) ((b[0] << 8) + b[1]);
}
static unsigned short swapunsignedshort(unsigned short v) throw()
{
const unsigned char* b = (const unsigned char*)&v;
return (unsigned short)((b[0] << 8) + b[1]);
}
static int swapint(int v) throw()
{
const unsigned char* b = (const unsigned char*) &v;
@ -81,13 +86,13 @@ protected:
{
int nsamples;
int sampperiod;
short sampsize;
unsigned short sampsize;
short sampkind;
void read(FILE* f)
{
nsamples = fgetint(f);
sampperiod = fgetint(f);
sampsize = fgetshort(f);
sampsize =(unsigned short) fgetshort(f);
sampkind = fgetshort(f);
}
@ -102,21 +107,24 @@ protected:
sampkind = (short) 9; // user type
int nRows = swapint(fgetint(f));
int nCols = swapint(fgetint(f));
sampsize = (short) (nRows * nCols); // features are stored as bytes;
int rawsampsize = nRows * nCols;
if (rawsampsize > UINT16_MAX)
RuntimeError("reading idx feature cache header: sample size overflow");
sampsize = (unsigned short)rawsampsize; // features are stored as bytes;
}
void write(FILE* f)
{
fputint(f, nsamples);
fputint(f, sampperiod);
fputshort(f, sampsize);
fputshort(f, (short) sampsize);
fputshort(f, sampkind);
}
void byteswap()
{
nsamples = swapint(nsamples);
sampperiod = swapint(sampperiod);
sampsize = swapshort(sampsize);
sampsize = swapunsignedshort(sampsize);
sampkind = swapshort(sampkind);
}
};
@ -215,7 +223,10 @@ public:
H.nsamples = 0; // unknown for now, updated in close()
H.sampperiod = period;
const int bytesPerValue = sizeof(float); // we do not support compression for now
H.sampsize = (short) featdim * bytesPerValue;
size_t rawsampsize = featdim * bytesPerValue;
if (rawsampsize > UINT16_MAX)
RuntimeError("htkfeatwriter: sample size overflow");
H.sampsize = (unsigned short)rawsampsize;
H.sampkind = parsekind(kind);
if (needbyteswapping)
H.byteswap();

Просмотреть файл

@ -60,6 +60,12 @@ void Bundler::CreateChunkDescriptions()
RuntimeError("Driving deserializer provided too many chunks.");
}
// Creating a table of weak chunks for non driving deserializers.
for (size_t i = 0; i < m_deserializers.size(); ++i)
{
m_weakChunkTable.push_back(std::vector<std::weak_ptr<Chunk>>(m_deserializers[i]->GetChunkDescriptions().size()));
}
m_chunks.reserve(chunks.size());
if (m_verbosity)
@ -105,7 +111,7 @@ void Bundler::CreateChunkDescriptions()
size_t sequenceSamples = sequence.m_numberOfSamples;
for (size_t deserializerIndex = 1; deserializerIndex < m_deserializers.size(); ++deserializerIndex)
{
isValid = m_deserializers[deserializerIndex]->GetSequenceDescriptionByKey(sequenceDescriptions[sequenceIndex].m_key, s);
isValid = m_deserializers[deserializerIndex]->GetSequenceDescription(sequenceDescriptions[sequenceIndex], s);
if (!isValid)
{
invalid.insert(sequenceIndex);
@ -193,7 +199,7 @@ void Bundler::GetSequencesForChunk(ChunkIdType chunkId, std::vector<SequenceDesc
uint32_t sequenceSamples = sequence.m_numberOfSamples;
for (size_t deserializerIndex = 1; deserializerIndex < m_deserializers.size(); ++deserializerIndex)
{
m_deserializers[deserializerIndex]->GetSequenceDescriptionByKey(sequence.m_key, s);
m_deserializers[deserializerIndex]->GetSequenceDescription(sequence, s);
sequenceSamples = std::max(sequenceSamples, s.m_numberOfSamples);
}
sequence.m_numberOfSamples = sequenceSamples;
@ -251,10 +257,9 @@ public:
// Creating sequence mapping and requiring underlying chunks.
SequenceDescription s;
for (size_t deserializerIndex = 1; deserializerIndex < m_parent->m_deserializers.size(); ++deserializerIndex)
for (size_t deserializerIndex = 1; deserializerIndex < deserializers.size(); ++deserializerIndex)
{
std::map<size_t, ChunkPtr> secondaryChunks;
auto& chunkTable = m_parent->m_weakChunkTable[deserializerIndex];
for (size_t sequenceIndex = 0; sequenceIndex < sequences.size(); ++sequenceIndex)
{
if (chunk->m_invalid.find(sequenceIndex) != chunk->m_invalid.end())
@ -263,19 +268,14 @@ public:
}
size_t currentIndex = sequenceIndex * deserializers.size() + deserializerIndex;
deserializers[deserializerIndex]->GetSequenceDescriptionByKey(sequences[sequenceIndex].m_key, s);
deserializers[deserializerIndex]->GetSequenceDescription(sequences[sequenceIndex], s);
m_sequenceToSequence[currentIndex] = s.m_id;
ChunkPtr secondaryChunk;
auto it = secondaryChunks.find(s.m_chunkId);
if (it == secondaryChunks.end())
ChunkPtr secondaryChunk = chunkTable[s.m_chunkId].lock();
if (!secondaryChunk)
{
secondaryChunk = deserializers[deserializerIndex]->GetChunk(s.m_chunkId);
secondaryChunks.insert(make_pair(s.m_chunkId, secondaryChunk));
}
else
{
secondaryChunk = it->second;
chunkTable[s.m_chunkId] = secondaryChunk;
}
m_innerChunks[currentIndex] = secondaryChunk;

Просмотреть файл

@ -59,6 +59,10 @@ private:
// (i.e. often in speech)
bool m_takePrimarySequenceLength;
// A table of loaded chunks to make sure we do not load same chunk twice.
// Inner vector is the table of chunk id into weak pointer, the outer vector has an element per deserializer.
std::vector<std::vector<std::weak_ptr<Chunk>>> m_weakChunkTable;
// General configuration
int m_verbosity;
};

Просмотреть файл

@ -37,9 +37,9 @@ public:
return m_deserializer->GetSequencesForChunk(chunkId, descriptions);
}
virtual bool GetSequenceDescriptionByKey(const KeyType& key, SequenceDescription& description) override
virtual bool GetSequenceDescription(const SequenceDescription& primary, SequenceDescription& description) override
{
return m_deserializer->GetSequenceDescriptionByKey(key, description);
return m_deserializer->GetSequenceDescription(primary, description);
}
// Gets chunk data given its id.

Просмотреть файл

@ -161,11 +161,11 @@ public:
// Gets sequence descriptions for a given a chunk.
virtual void GetSequencesForChunk(ChunkIdType chunkId, std::vector<SequenceDescription>& descriptions) = 0;
// Gets sequence description by its key.
// Used by deserializers not in driving/primary mode.
// Returns false if provided sequence is not valid.
// Gets sequence description given the sequence description of the primary deserializer.
// Used for deserializers not in driving/primary mode.
// Returns false if the corresponding secondary sequence is not valid.
// TODO: Possibly move this out into a separate interface.
virtual bool GetSequenceDescriptionByKey(const KeyType& key, SequenceDescription& description) = 0;
virtual bool GetSequenceDescription(const SequenceDescription& primary, SequenceDescription& description) = 0;
// Gets chunk data given its id.
virtual ChunkPtr GetChunk(ChunkIdType chunkId) = 0;

Просмотреть файл

@ -17,9 +17,9 @@ public:
DataDeserializerBase()
{}
virtual bool GetSequenceDescriptionByKey(const KeyType&, SequenceDescription&) override
virtual bool GetSequenceDescription(const SequenceDescription& primary, SequenceDescription& result) override
{
NOT_IMPLEMENTED;
return GetSequenceDescriptionByKey(primary.m_key, result);
}
virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
@ -28,6 +28,11 @@ public:
}
protected:
virtual bool GetSequenceDescriptionByKey(const KeyType&, SequenceDescription&)
{
NOT_IMPLEMENTED;
}
// Streams this data deserializer can produce.
std::vector<StreamDescriptionPtr> m_streams;

Просмотреть файл

@ -900,6 +900,14 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
bool noMoreSamplesToProcess = false;
for (;;)
{
// Per-minibatch performance measurements; only enabled when perfTraceLevel > 0
Timer fineGrainedPerfMeasurementTimer;
double readTime = 0;
double computeTime = 0;
double parameterUpdateTime = 0;
if (m_perfTraceLevel > 0)
fineGrainedPerfMeasurementTimer.Start();
// get minibatch
// TODO: is it guaranteed that the GPU is already completed at this point, is it safe to overwrite the buffers?
size_t actualMBSize = 0;
@ -908,6 +916,13 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
if (!wasDataRead && (!useDistributedMBReading || noMoreSamplesToProcess)) // in case of distributed reading, we do a few more loops until all ranks have completed
break; // end of epoch
if (m_perfTraceLevel > 0)
{
fineGrainedPerfMeasurementTimer.Stop();
readTime = fineGrainedPerfMeasurementTimer.ElapsedSeconds();
fineGrainedPerfMeasurementTimer.Start();
}
// Note: If !wasDataRead then the data that GetMinibatchIntoNetwork() was supposed to full in are undefined.
// Must not touch them.
@ -998,6 +1013,15 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
smbDispatcher.DoneWithCurrentMinibatch();
} // if (actualMBSize > 0)
if (m_perfTraceLevel > 0)
{
std::unique_ptr<MatrixComputeStreamEvent> mainStreamSyncEvent(MatrixComputeStreamEvent::Create(net->GetDeviceId()));
mainStreamSyncEvent->SynchronizeEvent();
fineGrainedPerfMeasurementTimer.Stop();
computeTime = fineGrainedPerfMeasurementTimer.ElapsedSeconds();
fineGrainedPerfMeasurementTimer.Start();
}
// for momentum/clipping/regularization/etc., as well as for progress and statistics, we should only count frames that are not gaps
// #samples according to the default dynamic axis, for use with criterion nodes that do not have an MBLayout
size_t numSamplesWithLabelOfNetwork = wasDataRead ? net->GetNumSamplesWithLabelOfNetwork(actualMBSize) : 0;
@ -1105,6 +1129,17 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
}
}
if (m_perfTraceLevel > 0)
{
std::unique_ptr<MatrixComputeStreamEvent> mainStreamSyncEvent(MatrixComputeStreamEvent::Create(net->GetDeviceId()));
mainStreamSyncEvent->SynchronizeEvent();
fineGrainedPerfMeasurementTimer.Stop();
parameterUpdateTime = fineGrainedPerfMeasurementTimer.ElapsedSeconds();
PREPENDTS(stderr);
fprintf(stderr, "Perf trace: Read = %.5gs; Compute = %.5gs; Parameter update = %.5gs\n", readTime, computeTime, parameterUpdateTime);
}
commTimer.Start();
// aggregation by model averaging or block momentum
if (useModelAggregation)
@ -1131,7 +1166,7 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
if (useDistributedMBReading)
{
noMoreSamplesToProcess = !wasDataRead;
}
}
if (nSamplesSinceLastModelSync >= m_nFramesBetweenASGDSync[epochNumber])
{
@ -2629,6 +2664,8 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
// BUGBUG: these are not passed to Init()
m_doUnitTest = configSGD(L"unitTest", false);
m_perfTraceLevel = configSGD(L"perfTraceLevel", (int)0);
// parallel training
m_parallelizationMethod = ParallelizationMethod::none;
m_numGradientBits = 32;
@ -2650,27 +2687,27 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
else
{
size_t numMPIWorkers = pMPI->NumNodesInUse();
const ConfigRecordType& configParallelTrain(configSGD(L"ParallelTrain", ConfigRecordType::Record()));
m_parallelizationMethod = ParseParallelizationMethod(configParallelTrain(L"parallelizationMethod", L"none"));
const ConfigRecordType& configParallelTrain(configSGD(L"ParallelTrain", ConfigRecordType::Record()));
m_parallelizationMethod = ParseParallelizationMethod(configParallelTrain(L"parallelizationMethod", L"none"));
m_parallelizationStartEpochNum = configParallelTrain(L"parallelizationStartEpoch", (int)1) - 1; // Epoch numbers internally are 0 based
m_enableDistributedMBReading = configParallelTrain(L"distributedMBReading", false);
m_enableDistributedMBReading = configParallelTrain(L"distributedMBReading", false);
m_syncStatsTrace = configParallelTrain(L"syncPerfStats", (int)0);
if (configParallelTrain.Exists(L"DataParallelSGD"))
{
const ConfigRecordType& configDataParallelSGD(configParallelTrain(L"DataParallelSGD", ConfigRecordType::Record()));
size_t defaultGradientBits = 8 * sizeofElemType;
m_numGradientBits = configDataParallelSGD(L"gradientBits", defaultGradientBits);
m_zeroThresholdFor1Bit = configDataParallelSGD(L"useZeroThresholdFor1BitQuantization", true);
m_bufferedAsyncGradientAggregation = configDataParallelSGD(L"useBufferedAsyncGradientAggregation", false);
if ( m_numGradientBits < 1 || m_numGradientBits > (8 * sizeofElemType) )
if (configParallelTrain.Exists(L"DataParallelSGD"))
{
InvalidArgument("gradientBits must be in the range [1, 32] when using precision=float and in range [1, 64] when using precision=double!");
const ConfigRecordType& configDataParallelSGD(configParallelTrain(L"DataParallelSGD", ConfigRecordType::Record()));
size_t defaultGradientBits = 8 * sizeofElemType;
m_numGradientBits = configDataParallelSGD(L"gradientBits", defaultGradientBits);
m_zeroThresholdFor1Bit = configDataParallelSGD(L"useZeroThresholdFor1BitQuantization", true);
m_bufferedAsyncGradientAggregation = configDataParallelSGD(L"useBufferedAsyncGradientAggregation", false);
if ( m_numGradientBits < 1 || m_numGradientBits > (8 * sizeofElemType) )
{
InvalidArgument("gradientBits must be in the range [1, 32] when using precision=float and in range [1, 64] when using precision=double!");
}
}
}
if (configParallelTrain.Exists(L"ModelAveragingSGD"))
{
const ConfigRecordType& configMASGD(configParallelTrain(L"ModelAveragingSGD", ConfigRecordType::Record()));
if (configParallelTrain.Exists(L"ModelAveragingSGD"))
{
const ConfigRecordType& configMASGD(configParallelTrain(L"ModelAveragingSGD", ConfigRecordType::Record()));
if (configMASGD.Exists(L"blockSizePerWorker") && configMASGD.Exists(L"blockSize"))
{
InvalidArgument("It is only allowed to set blockSizePerWorker or blockSize, not both of them");
@ -2689,8 +2726,8 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
m_modelAggregationBlockSize = 40000 * numMPIWorkers; // default value
}
#if 1 // legacy option
if (configMASGD.Exists(L"syncFrequencyInFrames"))
{
if (configMASGD.Exists(L"syncFrequencyInFrames"))
{
if (configMASGD.Exists(L"blockSizePerWorker") || configMASGD.Exists(L"blockSize"))
InvalidArgument("syncFrequencyInFrames is a deprecated alias of blockSizePerWorker. It is not allowed to specify both of them");
m_modelAggregationBlockSize = configMASGD(L"syncFrequencyInFrames");
@ -2706,15 +2743,15 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
m_modelAggregationBlockSize = configMASGD(L"syncPeriod");
m_modelAggregationBlockSize *= numMPIWorkers;
fprintf(stderr, "WARNING: option syncPeroid in ModelAveragingSGD is going to be deprecated. Please use blockSizePerWorker instead in the future.\n");
}
}
#endif
}
if (configParallelTrain.Exists(L"BlockMomentumSGD"))
{
}
if (configParallelTrain.Exists(L"BlockMomentumSGD"))
{
#ifndef CNTK_PARALLEL_TRAINING_SUPPORT
InvalidArgument("BlockMomentumSGD is not enabled in this version.\n");
InvalidArgument("BlockMomentumSGD is not enabled in this version.\n");
#else
const ConfigRecordType& configBMSGD(configParallelTrain(L"BlockMomentumSGD", ConfigRecordType::Record()));
const ConfigRecordType& configBMSGD(configParallelTrain(L"BlockMomentumSGD", ConfigRecordType::Record()));
if (configBMSGD.Exists(L"blockSize") && configBMSGD.Exists(L"blockSizePerWorker"))
{
InvalidArgument("It is only allowed to set blockSizePerWorker or blockSize, not both of them");
@ -2744,33 +2781,33 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
fprintf(stderr, "WARNING: option syncPeroid in BlockMomentumSGD is going to be deprecated. Please use blockSizePerWorker instead in the future.\n");
}
#endif
m_resetSGDMomentum = configBMSGD(L"resetSGDMomentum", true);
m_useNesterovBlockMomentum = configBMSGD(L"useNesterovMomentum", true);
m_blockLearningRate = configBMSGD(L"blockLearningRate", 1.0);
m_resetSGDMomentum = configBMSGD(L"resetSGDMomentum", true);
m_useNesterovBlockMomentum = configBMSGD(L"useNesterovMomentum", true);
m_blockLearningRate = configBMSGD(L"blockLearningRate", 1.0);
if (configBMSGD.Exists(L"blockMomentumPerSync") && configBMSGD.Exists(L"blockMomentumAsTimeConstant"))
{
InvalidArgument("It is only allowed to set either blockMomentumPerSync or blockMomentumAsTimeConstant, not both of them");
}
else if (configBMSGD.Exists(L"blockMomentumAsTimeConstant"))
{
m_blockMomentumAsTimeConstant = configBMSGD(L"blockMomentumAsTimeConstant");
}
if (configBMSGD.Exists(L"blockMomentumPerSync") && configBMSGD.Exists(L"blockMomentumAsTimeConstant"))
{
InvalidArgument("It is only allowed to set either blockMomentumPerSync or blockMomentumAsTimeConstant, not both of them");
}
else if (configBMSGD.Exists(L"blockMomentumAsTimeConstant"))
{
m_blockMomentumAsTimeConstant = configBMSGD(L"blockMomentumAsTimeConstant");
}
#if 1 // This option "blockMomentumPerSync" is going to be deprecated in the future
else if (configBMSGD.Exists(L"blockMomentumPerSync"))
{
double blockMomentum = configBMSGD(L"blockMomentumPerSync");
else if (configBMSGD.Exists(L"blockMomentumPerSync"))
{
double blockMomentum = configBMSGD(L"blockMomentumPerSync");
m_blockMomentumAsTimeConstant = BlockMomentumSGD<double>::Momentum2TimeConstant(blockMomentum, m_modelAggregationBlockSize);
}
}
#endif
else /*if (!configBMSGD.Exists(L"blockMomentumPerSync") && !configBMSGD.Exists(L"blockMomentumAsTimeConstant"))*/
{
else /*if (!configBMSGD.Exists(L"blockMomentumPerSync") && !configBMSGD.Exists(L"blockMomentumAsTimeConstant"))*/
{
double blockMomentum = 1.0 - 1.0 / (double)numMPIWorkers; // this is a default value which ensures each block update contributes equally
m_blockMomentumAsTimeConstant = BlockMomentumSGD<double>::Momentum2TimeConstant(blockMomentum, m_modelAggregationBlockSize);
}
}
#endif
}
if (configParallelTrain.Exists(L"DataParallelASGD"))
{
const ConfigRecordType & configDataParallelASGD(configParallelTrain(L"DataParallelASGD", ConfigRecordType::Record()));
@ -2784,7 +2821,7 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
m_adjustcoefficient = configAdjustLearningRateAtBeginning(L"adjustCoefficient", (double)0.1);
m_adjustnbminibatch = configAdjustLearningRateAtBeginning(L"adjustNbMinibatch", (size_t)256);
}
}
}
} // if (!pMPI)
} // if (configSGD.Exists(L"ParallelTrain"))
}

Просмотреть файл

@ -253,6 +253,8 @@ protected:
bool m_useAllDataForPreComputedNode;
int m_perfTraceLevel;
// Parallel training
MPIWrapperPtr m_mpi;

Просмотреть файл

@ -0,0 +1,189 @@
#!/usr/bin/env python
# ----------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
# ---------------------------------------------------------
# This script extracts information (hardware used, final results) contained in the baselines files
# and generates a markdown file (wiki page)
import sys, os, re
import TestDriver as td
try:
import six
except ImportError:
print("Python package 'six' not installed. Please run 'pip install six'.")
sys.exit(1)
thisDir = os.path.dirname(os.path.realpath(__file__))
windows = os.getenv("OS")=="Windows_NT"
class Baseline:
def __init__(self, fullPath, testResult = "", trainResult = ""):
self.fullPath = fullPath
self.cpuInfo = ""
self.gpuInfo = ""
self.testResult = testResult
self.trainResult = trainResult
# extracts results info. e.g.
# Finished Epoch[ 5 of 5]: [Training] ce = 2.32253198 * 1000 err = 0.90000000 * 1000 totalSamplesSeen = 5000 learningRatePerSample = 2e-06 epochTime=0.175781
# Final Results: Minibatch[1-1]: err = 0.90000000 * 100 ce = 2.32170486 * 100 perplexity = 10.1930372
def extractResultsInfo(self, baselineContent):
trainResults = re.findall('.*(Finished Epoch\[ *\d+ of \d+\]\: \[Training\]) (.*)', baselineContent)
if trainResults:
self.trainResult = Baseline.formatLastTrainResult(trainResults[-1])[0:-2]
testResults = re.findall('.*(Final Results: Minibatch\[1-\d+\]:)(\s+\* \d+)?\s+(.*)', baselineContent)
if testResults:
self.testResult = Baseline.formatLastTestResult(testResults[-1])[0:-2]
# extracts cpu and gpu info from baseline content. e.g.:
#CPU info:
# CPU Model Name: Intel(R) Xeon(R) CPU E5-2620 v3 @ 2.40GHz
# Hardware threads: 12
#GPU info:
#
#Device[0]: cores = 2496; computeCapability = 5.2; type = "Quadro M4000"; memory = 8192 MB
#Device[1]: cores = 96; computeCapability = 2.1; type = "Quadro 600"; memory = 1024 MB
# Total Memory: 33474872 kB
def extractHardwareInfo(self, baselineContent):
startCpuInfoIndex = baselineContent.find("CPU info:")
endCpuInfoIndex = baselineContent.find("----------", startCpuInfoIndex)
cpuInfo = re.search("^CPU info:\s+"
"CPU Model (Name:\s*.*)\s+"
"(Hardware threads: \d+)\s+"
"Total (Memory:\s*.*)\s+", baselineContent[startCpuInfoIndex:endCpuInfoIndex], re.MULTILINE)
if cpuInfo is None:
return
self.cpuInfo = "\n".join(cpuInfo.groups())
startGpuInfoIndex = baselineContent.find("GPU info:")
endGpuInfoIndex = baselineContent.find("----------", startGpuInfoIndex)
gpuInfoSnippet = baselineContent[startGpuInfoIndex:endGpuInfoIndex]
gpuDevices = re.findall("\t\t(Device\[\d+\]: cores = \d+; computeCapability = \d\.\d; type = .*; memory = \d+ MB)[\r\n]?", gpuInfoSnippet)
if not gpuDevices:
return
gpuInfo = [ device for device in gpuDevices ]
self.gpuInfo = "\n".join(gpuInfo)
@staticmethod
def formatLastTestResult(line):
return line[0] + line[1] + "\n" + line[2].replace('; ', '\n').replace(' ','\n')
@staticmethod
def formatLastTrainResult(line):
epochsInfo, parameters = line[0], line[1]
return epochsInfo + '\n' + parameters.replace('; ', '\n')
class Example:
allExamplesIndexedByFullName = {}
def __init__(self, suite, name, testDir):
self.suite = suite
self.name = name
self.fullName = suite + "/" + name
self.testDir = testDir
self.baselineList = []
self.gitHash = ""
@staticmethod
def discoverAllExamples():
testsDir = thisDir
for dirName, subdirList, fileList in os.walk(testsDir):
if 'testcases.yml' in fileList:
testDir = dirName
exampleName = os.path.basename(dirName)
suiteDir = os.path.dirname(dirName)
# suite name will be derived from the path components
suiteName = os.path.relpath(suiteDir, testsDir).replace('\\', '/')
example = Example(suiteName, exampleName, testDir)
Example.allExamplesIndexedByFullName[example.fullName.lower()] = example
# it returns a list with all baseline files for current example
def findBaselineFilesList(self):
baselineFilesList = []
oses = [".windows", ".linux", ""]
devices = [".cpu", ".gpu", ""]
flavors = [".debug", ".release", ""]
for o in oses:
for device in devices:
for flavor in flavors:
candidateName = "baseline" + o + flavor + device + ".txt"
fullPath = td.cygpath(os.path.join(self.testDir, candidateName), relative=True)
if os.path.isfile(fullPath):
baseline = Baseline(fullPath);
baselineFilesList.append(baseline)
return baselineFilesList
# extracts information for every example and stores it in Example.allExamplesIndexedByFullName
def getExamplesMetrics():
Example.allExamplesIndexedByFullName = list(sorted(Example.allExamplesIndexedByFullName.values(), key=lambda test: test.fullName))
allExamples = Example.allExamplesIndexedByFullName
print ("CNTK - Metrics collector")
for example in allExamples:
baselineListForExample = example.findBaselineFilesList()
six.print_("Example: " + example.fullName)
for baseline in baselineListForExample:
with open(baseline.fullPath, "r") as f:
baselineContent = f.read()
gitHash = re.search('.*Build SHA1:\s([a-z0-9]{40})[\r\n]+', baselineContent, re.MULTILINE)
if gitHash is None:
continue
example.gitHash = gitHash.group(1)
baseline.extractHardwareInfo(baselineContent)
baseline.extractResultsInfo(baselineContent)
example.baselineList.append(baseline)
# creates a list with links to each example result
def createAsciidocExampleList(file):
for example in Example.allExamplesIndexedByFullName:
if not example.baselineList:
continue
file.write("".join(["<<", example.fullName.replace("/","").lower(),",", example.fullName, ">> +\n"]))
file.write("\n")
def writeMetricsToAsciidoc():
metricsFile = open("metrics.adoc",'wb')
createAsciidocExampleList(metricsFile)
for example in Example.allExamplesIndexedByFullName:
if not example.baselineList:
continue
metricsFile.write("".join(["===== ", example.fullName, "\n"]))
metricsFile.write("".join(["**Git Hash: **", example.gitHash, "\n\n"]))
metricsFile.write("[cols=3, options=\"header\"]\n")
metricsFile.write("|====\n")
metricsFile.write("|Log file / Configuration | Train Result | Test Result\n")
for baseline in example.baselineList:
pathInDir=baseline.fullPath.split(thisDir)[1][1:]
metricsFile.write("".join(["|link:../blob/", example.gitHash[:7],"/Tests/EndToEndTests/", pathInDir, "[",
baseline.fullPath.split("/")[-1], "] .2+|", baseline.trainResult.replace("\n", " "), " .2+|",
baseline.testResult.replace("\n", " "), "|\n"]))
cpuInfo = "".join(["CPU: ", re.sub("[\r]?\n", ' ', baseline.cpuInfo)])
gpuInfo = re.sub("[\r]?\n", ' ', baseline.gpuInfo)
if gpuInfo:
metricsFile.write("".join([cpuInfo, " GPU: ", gpuInfo]))
else:
metricsFile.write(cpuInfo)
metricsFile.write("\n|====\n\n")
# ======================= Entry point =======================
six.print_("==============================================================================")
Example.discoverAllExamples()
getExamplesMetrics()
writeMetricsToAsciidoc()

Просмотреть файл

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python
# ----------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
@ -687,89 +687,90 @@ def runCommand(args):
sys.exit(10)
# ======================= Entry point =======================
parser = argparse.ArgumentParser(description="TestDriver - CNTK Test Driver")
subparsers = parser.add_subparsers(help="command to execute. Run TestDriver.py <command> --help for command-specific help")
runSubparser = subparsers.add_parser("run", help="run test(s)")
runSubparser.add_argument("test", nargs="*",
help="optional test name(s) to run, specified as Suite/TestName. "
"Use list command to list available tests. "
"If not specified then all tests will be run.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="TestDriver - CNTK Test Driver")
subparsers = parser.add_subparsers(help="command to execute. Run TestDriver.py <command> --help for command-specific help")
runSubparser = subparsers.add_parser("run", help="run test(s)")
runSubparser.add_argument("test", nargs="*",
help="optional test name(s) to run, specified as Suite/TestName. "
"Use list command to list available tests. "
"If not specified then all tests will be run.")
defaultBuildSKU = "gpu"
defaultBuildSKU = "gpu"
runSubparser.add_argument("-b", "--build-location", help="location of the CNTK build to run")
runSubparser.add_argument("-t", "--tag", help="runs tests which match the specified tag")
runSubparser.add_argument("-d", "--device", help="cpu|gpu - run on a specified device")
runSubparser.add_argument("-f", "--flavor", help="release|debug - run only a specified flavor")
runSubparser.add_argument("-s", "--build-sku", default=defaultBuildSKU, help="cpu|gpu|1bitsgd - run tests only for a specified build SKU")
tmpDir = os.getenv("TEMP") if windows else "/tmp"
defaultRunDir=os.path.join(tmpDir, "cntk-test-{0}.{1}".format(time.strftime("%Y%m%d%H%M%S"), random.randint(0,1000000)))
runSubparser.add_argument("-r", "--run-dir", default=defaultRunDir, help="directory where to store test output, default: a random dir within /tmp")
runSubparser.add_argument("--update-baseline", action='store_true', help="update baseline file(s) instead of matching them")
runSubparser.add_argument("--create-baseline", action='store_true', help="create new baseline file(s) (named as baseline.<os>.<device>.txt) for tests that do not currently have baselines")
runSubparser.add_argument("-v", "--verbose", action='store_true', help="verbose output - dump all output of test script")
runSubparser.add_argument("-n", "--dry-run", action='store_true', help="do not run the tests, only print test names and configurations to be run along with full command lines")
runSubparser.add_argument("-b", "--build-location", help="location of the CNTK build to run")
runSubparser.add_argument("-t", "--tag", help="runs tests which match the specified tag")
runSubparser.add_argument("-d", "--device", help="cpu|gpu - run on a specified device")
runSubparser.add_argument("-f", "--flavor", help="release|debug - run only a specified flavor")
runSubparser.add_argument("-s", "--build-sku", default=defaultBuildSKU, help="cpu|gpu|1bitsgd - run tests only for a specified build SKU")
tmpDir = os.getenv("TEMP") if windows else "/tmp"
defaultRunDir=os.path.join(tmpDir, "cntk-test-{0}.{1}".format(time.strftime("%Y%m%d%H%M%S"), random.randint(0,1000000)))
runSubparser.add_argument("-r", "--run-dir", default=defaultRunDir, help="directory where to store test output, default: a random dir within /tmp")
runSubparser.add_argument("--update-baseline", action='store_true', help="update baseline file(s) instead of matching them")
runSubparser.add_argument("--create-baseline", action='store_true', help="create new baseline file(s) (named as baseline.<os>.<device>.txt) for tests that do not currently have baselines")
runSubparser.add_argument("-v", "--verbose", action='store_true', help="verbose output - dump all output of test script")
runSubparser.add_argument("-n", "--dry-run", action='store_true', help="do not run the tests, only print test names and configurations to be run along with full command lines")
runSubparser.set_defaults(func=runCommand)
runSubparser.set_defaults(func=runCommand)
listSubparser = subparsers.add_parser("list", help="list available tests")
listSubparser.add_argument("-t", "--tag", help="limits a resulting list to tests matching the specified tag")
listSubparser.add_argument("-d", "--device", help="cpu|gpu - tests for a specified device")
listSubparser.add_argument("-f", "--flavor", help="release|debug - tests for specified flavor")
listSubparser.add_argument("-s", "--build-sku", default=defaultBuildSKU, help="cpu|gpu|1bitsgd - list tests only for a specified build SKU")
listSubparser.add_argument("--os", help="windows|linux - tests for a specified operating system")
listSubparser = subparsers.add_parser("list", help="list available tests")
listSubparser.add_argument("-t", "--tag", help="limits a resulting list to tests matching the specified tag")
listSubparser.add_argument("-d", "--device", help="cpu|gpu - tests for a specified device")
listSubparser.add_argument("-f", "--flavor", help="release|debug - tests for specified flavor")
listSubparser.add_argument("-s", "--build-sku", default=defaultBuildSKU, help="cpu|gpu|1bitsgd - list tests only for a specified build SKU")
listSubparser.add_argument("--os", help="windows|linux - tests for a specified operating system")
listSubparser.set_defaults(func=listCommand)
listSubparser.set_defaults(func=listCommand)
if len(sys.argv)==1:
parser.print_help()
sys.exit(1)
args = parser.parse_args(sys.argv[1:])
# parsing a --device, --flavor and --os options:
args.devices = ["cpu", "gpu"]
if (args.device):
args.device = args.device.lower()
if not args.device in args.devices:
six.print_("--device must be one of", args.devices, file=sys.stderr)
sys.exit(1)
args.devices = [args.device]
args.flavors = ["debug", "release"]
if (args.flavor):
args.flavor = args.flavor.lower()
if not args.flavor in args.flavors:
six.print_("--flavor must be one of", args.flavors, file=sys.stderr)
sys.exit(1)
args.flavors = [args.flavor]
args.buildSKUs = ["cpu", "gpu", "1bitsgd"]
if (args.build_sku):
args.build_sku = args.build_sku.lower()
if not args.build_sku in args.buildSKUs:
six.print_("--build-sku must be one of", args.buildSKUs, file=sys.stderr)
sys.exit(1)
args.buildSKUs = [args.build_sku]
if args.build_sku == "cpu" and args.devices == ["gpu"]:
print >>sys.stderr, "Invalid combination: --build-sku cpu and --device gpu"
sys.exit(1)
if args.func == runCommand and not args.build_location:
args.build_location = os.path.realpath(os.path.join(thisDir, "../..", "x64" if windows else "build/"))
if args.func == listCommand:
args.oses = ["windows", "linux"]
if (args.os):
args.os = args.os.lower()
if not args.os in args.oses:
six.print_("--os must be one of", args.oses, file=sys.stderr)
if len(sys.argv)==1:
parser.print_help()
sys.exit(1)
args.oses = [args.os]
# discover all the tests
Test.discoverAllTests()
args = parser.parse_args(sys.argv[1:])
# execute the command
args.func(args)
# parsing a --device, --flavor and --os options:
args.devices = ["cpu", "gpu"]
if (args.device):
args.device = args.device.lower()
if not args.device in args.devices:
six.print_("--device must be one of", args.devices, file=sys.stderr)
sys.exit(1)
args.devices = [args.device]
args.flavors = ["debug", "release"]
if (args.flavor):
args.flavor = args.flavor.lower()
if not args.flavor in args.flavors:
six.print_("--flavor must be one of", args.flavors, file=sys.stderr)
sys.exit(1)
args.flavors = [args.flavor]
args.buildSKUs = ["cpu", "gpu", "1bitsgd"]
if (args.build_sku):
args.build_sku = args.build_sku.lower()
if not args.build_sku in args.buildSKUs:
six.print_("--build-sku must be one of", args.buildSKUs, file=sys.stderr)
sys.exit(1)
args.buildSKUs = [args.build_sku]
if args.build_sku == "cpu" and args.devices == ["gpu"]:
print >>sys.stderr, "Invalid combination: --build-sku cpu and --device gpu"
sys.exit(1)
if args.func == runCommand and not args.build_location:
args.build_location = os.path.realpath(os.path.join(thisDir, "../..", "x64" if windows else "build/"))
if args.func == listCommand:
args.oses = ["windows", "linux"]
if (args.os):
args.os = args.os.lower()
if not args.os in args.oses:
six.print_("--os must be one of", args.oses, file=sys.stderr)
sys.exit(1)
args.oses = [args.os]
# discover all the tests
Test.discoverAllTests()
# execute the command
args.func(args)

Просмотреть файл

@ -1,5 +1,8 @@
#!/bin/bash
# ----------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
# ----------------------------------------------------------
# Helper script containing common code used by run-test scripts of E2E tests
BinaryPath=$TEST_CNTK_BINARY
@ -26,6 +29,21 @@ MPIArgs=
DeleteExistingModels=1
DeleteModelsAfterTest=1
# Print info needed by MetricsDriver.py to extract tests metrics
printHardwareInfo()
{
cpuName=$(cat /proc/cpuinfo 2> /dev/null | grep -m 1 'model name' | cut -d : -f 2- | tr -s " " | cut -c 2-)
totalMemory=$(cat /proc/meminfo 2> /dev/null | grep 'MemTotal' | cut -d : -f 2- | tr -s " " | cut -c 2-)
nproc=$(nproc)
# Note that MetricsDriver.py depends on this format
echo "CPU info:"
echo " CPU Model Name: $cpuName"
echo " Hardware threads: $nproc"
echo " Total Memory: $totalMemory"
echo "-------------------------------------------------------------------"
}
# Helper function to print and run a command
run()
{
@ -119,3 +137,6 @@ cntkmpirun()
cntkrun "$2" "$3"
return $?
}
# place printHardwareInfo here, so that all tests print it
printHardwareInfo

Просмотреть файл

@ -5,6 +5,8 @@
#include "stdafx.h"
#include "EvalTestHelper.h"
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
using namespace Microsoft::MSR::CNTK;
@ -21,22 +23,10 @@ BOOST_FIXTURE_TEST_SUITE(EvalTestSuite, EvalFixture)
IEvaluateModelExtended<float>* SetupNetworkAndGetLayouts(std::string modelDefinition, VariableSchema& inputLayouts, VariableSchema& outputLayouts)
{
// Load the eval library
auto hModule = LoadLibrary(L"evaldll.dll");
if (hModule == nullptr)
{
auto err = GetLastError();
throw std::exception((boost::format("Cannot load evaldll.dll: 0x%08lx") % err).str().c_str());
}
// Get the factory method to the evaluation engine
std::string func = "GetEvalExtendedF";
auto procAddress = GetProcAddress(hModule, func.c_str());
auto getEvalProc = (GetEvalProc<float>)procAddress;
// Native model evaluation instance
IEvaluateModelExtended<float> *eval;
getEvalProc(&eval);
GetEvalExtendedF(&eval);
try
{
@ -44,7 +34,7 @@ IEvaluateModelExtended<float>* SetupNetworkAndGetLayouts(std::string modelDefini
}
catch (std::exception& ex)
{
fprintf(stderr, ex.what());
fprintf(stderr, "%s\n", ex.what());
throw;
}
fflush(stderr);
@ -53,9 +43,9 @@ IEvaluateModelExtended<float>* SetupNetworkAndGetLayouts(std::string modelDefini
outputLayouts = eval->GetOutputSchema();
for (auto vl : outputLayouts)
{
fprintf(stderr, "Output dimension: %d\n", vl.m_numElements);
fprintf(stderr, "Output name: %ls\n", vl.m_name);
{
fprintf(stderr, "Output dimension: %" PRIu64 "\n", vl.m_numElements);
fprintf(stderr, "Output name: %ls\n", vl.m_name.c_str());
}
eval->StartForwardEvaluation({outputLayouts[0].m_name});

Просмотреть файл

@ -63,13 +63,13 @@ struct EvalFixture
if (!envVariableErrorMessage.empty())
{
BOOST_TEST_MESSAGE(envVariableErrorMessage);
fprintf(stderr, envVariableErrorMessage.c_str());
fprintf(stderr, "%s\n", envVariableErrorMessage.c_str());
}
newCurrentPath = m_testDataPath;
}
}
else if ((subPath[0] == '/' && subPath[1] == '//') || (subPath[0] == '\\' && subPath[1] == '\\'))
else if ((subPath[0] == '/' && subPath[1] == '/') || (subPath[0] == '\\' && subPath[1] == '\\'))
{
newCurrentPath = subPath;
}

Просмотреть файл

@ -72,6 +72,7 @@
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(OutDir)..;$(BOOST_LIB_PATH)</AdditionalLibraryDirectories>
<AdditionalDependencies>EvalDll.lib; %(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(DebugBuild)">
@ -86,8 +87,7 @@
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
<CudaCompile>
<TargetMachinePlatform>64</TargetMachinePlatform>
@ -112,8 +112,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<PreprocessorDefinitions>WIN32;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
<PreprocessorDefinitions>WIN32;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(GpuBuild)">
@ -168,4 +167,4 @@
<Output TaskParameter="DestinationFiles" ItemName="NewFileWrites" />
</Copy>
</Target>
</Project>
</Project>

Просмотреть файл

@ -9,16 +9,19 @@
#define _SCL_SECURE_NO_WARNINGS // current API of matrix does not allow safe invokations. TODO: change api to proper one.
#ifdef _WIN32
#include "targetver.h"
#include "targetver.h"
#include <windows.h>
#endif
#include <stdio.h>
#include <windows.h>
// TODO: reference additional headers your program requires here
#include "Eval.h"
//Adding required boost header
#ifndef _WIN32
// Use dynamic library on Linux
#define BOOST_TEST_DYN_LINK
#endif
#include <boost/test/unit_test.hpp>
#include <boost/format.hpp>

Просмотреть файл

@ -5,171 +5,20 @@
// MathPerformanceTests.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#define NOMINMAX
#include "Windows.h"
//#define NOMINMAX
//#include "Windows.h"
#include "Matrix.h"
#include "CPUMatrix.h"
#include "TensorView.h"
#include "Sequences.h"
#include <chrono>
#include <iostream>
#include <vector>
#include "Matrix.h"
#include "CPUMatrix.h"
#include "Sequences.h"
#include <algorithm>
using namespace Microsoft::MSR::CNTK;
using namespace std;
template <class ElemType>
void SetToInitStateValueForResetSeg(const Matrix<ElemType>& sentenceBegin,
size_t nStream, ElemType initStateValue, Matrix<ElemType>& newprevstate)
{
Matrix<ElemType> colSeg(sentenceBegin.GetDeviceId());
colSeg.Resize(nStream, nStream);
size_t nStateRow = newprevstate.GetNumRows();
assert(nStream == sentenceBegin.GetNumRows());
// only set state to init state value for segmentation = 0, and -1
// e.g., -1 0 1 -> 0 0 1 -> 0 0 -1 -> 1 1 0
Matrix<ElemType> colPos(sentenceBegin.GetDeviceId());
colPos.SetValue(sentenceBegin); // -1 0 1
colPos.InplaceTruncateBottom(1 << 0 /*(int)MinibatchPackingFlags::SequenceStart*/); // TODO: these flags no longer exist, this test probably no longer applies
Matrix<ElemType>::Scale((ElemType) -1.0, colPos);
colPos += 0; // (int)MinibatchPackingFlags::None; // TODO: these flags no longer exist, this test probably no longer applies
colSeg.SetDiagonalValue(colPos);
Matrix<ElemType> ones(sentenceBegin.GetDeviceId());
ones.Resize(nStateRow, nStream);
ones.SetValue((ElemType) 1);
// add default state value if it is for reset
Matrix<ElemType>::MultiplyAndWeightedAdd(initStateValue, ones, false, colSeg, false, 1.0, newprevstate); // += [0 initStateValue 0 ]
}
template <class ElemType>
void rnnForwardPropSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matrix<ElemType>& pastActivity, Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& colBegin, const Matrix<ElemType>& needToCompute)
{
size_t ncol = functionValues.GetNumCols();
size_t ntime = ncol / mNbr;
Matrix<ElemType> out = functionValues.ColumnSlice(0, mNbr);
Matrix<ElemType> inp((DEVICEID_TYPE) functionValues.GetDeviceId());
for (size_t d = 0; d < ntime; d++)
{
if (d == 0)
inp = pastActivity.ColumnSlice(d, mNbr);
else
inp = inputFunctionValues.ColumnSlice(d, mNbr);
if (needToCompute.ColumnSlice(d, 1).Get00Element() == 1)
{
Matrix<ElemType> colSegPastActivity((DEVICEID_TYPE) functionValues.GetDeviceId());
Matrix<ElemType> colSeg((DEVICEID_TYPE) functionValues.GetDeviceId());
colSeg.Resize(mNbr, mNbr);
colSeg.SetValue(0);
colSegPastActivity.SetValue(colBegin);
colSegPastActivity.InplaceTruncateBottom(1 << 0 /*(int)MinibatchPackingFlags::SequenceStart*/); // TODO: these flags no longer exist, this test probably no longer applies
colSeg.SetDiagonalValue(colSegPastActivity);
Matrix<ElemType>::Multiply(inp, false, colSeg, false, out);
ElemType initStateValue = (ElemType) 0.1;
SetToInitStateValueForResetSeg<ElemType>(colBegin, mNbr, initStateValue, out);
}
}
}
template <class ElemType>
void oldRnnForwardPropSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matrix<ElemType>& pastActivity, Matrix<ElemType>& inputFunctionValues)
{
size_t ncol = functionValues.GetNumCols();
size_t ntime = ncol / mNbr;
for (size_t timeIdxInSeq = 0; timeIdxInSeq < ntime; timeIdxInSeq++)
{
for (size_t i = 0; i < mNbr; i++)
{
bool reset = false;
if (timeIdxInSeq == 0)
{
reset = true;
}
oldRNNForwardPropSRP<ElemType>(timeIdxInSeq, 1, reset, (ElemType) 0.1, functionValues, pastActivity, inputFunctionValues, i, mNbr);
}
}
}
template <class ElemType>
void oldRNNForwardPropSRP(const size_t timeIdxInSeq, const int delay, const bool reset, const ElemType default_activity, Matrix<ElemType>& functionValues, const Matrix<ElemType>& pastActivity, const Matrix<ElemType>& inputFunctionValues, const size_t indexInBatch, const size_t mNbr)
{
assert(delay > 0);
if (functionValues.GetNumRows() != inputFunctionValues.GetNumRows() ||
functionValues.GetNumCols() != inputFunctionValues.GetNumCols())
functionValues.Resize(inputFunctionValues.GetNumRows(),
inputFunctionValues.GetNumCols());
int iPastIndex = (int) ((int) timeIdxInSeq - (int) delay) * (int) mNbr;
int d = iPastIndex;
if (d < 0)
d = (int) functionValues.Mod((float) iPastIndex, (float) pastActivity.GetNumCols());
// this can point to the past activity of the previous mninibatch
Matrix<ElemType> out = functionValues.ColumnSlice(timeIdxInSeq * mNbr + indexInBatch, 1);
Matrix<ElemType> inp((DEVICEID_TYPE) functionValues.GetDeviceId());
if (reset)
out.SetValue(default_activity);
else
{
if (iPastIndex < 0)
inp = pastActivity.ColumnSlice(d + indexInBatch, 1);
else
inp = inputFunctionValues.ColumnSlice(d + indexInBatch, 1);
out.AssignValuesOf(inp);
}
}
/**
The new way of resetting RNN state.
*/
template <class ElemType>
void TestRnnForwardPropSRP(size_t nRow = 100, size_t nCol = 1000, size_t mNbr = 10, DEVICEID_TYPE deviceID = 0)
{
Matrix<ElemType> functionValues(deviceID);
Matrix<ElemType> colBegin(deviceID);
Matrix<ElemType> pastActivity(deviceID);
Matrix<ElemType> inputFunctionValues(deviceID);
Matrix<ElemType> needToCompute(deviceID);
functionValues.Resize(nRow, nCol);
colBegin.Resize(mNbr, 1);
pastActivity.Resize(nRow, nCol);
inputFunctionValues.Resize(nRow, nCol);
needToCompute.Resize(1, nCol / mNbr);
needToCompute.SetValue(0);
needToCompute.ColumnSlice(0, 1).SetValue(1);
auto t_start = clock();
rnnForwardPropSRP<ElemType>(functionValues, mNbr, pastActivity, inputFunctionValues, colBegin, needToCompute);
auto t_end = clock();
std::cout << "testRnnForwardPropSRP: " << 1.0 * (t_end - t_start) / CLOCKS_PER_SEC << " seconds" << endl;
}
/**
The old way of resetting RNN state, which used if statement. Also only supports up to two sentences within a minibatch
*/
template <class ElemType>
void TestOldRnnForwardPropSRP(size_t nRow = 100, size_t nCol = 1000, size_t mNbr = 10, DEVICEID_TYPE deviceID = 0)
{
Matrix<ElemType> functionValues(deviceID);
Matrix<ElemType> colBegin(deviceID);
Matrix<ElemType> pastActivity(deviceID);
Matrix<ElemType> inputFunctionValues(deviceID);
functionValues.Resize(nRow, nCol);
colBegin.Resize(mNbr, 1);
pastActivity.Resize(nRow, nCol);
inputFunctionValues.Resize(nRow, nCol);
auto t_start = clock();
oldRnnForwardPropSRP<ElemType>(functionValues, mNbr, pastActivity, inputFunctionValues);
auto t_end = clock();
std::cout << "TestOldRnnForwardPropSRP: " << 1.0 * (t_end - t_start) / CLOCKS_PER_SEC << " seconds" << endl;
}
template <class ElemType>
void randomInitializeCPUMatrix(CPUMatrix<ElemType>& M, float min = -10, float max = 10)
{
@ -250,77 +99,6 @@ void AddMultiplyAndInplaceSigmoidTest(int n, int k, int m)
std::cout << "Matrix in: " << 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC << " seconds" << endl;
}
template <class ElemType>
void ColumnSliceMultAndAddTest(int n, int k, int m, DEVICEID_TYPE deviceID)
{
cout << "Testing Matrix" << endl;
Matrix<ElemType> AG((size_t) n, (size_t) k, deviceID);
AG.SetUniformRandomValue(-1, 1);
Matrix<ElemType> BG((size_t) k, (size_t) m, deviceID);
BG.SetUniformRandomValue(-1, 1);
Matrix<ElemType> CG((size_t) n, (size_t) m, deviceID);
Matrix<ElemType> DG((size_t) n, (size_t) m, deviceID);
auto t_startG = clock();
Matrix<ElemType>::MultiplyAndAdd(AG, false, BG, false, CG);
auto t_endG = clock();
std::cout << "MultiplyAndAdd Directly: " << 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC << " seconds" << endl;
t_startG = clock();
for (int i = 0; i < m; i++)
{
Matrix<ElemType> col_BG = BG.ColumnSlice(i, 1);
Matrix<ElemType> col_CG = CG.ColumnSlice(i, 1);
Matrix<ElemType>::MultiplyAndAdd(AG, false, col_BG, false, col_CG);
}
t_endG = clock();
std::cout << "MultiplyAndAdd With ColumnSlice: " << 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC << " seconds" << endl;
t_startG = clock();
for (int i = 0; i < m; i++)
{
Matrix<ElemType> col_BG = BG.ColumnSlice(i, 1);
Matrix<ElemType> col_CG = CG.ColumnSlice(i, 1);
Matrix<ElemType>::MultiplyAndAdd(AG, false, col_BG, false, col_CG);
}
t_endG = clock();
std::cout << "MultiplyAndAdd With ColumnSlice&: " << 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC << " seconds" << endl;
Matrix<ElemType> col_BG1(0), col_CG1(0);
t_startG = clock();
for (int i = 0; i < m; i++)
{
col_BG1.AssignColumnSlice(BG, i, 1);
col_CG1.AssignColumnSlice(CG, i, 1);
Matrix<ElemType>::MultiplyAndAdd(AG, false, col_BG1, false, col_CG1);
}
t_endG = clock();
std::cout << "MultiplyAndAdd With AssignColumnSlice: " << 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC << " seconds" << endl;
t_startG = clock();
for (int i = 0; i < m; i++)
{
Matrix<ElemType> col_CG = CG.ColumnSlice(i, 1);
Matrix<ElemType> col_DG = DG.ColumnSlice(i, 1);
col_DG.AssignSigmoidOf(col_CG);
}
t_endG = clock();
std::cout << "AssignSigmoidOf With ColumnSlice: " << 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC << " seconds" << endl;
t_startG = clock();
for (int i = 0; i < m; i++)
{
col_BG1.AssignColumnSlice(BG, i, 1);
col_CG1.AssignColumnSlice(CG, i, 1);
col_BG1.AssignSigmoidOf(col_CG1);
}
t_endG = clock();
std::cout << "AssignSigmoidOf With AssignColumnSlice: " << 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC << " seconds" << endl;
}
template <class ElemType>
void SquareMultiplyAndAdd10TimesAvgTest(int n, int count)
{
@ -437,12 +215,6 @@ void MandSTest(int count, int devId)
int wmain()
{
ColumnSliceMultAndAddTest<float>(2048, 2048, 256, 0);
TestRnnForwardPropSRP<float>();
TestOldRnnForwardPropSRP<float>();
// MandSTest<float>(100, 2);
/*cout<<endl<<"********************Matrix SquareMultiplyAndWeightedAdd10TimesAvg TEST********************"<<endl;

Просмотреть файл

@ -114,10 +114,11 @@
<ClInclude Include="targetver.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\..\Source\Common\ExceptionWithCallStack.cpp" />
<ClCompile Include="MathPerformanceTests.cpp" />
<ClCompile Include="stdafx.cpp">
<PrecompiledHeader>Create</PrecompiledHeader>
</ClCompile>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
</Project>
</Project>

Просмотреть файл

@ -8,15 +8,11 @@
namespace Microsoft { namespace MSR { namespace CNTK { namespace TEST {
//The simplest possible matrix multiplier, used here as a check.
template<typename ScalarAT, typename ScalarBT, typename ScalarCT, int MAXRANGE = 1 << ((8 * sizeof(ScalarAT)) - 3)> class ReferenceMultiplier
template<typename ScalarAT, typename ScalarBT, typename ScalarCT> class ReferenceMultiplier
{
public:
typedef ScalarAT ScalarAT;
typedef ScalarBT ScalarBT;
typedef ScalarCT ScalarCT;
static const int MAXRANGE = MAXRANGE;
static const int MAXRANGE = 1 << ((8 * sizeof(ScalarAT)) - 3);
ScalarBT* PrepareB(ScalarBT* oldB, int k, int n) { return oldB; }
static ScalarAT* CreateMatrixA(int m, int n)
@ -77,9 +73,17 @@ template<typename ScalarAT, typename ScalarBT, typename ScalarCT, int MAXRANGE =
}
};
template<typename ScalarAT, typename ScalarBT, typename ScalarCT, typename MultiplierT>static void TestMultiplierSub(
int m, int k, int n, MultiplierT& testMult, int numThreads = 1, ScalarCT epsilon = ScalarCT())
template<typename ScalarCT> void CompareMatricesAndDump(const ScalarCT* ref, const ScalarCT* test,
int m, int /*k*/, int n)
{
for (int i = 0; i < m * n; ++i)
{
BOOST_CHECK_EQUAL(ref[i], test[i]);
}
}
template<typename ScalarAT, typename ScalarBT, typename ScalarCT, typename MultiplierT>static void TestMultiplierSub(
int m, int k, int n, MultiplierT& testMult, int numThreads = 1, ScalarCT epsilon = ScalarCT())
{
epsilon;
testMult.SetNumThreads(numThreads);
@ -126,23 +130,13 @@ template<typename ScalarAT, typename ScalarBT, typename ScalarCT, int MAXRANGE =
}
template<typename ScalarAT, typename ScalarBT, typename ScalarCT, typename MultiplierT>static void TestMultiplierSub(
template<typename ScalarAT, typename ScalarBT, typename ScalarCT, typename MultiplierT>static void TestMultiplierSub(
int m, int k, int n, int numThreads = 1, ScalarCT epsilon = ScalarCT())
{
MultiplierT testMult;
TestMultiplierSub<ScalarAT, ScalarBT, ScalarCT, MultiplierT>(m, k, n, testMult, numThreads, epsilon);
}
template<typename ScalarCT> void CompareMatricesAndDump(const ScalarCT* ref, const ScalarCT* test,
int m, int /*k*/, int n)
{
for (int i = 0; i < m * n; ++i)
{
BOOST_CHECK_EQUAL(ref[i], test[i]);
}
}
BOOST_AUTO_TEST_SUITE(BlockMultiplierSuite)
BOOST_AUTO_TEST_CASE(BlockMultiplyTest8x128x8SingleThread)

Просмотреть файл

@ -3,7 +3,9 @@
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#include "stdafx.h"
#ifdef _WIN32
#include <crtdefs.h>
#endif
#include "../../../Source/Math/CPUSparseMatrix.h"
using namespace Microsoft::MSR::CNTK;

Просмотреть файл

@ -6,7 +6,9 @@
//
#include "stdafx.h"
#include <math.h>
#ifdef _WIN32
#include <crtdefs.h>
#endif
#include "../../../Source/Math/GPUSparseMatrix.h"
using namespace Microsoft::MSR::CNTK;

Просмотреть файл

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" InitialTargets="CheckDependencies" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
@ -66,7 +66,7 @@
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(BOOST_INCLUDE_PATH);$(SolutionDir)Source\Common\Include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(BOOST_INCLUDE_PATH);$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include</AdditionalIncludeDirectories>
<DisableSpecificWarnings>4819</DisableSpecificWarnings>
</ClCompile>
<Link>
@ -135,6 +135,7 @@
<ClInclude Include="fixtures.h" />
<ClInclude Include="stdafx.h" />
<ClInclude Include="targetver.h" />
<ClInclude Include="TensorTestsHelper.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="BatchNormalizationEngineTests.cpp" />
@ -156,6 +157,7 @@
<PrecompiledHeader>Create</PrecompiledHeader>
</ClCompile>
<ClCompile Include="CPUMatrixTests.cpp" />
<ClCompile Include="TensorTests.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<Target Name="Build" Condition="$(HasBoost)" Outputs="$(TargetPath)" DependsOnTargets="$(BuildDependsOn)" />

Просмотреть файл

@ -4,7 +4,9 @@
//
#include "stdafx.h"
#include <math.h>
#ifdef _WIN32
#include <crtdefs.h>
#endif
#include "../../../Source/Math/Matrix.h"
#include "../../../Source/Math/CPUMatrix.h"

Просмотреть файл

@ -5,7 +5,12 @@
#include "stdafx.h"
#include "File.h"
#include <memory>
#ifdef _WIN32
#include <io.h>
#else // Linux
#define _dup2 dup2
#define _fileno fileno
#endif
#include "../../../Source/Math/MatrixQuantizerImpl.h"
#include "../../../Source/Math/CUDAPageLockedMemAllocator.h"

Просмотреть файл

@ -0,0 +1,105 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
//
#include "stdafx.h"
#include "TensorView.h"
#include "Sequences.h"
#include "TensorTestsHelper.h"
using namespace Microsoft::MSR::CNTK;
namespace Microsoft { namespace MSR { namespace CNTK { namespace Test {
BOOST_AUTO_TEST_SUITE(MathTensorTests)
BOOST_AUTO_TEST_CASE(ElementwiseAddition)
{
Test::TensorTest<float> tensorTester;
// --- elementwise
// elementwise sum
tensorTester.OneTensorTest("elementwise addition", 1e-8, [&tensorTester](DEVICEID_TYPE deviceId)
{
return tensorTester.BroadcastingTest(TensorShape{ 512, 256 }, TensorShape({ 512, 256 }), deviceId);
});
}
BOOST_AUTO_TEST_CASE(AdditionWithSimpleBroadcasting)
{
Test::TensorTest<float> tensorTester;
// --- broadcasting
// simple broadcasting
tensorTester.OneTensorTest("addition wth simple broadcasting", 1e-8, [&tensorTester](DEVICEID_TYPE deviceId)
{
return tensorTester.BroadcastingTest(TensorShape{ 3, 2 }, TensorShape({ 3, 1 }), deviceId);
});
}
BOOST_AUTO_TEST_CASE(BiasAddition)
{
Test::TensorTest<float> tensorTester;
// typical bias for convolutional layer
tensorTester.OneTensorTest("bias addition (broadcasting)", 1e-8, [&tensorTester](DEVICEID_TYPE deviceId)
{
return tensorTester.BroadcastingTest(TensorShape{ 28, 28, 128, 32 }, TensorShape({ 1, 1, 128 }), deviceId);
});
}
BOOST_AUTO_TEST_CASE(BiasAddition2)
{
Test::TensorTest<float> tensorTester;
// BUGBUG: This test is strange--Print() shows different values with depth 128 instead of 64, but IsEqual() does not fail with 1e-3 tolerance.
// Something fishy going on. Dimension overflow?
tensorTester.OneTensorTest("bias addition (broadcasting)", 1e-8, [&tensorTester](DEVICEID_TYPE deviceId)
{
return tensorTester.BroadcastingTest(TensorShape{ 256, 256, 64, 32 }, TensorShape({ 1, 1, 64 }), deviceId);
});
}
BOOST_AUTO_TEST_CASE(BiasGradient)
{
Test::TensorTest<float> tensorTester;
// --- reduction
// typical bias gradient (reduction) for FF-DNN
tensorTester.OneTensorTest("bias gradient (reduction)", 1e-4, [&tensorTester](DEVICEID_TYPE deviceId)
{
return tensorTester.BiasGradientTest(TensorShape{ 2048, 1024 }, TensorShape(2048), deviceId);
});
}
BOOST_AUTO_TEST_CASE(BiasGradient2)
{
Test::TensorTest<float> tensorTester;
// typical bias gradient (reduction) for convolutional layer
tensorTester.OneTensorTest("bias gradient (reduction)", 1e-1, [&tensorTester](DEVICEID_TYPE deviceId)
{
return tensorTester.BiasGradientTest(TensorShape{ 256, 256, 64, 32 }, TensorShape({ 1, 1, 64 }), deviceId);
});
}
BOOST_AUTO_TEST_CASE(ColumnSliceMultAndAdd)
{
ColumnSliceMultAndAddTest<float>(2048, 2048, 256, 0);
}
BOOST_AUTO_TEST_CASE(RnnForwardProp)
{
TestRnnForwardPropSRP<float>();
}
BOOST_AUTO_TEST_CASE(OldRnnForwardProp)
{
TestOldRnnForwardPropSRP<float>();
}
BOOST_AUTO_TEST_SUITE_END()
} } } }

Просмотреть файл

@ -0,0 +1,310 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include "Matrix.h"
#include "CPUMatrix.h"
#include "TensorView.h"
#include "Sequences.h"
#include <chrono>
#include <iostream>
#include <vector>
#include <algorithm>
namespace Microsoft { namespace MSR { namespace CNTK { namespace Test {
template <class ElemType>
struct TensorTest
{
// run one test for both GPU and CPU and verify they are the same
template<typename FN>
void OneTensorTest(const char* what, double tolerance, const FN& fn)
{
fprintf(stderr, "===== Tensor test '%s'\n", what);
// run on GPU and CPU
let resultGPU = fn(0);
let resultCPU = fn(-1);
// dump top corner of the result to get a feel for the error
resultGPU.GetSOB().Print("GPU result", 0, 7, 0, 9);
resultGPU.GetSOB().TransferToDeviceIfNotThere(-1, true, false, true);
resultCPU.GetSOB().Print("CPU result", 0, 7, 0, 9);
BOOST_CHECK(resultGPU.GetSOB().IsEqualTo(resultCPU.GetSOB(), (ElemType)tolerance));
}
// helper to create a randomly initialized tensor object
TensorView<ElemType> CreateTensor(TensorShape shape, int randomSeed, DEVICEID_TYPE deviceId, bool isResult = false)
{
let numElements = shape.GetNumElements();
if (isResult)
cout << " ->";
cout << " [" << string(shape) << "]";
if (isResult)
cout << " \t// " << (deviceId < 0 ? "C" : "G") << "PU\n " << flush;
// random init
mt19937 rng(randomSeed);
uniform_real_distribution<float> nd(-1, 1);
vector<ElemType> init(numElements);
generate(begin(init), end(init), [&] { return nd(rng); });
// create storage object (one-column matrix)
let sob = make_shared<Matrix<ElemType>>(numElements/*rows*/, 1/*cols*/, init.data(), deviceId);
// create TensorView
return TensorView<ElemType>(sob, shape);
}
// test bias gradient (reduction)
TensorView<ElemType> BiasGradientTest(TensorShape layerShape, TensorShape biasShape, DEVICEID_TYPE deviceId)
{
int randomSeed = 1;
let gradient = CreateTensor(layerShape, randomSeed++, deviceId);
auto bias = CreateTensor(biasShape, randomSeed++, deviceId, true);
//gradient.GetSOB().Print("incoming gradient", 0, 9, 0, 9);
//bias.GetSOB().Print("bias gradient", 0, 9, 0, 9);
bias.DoCopyOf(1, gradient, 1);
//bias.GetSOB().Print("updated bias gradient", 0, 9, 0, 9);
return bias;
}
// test broadcast summation gradient
TensorView<ElemType> BroadcastingTest(TensorShape layerShape, TensorShape biasShape, DEVICEID_TYPE deviceId)
{
int randomSeed = 1;
let input = CreateTensor(layerShape, randomSeed++, deviceId);
auto bias = CreateTensor(biasShape, randomSeed++, deviceId);
//input.GetSOB().Print("input data", 0, 9, 0, 9);
//bias.GetSOB().Print("bias", 0, 9, 0, 9);
auto result = CreateTensor(layerShape, randomSeed++, deviceId, true);
result.AssignSumOf(input, bias);
return result;
}
};
template <class ElemType>
void SetToInitStateValueForResetSeg(const Matrix<ElemType>& sentenceBegin, size_t nStream, ElemType initStateValue, Matrix<ElemType>& newprevstate)
{
Matrix<ElemType> colSeg(sentenceBegin.GetDeviceId());
colSeg.Resize(nStream, nStream);
size_t nStateRow = newprevstate.GetNumRows();
assert(nStream == sentenceBegin.GetNumRows());
// only set state to init state value for segmentation = 0, and -1
// e.g., -1 0 1 -> 0 0 1 -> 0 0 -1 -> 1 1 0
Matrix<ElemType> colPos(sentenceBegin.GetDeviceId());
colPos.SetValue(sentenceBegin); // -1 0 1
colPos.InplaceTruncateBottom(1 << 0 /*(int)MinibatchPackingFlags::SequenceStart*/); // TODO: these flags no longer exist, this test probably no longer applies
Matrix<ElemType>::Scale((ElemType)-1.0, colPos);
colPos += 0; // (int)MinibatchPackingFlags::None; // TODO: these flags no longer exist, this test probably no longer applies
colSeg.SetDiagonalValue(colPos);
Matrix<ElemType> ones(sentenceBegin.GetDeviceId());
ones.Resize(nStateRow, nStream);
ones.SetValue((ElemType)1);
// add default state value if it is for reset
Matrix<ElemType>::MultiplyAndWeightedAdd(initStateValue, ones, false, colSeg, false, 1.0, newprevstate); // += [0 initStateValue 0 ]
}
template <class ElemType>
void rnnForwardPropSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matrix<ElemType>& pastActivity, Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& colBegin, const Matrix<ElemType>& needToCompute)
{
size_t ncol = functionValues.GetNumCols();
size_t ntime = ncol / mNbr;
Matrix<ElemType> out = functionValues.ColumnSlice(0, mNbr);
Matrix<ElemType> inp((DEVICEID_TYPE)functionValues.GetDeviceId());
for (size_t d = 0; d < ntime; d++)
{
if (d == 0)
inp = pastActivity.ColumnSlice(d, mNbr);
else
inp = inputFunctionValues.ColumnSlice(d, mNbr);
if (needToCompute.ColumnSlice(d, 1).Get00Element() == 1)
{
Matrix<ElemType> colSegPastActivity((DEVICEID_TYPE)functionValues.GetDeviceId());
Matrix<ElemType> colSeg((DEVICEID_TYPE)functionValues.GetDeviceId());
colSeg.Resize(mNbr, mNbr);
colSeg.SetValue(0);
colSegPastActivity.SetValue(colBegin);
colSegPastActivity.InplaceTruncateBottom(1 << 0 /*(int)MinibatchPackingFlags::SequenceStart*/); // TODO: these flags no longer exist, this test probably no longer applies
colSeg.SetDiagonalValue(colSegPastActivity);
Matrix<ElemType>::Multiply(inp, false, colSeg, false, out);
ElemType initStateValue = (ElemType) 0.1;
SetToInitStateValueForResetSeg<ElemType>(colBegin, mNbr, initStateValue, out);
}
}
}
template <class ElemType>
void oldRNNForwardPropSRP(const size_t timeIdxInSeq, const int delay, const bool reset, const ElemType default_activity, Matrix<ElemType>& functionValues, const Matrix<ElemType>& pastActivity, const Matrix<ElemType>& inputFunctionValues, const size_t indexInBatch, const size_t mNbr)
{
assert(delay > 0);
if (functionValues.GetNumRows() != inputFunctionValues.GetNumRows() ||
functionValues.GetNumCols() != inputFunctionValues.GetNumCols())
functionValues.Resize(inputFunctionValues.GetNumRows(),
inputFunctionValues.GetNumCols());
int iPastIndex = (int)((int)timeIdxInSeq - (int)delay) * (int)mNbr;
int d = iPastIndex;
if (d < 0)
d = (int)functionValues.Mod((float)iPastIndex, (float)pastActivity.GetNumCols());
// this can point to the past activity of the previous mninibatch
Matrix<ElemType> out = functionValues.ColumnSlice(timeIdxInSeq * mNbr + indexInBatch, 1);
Matrix<ElemType> inp((DEVICEID_TYPE)functionValues.GetDeviceId());
if (reset)
out.SetValue(default_activity);
else
{
if (iPastIndex < 0)
inp = pastActivity.ColumnSlice(d + indexInBatch, 1);
else
inp = inputFunctionValues.ColumnSlice(d + indexInBatch, 1);
out.AssignValuesOf(inp);
}
}
template <class ElemType>
void oldRnnForwardPropSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matrix<ElemType>& pastActivity, Matrix<ElemType>& inputFunctionValues)
{
size_t ncol = functionValues.GetNumCols();
size_t ntime = ncol / mNbr;
for (size_t timeIdxInSeq = 0; timeIdxInSeq < ntime; timeIdxInSeq++)
{
for (size_t i = 0; i < mNbr; i++)
{
bool reset = false;
if (timeIdxInSeq == 0)
{
reset = true;
}
oldRNNForwardPropSRP<ElemType>(timeIdxInSeq, 1, reset, (ElemType) 0.1, functionValues, pastActivity, inputFunctionValues, i, mNbr);
}
}
}
template <class ElemType>
void ColumnSliceMultAndAddTest(int n, int k, int m, DEVICEID_TYPE deviceID)
{
Matrix<ElemType> AG((size_t)n, (size_t)k, deviceID);
AG.SetUniformRandomValue(-1, 1);
Matrix<ElemType> BG((size_t)k, (size_t)m, deviceID);
BG.SetUniformRandomValue(-1, 1);
Matrix<ElemType> CG((size_t)n, (size_t)m, deviceID);
Matrix<ElemType> DG((size_t)n, (size_t)m, deviceID);
auto t_startG = clock();
Matrix<ElemType>::MultiplyAndAdd(AG, false, BG, false, CG);
auto t_endG = clock();
fprintf(stderr, "MultiplyAndAdd Directly: %f seconds\n", 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC);
t_startG = clock();
for (int i = 0; i < m; i++)
{
Matrix<ElemType> col_BG = BG.ColumnSlice(i, 1);
Matrix<ElemType> col_CG = CG.ColumnSlice(i, 1);
Matrix<ElemType>::MultiplyAndAdd(AG, false, col_BG, false, col_CG);
}
t_endG = clock();
fprintf(stderr, "MultiplyAndAdd With ColumnSlice: %f seconds\n", 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC);
t_startG = clock();
for (int i = 0; i < m; i++)
{
Matrix<ElemType> col_BG = BG.ColumnSlice(i, 1);
Matrix<ElemType> col_CG = CG.ColumnSlice(i, 1);
Matrix<ElemType>::MultiplyAndAdd(AG, false, col_BG, false, col_CG);
}
t_endG = clock();
fprintf(stderr, "MultiplyAndAdd With ColumnSlice&: %f seconds\n", 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC);
Matrix<ElemType> col_BG1(0), col_CG1(0);
t_startG = clock();
for (int i = 0; i < m; i++)
{
col_BG1.AssignColumnSlice(BG, i, 1);
col_CG1.AssignColumnSlice(CG, i, 1);
Matrix<ElemType>::MultiplyAndAdd(AG, false, col_BG1, false, col_CG1);
}
t_endG = clock();
fprintf(stderr, "MultiplyAndAdd With AssignColumnSlice: %f seconds\n", 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC);
t_startG = clock();
for (int i = 0; i < m; i++)
{
Matrix<ElemType> col_CG = CG.ColumnSlice(i, 1);
Matrix<ElemType> col_DG = DG.ColumnSlice(i, 1);
col_DG.AssignSigmoidOf(col_CG);
}
t_endG = clock();
fprintf(stderr, "AssignSigmoidOf With ColumnSlice: %f seconds\n", 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC);
t_startG = clock();
for (int i = 0; i < m; i++)
{
col_BG1.AssignColumnSlice(BG, i, 1);
col_CG1.AssignColumnSlice(CG, i, 1);
col_BG1.AssignSigmoidOf(col_CG1);
}
t_endG = clock();
fprintf(stderr, "AssignSigmoidOf With AssignColumnSlice: %f seconds\n", 1.0 * (t_endG - t_startG) / CLOCKS_PER_SEC);
}
template <class ElemType>
void TestRnnForwardPropSRP(size_t nRow = 100, size_t nCol = 1000, size_t mNbr = 10, DEVICEID_TYPE deviceID = 0)
{
Matrix<ElemType> functionValues(deviceID);
Matrix<ElemType> colBegin(deviceID);
Matrix<ElemType> pastActivity(deviceID);
Matrix<ElemType> inputFunctionValues(deviceID);
Matrix<ElemType> needToCompute(deviceID);
functionValues.Resize(nRow, nCol);
colBegin.Resize(mNbr, 1);
pastActivity.Resize(nRow, nCol);
inputFunctionValues.Resize(nRow, nCol);
needToCompute.Resize(1, nCol / mNbr);
needToCompute.SetValue(0);
needToCompute.ColumnSlice(0, 1).SetValue(1);
auto t_start = clock();
rnnForwardPropSRP<ElemType>(functionValues, mNbr, pastActivity, inputFunctionValues, colBegin, needToCompute);
auto t_end = clock();
fprintf(stderr, "testRnnForwardPropSRP: %f seconds\n", 1.0 * (t_end - t_start) / CLOCKS_PER_SEC);
}
/**
The old way of resetting RNN state, which used if statement. Also only supports up to two sentences within a minibatch
*/
template <class ElemType>
void TestOldRnnForwardPropSRP(size_t nRow = 100, size_t nCol = 1000, size_t mNbr = 10, DEVICEID_TYPE deviceID = 0)
{
Matrix<ElemType> functionValues(deviceID);
Matrix<ElemType> colBegin(deviceID);
Matrix<ElemType> pastActivity(deviceID);
Matrix<ElemType> inputFunctionValues(deviceID);
functionValues.Resize(nRow, nCol);
colBegin.Resize(mNbr, 1);
pastActivity.Resize(nRow, nCol);
inputFunctionValues.Resize(nRow, nCol);
auto t_start = clock();
oldRnnForwardPropSRP<ElemType>(functionValues, mNbr, pastActivity, inputFunctionValues);
auto t_end = clock();
fprintf(stderr, "TestOldRnnForwardPropSRP: %f seconds\n", 1.0 * (t_end - t_start) / CLOCKS_PER_SEC);
}
}}}}

Просмотреть файл

@ -15,14 +15,6 @@ struct Err
static const T Rel;
static const T Abs;
};
template <>
const float Err<float>::Rel = 1e-5f;
template <>
const double Err<double>::Rel = 1e-5f;
template <>
const float Err<float>::Abs = 1.192092896e-07f;
template <>
const double Err<double>::Abs = 2.2204460492503131e-016;
bool AreEqual(float a, float b, float maxRelError, float maxAbsError);
bool AreEqual(double a, double b, double maxRelError, double maxAbsError);

Просмотреть файл

@ -3,6 +3,7 @@
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#include "stdafx.h"
#include "common.h"
const int c_deviceIdZero = 0;
@ -13,3 +14,12 @@ const float c_epsilonFloatE1 = 0.1f;
const float c_epsilonFloat5E4 = 0.0005f;
const float c_epsilonFloatE5 = 0.00001f;
const double c_epsilonDoubleE11 = 0.00000000001;
template <>
const float Microsoft::MSR::CNTK::Test::Err<float>::Rel = 1e-5f;
template <>
const double Microsoft::MSR::CNTK::Test::Err<double>::Rel = 1e-5f;
template <>
const float Microsoft::MSR::CNTK::Test::Err<float>::Abs = 1.192092896e-07f;
template <>
const double Microsoft::MSR::CNTK::Test::Err<double>::Abs = 2.2204460492503131e-016;

Просмотреть файл

@ -14,8 +14,16 @@
#endif
#define _SCL_SECURE_NO_WARNINGS // current API of matrix does not allow safe invokations. TODO: change api to proper one.
#ifdef _WIN32
#include "targetver.h"
#endif
#include <array>
#ifndef _WIN32
#define BOOST_TEST_DYN_LINK
#endif
#include <boost/test/unit_test.hpp>
#include "constants.h"
#include "fixtures.h"

Просмотреть файл

@ -70,7 +70,7 @@ struct DataFixture
if (!envVariableErrorMessage.empty())
{
BOOST_TEST_MESSAGE(envVariableErrorMessage);
fprintf(stderr, envVariableErrorMessage.c_str());
fprintf(stderr, "%s", envVariableErrorMessage.c_str());
}
newCurrentPath = m_testDataPath;

Просмотреть файл

@ -8,7 +8,15 @@
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms
#define _SCL_SECURE_NO_WARNINGS // current API of matrix does not allow safe invokations. TODO: change api to proper one.
#ifdef _WIN32
#include "targetver.h"
#include "basics.h"
#endif
#include "Basics.h"
#include "BrainScriptParser.h"
#ifndef _WIN32
// Use dynamic library on Linux
#define BOOST_TEST_DYN_LINK
#endif
#include <boost/test/unit_test.hpp>

Просмотреть файл

@ -4,7 +4,14 @@
//
#include "stdafx.h"
#include <algorithm>
#ifdef _WIN32
#include <io.h>
#else // On Linux
#define _dup2 dup2
#define _dup dup
#define _close close
#define _fileno fileno
#endif
#include <cstdio>
#include <boost/scope_exit.hpp>
#include "Common/ReaderTestHelper.h"

Просмотреть файл

@ -64,13 +64,13 @@ struct ReaderFixture
if (!envVariableErrorMessage.empty())
{
BOOST_TEST_MESSAGE(envVariableErrorMessage);
fprintf(stderr, envVariableErrorMessage.c_str());
fprintf(stderr, "%s\n", envVariableErrorMessage.c_str());
}
newCurrentPath = m_testDataPath;
}
}
else if ((subPath[0] == '/' && subPath[1] == '//') || (subPath[0] == '\\' && subPath[1] == '\\'))
else if ((subPath[0] == '/' && subPath[1] == '/') || (subPath[0] == '\\' && subPath[1] == '\\'))
{
newCurrentPath = subPath;
}
@ -295,16 +295,22 @@ struct ReaderFixture
// readerSectionName : the reader field name in the test section
shared_ptr<DataReader> GetDataReader(
const string configFileName,
const string testSectionName,
const string readerSectionName)
const std::string& configFileName,
const std::string& testSectionName,
const std::string& readerSectionName,
std::vector<std::wstring> additionalConfigParameters)
{
std::wstring configFN(configFileName.begin(), configFileName.end());
std::wstring configFileCommand(L"configFile=" + configFN);
std::wstring cntk(L"CNTK");
std::vector<wchar_t*> arg{ &cntk[0], &configFileCommand[0] };
for(auto& p : additionalConfigParameters)
{
arg.push_back(&p[0]);
}
wchar_t* arg[2]{L"CNTK", &configFileCommand[0]};
ConfigParameters config;
const std::string rawConfigString = ConfigParameters::ParseCommandLine(2, arg, config);
const std::string rawConfigString = ConfigParameters::ParseCommandLine((int)arg.size(), &arg[0], config);
config.ResolveVariables(rawConfigString);
const ConfigParameters simpleDemoConfig = config(testSectionName);
@ -344,14 +350,15 @@ struct ReaderFixture
size_t numSubsets,
bool sparseFeatures = false,
bool sparseLabels = false,
bool useSharedLayout = true)
bool useSharedLayout = true,
std::vector<std::wstring> additionalConfigParameters = {})
{
shared_ptr<StreamMinibatchInputs> inputsPtr =
CreateStreamMinibatchInputs<ElemType>(numFeatureFiles, numLabelFiles,
sparseFeatures, sparseLabels, useSharedLayout);
shared_ptr<DataReader> readerPtr = GetDataReader(configFileName,
testSectionName, readerSectionName);
testSectionName, readerSectionName, additionalConfigParameters);
// Perform the data reading
HelperWriteReaderContentToFile<ElemType>(testDataFilePath, *readerPtr, *inputsPtr,
@ -391,11 +398,12 @@ struct ReaderFixture
size_t numSubsets,
bool sparseFeatures = false,
bool sparseLabels = false,
bool useSharedLayout = true)
bool useSharedLayout = true,
std::vector<std::wstring> additionalConfigParameters = {})
{
HelperReadInAndWriteOut<ElemType>(configFileName, testDataFilePath, testSectionName, readerSectionName,
epochSize, mbSize, epochs, numFeatureFiles, numLabelFiles, subsetNum,numSubsets,
sparseFeatures, sparseLabels, useSharedLayout);
sparseFeatures, sparseLabels, useSharedLayout, additionalConfigParameters);
CheckFilesEquivalent(controlDataFilePath, testDataFilePath);
}
@ -408,10 +416,11 @@ struct ReaderFixture
void HelperRunReaderTestWithException(
string configFileName,
string testSectionName,
string readerSectionName)
string readerSectionName,
std::vector<std::wstring> additionalConfigParameters = {})
{
BOOST_CHECK_THROW(
GetDataReader(configFileName,testSectionName, readerSectionName),
GetDataReader(configFileName, testSectionName, readerSectionName, additionalConfigParameters),
ExceptionType);
}
};

Просмотреть файл

@ -0,0 +1,38 @@
RootDir = .
DataDir = $RootDir$
# deviceId = -1 for CPU, >= 0 for GPU devices
deviceId = -1
precision = "double"
Simple_Test = [
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = 450000
verbosity = 0
features1 = [
dim = 40
contextWindow=1
type = "real"
scpFile = "$DataDir$/features.rscp"
]
features2 = [
dim = 100
scpFile = "$DataDir$/ivector.rscp"
type = "real"
expandToUtterance = true
]
labels = [
mlfFile = "$DataDir$/labels.smlf"
labelMappingFile = "$DataDir$/labels.statelist"
labelDim = 9000
labelType = "category"
]
]
]

Просмотреть файл

@ -0,0 +1,32 @@
RootDir = .
DataDir = $RootDir$
# deviceId = -1 for CPU, >= 0 for GPU devices
deviceId = -1
precision = "double"
Simple_Test = [
reader = [
minimizeReaderMemoryFootprint=false
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = 450000
verbosity = 0
features = [
dim = 40
contextWindow=1
type = "real"
scpFile = "$DataDir$/features.rscp"
]
labels = [
mlfFile = "$DataDir$/labels.smlf"
labelMappingFile = "$DataDir$/labels.statelist"
labelDim = 9000
labelType = "category"
]
]
]

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше