Merge branch 'master' into qiwye/asgd-dev
This commit is contained in:
Коммит
2b8b3047df
|
@ -623,7 +623,7 @@ initValueScale
|
||||||
|
|
||||||
.
|
.
|
||||||
If the model parameters are initialized using the Gaussian distribution,
|
If the model parameters are initialized using the Gaussian distribution,
|
||||||
the standard deviation will be adjusted to
|
the standard deviation will be adjusted to
|
||||||
\begin_inset Formula $0.2\times initValueScale/\sqrt{fanout}$
|
\begin_inset Formula $0.2\times initValueScale/\sqrt{fanout}$
|
||||||
\end_inset
|
\end_inset
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,190 @@
|
||||||
|
# Note: reader configuration comes from AlexNet.cntk or AlexNetComposite.cntk, depending on the test
|
||||||
|
RootDir = "."
|
||||||
|
|
||||||
|
ConfigDir = "$RootDir$"
|
||||||
|
DataDir = "$RootDir$"
|
||||||
|
OutputDir = "$RootDir$/Output"
|
||||||
|
ModelDir = "$OutputDir$/Models"
|
||||||
|
|
||||||
|
ndlMacros="$ConfigDir$/Macros.ndl"
|
||||||
|
|
||||||
|
precision = "float"
|
||||||
|
deviceId = "Auto"
|
||||||
|
|
||||||
|
command = Train:Test
|
||||||
|
|
||||||
|
parallelTrain = "true"
|
||||||
|
traceLevel = 1
|
||||||
|
numMBsToShowResult = 500
|
||||||
|
|
||||||
|
modelPath = "$ModelDir$/AlexNet"
|
||||||
|
stderr = "$OutputDir$/AlexNet"
|
||||||
|
|
||||||
|
################################
|
||||||
|
Train = {
|
||||||
|
action = "train"
|
||||||
|
|
||||||
|
BrainScriptNetworkBuilder = {
|
||||||
|
imageShape = 227:227:3
|
||||||
|
labelDim = 1000
|
||||||
|
|
||||||
|
# Local Response Normalization
|
||||||
|
# k : bias
|
||||||
|
# n : half radius
|
||||||
|
# alpha: scale factor
|
||||||
|
# beta: exponent
|
||||||
|
LRN {k, n, alpha, beta} = {
|
||||||
|
apply (x) = {
|
||||||
|
x2 = x .* x
|
||||||
|
# reshape to insert a fake singleton reduction dimension after the 3rd axis
|
||||||
|
x2s = SplitDimension(x2, 3, 1)
|
||||||
|
# 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
|
||||||
|
W = ParameterTensor{(1:1:2*n+1:1), learningRateMultiplier = 0, initValue = alpha/(2*n+1)}
|
||||||
|
y = Convolution (W, x2s, (1:1:2*n+1), mapDims = 1, stride = 1, sharing = true, autoPadding = true, lowerPad = 0, upperPad = 0, transpose = false, maxTempMemSizeInSamples = 0)
|
||||||
|
# reshape back to remove the fake singleton reduction dimension
|
||||||
|
b = FlattenDimensions(y, 3, 2)
|
||||||
|
den = Exp (beta .* Log(k + b))
|
||||||
|
r = x .* Reciprocal(den)
|
||||||
|
}.r
|
||||||
|
}.apply
|
||||||
|
|
||||||
|
# initValueScale are determined to generate Gaussians with variance of 0.01 and 0.005 (for the two DenseLayers)
|
||||||
|
model = Sequential (
|
||||||
|
ConvolutionalLayer {96, (11:11), stride=(4:4), pad=false, init='gaussian', initValueScale=0.95265} : ReLU :
|
||||||
|
LRN {1.0, 2, 0.0001, 0.75} :
|
||||||
|
MaxPoolingLayer {(3:3), stride=(2:2)} :
|
||||||
|
ConvolutionalLayer {192, (5:5), pad = true, init='gaussian', initValueScale=2.44978, initBias=0.1} : ReLU :
|
||||||
|
LRN {1.0, 2, 0.0001, 0.75} :
|
||||||
|
MaxPoolingLayer {(3:3), stride=(2:2)} :
|
||||||
|
ConvolutionalLayer {384, (3:3), pad = true, init='gaussian', initValueScale=2.07857} : ReLU :
|
||||||
|
ConvolutionalLayer {384, (3:3), pad = true, init='gaussian', initValueScale=2.93945, initBias=0.1} : ReLU :
|
||||||
|
ConvolutionalLayer {256, (3:3), pad = true, init='gaussian', initValueScale=2.93945, initBias=0.1} : ReLU :
|
||||||
|
MaxPoolingLayer {(3:3), stride=(2:2)} :
|
||||||
|
DenseLayer {4096, activation=ReLU, init='gaussian', initValueScale=2.40038, initBias=0.1} : Dropout :
|
||||||
|
DenseLayer {4096, activation=ReLU, init='gaussian', initValueScale=1.6, initBias=0.1} : Dropout :
|
||||||
|
LinearLayer {labelDim, init='gaussian', initValueScale=3.2}
|
||||||
|
)
|
||||||
|
|
||||||
|
# inputs
|
||||||
|
features = Input {imageShape}
|
||||||
|
featNorm = features - Constant(114)
|
||||||
|
labels = Input {labelDim}
|
||||||
|
|
||||||
|
# apply model to features
|
||||||
|
z = model (featNorm)
|
||||||
|
|
||||||
|
# loss and error computation
|
||||||
|
ce = CrossEntropyWithSoftmax (labels, z)
|
||||||
|
errs = ClassificationError (labels, z)
|
||||||
|
top5Errs = ClassificationError (labels, z, topN=5) # only used in Eval action
|
||||||
|
|
||||||
|
# declare special nodes
|
||||||
|
featureNodes = (features)
|
||||||
|
labelNodes = (labels)
|
||||||
|
criterionNodes = (ce)
|
||||||
|
evaluationNodes = (errs)
|
||||||
|
outputNodes = (z)
|
||||||
|
}
|
||||||
|
|
||||||
|
SGD = {
|
||||||
|
epochSize = 0
|
||||||
|
minibatchSize = 256
|
||||||
|
learningRatesPerMB = 0.01*25:0.001*25:0.0001*25:0.00001*25:0.000001
|
||||||
|
momentumPerMB = 0.9
|
||||||
|
maxEpochs = 110
|
||||||
|
gradUpdateType = None
|
||||||
|
L2RegWeight = 0.0005
|
||||||
|
dropoutRate = 0.5
|
||||||
|
|
||||||
|
# TODO: try less bits?
|
||||||
|
ParallelTrain = {
|
||||||
|
parallelizationMethod = "DataParallelSGD"
|
||||||
|
distributedMBReading = "true"
|
||||||
|
parallelizationStartEpoch = 3
|
||||||
|
DataParallelSGD = {
|
||||||
|
gradientBits = 32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
numMBsToShowResult = 100
|
||||||
|
}
|
||||||
|
|
||||||
|
# Reader
|
||||||
|
reader = {
|
||||||
|
verbosity = 0
|
||||||
|
randomize = true
|
||||||
|
randomizationWindow = 1
|
||||||
|
|
||||||
|
deserializers = (
|
||||||
|
{
|
||||||
|
type = "ImageDeserializer" ; module = "ImageReader"
|
||||||
|
file = "$DataDir$/train_map.txt"
|
||||||
|
input = {
|
||||||
|
features = {
|
||||||
|
transforms = (
|
||||||
|
{
|
||||||
|
type = "Crop"
|
||||||
|
cropType = "random"
|
||||||
|
cropRatio = 0.88671875
|
||||||
|
jitterType = "uniRatio"
|
||||||
|
}:{
|
||||||
|
type = "Scale"
|
||||||
|
width = 227
|
||||||
|
height = 227
|
||||||
|
channels = 3
|
||||||
|
interpolations = "linear"
|
||||||
|
#}:{
|
||||||
|
# type = "Mean"
|
||||||
|
# meanFile = "$ConfigDir$/ImageNet1K_mean.xml"
|
||||||
|
}:{
|
||||||
|
type = "Transpose"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
labels = {
|
||||||
|
labelDim = 1000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
################################
|
||||||
|
Test = {
|
||||||
|
action=test
|
||||||
|
minibatchSize=128
|
||||||
|
evalNodeNames = errs:top5Errs # also test top-5 error rate
|
||||||
|
|
||||||
|
# Reader
|
||||||
|
reader = {
|
||||||
|
verbosity = 0
|
||||||
|
randomize = false
|
||||||
|
|
||||||
|
deserializers = (
|
||||||
|
{
|
||||||
|
type = "ImageDeserializer" ; module = "ImageReader"
|
||||||
|
file="$DataDir$/val_map.txt"
|
||||||
|
input = {
|
||||||
|
features = {
|
||||||
|
transforms = (
|
||||||
|
{
|
||||||
|
type = "Crop"
|
||||||
|
cropType = "center"
|
||||||
|
cropRatio = 0.88671875
|
||||||
|
}:{
|
||||||
|
type = "Scale"
|
||||||
|
width = 227
|
||||||
|
height = 227
|
||||||
|
channels = 3
|
||||||
|
}:{
|
||||||
|
type = "Transpose"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
labels = {
|
||||||
|
labelDim = 1000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,130 @@
|
||||||
|
# ConvNet applied on CIFAR-10 dataset, with data augmentation (translation and flipping).
|
||||||
|
|
||||||
|
command = TrainConvNet:Eval
|
||||||
|
|
||||||
|
precision = "float"; traceLevel = 1 ; deviceId = "auto"
|
||||||
|
|
||||||
|
rootDir = "../../.." ; dataDir = "$rootDir$/DataSets/CIFAR-10" ;
|
||||||
|
outputDir = "./Output" ;
|
||||||
|
|
||||||
|
modelPath = "$outputDir$/Models/ConvNetLRN_CIFAR10_DataAug"
|
||||||
|
#stderr = "$outputDir$/ConvNetLRN_CIFAR10_DataAug_bs_out"
|
||||||
|
|
||||||
|
TrainConvNet = {
|
||||||
|
action = "train"
|
||||||
|
|
||||||
|
BrainScriptNetworkBuilder = {
|
||||||
|
imageShape = 32:32:3
|
||||||
|
labelDim = 10
|
||||||
|
|
||||||
|
featScale = 1/256
|
||||||
|
Normalize{f} = x => f .* x
|
||||||
|
|
||||||
|
# Local Response Normalization
|
||||||
|
# k : bias
|
||||||
|
# n : half radius
|
||||||
|
# alpha: scale factor
|
||||||
|
# beta: exponent
|
||||||
|
LRN {k, n, alpha, beta} = {
|
||||||
|
apply (x) = {
|
||||||
|
x2 = x .* x
|
||||||
|
# reshape to insert a fake singleton reduction dimension after the 3rd axis
|
||||||
|
x2s = SplitDimension(x2, 3, 1)
|
||||||
|
# 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
|
||||||
|
W = ParameterTensor{(1:1:2*n+1:1), learningRateMultiplier = 0, initValue = alpha/(2*n+1)}
|
||||||
|
y = Convolution (W, x2s, (1:1:2*n+1), mapDims = 1, stride = 1, sharing = true, autoPadding = true, lowerPad = 0, upperPad = 0, transpose = false, maxTempMemSizeInSamples = 0)
|
||||||
|
# reshape back to remove the fake singleton reduction dimension
|
||||||
|
b = FlattenDimensions(y, 3, 2)
|
||||||
|
den = Exp (beta .* Log(k + b))
|
||||||
|
r = x .* Reciprocal(den)
|
||||||
|
}.r
|
||||||
|
}.apply
|
||||||
|
|
||||||
|
model = Sequential (
|
||||||
|
Normalize {featScale} :
|
||||||
|
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
|
||||||
|
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
|
||||||
|
LRN {1.0, 4, 0.001, 0.75} :
|
||||||
|
MaxPoolingLayer {(3:3), stride = (2:2)} :
|
||||||
|
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
|
||||||
|
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
|
||||||
|
LRN {1.0, 4, 0.001, 0.75} :
|
||||||
|
MaxPoolingLayer {(3:3), stride = (2:2)} :
|
||||||
|
DenseLayer {256} : ReLU : Dropout :
|
||||||
|
DenseLayer {128} : ReLU : Dropout :
|
||||||
|
LinearLayer {labelDim}
|
||||||
|
)
|
||||||
|
|
||||||
|
# inputs
|
||||||
|
features = Input {imageShape}
|
||||||
|
labels = Input {labelDim}
|
||||||
|
|
||||||
|
# apply model to features
|
||||||
|
z = model (features)
|
||||||
|
|
||||||
|
# connect to system
|
||||||
|
ce = CrossEntropyWithSoftmax (labels, z)
|
||||||
|
errs = ClassificationError (labels, z)
|
||||||
|
top5Errs = ClassificationError (labels, z, topN=5) # only used in Eval action
|
||||||
|
|
||||||
|
featureNodes = (features)
|
||||||
|
labelNodes = (labels)
|
||||||
|
criterionNodes = (ce)
|
||||||
|
evaluationNodes = (errs) # top5Errs only used in Eval
|
||||||
|
outputNodes = (z)
|
||||||
|
}
|
||||||
|
|
||||||
|
SGD = {
|
||||||
|
epochSize = 0
|
||||||
|
minibatchSize = 64
|
||||||
|
|
||||||
|
learningRatesPerSample = 0.0015625*20:0.00046875*20:0.00015625*20:0.000046875*10:0.000015625
|
||||||
|
momentumAsTimeConstant = 0*20:600*20:1200
|
||||||
|
maxEpochs = 80
|
||||||
|
L2RegWeight = 0.002
|
||||||
|
dropoutRate = 0.5
|
||||||
|
|
||||||
|
numMBsToShowResult = 100
|
||||||
|
}
|
||||||
|
|
||||||
|
reader = {
|
||||||
|
verbosity = 0 ; randomize = true
|
||||||
|
deserializers = ({
|
||||||
|
type = "ImageDeserializer" ; module = "ImageReader"
|
||||||
|
file = "$dataDir$/train_map.txt"
|
||||||
|
input = {
|
||||||
|
features = { transforms = (
|
||||||
|
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
|
||||||
|
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
|
||||||
|
{ type = "Mean" ; meanFile = "$dataDir$/CIFAR-10_mean.xml" } :
|
||||||
|
{ type = "Transpose" }
|
||||||
|
)}
|
||||||
|
labels = { labelDim = 10 }
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Eval action
|
||||||
|
Eval = {
|
||||||
|
action = "eval"
|
||||||
|
evalNodeNames = errs:top5Errs # also test top-5 error rate
|
||||||
|
# Set minibatch size for testing.
|
||||||
|
minibatchSize = 512
|
||||||
|
|
||||||
|
reader = {
|
||||||
|
verbosity = 0 ; randomize = false
|
||||||
|
deserializers = ({
|
||||||
|
type = "ImageDeserializer" ; module = "ImageReader"
|
||||||
|
file = "$dataDir$/test_map.txt"
|
||||||
|
input = {
|
||||||
|
features = { transforms = (
|
||||||
|
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
|
||||||
|
{ type = "Mean"; meanFile = "$dataDir$/CIFAR-10_mean.xml" } :
|
||||||
|
{ type = "Transpose" }
|
||||||
|
)}
|
||||||
|
labels = { labelDim = 10 }
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
|
@ -28,8 +28,8 @@ TrainConvNet = {
|
||||||
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
|
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
|
||||||
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
|
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
|
||||||
MaxPoolingLayer {(3:3), stride = (2:2)} :
|
MaxPoolingLayer {(3:3), stride = (2:2)} :
|
||||||
DenseLayer {256} : Dropout : ReLU :
|
DenseLayer {256} : ReLU : Dropout :
|
||||||
DenseLayer {128} : Dropout : ReLU :
|
DenseLayer {128} : ReLU : Dropout :
|
||||||
LinearLayer {labelDim}
|
LinearLayer {labelDim}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -31,4 +31,12 @@ Run the example from the current folder using:
|
||||||
|
|
||||||
`cntk configFile=ConvNet_CIFAR10_DataAug.cntk`
|
`cntk configFile=ConvNet_CIFAR10_DataAug.cntk`
|
||||||
|
|
||||||
As seen in the CNTK configuration file [ConvNet_CIFAR10_DataAug.cntk](./ConvNet_CIFAR10_DataAug.cntk), we use a fixed crop ratio of `0.8` and scale the image to `32x32` pixels for training. Since all training images are pre-padded to `40x40` pixels, effectively we only perform translation transform without scaling. The accuracy of the network on test data is around `14%`, which is a lot better than the previous model.
|
As seen in the CNTK configuration file [ConvNet_CIFAR10_DataAug.cntk](./ConvNet_CIFAR10_DataAug.cntk), we use a fixed crop ratio of `0.8` and scale the image to `32x32` pixels for training. Since all training images are pre-padded to `40x40` pixels, effectively we only perform translation transform without scaling. The accuracy of the network on test data is around `14.2%`, which is a lot better than the previous model.
|
||||||
|
|
||||||
|
### ConvNetLRN_CIFAR10_DataAug.cntk
|
||||||
|
|
||||||
|
The fourth example added local response normalization (LRN) to the previous example. LRN is implemented as a BrainScript function using 3D convolution with a constant kernel. You may run the example from the current folder using:
|
||||||
|
|
||||||
|
`cntk configFile=ConvNetLRN_CIFAR10_DataAug.cntk`
|
||||||
|
|
||||||
|
This model achieves slightly better accuracy of `13.8%`, which demonstrates the effectiveness of LRN. Nevertheless, as mentioned earlier, LRN is now rarely used by state-of-the-art deep networks.
|
||||||
|
|
|
@ -13,6 +13,7 @@ from cntk.layers import *
|
||||||
from cntk.models import Sequential, LayerStack
|
from cntk.models import Sequential, LayerStack
|
||||||
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
|
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
|
||||||
from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_schedule, momentum_as_time_constant_schedule, UnitType
|
from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_schedule, momentum_as_time_constant_schedule, UnitType
|
||||||
|
from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_as_time_constant_schedule, UnitType
|
||||||
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, relu, minus, element_times, constant
|
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, relu, minus, element_times, constant
|
||||||
from _cntk_py import set_computation_network_trace_level
|
from _cntk_py import set_computation_network_trace_level
|
||||||
|
|
||||||
|
@ -71,9 +72,9 @@ def convnet_cifar10(debug_output=False):
|
||||||
|
|
||||||
# Set learning parameters
|
# Set learning parameters
|
||||||
lr_per_sample = [0.0015625]*10+[0.00046875]*10+[0.00015625]
|
lr_per_sample = [0.0015625]*10+[0.00046875]*10+[0.00015625]
|
||||||
lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample)
|
lr_schedule = learning_rate_schedule(lr_per_sample, UnitType.sample, epoch_size)
|
||||||
momentum_time_constant = [0]*20+[-minibatch_size/np.log(0.9)]
|
mm_time_constant = [0]*20+[-minibatch_size/np.log(0.9)]
|
||||||
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size)
|
mm_schedule = momentum_as_time_constant_schedule(mm_time_constant, epoch_size)
|
||||||
l2_reg_weight = 0.002
|
l2_reg_weight = 0.002
|
||||||
|
|
||||||
# Instantiate the trainer object to drive the model training
|
# Instantiate the trainer object to drive the model training
|
||||||
|
|
|
@ -14,6 +14,7 @@ from cntk.ops import input_variable, cross_entropy_with_softmax, classification_
|
||||||
from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs
|
from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs
|
||||||
from cntk import Trainer, persist, cntk_py
|
from cntk import Trainer, persist, cntk_py
|
||||||
from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_schedule, momentum_as_time_constant_schedule, UnitType
|
from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_schedule, momentum_as_time_constant_schedule, UnitType
|
||||||
|
from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_as_time_constant_schedule, UnitType
|
||||||
from _cntk_py import set_computation_network_trace_level
|
from _cntk_py import set_computation_network_trace_level
|
||||||
|
|
||||||
# Paths relative to current python file.
|
# Paths relative to current python file.
|
||||||
|
@ -83,8 +84,8 @@ def convnet_cifar10_dataaug(reader_train, reader_test, max_epochs = 80):
|
||||||
# Set learning parameters
|
# Set learning parameters
|
||||||
lr_per_sample = [0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625]
|
lr_per_sample = [0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625]
|
||||||
lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample, epoch_size=epoch_size)
|
lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample, epoch_size=epoch_size)
|
||||||
momentum_time_constant = [0]*20+[600]*20+[1200]
|
mm_time_constant = [0]*20+[600]*20+[1200]
|
||||||
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size)
|
mm_schedule = momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size)
|
||||||
l2_reg_weight = 0.002
|
l2_reg_weight = 0.002
|
||||||
|
|
||||||
# trainer object
|
# trainer object
|
||||||
|
|
|
@ -48,10 +48,9 @@ def create_reader(map_file, mean_file, train, distributed_after=INFINITE_SAMPLES
|
||||||
ImageDeserializer(map_file, StreamDefs(
|
ImageDeserializer(map_file, StreamDefs(
|
||||||
features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
|
features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
|
||||||
labels = StreamDef(field='label', shape=num_classes))), # and second as 'label'
|
labels = StreamDef(field='label', shape=num_classes))), # and second as 'label'
|
||||||
randomize = False,
|
multithreaded_deserializer = False, # turn off omp as CIFAR-10 is not heavy for deserializer
|
||||||
distributed_after = distributed_after)
|
distributed_after = distributed_after)
|
||||||
|
|
||||||
|
|
||||||
# Train and evaluate the network.
|
# Train and evaluate the network.
|
||||||
def convnet_cifar10_dataaug(reader_train, reader_test, distributed_trainer, max_epochs = 80):
|
def convnet_cifar10_dataaug(reader_train, reader_test, distributed_trainer, max_epochs = 80):
|
||||||
set_computation_network_trace_level(0)
|
set_computation_network_trace_level(0)
|
||||||
|
@ -87,8 +86,8 @@ def convnet_cifar10_dataaug(reader_train, reader_test, distributed_trainer, max_
|
||||||
# Set learning parameters
|
# Set learning parameters
|
||||||
lr_per_sample = [0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625]
|
lr_per_sample = [0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625]
|
||||||
lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample, epoch_size=epoch_size)
|
lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample, epoch_size=epoch_size)
|
||||||
momentum_time_constant = [0]*20+[600]*20+[1200]
|
mm_time_constant = [0]*20+[600]*20+[1200]
|
||||||
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size)
|
mm_schedule = momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size)
|
||||||
l2_reg_weight = 0.002
|
l2_reg_weight = 0.002
|
||||||
|
|
||||||
# trainer object
|
# trainer object
|
||||||
|
|
|
@ -11,7 +11,7 @@ from cntk import Trainer, persist
|
||||||
from cntk.utils import *
|
from cntk.utils import *
|
||||||
from cntk.layers import *
|
from cntk.layers import *
|
||||||
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
|
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
|
||||||
from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_schedule, UnitType
|
from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_as_time_constant_schedule, UnitType
|
||||||
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, relu, element_times, constant
|
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, relu, element_times, constant
|
||||||
|
|
||||||
# Paths relative to current python file.
|
# Paths relative to current python file.
|
||||||
|
@ -63,11 +63,11 @@ def convnet_mnist(debug_output=False):
|
||||||
# Set learning parameters
|
# Set learning parameters
|
||||||
lr_per_sample = [0.001]*10+[0.0005]*10+[0.0001]
|
lr_per_sample = [0.001]*10+[0.0005]*10+[0.0001]
|
||||||
lr_schedule = learning_rate_schedule(lr_per_sample, UnitType.sample, epoch_size)
|
lr_schedule = learning_rate_schedule(lr_per_sample, UnitType.sample, epoch_size)
|
||||||
momentum_time_constant = [0]*5+[1024]
|
mm_time_constant = [0]*5+[1024]
|
||||||
mn_schedule = momentum_schedule(momentum_time_constant, epoch_size)
|
mm_schedule = momentum_as_time_constant_schedule(mm_time_constant, epoch_size)
|
||||||
|
|
||||||
# Instantiate the trainer object to drive the model training
|
# Instantiate the trainer object to drive the model training
|
||||||
learner = momentum_sgd(z.parameters, lr_schedule, mn_schedule)
|
learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule)
|
||||||
trainer = Trainer(z, ce, pe, learner)
|
trainer = Trainer(z, ce, pe, learner)
|
||||||
|
|
||||||
# define mapping from reader streams to network inputs
|
# define mapping from reader streams to network inputs
|
||||||
|
|
|
@ -35,7 +35,7 @@ We use a fixed crop ratio of `0.8` and scale the image to `32x32` pixels for tra
|
||||||
|
|
||||||
### ConvNet_CIFAR10_DataAug_Distributed.py
|
### ConvNet_CIFAR10_DataAug_Distributed.py
|
||||||
|
|
||||||
The fourth example uses the same CNN as ConvNet_CIFAR10_DataAug.py, but it adds support for distributed training with simple aggregation. For a reference on distributed training, please check [here](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines)
|
The fourth example uses the same CNN as ConvNet_CIFAR10_DataAug.py, but it adds support for distributed training with simple aggregation. For a reference on distributed training, please check [here](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines).
|
||||||
Note that this example supports CPU-only build.
|
Note that [this example](./ConvNet_CIFAR10_DataAug_Distributed.py) supports CPU-only build.
|
||||||
|
|
||||||
`mpiexec -n <#workers> python ConvNet_CIFAR10_DataAug_Distributed.py`
|
`mpiexec -n <#workers> python ConvNet_CIFAR10_DataAug_Distributed.py`
|
||||||
|
|
|
@ -15,7 +15,7 @@ for ResNet20 and ResNet110, respectively. The ResNet20 network achieves an error
|
||||||
|
|
||||||
### TrainResNet_CIFAR10_Distributed.py
|
### TrainResNet_CIFAR10_Distributed.py
|
||||||
|
|
||||||
This example code is similar to TrainResNet_CIFAR10.py, but it adds support for distributed training via [MPI](https://en.wikipedia.org/wiki/Message_Passing_Interface). Details can be found in [here](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines)
|
[This example](./TrainResNet_CIFAR10_Distributed.py) is similar to TrainResNet_CIFAR10.py, but it adds support for distributed training via [MPI](https://en.wikipedia.org/wiki/Message_Passing_Interface). Details can be found in [here](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines).
|
||||||
Note this example requires a multi-GPU machine or mpi hosts file to distribute to multiple machines.
|
Note this example requires a multi-GPU machine or mpi hosts file to distribute to multiple machines.
|
||||||
|
|
||||||
Simple aggregation, ResNet20, with a 2-GPU machine:
|
Simple aggregation, ResNet20, with a 2-GPU machine:
|
||||||
|
@ -25,3 +25,7 @@ Simple aggregation, ResNet20, with a 2-GPU machine:
|
||||||
Quantized 1-bit aggregation with 50000 samples before distributed, ResNet20, with a 2-GPU machine:
|
Quantized 1-bit aggregation with 50000 samples before distributed, ResNet20, with a 2-GPU machine:
|
||||||
|
|
||||||
`mpiexec -n 2 python TrainResNet_CIFAR10_Distributed.py -n resnet20 -q 1 -a 50000`
|
`mpiexec -n 2 python TrainResNet_CIFAR10_Distributed.py -n resnet20 -q 1 -a 50000`
|
||||||
|
|
||||||
|
To run with maximum parallelization with minibatch size scaled according to #workers for 3 epochs:
|
||||||
|
|
||||||
|
`mpiexec -n 2 python TrainResNet_CIFAR10_Distributed.py -s True -e 3`
|
|
@ -50,12 +50,12 @@ def create_reader(map_file, mean_file, train, distributed_after=INFINITE_SAMPLES
|
||||||
ImageDeserializer(map_file, StreamDefs(
|
ImageDeserializer(map_file, StreamDefs(
|
||||||
features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
|
features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
|
||||||
labels = StreamDef(field='label', shape=num_classes))), # and second as 'label'
|
labels = StreamDef(field='label', shape=num_classes))), # and second as 'label'
|
||||||
randomize = False,
|
multithreaded_deserializer = False, # turn off omp as CIFAR-10 is not heavy for deserializer
|
||||||
distributed_after = distributed_after)
|
distributed_after = distributed_after)
|
||||||
|
|
||||||
|
|
||||||
# Train and evaluate the network.
|
# Train and evaluate the network.
|
||||||
def train_and_evaluate(reader_train, reader_test, network_name, max_epochs, distributed_trainer):
|
def train_and_evaluate(reader_train, reader_test, network_name, max_epochs, distributed_trainer, scale_up=False):
|
||||||
|
|
||||||
set_computation_network_trace_level(0)
|
set_computation_network_trace_level(0)
|
||||||
|
|
||||||
|
@ -79,7 +79,13 @@ def train_and_evaluate(reader_train, reader_test, network_name, max_epochs, dist
|
||||||
|
|
||||||
# shared training parameters
|
# shared training parameters
|
||||||
epoch_size = 50000 # for now we manually specify epoch size
|
epoch_size = 50000 # for now we manually specify epoch size
|
||||||
minibatch_size = 128
|
|
||||||
|
# NOTE: scaling up minibatch_size increases sample throughput. In 8-GPU machine,
|
||||||
|
# ResNet110 samples-per-second is ~7x of single GPU, comparing to ~3x without scaling
|
||||||
|
# up. However, bigger minimatch size on the same number of samples means less updates,
|
||||||
|
# thus leads to higher training error. This is a trade-off of speed and accuracy
|
||||||
|
minibatch_size = 128 * (len(distributed_trainer.communicator().workers()) if scale_up else 1)
|
||||||
|
|
||||||
momentum_time_constant = -minibatch_size/np.log(0.9)
|
momentum_time_constant = -minibatch_size/np.log(0.9)
|
||||||
l2_reg_weight = 0.0001
|
l2_reg_weight = 0.0001
|
||||||
|
|
||||||
|
@ -145,15 +151,17 @@ def train_and_evaluate(reader_train, reader_test, network_name, max_epochs, dist
|
||||||
if __name__=='__main__':
|
if __name__=='__main__':
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('-n', '--network', help='network type, resnet20 or resnet110', required=False, default='resnet20')
|
parser.add_argument('-n', '--network', help='network type, resnet20 or resnet110', required=False, default='resnet20')
|
||||||
parser.add_argument('-e', '--epochs', help='total epochs', required=False, default='160')
|
parser.add_argument('-e', '--epochs', help='total epochs', type=int, required=False, default='160')
|
||||||
parser.add_argument('-q', '--quantize_bit', help='quantized bit', required=False, default='32')
|
parser.add_argument('-q', '--quantize_bit', help='quantized bit', type=int, required=False, default='32')
|
||||||
parser.add_argument('-a', '--distributed_after', help='number of samples to train with before running distributed', required=False, default='0')
|
parser.add_argument('-s', '--scale_up', help='scale up minibatch size with #workers for better parallelism', type=bool, required=False, default='False')
|
||||||
|
parser.add_argument('-a', '--distributed_after', help='number of samples to train with before running distributed', type=int, required=False, default='0')
|
||||||
|
|
||||||
args = vars(parser.parse_args())
|
args = vars(parser.parse_args())
|
||||||
num_quantization_bits = int(args['quantize_bit'])
|
num_quantization_bits = int(args['quantize_bit'])
|
||||||
epochs = int(args['epochs'])
|
epochs = int(args['epochs'])
|
||||||
distributed_after_samples = int(args['distributed_after'])
|
distributed_after_samples = int(args['distributed_after'])
|
||||||
network_name = args['network']
|
network_name = args['network']
|
||||||
|
scale_up = bool(args['scale_up'])
|
||||||
|
|
||||||
# Create distributed trainer
|
# Create distributed trainer
|
||||||
print("Start training: quantize_bit = {}, epochs = {}, distributed_after = {}".format(num_quantization_bits, epochs, distributed_after_samples))
|
print("Start training: quantize_bit = {}, epochs = {}, distributed_after = {}".format(num_quantization_bits, epochs, distributed_after_samples))
|
||||||
|
@ -164,7 +172,7 @@ if __name__=='__main__':
|
||||||
reader_train = create_reader(os.path.join(data_path, 'train_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), True, distributed_after_samples)
|
reader_train = create_reader(os.path.join(data_path, 'train_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), True, distributed_after_samples)
|
||||||
reader_test = create_reader(os.path.join(data_path, 'test_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), False)
|
reader_test = create_reader(os.path.join(data_path, 'test_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), False)
|
||||||
|
|
||||||
train_and_evaluate(reader_train, reader_test, network_name, epochs, distributed_trainer)
|
train_and_evaluate(reader_train, reader_test, network_name, epochs, distributed_trainer, scale_up)
|
||||||
|
|
||||||
# Must call MPI finalize when process exit
|
# Must call MPI finalize when process exit
|
||||||
distributed.Communicator.finalize()
|
distributed.Communicator.finalize()
|
||||||
|
|
16
README.md
16
README.md
|
@ -1,9 +1,11 @@
|
||||||
|
**The [CNTK Wiki](https://github.com/Microsoft/CNTK/wiki) has all information on CNTK including [setup](https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-your-machine ), [examples](https://github.com/Microsoft/CNTK/wiki/Examples ), etc.**
|
||||||
|
|
||||||
# Latest news
|
# Latest news
|
||||||
*2016-11-21.* V 2.0 Beta 4 Release
|
*2016-11-21.* V 2.0 Beta 4 Release
|
||||||
Highlights of this Release:
|
Highlights of this Release:
|
||||||
* New ASGD/Hogwild! training using Microsoft’s Parameter Server ([Project Multiverso](https://github.com/Microsoft/multiverso))
|
* New ASGD/Hogwild! training using Microsoft’s Parameter Server ([Project Multiverso](https://github.com/Microsoft/multiverso))
|
||||||
* Distributed Scenarios now supported in CNTK Python API
|
* Distributed Scenarios now supported in CNTK Python API
|
||||||
* New Memory compression -- ability to trade off memory usage with compute.
|
* New [Memory Compression](https://github.com/Microsoft/CNTK/wiki/Top-level-configurations#hypercompressmemory) mode to reduce memory usage on GPU
|
||||||
* CNTK Docker image with 1bit-SGD support
|
* CNTK Docker image with 1bit-SGD support
|
||||||
* Stability Improvements and bug fixes
|
* Stability Improvements and bug fixes
|
||||||
|
|
||||||
|
@ -55,18 +57,6 @@ If you ARE using Model Evaluation Library we **strongly recommend** installing v
|
||||||
|
|
||||||
See [Release Notes](https://github.com/Microsoft/CNTk/wiki/CNTK_1_7_2_Release_Notes) for details.
|
See [Release Notes](https://github.com/Microsoft/CNTk/wiki/CNTK_1_7_2_Release_Notes) for details.
|
||||||
|
|
||||||
*2016-09-28.* V 1.7.1 Binary release
|
|
||||||
Highlights of this Release:
|
|
||||||
* Two Breaking Changes related to Layers library default initialization and ```fsAdagrad``` gradient-normalization scheme
|
|
||||||
* Improvements in BrainScript
|
|
||||||
* Enabling of Deterministic Algorithm enforcement
|
|
||||||
* Improvements in Model Evaluation including the support of Evaluation for Azure Applications
|
|
||||||
* Different Performance improvements
|
|
||||||
* Multiple bug fixes
|
|
||||||
|
|
||||||
See more in the [Release Notes](https://github.com/Microsoft/CNTK/wiki/CNTK_1_7_1_Release_Notes) (including the full list of bugs fixed)
|
|
||||||
Get the Release from the [CNTK Releases page](https://github.com/Microsoft/CNTK/releases)
|
|
||||||
|
|
||||||
See [all news](https://github.com/Microsoft/CNTK/wiki/News).
|
See [all news](https://github.com/Microsoft/CNTK/wiki/News).
|
||||||
|
|
||||||
# What is CNTK
|
# What is CNTK
|
||||||
|
|
|
@ -512,7 +512,7 @@ CNTK2 = [
|
||||||
# Parameter{} can do several forms of initialization.
|
# Parameter{} can do several forms of initialization.
|
||||||
# - initValue=scalar, value=array --> initialize from this value --array form not implemented yet
|
# - initValue=scalar, value=array --> initialize from this value --array form not implemented yet
|
||||||
# - initFromFilePath="..." --> read from a data file
|
# - initFromFilePath="..." --> read from a data file
|
||||||
# - init="uniform|gaussian" (random init scaled by initValueScale). Warning: This has magic scaling factors. TODO: document them here
|
# - init="uniform|gaussian" (random init scaled by initValueScale).
|
||||||
# - init="zero"
|
# - init="zero"
|
||||||
# deprecated:
|
# deprecated:
|
||||||
# - initFromLiteral="..." (deprecated) --> parse a string literal (obsolete with value=array form)
|
# - initFromLiteral="..." (deprecated) --> parse a string literal (obsolete with value=array form)
|
||||||
|
|
|
@ -393,16 +393,11 @@ namespace CNTK
|
||||||
friend class PackedValue;
|
friend class PackedValue;
|
||||||
friend class MPICommunicatorImpl;
|
friend class MPICommunicatorImpl;
|
||||||
friend class BlockMomentumDistributedTrainer;
|
friend class BlockMomentumDistributedTrainer;
|
||||||
|
friend class Internal::VariableResolver;
|
||||||
|
|
||||||
template <typename T, typename ...CtorArgTypes>
|
template <typename T, typename ...CtorArgTypes>
|
||||||
friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs);
|
friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs);
|
||||||
|
|
||||||
template <typename ElementType>
|
|
||||||
friend Variable Internal::GetVariable(const Microsoft::MSR::CNTK::ComputationNodeBasePtr& node,
|
|
||||||
std::unordered_map<Microsoft::MSR::CNTK::ComputationNodeBasePtr, Variable>& nodeToVariableMap,
|
|
||||||
std::unordered_map<Variable, Variable>& placeholderReplacements,
|
|
||||||
std::unordered_set<FunctionPtr>& allPrimitiveFunctions);
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
///
|
///
|
||||||
/// Construct a NDArrayView with the specified 'dataBuffer' as the backing storage.
|
/// Construct a NDArrayView with the specified 'dataBuffer' as the backing storage.
|
||||||
|
@ -635,6 +630,7 @@ namespace CNTK
|
||||||
static const size_t AutoSelectRowColSplitPoint = SIZE_MAX;
|
static const size_t AutoSelectRowColSplitPoint = SIZE_MAX;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
CNTK_API NDArrayView(::CNTK::DataType dataType, const DeviceDescriptor& device, ::CNTK::StorageFormat storageType, const NDShape& viewShape, bool readOnly, void* tensorView);
|
CNTK_API NDArrayView(::CNTK::DataType dataType, const DeviceDescriptor& device, ::CNTK::StorageFormat storageType, const NDShape& viewShape, bool readOnly, void* tensorView);
|
||||||
|
|
||||||
|
|
||||||
|
@ -1599,11 +1595,7 @@ namespace CNTK
|
||||||
template <typename T>
|
template <typename T>
|
||||||
friend struct std::hash;
|
friend struct std::hash;
|
||||||
|
|
||||||
template <typename ElementType>
|
friend class Internal::VariableResolver;
|
||||||
friend Variable Internal::GetVariable(const Microsoft::MSR::CNTK::ComputationNodeBasePtr& node,
|
|
||||||
std::unordered_map<Microsoft::MSR::CNTK::ComputationNodeBasePtr, Variable>& nodeToVariableMap,
|
|
||||||
std::unordered_map<Variable, Variable>& placeholderReplacements,
|
|
||||||
std::unordered_set<FunctionPtr>& allPrimitiveFunctions);
|
|
||||||
|
|
||||||
#ifndef SWIG
|
#ifndef SWIG
|
||||||
private:
|
private:
|
||||||
|
@ -1952,11 +1944,7 @@ private:
|
||||||
template <typename T>
|
template <typename T>
|
||||||
friend struct std::hash;
|
friend struct std::hash;
|
||||||
|
|
||||||
template <typename ElementType>
|
friend class Internal::VariableResolver;
|
||||||
friend Variable Internal::GetVariable(const Microsoft::MSR::CNTK::ComputationNodeBasePtr& node,
|
|
||||||
std::unordered_map<Microsoft::MSR::CNTK::ComputationNodeBasePtr, Variable>& nodeToVariableMap,
|
|
||||||
std::unordered_map<Variable, Variable>& placeholderReplacements,
|
|
||||||
std::unordered_set<FunctionPtr>& allPrimitiveFunctions);
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
///
|
///
|
||||||
|
@ -2037,11 +2025,7 @@ private:
|
||||||
template <typename T>
|
template <typename T>
|
||||||
friend struct std::hash;
|
friend struct std::hash;
|
||||||
|
|
||||||
template <typename ElementType>
|
friend class Internal::VariableResolver;
|
||||||
friend Variable Internal::GetVariable(const Microsoft::MSR::CNTK::ComputationNodeBasePtr& node,
|
|
||||||
std::unordered_map<Microsoft::MSR::CNTK::ComputationNodeBasePtr, Variable>& nodeToVariableMap,
|
|
||||||
std::unordered_map<Variable, Variable>& placeholderReplacements,
|
|
||||||
std::unordered_set<FunctionPtr>& allPrimitiveFunctions);
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
///
|
///
|
||||||
|
@ -2175,13 +2159,17 @@ namespace CNTK
|
||||||
/// Returns the Function that 'this' BackPropState belongs to
|
/// Returns the Function that 'this' BackPropState belongs to
|
||||||
///
|
///
|
||||||
FunctionPtr Function() const { return m_function; }
|
FunctionPtr Function() const { return m_function; }
|
||||||
|
DeviceDescriptor Device() const { return m_forwardComputeDevice; }
|
||||||
virtual ~BackPropState() {}
|
virtual ~BackPropState() {}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
BackPropState(const FunctionPtr& function) : m_function(function) {}
|
BackPropState(const FunctionPtr& function, const DeviceDescriptor& computeDevice)
|
||||||
|
: m_function(function), m_forwardComputeDevice(computeDevice)
|
||||||
|
{}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
FunctionPtr m_function;
|
FunctionPtr m_function;
|
||||||
|
DeviceDescriptor m_forwardComputeDevice;
|
||||||
};
|
};
|
||||||
typedef std::shared_ptr<BackPropState> BackPropStatePtr;
|
typedef std::shared_ptr<BackPropState> BackPropStatePtr;
|
||||||
|
|
||||||
|
|
|
@ -247,10 +247,6 @@ namespace CNTK
|
||||||
|
|
||||||
CNTK_API bool AreEqual(const ::CNTK::NDArrayView& view1, const ::CNTK::NDArrayView& view2, double relativeTolerance = 0.0, double absoluteTolerance = 0.0);
|
CNTK_API bool AreEqual(const ::CNTK::NDArrayView& view1, const ::CNTK::NDArrayView& view2, double relativeTolerance = 0.0, double absoluteTolerance = 0.0);
|
||||||
|
|
||||||
template <typename ElementType>
|
class VariableResolver;
|
||||||
Variable GetVariable(const Microsoft::MSR::CNTK::ComputationNodeBasePtr& node,
|
|
||||||
std::unordered_map<Microsoft::MSR::CNTK::ComputationNodeBasePtr, ::CNTK::Variable>& nodeToVariableMap,
|
|
||||||
std::unordered_map<::CNTK::Variable, ::CNTK::Variable>& placeholderReplacements,
|
|
||||||
std::unordered_set<::CNTK::FunctionPtr>& allPrimitiveFunctions);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -20,28 +20,67 @@
|
||||||
#include "DeprecatedNodes.h"
|
#include "DeprecatedNodes.h"
|
||||||
#include "RNNNodes.h"
|
#include "RNNNodes.h"
|
||||||
|
|
||||||
|
|
||||||
using namespace Microsoft::MSR::CNTK;
|
using namespace Microsoft::MSR::CNTK;
|
||||||
|
|
||||||
namespace CNTK
|
namespace CNTK
|
||||||
{
|
{
|
||||||
namespace Internal
|
namespace Internal
|
||||||
{
|
{
|
||||||
template <typename ElementType>
|
// Helper class to resolve variables in the model.
|
||||||
Variable GetVariable(const ComputationNodeBasePtr& node,
|
class VariableResolver final
|
||||||
std::unordered_map<ComputationNodeBasePtr, Variable>& nodeToVariableMap,
|
|
||||||
std::unordered_map<Variable, Variable>& placeholderReplacements,
|
|
||||||
std::unordered_set<FunctionPtr>& allPrimitiveFunctions)
|
|
||||||
{
|
{
|
||||||
auto iter = nodeToVariableMap.find(node);
|
std::unordered_map<Variable, Variable> m_placeholderReplacements;
|
||||||
if (iter != nodeToVariableMap.end())
|
std::unordered_map<ComputationNodeBasePtr, Variable> m_nodeToVariableMap;
|
||||||
return iter->second;
|
std::unordered_set<FunctionPtr> m_allPrimitiveFunctions;
|
||||||
|
|
||||||
Variable var;
|
public:
|
||||||
NDShape varShape = AsNDShape(node->GetSampleLayout());
|
const std::unordered_map<Variable, Variable>& GetPlaceHolders() const
|
||||||
|
|
||||||
if (node->IsLeaf())
|
|
||||||
{
|
{
|
||||||
|
return m_placeholderReplacements;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class ElementType>
|
||||||
|
Variable GetVariable(const ComputationNodeBasePtr& node)
|
||||||
|
{
|
||||||
|
auto iter = m_nodeToVariableMap.find(node);
|
||||||
|
if (iter != m_nodeToVariableMap.end())
|
||||||
|
return iter->second;
|
||||||
|
|
||||||
|
Variable var;
|
||||||
|
if (node->IsLeaf())
|
||||||
|
{
|
||||||
|
var = ResolveLeaf<ElementType>(node);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// This is a non-leaf node and maps to a primitive Function
|
||||||
|
NDShape varShape = AsNDShape(node->GetSampleLayout());
|
||||||
|
auto placeholderVar = PlaceholderVariable(varShape);
|
||||||
|
m_nodeToVariableMap[node] = placeholderVar;
|
||||||
|
|
||||||
|
std::vector<Variable> inputVars(node->GetNumInputs());
|
||||||
|
for (size_t i = 0; i < inputVars.size(); ++i)
|
||||||
|
{
|
||||||
|
inputVars[i] = GetVariable<ElementType>(node->Input(i));
|
||||||
|
if (inputVars[i].IsPlaceholder())
|
||||||
|
m_placeholderReplacements[inputVars[i]] = Variable();
|
||||||
|
}
|
||||||
|
|
||||||
|
var = ResolveFunction<ElementType>(node, inputVars);
|
||||||
|
|
||||||
|
if (m_placeholderReplacements.find(placeholderVar) != m_placeholderReplacements.end())
|
||||||
|
m_placeholderReplacements[placeholderVar] = var;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_nodeToVariableMap[node] = var;
|
||||||
|
return var;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<class ElementType>
|
||||||
|
Variable ResolveLeaf(const ComputationNodeBasePtr& node)
|
||||||
|
{
|
||||||
|
NDShape variableShape = AsNDShape(node->GetSampleLayout());
|
||||||
std::wstring varUid, varName;
|
std::wstring varUid, varName;
|
||||||
if (node->Is<InputValueBase<ElementType>>())
|
if (node->Is<InputValueBase<ElementType>>())
|
||||||
{
|
{
|
||||||
|
@ -54,48 +93,32 @@ namespace CNTK
|
||||||
auto inputNodeInternalDynamicAxisName = node->As<InputValueBase<ElementType>>()->GetRequestedDynamicAxis();
|
auto inputNodeInternalDynamicAxisName = node->As<InputValueBase<ElementType>>()->GetRequestedDynamicAxis();
|
||||||
std::vector<Axis> inputVarDynamicAxes = DynamicAxesFromInternalDynamicAxisName(inputNodeInternalDynamicAxisName);
|
std::vector<Axis> inputVarDynamicAxes = DynamicAxesFromInternalDynamicAxisName(inputNodeInternalDynamicAxisName);
|
||||||
|
|
||||||
var = Variable(varShape, isSparse, AsDataType<ElementType>(), node->GetLearningRateMultiplier() != 0, varName, inputVarDynamicAxes, varUid);
|
return Variable(variableShape, isSparse, AsDataType<ElementType>(), node->GetLearningRateMultiplier() != 0, varName, inputVarDynamicAxes, varUid);
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// TODO: Allow creating inputs without a dynamic axis
|
|
||||||
LogicError("Found InputNode with no dynamic axes which is currently unsupported");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Allow creating inputs without a dynamic axis
|
||||||
|
LogicError("Found InputNode with no dynamic axes which is currently unsupported");
|
||||||
}
|
}
|
||||||
else if (node->Is<LearnableParameter<ElementType>>())
|
|
||||||
|
if (node->Is<LearnableParameter<ElementType>>())
|
||||||
{
|
{
|
||||||
bool isConstant = (node->GetLearningRateMultiplier() == 0);
|
bool isConstant = (node->GetLearningRateMultiplier() == 0);
|
||||||
auto& matrix = node->As<ComputationNode<ElementType>>()->Value();
|
auto& matrix = node->As<ComputationNode<ElementType>>()->Value();
|
||||||
auto tensorView = new TensorView<ElementType>(std::make_shared<Matrix<ElementType>>(matrix.AsReference()), AsTensorViewShape(node->GetSampleLayout()));
|
auto tensorView = new TensorView<ElementType>(std::make_shared<Matrix<ElementType>>(matrix.AsReference()), AsTensorViewShape(node->GetSampleLayout()));
|
||||||
NDArrayViewPtr value = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(matrix.GetFormat()), varShape, false, tensorView);
|
NDArrayViewPtr value = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(matrix.GetFormat()), variableShape, false, tensorView);
|
||||||
if (isConstant)
|
|
||||||
{
|
auto kind = isConstant ? VariableKind::Constant : VariableKind::Parameter;
|
||||||
std::tie(varUid, varName) = UidAndNameFromCNTKInternalNodeName(node->NodeName(), VariableKind::Constant);
|
std::tie(varUid, varName) = UidAndNameFromCNTKInternalNodeName(node->NodeName(), kind);
|
||||||
var = Constant(value, varName, varUid);
|
return isConstant ? (Variable)Constant(value, varName, varUid) : Parameter(value, varName, varUid);
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::tie(varUid, varName) = UidAndNameFromCNTKInternalNodeName(node->NodeName(), VariableKind::Parameter);
|
|
||||||
var = Parameter(value, varName, varUid);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
|
||||||
LogicError("CNTK::LoadLegacyModel: Unsupported legacy CNTK node named '%S'", node->NodeName().c_str());
|
LogicError("CNTK::LoadLegacyModel: Unsupported legacy CNTK node named '%S'", node->NodeName().c_str());
|
||||||
|
return Variable();// make compiler happy.
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
template<class ElementType>
|
||||||
|
Variable ResolveFunction(const ComputationNodeBasePtr& node, std::vector<Variable>& inputVars)
|
||||||
{
|
{
|
||||||
// This is a non-leaf node and maps to a primitive Function
|
|
||||||
auto placeholderVar = PlaceholderVariable(varShape);
|
|
||||||
nodeToVariableMap[node] = placeholderVar;
|
|
||||||
|
|
||||||
std::vector<Variable> inputVars(node->GetNumInputs());
|
|
||||||
for (size_t i = 0; i < inputVars.size(); ++i)
|
|
||||||
{
|
|
||||||
inputVars[i] = GetVariable<ElementType>(node->Input(i), nodeToVariableMap, placeholderReplacements, allPrimitiveFunctions);
|
|
||||||
if (inputVars[i].IsPlaceholder())
|
|
||||||
placeholderReplacements[inputVars[i]] = Variable();
|
|
||||||
}
|
|
||||||
|
|
||||||
PrimitiveOpType opType;
|
PrimitiveOpType opType;
|
||||||
Dictionary primitiveFunctionConfigParameters;
|
Dictionary primitiveFunctionConfigParameters;
|
||||||
if (node->OperationName() == OperationNameOf(NegateNode))
|
if (node->OperationName() == OperationNameOf(NegateNode))
|
||||||
|
@ -376,15 +399,10 @@ namespace CNTK
|
||||||
std::tie(functionUid, functionName) = UidAndNameFromCNTKInternalNodeName(node->NodeName(), opType);
|
std::tie(functionUid, functionName) = UidAndNameFromCNTKInternalNodeName(node->NodeName(), opType);
|
||||||
|
|
||||||
FunctionPtr primitiveFunction = MakeSharedObject<PrimitiveFunction>(opType, inputVars, std::move(primitiveFunctionConfigParameters), functionName, functionUid);
|
FunctionPtr primitiveFunction = MakeSharedObject<PrimitiveFunction>(opType, inputVars, std::move(primitiveFunctionConfigParameters), functionName, functionUid);
|
||||||
allPrimitiveFunctions.insert(primitiveFunction);
|
m_allPrimitiveFunctions.insert(primitiveFunction);
|
||||||
var = primitiveFunction->Output();
|
return primitiveFunction->Output();
|
||||||
if (placeholderReplacements.find(placeholderVar) != placeholderReplacements.end())
|
|
||||||
placeholderReplacements[placeholderVar] = var;
|
|
||||||
}
|
}
|
||||||
|
};
|
||||||
nodeToVariableMap[node] = var;
|
|
||||||
return var;
|
|
||||||
}
|
|
||||||
|
|
||||||
FunctionPtr LoadLegacyModel(const std::wstring& modelFile, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
|
FunctionPtr LoadLegacyModel(const std::wstring& modelFile, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
|
||||||
{
|
{
|
||||||
|
@ -410,8 +428,8 @@ namespace CNTK
|
||||||
// Now traverse the model and construct the Function graph
|
// Now traverse the model and construct the Function graph
|
||||||
std::unordered_map<ComputationNodeBasePtr, Variable> nodeToVariableMap;
|
std::unordered_map<ComputationNodeBasePtr, Variable> nodeToVariableMap;
|
||||||
std::unordered_map<Variable, Variable> placeholderReplacements;
|
std::unordered_map<Variable, Variable> placeholderReplacements;
|
||||||
std::unordered_set<FunctionPtr> allPrimitiveFunctions;
|
|
||||||
std::vector<Variable> rootVariables;
|
std::vector<Variable> rootVariables;
|
||||||
|
VariableResolver resolver;
|
||||||
auto& networkRoots = net->RootNodes();
|
auto& networkRoots = net->RootNodes();
|
||||||
for (auto& rootNode : networkRoots)
|
for (auto& rootNode : networkRoots)
|
||||||
{
|
{
|
||||||
|
@ -420,11 +438,11 @@ namespace CNTK
|
||||||
|
|
||||||
if (ComputationNetwork::IsNodePtr<ComputationNode<float>>(rootNode))
|
if (ComputationNetwork::IsNodePtr<ComputationNode<float>>(rootNode))
|
||||||
{
|
{
|
||||||
rootVariables.push_back(Internal::GetVariable<float>(rootNode, nodeToVariableMap, placeholderReplacements, allPrimitiveFunctions).Owner());
|
rootVariables.push_back(resolver.GetVariable<float>(rootNode).Owner());
|
||||||
}
|
}
|
||||||
else if (ComputationNetwork::IsNodePtr<ComputationNode<double>>(rootNode))
|
else if (ComputationNetwork::IsNodePtr<ComputationNode<double>>(rootNode))
|
||||||
{
|
{
|
||||||
rootVariables.push_back(Internal::GetVariable<double>(rootNode, nodeToVariableMap, placeholderReplacements, allPrimitiveFunctions).Owner());
|
rootVariables.push_back(resolver.GetVariable<double>(rootNode).Owner());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -433,8 +451,7 @@ namespace CNTK
|
||||||
}
|
}
|
||||||
|
|
||||||
auto rootComposite = Combine(rootVariables);
|
auto rootComposite = Combine(rootVariables);
|
||||||
rootComposite->ReplacePlaceholders(placeholderReplacements);
|
rootComposite->ReplacePlaceholders(resolver.GetPlaceHolders());
|
||||||
|
|
||||||
return rootComposite;
|
return rootComposite;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2625,7 +2625,7 @@ namespace CNTK
|
||||||
else
|
else
|
||||||
evalTimeStampVariable = arguments.begin()->first;
|
evalTimeStampVariable = arguments.begin()->first;
|
||||||
|
|
||||||
return (outputsToRetainBackwardStateFor.size() > 0) ? MakeSharedObject<CNTKBackPropState>(this->shared_from_this(), std::make_pair(evalTimeStampVariable, m_variableToNodeMap[evalTimeStampVariable]->GetEvalTimeStamp())) : nullptr;
|
return (outputsToRetainBackwardStateFor.size() > 0) ? MakeSharedObject<CNTKBackPropState>(this->shared_from_this(), computeDevice, std::make_pair(evalTimeStampVariable, m_variableToNodeMap[evalTimeStampVariable]->GetEvalTimeStamp())) : nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*virtual*/ void CompositeFunction::Backward(const BackPropStatePtr& state,
|
/*virtual*/ void CompositeFunction::Backward(const BackPropStatePtr& state,
|
||||||
|
|
|
@ -652,8 +652,8 @@ namespace CNTK
|
||||||
class CNTKBackPropState final : public BackPropState
|
class CNTKBackPropState final : public BackPropState
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
CNTKBackPropState(const FunctionPtr& function, const std::pair<Variable, int64_t>& evalTimeStamp)
|
CNTKBackPropState(const FunctionPtr& function, const DeviceDescriptor& computeDevice, const std::pair<Variable, int64_t>& evalTimeStamp)
|
||||||
: BackPropState(function), m_evalTimeStamp(evalTimeStamp)
|
: BackPropState(function, computeDevice), m_evalTimeStamp(evalTimeStamp)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
std::pair<Variable, int64_t> EvalTimeStamp() const
|
std::pair<Variable, int64_t> EvalTimeStamp() const
|
||||||
|
|
|
@ -103,9 +103,9 @@ private:
|
||||||
bool log = GetEnvironmentPtr() && Environment().traceLevel > 0; // note: this will not log before node is part of network
|
bool log = GetEnvironmentPtr() && Environment().traceLevel > 0; // note: this will not log before node is part of network
|
||||||
if (log)
|
if (log)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "%ls: Initializing Parameter[%s] <- %ls(seed=%d, init dims=[%d x %d], range=%f*%f, onCPU=%s.\n)",
|
fprintf(stderr, "%ls: Initializing Parameter[%s] <- %ls(seed=%d, init dims=[%d x %d], range=%f(%f*%f), onCPU=%s.\n)",
|
||||||
NodeDescription().c_str(), string(GetSampleLayout()).c_str(), m_initString.c_str(),
|
NodeDescription().c_str(), string(GetSampleLayout()).c_str(), m_initString.c_str(),
|
||||||
(int)randomSeed, (int)fanOut, (int)fanIn, range, initValueScale, initOnCPUOnly ? "true" : "false");
|
(int)randomSeed, (int)fanOut, (int)fanIn, range, range/initValueScale, initValueScale, initOnCPUOnly ? "true" : "false");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -109,67 +109,26 @@ public:
|
||||||
m_originIndex = 0;
|
m_originIndex = 0;
|
||||||
for (int i = (int)dimCount - 1; i >= 0; i--)
|
for (int i = (int)dimCount - 1; i >= 0; i--)
|
||||||
{
|
{
|
||||||
assert((m_outputShape[i] % GetMapCount(i)) == 0);
|
bool padded = GetAutoPad(i);
|
||||||
int outPerMap = (int)(m_outputShape[i] / GetMapCount(i));
|
if (padded)
|
||||||
// Number of cells between first and last "centers", inclusive.
|
m_start[i] = 0;
|
||||||
int cells = (int)((outPerMap - 1) * GetStride(i) + 1);
|
|
||||||
assert(m_inputShape[i] >= cells);
|
|
||||||
|
|
||||||
// Extra cells, to the left and right of "cells".
|
|
||||||
int extra = (int)m_inputShape[i] - cells;
|
|
||||||
assert(extra >= 0);
|
|
||||||
|
|
||||||
// When LowerPad and/or UpperPad are specified, the Start[i] value is determined by those values.
|
|
||||||
int lo = GetAutoPad(i) ? 0 : (int)m_lowerPad[m_lowerPad.size() == 1 ? 0 : i];
|
|
||||||
int hi = GetAutoPad(i) ? 0 : (int)m_upperPad[m_upperPad.size() == 1 ? 0 : i];
|
|
||||||
if (lo != 0 || hi != 0)
|
|
||||||
{
|
|
||||||
assert(extra + lo + hi + 1 == m_kernelShape[i]);
|
|
||||||
// Compute the number of cells on the left and right parts of the kernel,
|
|
||||||
// not counting the "kernel-center" cell. If m_kernelShape[i] is even, the extra cell is
|
|
||||||
// placed on the right (the center is shifted to the left).
|
|
||||||
int right = (int)m_kernelShape[i] - 1;
|
|
||||||
int left = right / 2;
|
|
||||||
right -= left;
|
|
||||||
assert(left <= right);
|
|
||||||
assert(right <= left + 1);
|
|
||||||
|
|
||||||
assert(lo <= left);
|
|
||||||
assert(hi <= right);
|
|
||||||
m_start[i] = left - lo;
|
|
||||||
assert(m_start[i] + cells + right == m_inputShape[i] + hi);
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_start[i] = extra / 2;
|
m_start[i] = ((int)m_kernelShape[i] - 1) / 2;
|
||||||
#ifdef _DEBUG
|
int lo = (int)m_lowerPad[m_lowerPad.size() == 1 ? 0 : i];
|
||||||
// If we're padding then extra should be covered.
|
int hi = (int)m_upperPad[m_upperPad.size() == 1 ? 0 : i];
|
||||||
bool padded = GetAutoPad(i);
|
if (lo != 0 || hi != 0)
|
||||||
assert(!padded || extra + 1 <= m_kernelShape[i]);
|
{
|
||||||
// If we're not padding then, we should stay within the input dimension.
|
m_start[i] -= lo;
|
||||||
assert(padded || extra + 1 >= m_kernelShape[i]);
|
assert(m_start[i] >= 0);
|
||||||
|
int outPerMap = (int)(m_outputShape[i] / GetMapCount(i));
|
||||||
// Compute the number of cells on the left and right parts of the kernel,
|
int cells = (int)((outPerMap - 1) * GetStride(i) + 1);
|
||||||
// not counting the "kernel-center" cell. If m_kernelShape[i] is even, the extra cell is
|
if (cells > 0) // dummy if, just to get rid of warning
|
||||||
// placed on the right (the center is shifted to the left).
|
{
|
||||||
int right = (int)m_kernelShape[i] - 1;
|
assert(m_inputShape[i] >= cells);
|
||||||
int left = right / 2;
|
assert(m_start[i] + cells + (int)m_kernelShape[i] - 1 == m_inputShape[i] + hi);
|
||||||
right -= left;
|
}
|
||||||
assert(0 <= left);
|
}
|
||||||
assert(left <= right);
|
|
||||||
assert(right <= left + 1);
|
|
||||||
|
|
||||||
int min = m_start[i] - left;
|
|
||||||
int max = m_start[i] + (int)cells + right;
|
|
||||||
assert(!padded || min <= 0 && max >= m_inputShape[i]);
|
|
||||||
assert(padded || min >= 0 && max <= m_inputShape[i]);
|
|
||||||
|
|
||||||
int diff = min - ((int)m_inputShape[i] - max);
|
|
||||||
assert(std::abs(diff) <= 1);
|
|
||||||
|
|
||||||
UNUSED(padded);
|
|
||||||
UNUSED(diff);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
m_startIndex = m_startIndex * (int)m_inputShape[i] + m_start[i];
|
m_startIndex = m_startIndex * (int)m_inputShape[i] + m_start[i];
|
||||||
|
|
|
@ -40,7 +40,6 @@
|
||||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||||
<UseDebugLibraries>false</UseDebugLibraries>
|
<UseDebugLibraries>false</UseDebugLibraries>
|
||||||
<PlatformToolset>v120</PlatformToolset>
|
<PlatformToolset>v120</PlatformToolset>
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
<CharacterSet>Unicode</CharacterSet>
|
<CharacterSet>Unicode</CharacterSet>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
|
@ -51,7 +50,6 @@
|
||||||
<PropertyGroup Label="UserMacros" />
|
<PropertyGroup Label="UserMacros" />
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<!-- TODO intentional for all? -->
|
<!-- TODO intentional for all? -->
|
||||||
<LinkIncremental>false</LinkIncremental>
|
|
||||||
<TargetName>Math</TargetName>
|
<TargetName>Math</TargetName>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemDefinitionGroup>
|
<ItemDefinitionGroup>
|
||||||
|
@ -102,9 +100,6 @@
|
||||||
<ClCompile>
|
<ClCompile>
|
||||||
<WarningLevel>Level4</WarningLevel>
|
<WarningLevel>Level4</WarningLevel>
|
||||||
<PrecompiledHeader>Use</PrecompiledHeader>
|
<PrecompiledHeader>Use</PrecompiledHeader>
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
|
||||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
|
||||||
<PreprocessorDefinitions>$(MathDefine); NO_SYNC; WIN32; NDEBUG; _WINDOWS; _USRDLL; MATH_EXPORTS; %(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>$(MathDefine); NO_SYNC; WIN32; NDEBUG; _WINDOWS; _USRDLL; MATH_EXPORTS; %(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<SDLCheck>true</SDLCheck>
|
<SDLCheck>true</SDLCheck>
|
||||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||||
|
@ -113,14 +108,12 @@
|
||||||
<EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
|
<EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
|
||||||
<FloatingPointExceptions>false</FloatingPointExceptions>
|
<FloatingPointExceptions>false</FloatingPointExceptions>
|
||||||
<TreatWarningAsError>true</TreatWarningAsError>
|
<TreatWarningAsError>true</TreatWarningAsError>
|
||||||
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>/d2Zi+ /bigobj %(AdditionalOptions)</AdditionalOptions>
|
||||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
<SubSystem>Console</SubSystem>
|
<SubSystem>Console</SubSystem>
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
|
||||||
<OptimizeReferences>true</OptimizeReferences>
|
|
||||||
<AdditionalDependencies>$(MathLinkLibrary);Common.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
<AdditionalDependencies>$(MathLinkLibrary);Common.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||||
<DelayLoadDLLs>$(MathDelayLoad); $(CudaDlls); %(DelayLoadDLLs)</DelayLoadDLLs>
|
<DelayLoadDLLs>$(MathDelayLoad); $(CudaDlls); %(DelayLoadDLLs)</DelayLoadDLLs>
|
||||||
<Profile>true</Profile>
|
<Profile>true</Profile>
|
||||||
|
@ -147,6 +140,7 @@
|
||||||
<ItemDefinitionGroup Condition="$(GpuBuild)">
|
<ItemDefinitionGroup Condition="$(GpuBuild)">
|
||||||
<ClCompile>
|
<ClCompile>
|
||||||
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(CudaInclude)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(CudaInclude)</AdditionalIncludeDirectories>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/d2Zi+ /bigobj %(AdditionalOptions)</AdditionalOptions>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories);$(CudaLibPath)</AdditionalLibraryDirectories>
|
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories);$(CudaLibPath)</AdditionalLibraryDirectories>
|
||||||
|
@ -155,6 +149,7 @@
|
||||||
<ItemDefinitionGroup Condition="$(CpuOnlyBuild)">
|
<ItemDefinitionGroup Condition="$(CpuOnlyBuild)">
|
||||||
<ClCompile>
|
<ClCompile>
|
||||||
<PreprocessorDefinitions>CPUONLY;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>CPUONLY;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">/d2Zi+ /bigobj %(AdditionalOptions)</AdditionalOptions>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
<DelayLoadDLLs>$(MathDelayLoad)</DelayLoadDLLs>
|
<DelayLoadDLLs>$(MathDelayLoad)</DelayLoadDLLs>
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
# Copyright (c) Microsoft. All rights reserved.
|
||||||
|
|
||||||
|
# Licensed under the MIT license. See LICENSE.md file in the project root
|
||||||
|
# for full license information.
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
notebook = os.path.join(abs_path, "..", "..", "..", "..", "Tutorials", "CNTK_204_Sequence_To_Sequence.ipynb")
|
||||||
|
|
||||||
|
def test_cntk_204_sequence_to_sequence_noErrors(nb):
|
||||||
|
errors = [output for cell in nb.cells if 'outputs' in cell
|
||||||
|
for output in cell['outputs'] if output.output_type == "error"]
|
||||||
|
print(errors)
|
||||||
|
assert errors == []
|
||||||
|
|
||||||
|
expectedEvalError = 90
|
||||||
|
|
||||||
|
def test_cntk_204_sequence_to_sequence_trainerror(nb):
|
||||||
|
testCell = [cell for cell in nb.cells
|
||||||
|
if cell.cell_type == 'code' and re.search('#Print the training error', cell.source)]
|
||||||
|
assert float((testCell[0].outputs[0])['text']) < expectedEvalError
|
|
@ -7,55 +7,36 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import signal
|
||||||
|
import subprocess
|
||||||
|
import re
|
||||||
|
import pytest
|
||||||
from cntk.utils import cntk_device
|
from cntk.utils import cntk_device
|
||||||
from cntk.cntk_py import DeviceKind_GPU
|
from cntk.cntk_py import DeviceKind_GPU
|
||||||
from cntk.device import set_default_device
|
from cntk.device import set_default_device
|
||||||
from cntk.io import ReaderConfig, ImageDeserializer
|
|
||||||
from cntk import distributed
|
|
||||||
import pytest
|
|
||||||
import platform
|
|
||||||
|
|
||||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||||
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Classification", "ConvNet", "Python"))
|
sys.path.append(abs_path)
|
||||||
from ConvNet_CIFAR10_DataAug_Distributed import convnet_cifar10_dataaug, create_reader
|
from run_cifar_convnet_distributed import run_cifar_convnet_distributed
|
||||||
|
|
||||||
TOLERANCE_ABSOLUTE = 2E-1
|
TOLERANCE_ABSOLUTE = 2E-1
|
||||||
|
TIMEOUT_SECONDS = 300
|
||||||
|
|
||||||
def test_cifar_convnet_error(device_id):
|
def test_cifar_convnet_distributed_mpiexec(device_id):
|
||||||
if platform.system() == 'Windows':
|
if cntk_device(device_id).type() != DeviceKind_GPU:
|
||||||
pytest.skip('test skipped on Windows')
|
pytest.skip('test only runs on GPU')
|
||||||
|
|
||||||
set_default_device(cntk_device(device_id))
|
|
||||||
|
|
||||||
|
cmd = ["mpiexec", "-n", "2", "python", os.path.join(abs_path, "run_cifar_convnet_distributed.py")]
|
||||||
|
p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
|
||||||
try:
|
try:
|
||||||
base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
|
out = p.communicate(timeout=TIMEOUT_SECONDS)[0] # in case we have a hang
|
||||||
*"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
|
except subprocess.TimeoutExpired:
|
||||||
# N.B. CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY has {train,test}_map.txt
|
os.kill(p.pid, signal.CTRL_C_EVENT)
|
||||||
# and CIFAR-10_mean.xml in the base_path.
|
raise RuntimeError('Timeout in mpiexec, possibly hang')
|
||||||
except KeyError:
|
str_out = out.decode(sys.getdefaultencoding())
|
||||||
base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
results = re.findall("Final Results: Minibatch\[.+?\]: errs = (.+?)%", str_out)
|
||||||
*"../../../../Examples/Image/DataSets/CIFAR-10".split("/"))
|
assert len(results) == 2
|
||||||
|
assert results[0] == results[1]
|
||||||
base_path = os.path.normpath(base_path)
|
|
||||||
os.chdir(os.path.join(base_path, '..'))
|
|
||||||
|
|
||||||
from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
|
|
||||||
set_computation_network_trace_level(1)
|
|
||||||
set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works
|
|
||||||
#force_deterministic_algorithms()
|
|
||||||
# TODO: do the above; they lead to slightly different results, so not doing it for now
|
|
||||||
|
|
||||||
reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True, 0)
|
|
||||||
reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
|
|
||||||
|
|
||||||
distributed_after_samples = 0
|
|
||||||
num_quantization_bits = 32
|
|
||||||
distributed_trainer = distributed.data_parallel_distributed_trainer(
|
|
||||||
num_quantization_bits=num_quantization_bits,
|
|
||||||
distributed_after=distributed_after_samples)
|
|
||||||
|
|
||||||
test_error = convnet_cifar10_dataaug(reader_train, reader_test, distributed_trainer, max_epochs=1)
|
|
||||||
expected_test_error = 0.617
|
expected_test_error = 0.617
|
||||||
|
assert np.allclose(float(results[0])/100, expected_test_error,
|
||||||
assert np.allclose(test_error, expected_test_error,
|
|
||||||
atol=TOLERANCE_ABSOLUTE)
|
atol=TOLERANCE_ABSOLUTE)
|
||||||
|
|
|
@ -65,7 +65,7 @@ def nb(tmpdir_factory, request, device_id):
|
||||||
outPath = str(tmpdir_factory.mktemp('notebook').join('out.ipynb'))
|
outPath = str(tmpdir_factory.mktemp('notebook').join('out.ipynb'))
|
||||||
assert os.path.isfile(inPath)
|
assert os.path.isfile(inPath)
|
||||||
args = ["jupyter", "nbconvert", "--to", "notebook", "--execute",
|
args = ["jupyter", "nbconvert", "--to", "notebook", "--execute",
|
||||||
"--ExecutePreprocessor.timeout=60", "--output", outPath, inPath]
|
"--ExecutePreprocessor.timeout=300", "--output", outPath, inPath]
|
||||||
subprocess.check_call(args)
|
subprocess.check_call(args)
|
||||||
nb = nbformat.read(outPath, nbformat.current_nbformat)
|
nb = nbformat.read(outPath, nbformat.current_nbformat)
|
||||||
return nb
|
return nb
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
[pytest]
|
[pytest]
|
||||||
python_files = *.py
|
python_files = *_test.py
|
||||||
|
|
|
@ -0,0 +1,52 @@
|
||||||
|
# Copyright (c) Microsoft. All rights reserved.
|
||||||
|
|
||||||
|
# Licensed under the MIT license. See LICENSE.md file in the project root
|
||||||
|
# for full license information.
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import platform
|
||||||
|
from cntk.io import ReaderConfig, ImageDeserializer
|
||||||
|
from cntk import distributed
|
||||||
|
from cntk.device import set_default_device, gpu
|
||||||
|
|
||||||
|
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Classification", "ConvNet", "Python"))
|
||||||
|
from ConvNet_CIFAR10_DataAug_Distributed import convnet_cifar10_dataaug, create_reader
|
||||||
|
|
||||||
|
def run_cifar_convnet_distributed():
|
||||||
|
try:
|
||||||
|
base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
|
||||||
|
*"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
|
||||||
|
# N.B. CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY has {train,test}_map.txt
|
||||||
|
# and CIFAR-10_mean.xml in the base_path.
|
||||||
|
except KeyError:
|
||||||
|
base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
||||||
|
*"../../../../Examples/Image/DataSets/CIFAR-10".split("/"))
|
||||||
|
|
||||||
|
base_path = os.path.normpath(base_path)
|
||||||
|
os.chdir(os.path.join(base_path, '..'))
|
||||||
|
|
||||||
|
from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
|
||||||
|
set_computation_network_trace_level(1)
|
||||||
|
set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works
|
||||||
|
#force_deterministic_algorithms()
|
||||||
|
# TODO: do the above; they lead to slightly different results, so not doing it for now
|
||||||
|
|
||||||
|
reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True, 0)
|
||||||
|
reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
|
||||||
|
|
||||||
|
distributed_after_samples = 0
|
||||||
|
num_quantization_bits = 32
|
||||||
|
distributed_trainer = distributed.data_parallel_distributed_trainer(
|
||||||
|
num_quantization_bits=num_quantization_bits,
|
||||||
|
distributed_after=distributed_after_samples)
|
||||||
|
|
||||||
|
return convnet_cifar10_dataaug(reader_train, reader_test, distributed_trainer, max_epochs=1)
|
||||||
|
|
||||||
|
if __name__=='__main__':
|
||||||
|
set_default_device(gpu(0)) # force using GPU-0 in test for speed
|
||||||
|
run_cifar_convnet_distributed()
|
||||||
|
distributed.Communicator.finalize()
|
|
@ -2,6 +2,22 @@
|
||||||
|
|
||||||
. $TEST_ROOT_DIR/run-test-common
|
. $TEST_ROOT_DIR/run-test-common
|
||||||
|
|
||||||
|
# Temporary workaround to force the default device to be always GPU 0 when
|
||||||
|
# running the python unit tests since data placement is currently broken which
|
||||||
|
# causes some of the test data to end on the default device instead of the
|
||||||
|
# explicitly selected GPU device 0 which results in the tests to fail
|
||||||
|
# This whould be removed when the test bugs have been addressed
|
||||||
|
if [ "$TEST_DEVICE" == "gpu" ]; then
|
||||||
|
if [ -z "$CUDA_VISIBLE_DEVICES" ]; then
|
||||||
|
export CUDA_VISIBLE_DEVICES=0
|
||||||
|
else
|
||||||
|
IFS=','
|
||||||
|
visibleDevicesArray=($CUDA_VISIBLE_DEVICES)
|
||||||
|
unset IFS
|
||||||
|
export CUDA_VISIBLE_DEVICES=${visibleDevicesArray[0]}
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
python -c "import sys; print('Python: %s'%sys.version)"
|
python -c "import sys; print('Python: %s'%sys.version)"
|
||||||
python -c "import numpy; print('NumPy: %s'%numpy.version.full_version)"
|
python -c "import numpy; print('NumPy: %s'%numpy.version.full_version)"
|
||||||
python -c "import scipy; print('SciPy: %s'%scipy.version.full_version)"
|
python -c "import scipy; print('SciPy: %s'%scipy.version.full_version)"
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
dataDir: .
|
dataDir: .
|
||||||
|
|
||||||
tags:
|
tags:
|
||||||
- bvt-l (build_sku == 'gpu') and (flavor == 'release') and ((os == 'linux') or (device=='cpu'))
|
- bvt-l (build_sku == 'gpu') and (flavor == 'release')
|
||||||
- nightly-l (build_sku == 'gpu') and (flavor == 'release') and ((os == 'linux') or (device=='cpu'))
|
- nightly-l (build_sku == 'gpu') and (flavor == 'release')
|
||||||
|
|
||||||
testCases:
|
testCases:
|
||||||
Run must finish with error code 0 (outputs __COMPLETED__ in that case):
|
Run must finish with error code 0 (outputs __COMPLETED__ in that case):
|
||||||
|
|
|
@ -763,7 +763,7 @@ INFO: rn4_1.c_proj.y.y: loading pre-CuDNNv5 model: approximated mini-batch count
|
||||||
INFO: rn4_2.c1.c.y.y: loading pre-CuDNNv5 model: approximated mini-batch count of 625625 as 10010000 trained samples.
|
INFO: rn4_2.c1.c.y.y: loading pre-CuDNNv5 model: approximated mini-batch count of 625625 as 10010000 trained samples.
|
||||||
Statistics in further training may be biased; consider re-training instead.
|
Statistics in further training may be biased; consider re-training instead.
|
||||||
INFO: rn4_2.c2.y.y: loading pre-CuDNNv5 model: approximated mini-batch count of 625625 as 10010000 trained samples.
|
INFO: rn4_2.c2.y.y: loading pre-CuDNNv5 model: approximated mini-batch count of 625625 as 10010000 trained samples.
|
||||||
Statistics in further EvaluateImageInputUsingFeatureVector: Outcome = 340
|
Statistics in further EvaluateImageInputUsingFeatureVector: Outcome = 118
|
||||||
|
|
||||||
====== EvaluateImageInputUsingImageApi ========
|
====== EvaluateImageInputUsingImageApi ========
|
||||||
training may be biased; consider re-training instead.
|
training may be biased; consider re-training instead.
|
||||||
|
@ -847,7 +847,7 @@ WARNING: rn2_1.c2.y.y: loading pre-CuDNNv5 model: approximately converting varia
|
||||||
WARNING: rn2_1.c_proj.y.y: loading pre-CuDNNv5 model: approximately converting variance statistics format
|
WARNING: rn2_1.c_proj.y.y: loading pre-CuDNNv5 model: approximately converting variance statistics format
|
||||||
WARNING: rn2_2.c1.c.y.y: loading pre-CuDNNv5 model: approximately converting variance statistics format
|
WARNING: rn2_2.c1.c.y.y: loading pre-CuDNNv5 model: approximately converting variance statistics format
|
||||||
WARNING: rn2_2.c2.y.y: loading pre-CuDNNv5 model: approximately converting variance statistics format
|
WARNING: rn2_2.c2.y.y: loading pre-CuDNNv5 model: approximately converting variance statistics format
|
||||||
WARNING: rn3_1.c1.c.y.y: loading pre-CuDNNv5 model:EvaluateImageInputUsingImageApi: Outcome = 340
|
WARNING: rn3_1.c1.c.y.y: loading pre-CuDNNv5 model:EvaluateImageInputUsingImageApi: Outcome = 118
|
||||||
|
|
||||||
====== CompareImageApiResults ========
|
====== CompareImageApiResults ========
|
||||||
Both image API calls returned the same output vector.
|
Both image API calls returned the same output vector.
|
||||||
|
|
|
@ -80,7 +80,6 @@
|
||||||
<SubSystem>Console</SubSystem>
|
<SubSystem>Console</SubSystem>
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
<AdditionalDependencies>CNTKLibrary-2.0.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
<AdditionalDependencies>CNTKLibrary-2.0.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||||
<StackReserveSize Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">100000000</StackReserveSize>
|
|
||||||
</Link>
|
</Link>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
|
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
|
||||||
|
@ -145,4 +144,4 @@
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -19,6 +19,7 @@ def uniform(scale=DefaultParamInitScale, seed=None):
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
initializer for :class:`cntk.variables.Parameter`
|
initializer for :class:`cntk.variables.Parameter`
|
||||||
|
initialized to uniform distribution between `scale*[-0.05, 0.05]`
|
||||||
'''
|
'''
|
||||||
if seed is None:
|
if seed is None:
|
||||||
seed = SentinelValueForAutoSelectRandomSeed
|
seed = SentinelValueForAutoSelectRandomSeed
|
||||||
|
@ -37,6 +38,7 @@ def gaussian(output_rank=SentinelValueForInferParamInitRank, filter_rank=Sentine
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
initializer for :class:`cntk.variables.Parameter`
|
initializer for :class:`cntk.variables.Parameter`
|
||||||
|
initialized to Gaussian distribution with mean `0` and standard deviation `scale*0.2/sqrt(fanIn))`.
|
||||||
'''
|
'''
|
||||||
if seed is None:
|
if seed is None:
|
||||||
seed = SentinelValueForAutoSelectRandomSeed
|
seed = SentinelValueForAutoSelectRandomSeed
|
||||||
|
@ -55,6 +57,7 @@ def xavier(output_rank=SentinelValueForInferParamInitRank, filter_rank=SentinelV
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
initializer for :class:`cntk.variables.Parameter`
|
initializer for :class:`cntk.variables.Parameter`
|
||||||
|
initialized to Gaussian distribution with mean `0` and standard deviation `scale*sqrt(3.0/fanIn)`
|
||||||
'''
|
'''
|
||||||
if seed is None:
|
if seed is None:
|
||||||
seed = SentinelValueForAutoSelectRandomSeed
|
seed = SentinelValueForAutoSelectRandomSeed
|
||||||
|
@ -73,6 +76,7 @@ def glorot_uniform(output_rank=SentinelValueForInferParamInitRank, filter_rank=S
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
initializer for :class:`cntk.variables.Parameter`
|
initializer for :class:`cntk.variables.Parameter`
|
||||||
|
initialized to uniform distribution between `scale*sqrt(6.0/(fanIn+fanOut))*[-1,1]`
|
||||||
'''
|
'''
|
||||||
if seed is None:
|
if seed is None:
|
||||||
seed = SentinelValueForAutoSelectRandomSeed
|
seed = SentinelValueForAutoSelectRandomSeed
|
||||||
|
@ -91,6 +95,7 @@ def glorot_normal(output_rank=SentinelValueForInferParamInitRank, filter_rank=Se
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
initializer for :class:`cntk.variables.Parameter`
|
initializer for :class:`cntk.variables.Parameter`
|
||||||
|
initialized to Gaussian distribution with mean `0` and standard deviation `scale*sqrt(2.0/(fanIn+fanOut))`
|
||||||
'''
|
'''
|
||||||
if seed is None:
|
if seed is None:
|
||||||
seed = SentinelValueForAutoSelectRandomSeed
|
seed = SentinelValueForAutoSelectRandomSeed
|
||||||
|
@ -109,6 +114,7 @@ def he_uniform(output_rank=SentinelValueForInferParamInitRank, filter_rank=Senti
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
initializer for :class:`cntk.variables.Parameter`
|
initializer for :class:`cntk.variables.Parameter`
|
||||||
|
initialized to uniform distribution between `scale*sqrt(6.0/fanIn)*[-1,1]`
|
||||||
'''
|
'''
|
||||||
if seed is None:
|
if seed is None:
|
||||||
seed = SentinelValueForAutoSelectRandomSeed
|
seed = SentinelValueForAutoSelectRandomSeed
|
||||||
|
@ -127,6 +133,7 @@ def he_normal(output_rank=SentinelValueForInferParamInitRank, filter_rank=Sentin
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
initializer for :class:`cntk.variables.Parameter`
|
initializer for :class:`cntk.variables.Parameter`
|
||||||
|
initialized to Gaussian distribution with mean `0` and standard deviation `scale*sqrt(2.0/fanIn)`
|
||||||
'''
|
'''
|
||||||
if seed is None:
|
if seed is None:
|
||||||
seed = SentinelValueForAutoSelectRandomSeed
|
seed = SentinelValueForAutoSelectRandomSeed
|
||||||
|
@ -143,6 +150,7 @@ def bilinear(kernel_width, kernel_height):
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
initializer for :class:`cntk.variables.Parameter`
|
initializer for :class:`cntk.variables.Parameter`
|
||||||
|
useful for deconvolution layer
|
||||||
'''
|
'''
|
||||||
return cntk_py.bilinear_initializer(kernel_width, kernel_height)
|
return cntk_py.bilinear_initializer(kernel_width, kernel_height)
|
||||||
|
|
||||||
|
|
|
@ -81,15 +81,17 @@ class MinibatchSource(cntk_py.MinibatchSource):
|
||||||
randomize (bool, default True): randomize images before every epoch
|
randomize (bool, default True): randomize images before every epoch
|
||||||
epoch_size (int): epoch size
|
epoch_size (int): epoch size
|
||||||
distributed_after (int): sample count after which minibatch source becomes distributed
|
distributed_after (int): sample count after which minibatch source becomes distributed
|
||||||
|
multithreaded_deserializer (bool): using multi threaded deserializer
|
||||||
'''
|
'''
|
||||||
def __init__(self, deserializers=None, randomize=True, epoch_size=INFINITELY_REPEAT, distributed_after=INFINITE_SAMPLES):
|
def __init__(self, deserializers=None, randomize=True, epoch_size=INFINITELY_REPEAT, distributed_after=INFINITE_SAMPLES, multithreaded_deserializer=None):
|
||||||
if not isinstance(deserializers, (list,tuple)):
|
if not isinstance(deserializers, (list,tuple)):
|
||||||
deserializers = [deserializers] # allow passing a single item or a list
|
deserializers = [deserializers] # allow passing a single item or a list
|
||||||
reader_config = ReaderConfig(
|
reader_config = ReaderConfig(
|
||||||
deserializers=deserializers,
|
deserializers=deserializers,
|
||||||
randomize=randomize,
|
randomize=randomize,
|
||||||
epoch_size=epoch_size,
|
epoch_size=epoch_size,
|
||||||
distributed_after=distributed_after)
|
distributed_after=distributed_after,
|
||||||
|
multithreaded_deserializer=multithreaded_deserializer)
|
||||||
source = minibatch_source(reader_config)
|
source = minibatch_source(reader_config)
|
||||||
# transplant into this class instance
|
# transplant into this class instance
|
||||||
self.__dict__ = source.__dict__
|
self.__dict__ = source.__dict__
|
||||||
|
@ -256,8 +258,9 @@ class ReaderConfig(dict):
|
||||||
randomize (bool, default True): randomize images before every epoch
|
randomize (bool, default True): randomize images before every epoch
|
||||||
epoch_size (int): epoch size
|
epoch_size (int): epoch size
|
||||||
distributed_after (int): sample count after which reader becomes distributed
|
distributed_after (int): sample count after which reader becomes distributed
|
||||||
|
multithreaded_deserializer (bool): using multi threaded deserializer
|
||||||
'''
|
'''
|
||||||
def __init__(self, deserializers=None, randomize=True, epoch_size=INFINITELY_REPEAT, distributed_after=INFINITE_SAMPLES):
|
def __init__(self, deserializers=None, randomize=True, epoch_size=INFINITELY_REPEAT, distributed_after=INFINITE_SAMPLES, multithreaded_deserializer=None):
|
||||||
|
|
||||||
self['epochSize'] = cntk_py.SizeTWrapper(epoch_size) # force to store in size_t
|
self['epochSize'] = cntk_py.SizeTWrapper(epoch_size) # force to store in size_t
|
||||||
if not isinstance(deserializers, (list, tuple)):
|
if not isinstance(deserializers, (list, tuple)):
|
||||||
|
@ -265,6 +268,8 @@ class ReaderConfig(dict):
|
||||||
self['deserializers'] = self.deserializers = deserializers or []
|
self['deserializers'] = self.deserializers = deserializers or []
|
||||||
self['randomize'] = randomize
|
self['randomize'] = randomize
|
||||||
self['distributedAfterSampleCount'] = cntk_py.SizeTWrapper(distributed_after)
|
self['distributedAfterSampleCount'] = cntk_py.SizeTWrapper(distributed_after)
|
||||||
|
if multithreaded_deserializer != None:
|
||||||
|
self['multiThreadedDeserialization'] = multithreaded_deserializer
|
||||||
|
|
||||||
@typemap
|
@typemap
|
||||||
def minibatch_source(self):
|
def minibatch_source(self):
|
||||||
|
|
|
@ -98,8 +98,8 @@ class Learner(cntk_py.Learner):
|
||||||
Returns:
|
Returns:
|
||||||
`False` to indicate that learning has stopped for all of the parameters associated with this learner
|
`False` to indicate that learning has stopped for all of the parameters associated with this learner
|
||||||
'''
|
'''
|
||||||
from .utils import create_NDArrayView_from_NumPy
|
from .utils import _create_NDArrayView_from_NumPy
|
||||||
var_nd_map = { var:create_NDArrayView_from_NumPy(val) for var, val in
|
var_nd_map = { var: _create_NDArrayView_from_NumPy(val) for var, val in
|
||||||
gradient_values.items() }
|
gradient_values.items() }
|
||||||
|
|
||||||
return super(Learner, self).update(var_nd_map, training_sample_count)
|
return super(Learner, self).update(var_nd_map, training_sample_count)
|
||||||
|
|
|
@ -250,7 +250,7 @@ def convolution(convolution_map, operand, strides=(1,), sharing=[True],
|
||||||
>>> x = C.input_variable(img.shape)
|
>>> x = C.input_variable(img.shape)
|
||||||
>>> filter = np.reshape(np.array([2, -1, -1, 2], dtype = np.float32), (1, 2, 2))
|
>>> filter = np.reshape(np.array([2, -1, -1, 2], dtype = np.float32), (1, 2, 2))
|
||||||
>>> kernel = C.constant(value = filter)
|
>>> kernel = C.constant(value = filter)
|
||||||
>>> C.convolution(kernel, x, auto_padding = [False]).eval({x: [img]})
|
>>> C.convolution(kernel, x, auto_padding = [False]).eval({x: [img]}) # doctest: +SKIP
|
||||||
array([[[[[ 6., 8., 10., 12.],
|
array([[[[[ 6., 8., 10., 12.],
|
||||||
[ 16., 18., 20., 22.],
|
[ 16., 18., 20., 22.],
|
||||||
[ 26., 28., 30., 32.],
|
[ 26., 28., 30., 32.],
|
||||||
|
|
|
@ -273,7 +273,9 @@ class Function(cntk_py.Function):
|
||||||
Returns:
|
Returns:
|
||||||
dict: mapping of ``variables`` to NumPy arrays
|
dict: mapping of ``variables`` to NumPy arrays
|
||||||
'''
|
'''
|
||||||
root_gradients = sanitize_var_map(self.outputs, root_gradients)
|
device = state.device()
|
||||||
|
root_gradients = sanitize_var_map(self.outputs, root_gradients,
|
||||||
|
None, device)
|
||||||
|
|
||||||
var_gradients = dict((var, None) for var in variables)
|
var_gradients = dict((var, None) for var in variables)
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,7 @@ from __future__ import division
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pytest
|
import pytest
|
||||||
from .ops_test_utils import unittest_helper, _test_unary_op, _test_binary_op, AA, I, precision, PRECISION_TO_TYPE, batch_dense_to_sparse, left_matrix_type, right_matrix_type
|
from .ops_test_utils import unittest_helper, _test_unary_op, _test_binary_op, AA, I, precision, PRECISION_TO_TYPE, batch_dense_to_sparse, left_matrix_type, right_matrix_type
|
||||||
from ...utils import sanitize_dtype_cntk, ones_like, eval
|
from ...utils import sanitize_dtype_cntk, _ones_like, eval
|
||||||
|
|
||||||
TENSOR_PAIRS = [
|
TENSOR_PAIRS = [
|
||||||
([30.], [10.]),
|
([30.], [10.]),
|
||||||
|
@ -74,8 +74,8 @@ def test_op_plus_var_sequences_input_input(left_batch, right_batch, device_id, p
|
||||||
for i in range(len(left_batch))]
|
for i in range(len(left_batch))]
|
||||||
|
|
||||||
expected_backward = {
|
expected_backward = {
|
||||||
'left': ones_like(left_batch, PRECISION_TO_TYPE[precision]),
|
'left': _ones_like(left_batch, PRECISION_TO_TYPE[precision]),
|
||||||
'right': ones_like(right_batch, PRECISION_TO_TYPE[precision])
|
'right': _ones_like(right_batch, PRECISION_TO_TYPE[precision])
|
||||||
}
|
}
|
||||||
|
|
||||||
left_value = [AA(sample, dtype=PRECISION_TO_TYPE[precision])
|
left_value = [AA(sample, dtype=PRECISION_TO_TYPE[precision])
|
||||||
|
|
|
@ -53,18 +53,11 @@ def cntk_device(device_id):
|
||||||
return gpu(device_id)
|
return gpu(device_id)
|
||||||
|
|
||||||
|
|
||||||
def is_string(value):
|
def _dense_to_str(data):
|
||||||
if sys.version_info.major < 3:
|
|
||||||
return isinstance(value, basestring)
|
|
||||||
|
|
||||||
return isinstance(value, str)
|
|
||||||
|
|
||||||
|
|
||||||
def dense_to_str(data):
|
|
||||||
return ' '.join(data.ravel(order='C').astype(np.str))
|
return ' '.join(data.ravel(order='C').astype(np.str))
|
||||||
|
|
||||||
|
|
||||||
def sparse_to_str(data):
|
def _sparse_to_str(data):
|
||||||
return ' '.join('%s:%s' % (k, v) for k, v in sorted(data.items()))
|
return ' '.join('%s:%s' % (k, v) for k, v in sorted(data.items()))
|
||||||
|
|
||||||
|
|
||||||
|
@ -96,12 +89,12 @@ def tensors_to_text_format(sample_idx, alias_tensor_map):
|
||||||
# for this alias there no more sequence elements
|
# for this alias there no more sequence elements
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if is_tensor(tensor):
|
if _is_tensor(tensor):
|
||||||
if not isinstance(tensor, np.ndarray):
|
if not isinstance(tensor, np.ndarray):
|
||||||
tensor = np.asarray(tensor)
|
tensor = np.asarray(tensor)
|
||||||
to_str = dense_to_str
|
to_str = _dense_to_str
|
||||||
elif isinstance(tensor, list) and isinstance(tensor[0], dict):
|
elif isinstance(tensor, list) and isinstance(tensor[0], dict):
|
||||||
to_str = sparse_to_str
|
to_str = _sparse_to_str
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
'expected a tensor (dense) or list of dicts (sparse), but got "%s"' % type(tensor))
|
'expected a tensor (dense) or list of dicts (sparse), but got "%s"' % type(tensor))
|
||||||
|
@ -113,7 +106,7 @@ def tensors_to_text_format(sample_idx, alias_tensor_map):
|
||||||
return '\n'.join(lines)
|
return '\n'.join(lines)
|
||||||
|
|
||||||
|
|
||||||
def is_tensor(data):
|
def _is_tensor(data):
|
||||||
'''
|
'''
|
||||||
Checks whether the data is a tensor, i.e. whether it is a NumPy array or a
|
Checks whether the data is a tensor, i.e. whether it is a NumPy array or a
|
||||||
list of NumPy arrays.
|
list of NumPy arrays.
|
||||||
|
@ -179,7 +172,7 @@ def one_hot(batch, num_classes, dtype=None, device=None):
|
||||||
value = cntk_py.Value.create_one_hot_double(num_classes, batch, device, False)
|
value = cntk_py.Value.create_one_hot_double(num_classes, batch, device, False)
|
||||||
return value
|
return value
|
||||||
|
|
||||||
def has_seq_dim(var, data):
|
def _has_seq_dim(var, data):
|
||||||
'''
|
'''
|
||||||
Checks whether the data has a sequence dimensions or not.
|
Checks whether the data has a sequence dimensions or not.
|
||||||
|
|
||||||
|
@ -274,7 +267,7 @@ def sanitize_shape(shape):
|
||||||
|
|
||||||
def sanitize_input(arg, fallback_dtype=np.float32, reshape=None):
|
def sanitize_input(arg, fallback_dtype=np.float32, reshape=None):
|
||||||
"""
|
"""
|
||||||
Convert to :class:`cntk.ops.variables.Variable` so that it can be passed as Variable to the
|
Convert to :class:`~cntk.ops.variables.Variable` so that it can be passed as Variable to the
|
||||||
CNTK operators.
|
CNTK operators.
|
||||||
|
|
||||||
* If ``arg`` is a NumPy array and its type is neither `np.float32` nor `np.float64`, it sets it to `np.float32`.
|
* If ``arg`` is a NumPy array and its type is neither `np.float32` nor `np.float64`, it sets it to `np.float32`.
|
||||||
|
@ -329,8 +322,7 @@ def get_data_type(*args):
|
||||||
inputs. Placeholders are ignored in the type determination.
|
inputs. Placeholders are ignored in the type determination.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
args (number, list, NumPy array, :class:`cntk.ops.variables.Variable`,
|
args (number, list, NumPy array, :class:`cntk.ops.variables.Variable`, or :class:`cntk.ops.functions.Function`): input
|
||||||
or :class:`cntk.ops.functions.Function`): input
|
|
||||||
Returns:
|
Returns:
|
||||||
np.float32, np.float64, or None
|
np.float32, np.float64, or None
|
||||||
"""
|
"""
|
||||||
|
@ -410,9 +402,10 @@ def _pad_dense_to_max_len(var, batch, max_seq_len):
|
||||||
Z = np.zeros((len(batch), max_seq_len) +
|
Z = np.zeros((len(batch), max_seq_len) +
|
||||||
(data_point.shape), dtype=data_point.dtype)
|
(data_point.shape), dtype=data_point.dtype)
|
||||||
for idx, seq in enumerate(batch):
|
for idx, seq in enumerate(batch):
|
||||||
if seq[0].shape != data_point.shape:
|
elem_shape = seq[0].shape if hasattr(seq, 'shape') else ()
|
||||||
|
if elem_shape != data_point.shape:
|
||||||
raise ValueError('shape mismatch: expected %s but got %s'
|
raise ValueError('shape mismatch: expected %s but got %s'
|
||||||
% (str(data_point.shape), str(seq[0].shape)))
|
% (str(data_point.shape), str(elem_shape)))
|
||||||
Z[idx, :len(seq)] += seq
|
Z[idx, :len(seq)] += seq
|
||||||
return Z
|
return Z
|
||||||
|
|
||||||
|
@ -443,6 +436,11 @@ def _pad_sparse_seq_to_max_len(batch, max_seq_len):
|
||||||
return Z
|
return Z
|
||||||
|
|
||||||
def _is_dense(batch):
|
def _is_dense(batch):
|
||||||
|
if isinstance(batch, np.ndarray):
|
||||||
|
return True
|
||||||
|
elif sparse.issparse(batch):
|
||||||
|
return False
|
||||||
|
|
||||||
is_dense = True
|
is_dense = True
|
||||||
b = batch
|
b = batch
|
||||||
while isinstance(b, list):
|
while isinstance(b, list):
|
||||||
|
@ -452,6 +450,7 @@ def _is_dense(batch):
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@typemap
|
||||||
def sanitize_batch(var, batch, seq_starts=None, dtype=None, device=None):
|
def sanitize_batch(var, batch, seq_starts=None, dtype=None, device=None):
|
||||||
'''
|
'''
|
||||||
Convert to :class:`Value` with ``dtype``. If the samples in
|
Convert to :class:`Value` with ``dtype``. If the samples in
|
||||||
|
@ -476,37 +475,31 @@ def sanitize_batch(var, batch, seq_starts=None, dtype=None, device=None):
|
||||||
if isinstance(batch, cntk_py.Value):
|
if isinstance(batch, cntk_py.Value):
|
||||||
return batch
|
return batch
|
||||||
|
|
||||||
|
if isinstance(batch, list):
|
||||||
|
if len(batch) == 0:
|
||||||
|
raise ValueError('batch is empty')
|
||||||
|
|
||||||
# We need to figure out whether the data has a sequence axis. Note that
|
# We need to figure out whether the data has a sequence axis. Note that
|
||||||
# it is not enough to check whether the variable's dynamic axes include the
|
# it is not enough to check whether the variable's dynamic axes include the
|
||||||
# sequence axis, because the sequence axis might be omitted in the data if
|
# sequence axis, because the sequence axis might be omitted in the data if
|
||||||
# it is not needed (CNTK core would then take care of this).
|
# it is not needed (CNTK core would then take care of this).
|
||||||
batch_has_seq = has_seq_dim(var, batch)
|
batch_has_seq = _has_seq_dim(var, batch)
|
||||||
|
|
||||||
if isinstance(batch, list):
|
is_dense = _is_dense(batch)
|
||||||
is_dense = _is_dense(batch)
|
|
||||||
|
|
||||||
if is_dense:
|
if batch_has_seq or seq_starts:
|
||||||
|
if isinstance(batch[0], list):
|
||||||
seq_lens = [len(seq) for seq in batch]
|
seq_lens = [len(seq) for seq in batch]
|
||||||
|
|
||||||
# If the input is a list of lists of dense values, all of the same
|
|
||||||
# length, then we convert it into a NumPy array without requiring a
|
|
||||||
# mask.
|
|
||||||
if len(set(seq_lens)) == 1:
|
|
||||||
batch = np.asarray(batch)
|
|
||||||
else:
|
else:
|
||||||
if isinstance(batch[0], list):
|
seq_lens = [seq.shape[0] for seq in batch]
|
||||||
seq_lens = [len(seq) for seq in batch]
|
|
||||||
else:
|
|
||||||
seq_lens = [seq.shape[0] for seq in batch]
|
|
||||||
|
|
||||||
if batch_has_seq:
|
max_seq_len = max(seq_lens)
|
||||||
max_seq_len = max(seq_lens)
|
|
||||||
else:
|
# If the input is a list of lists of dense values, all of the same
|
||||||
is_dense = isinstance(batch, np.ndarray)
|
# length, we convert it into a NumPy array.
|
||||||
# It is a sparse or dense NumPy array having all sequences being the
|
if is_dense and len(set(seq_lens)) == 1:
|
||||||
# same length, so we just calculate the sequence lengths
|
batch_has_seq = False
|
||||||
if batch_has_seq:
|
batch = np.asarray(batch, dtype=var.dtype)
|
||||||
max_seq_len = batch.shape[1]
|
|
||||||
|
|
||||||
if dtype is None:
|
if dtype is None:
|
||||||
dtype = get_data_type(var)
|
dtype = get_data_type(var)
|
||||||
|
@ -514,25 +507,8 @@ def sanitize_batch(var, batch, seq_starts=None, dtype=None, device=None):
|
||||||
if device is None:
|
if device is None:
|
||||||
device = use_default_device()
|
device = use_default_device()
|
||||||
|
|
||||||
if isinstance(batch, np.ndarray):
|
|
||||||
if np.issubdtype(batch.dtype, int):
|
|
||||||
batch = batch.astype(var.dtype)
|
|
||||||
elif batch.dtype not in (np.float32, np.float64):
|
|
||||||
raise ValueError('only float32 and float64 are supported')
|
|
||||||
|
|
||||||
ndav = create_NDArrayView_from_NumPy(batch, device)
|
|
||||||
return Value(data=ndav)
|
|
||||||
|
|
||||||
if isinstance(batch, list):
|
|
||||||
if len(batch) == 0:
|
|
||||||
raise ValueError('batch is empty')
|
|
||||||
|
|
||||||
if not batch_has_seq and seq_starts is not None:
|
|
||||||
raise ValueError('specification of individual sequence begins does not'
|
|
||||||
' make sense when not using the sequence axis')
|
|
||||||
|
|
||||||
# batch is now either a dense input that requires a mask, or it is sparse
|
# batch is now either a dense input that requires a mask, or it is sparse
|
||||||
if batch_has_seq:
|
if batch_has_seq or seq_starts:
|
||||||
mask = cntk_py.NDMask((len(batch), max_seq_len),
|
mask = cntk_py.NDMask((len(batch), max_seq_len),
|
||||||
device or use_default_device())
|
device or use_default_device())
|
||||||
for idx, seq_len in enumerate(seq_lens):
|
for idx, seq_len in enumerate(seq_lens):
|
||||||
|
@ -550,8 +526,20 @@ def sanitize_batch(var, batch, seq_starts=None, dtype=None, device=None):
|
||||||
mask = None
|
mask = None
|
||||||
|
|
||||||
if is_dense:
|
if is_dense:
|
||||||
batch = _pad_dense_to_max_len(var, batch, max_seq_len)
|
if batch_has_seq:
|
||||||
ndav = create_NDArrayView_from_NumPy(batch.astype(dtype), device)
|
batch = _pad_dense_to_max_len(var, batch, max_seq_len)
|
||||||
|
if not isinstance(batch, np.ndarray):
|
||||||
|
batch = np.asarray(batch)
|
||||||
|
ndav = _create_NDArrayView_from_NumPy(batch.astype(dtype), device)
|
||||||
|
return Value(data=ndav, mask=mask)
|
||||||
|
|
||||||
|
if isinstance(batch, np.ndarray):
|
||||||
|
if np.issubdtype(batch.dtype, int):
|
||||||
|
batch = batch.astype(var.dtype)
|
||||||
|
elif batch.dtype not in (np.float32, np.float64):
|
||||||
|
raise ValueError('only float32 and float64 are supported')
|
||||||
|
|
||||||
|
ndav = _create_NDArrayView_from_NumPy(batch.astype(dtype), device)
|
||||||
return Value(data=ndav, mask=mask)
|
return Value(data=ndav, mask=mask)
|
||||||
|
|
||||||
# There are three possibilities of providing sparse batches:
|
# There are three possibilities of providing sparse batches:
|
||||||
|
@ -638,7 +626,7 @@ def sanitize_value(shape, value, dtype, device):
|
||||||
if shape is None:
|
if shape is None:
|
||||||
raise ValueError('you need to specify at least shape or value')
|
raise ValueError('you need to specify at least shape or value')
|
||||||
cntk_dtype = sanitize_dtype_cntk(dtype)
|
cntk_dtype = sanitize_dtype_cntk(dtype)
|
||||||
ndav = create_NDArrayView(shape, cntk_dtype, device)
|
ndav = _create_NDArrayView(shape, cntk_dtype, device)
|
||||||
else:
|
else:
|
||||||
np_dtype = sanitize_dtype_numpy(dtype)
|
np_dtype = sanitize_dtype_numpy(dtype)
|
||||||
if not isinstance(value, np.ndarray) or value.dtype != np_dtype:
|
if not isinstance(value, np.ndarray) or value.dtype != np_dtype:
|
||||||
|
@ -647,7 +635,7 @@ def sanitize_value(shape, value, dtype, device):
|
||||||
else:
|
else:
|
||||||
value = np.asarray(value, dtype=np_dtype)
|
value = np.asarray(value, dtype=np_dtype)
|
||||||
|
|
||||||
ndav = create_NDArrayView_from_NumPy(value, device)
|
ndav = _create_NDArrayView_from_NumPy(value, device)
|
||||||
|
|
||||||
return ndav
|
return ndav
|
||||||
|
|
||||||
|
@ -772,7 +760,7 @@ def sanitize_var_map(op_arguments, arguments, precision=None,
|
||||||
return var_map
|
return var_map
|
||||||
|
|
||||||
|
|
||||||
def ones_like(batch, precision):
|
def _ones_like(batch, precision):
|
||||||
'''
|
'''
|
||||||
Returns a new batch, which has the same format as ``batch`` but all values
|
Returns a new batch, which has the same format as ``batch`` but all values
|
||||||
set to 1.
|
set to 1.
|
||||||
|
@ -783,7 +771,7 @@ def ones_like(batch, precision):
|
||||||
return [np.ones_like(sample, dtype=sanitize_precision(precision)) for sample in batch]
|
return [np.ones_like(sample, dtype=sanitize_precision(precision)) for sample in batch]
|
||||||
|
|
||||||
|
|
||||||
def create_NDArrayView(shape, data_type=cntk_py.DataType_Float, device=None):
|
def _create_NDArrayView(shape, data_type=cntk_py.DataType_Float, device=None):
|
||||||
shape = sanitize_shape(shape)
|
shape = sanitize_shape(shape)
|
||||||
if device is None:
|
if device is None:
|
||||||
device = use_default_device()
|
device = use_default_device()
|
||||||
|
@ -793,7 +781,7 @@ def create_NDArrayView(shape, data_type=cntk_py.DataType_Float, device=None):
|
||||||
return view
|
return view
|
||||||
|
|
||||||
|
|
||||||
def create_NDArrayView_from_NumPy(nd, device=None):
|
def _create_NDArrayView_from_NumPy(nd, device=None):
|
||||||
if device is None:
|
if device is None:
|
||||||
device = use_default_device()
|
device = use_default_device()
|
||||||
|
|
||||||
|
@ -819,11 +807,11 @@ class Value(cntk_py.Value):
|
||||||
device = use_default_device()
|
device = use_default_device()
|
||||||
|
|
||||||
if shape and dtype:
|
if shape and dtype:
|
||||||
ndav = create_NDArrayView(shape, dtype, device)
|
ndav = _create_NDArrayView(shape, dtype, device)
|
||||||
|
|
||||||
elif data:
|
elif data:
|
||||||
if isinstance(data, np.ndarray):
|
if isinstance(data, np.ndarray):
|
||||||
ndav = create_NDArrayView_from_NumPy(data, device)
|
ndav = _create_NDArrayView_from_NumPy(data, device)
|
||||||
else:
|
else:
|
||||||
ndav = data
|
ndav = data
|
||||||
|
|
||||||
|
@ -841,6 +829,27 @@ class Value(cntk_py.Value):
|
||||||
'''
|
'''
|
||||||
return super(Value, self).shape().dimensions()
|
return super(Value, self).shape().dimensions()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def mask(self):
|
||||||
|
'''
|
||||||
|
The mask matrix of this value. Each row denotes a sequence with its
|
||||||
|
elements describing the mask of the element:
|
||||||
|
* 2: beginning of sequence (e.g. an LSTM would be reset)
|
||||||
|
* 1: valid element
|
||||||
|
# 0: invalid element
|
||||||
|
|
||||||
|
Example:
|
||||||
|
A mask of
|
||||||
|
```[[2, 1, 1], [1, 1, 0]]
|
||||||
|
```
|
||||||
|
describes a batch of two sequences. The first has three elements, of
|
||||||
|
which the first element signals the beginning of a sequence. The second
|
||||||
|
sequence has two elements, which are both continuations of the first
|
||||||
|
sequence.
|
||||||
|
'''
|
||||||
|
return np.asarray(super(Value, self).mask())
|
||||||
|
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
'''
|
'''
|
||||||
Number of samples in this value object.
|
Number of samples in this value object.
|
||||||
|
@ -939,7 +948,7 @@ def ensure_dev(ndav, dev):
|
||||||
|
|
||||||
if ndav.device() != dev:
|
if ndav.device() != dev:
|
||||||
|
|
||||||
ndav_on_target = create_NDArrayView(
|
ndav_on_target = _create_NDArrayView(
|
||||||
ndav.shape().dimensions(), data_type=ndav.get_data_type(), dev=dev)
|
ndav.shape().dimensions(), data_type=ndav.get_data_type(), dev=dev)
|
||||||
ndav_on_target.copy_from(ndav)
|
ndav_on_target.copy_from(ndav)
|
||||||
ndav = ndav_on_target
|
ndav = ndav_on_target
|
||||||
|
@ -953,7 +962,7 @@ def value_to_seq(value):
|
||||||
entries removed.
|
entries removed.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
value (`Value`): Value as it is returned by Swig
|
value (:class:`Value`): Value as it is returned by Swig
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
a list of NumPy arrays
|
a list of NumPy arrays
|
||||||
|
@ -1013,7 +1022,7 @@ def eval(op, arguments=None, precision=None, device=None, backward_pass=False, e
|
||||||
if backward_pass:
|
if backward_pass:
|
||||||
if expected_backward is None:
|
if expected_backward is None:
|
||||||
expected_backward = arguments
|
expected_backward = arguments
|
||||||
root_gradients = {v: ones_like(o, precision) for v, o in
|
root_gradients = {v: _ones_like(o, precision) for v, o in
|
||||||
forward_output.items()}
|
forward_output.items()}
|
||||||
|
|
||||||
backward_output = op.backward(state, root_gradients, expected_backward)
|
backward_output = op.backward(state, root_gradients, expected_backward)
|
||||||
|
|
|
@ -13,6 +13,7 @@ from cntk.device import default
|
||||||
from cntk.tests.test_utils import precision, PRECISION_TO_TYPE
|
from cntk.tests.test_utils import precision, PRECISION_TO_TYPE
|
||||||
from cntk.ops import *
|
from cntk.ops import *
|
||||||
from cntk.utils import *
|
from cntk.utils import *
|
||||||
|
from cntk.utils import _has_seq_dim, _is_tensor
|
||||||
|
|
||||||
# Keeping things short
|
# Keeping things short
|
||||||
AA = np.asarray
|
AA = np.asarray
|
||||||
|
@ -58,7 +59,7 @@ def test_tensor_conversion_dense(idx, alias_tensor_map, expected):
|
||||||
([AA([1, 2]), AA([])], False),
|
([AA([1, 2]), AA([])], False),
|
||||||
])
|
])
|
||||||
def test_is_tensor(data, expected):
|
def test_is_tensor(data, expected):
|
||||||
assert is_tensor(data) == expected
|
assert _is_tensor(data) == expected
|
||||||
|
|
||||||
|
|
||||||
def test_sanitize_dtype_numpy():
|
def test_sanitize_dtype_numpy():
|
||||||
|
@ -123,16 +124,14 @@ def test_get_data_type():
|
||||||
# exception handling
|
# exception handling
|
||||||
((2,2), AA([[1,1],[2,2]]), ValueError),
|
((2,2), AA([[1,1],[2,2]]), ValueError),
|
||||||
(1, [[[1,2]]], ValueError),
|
(1, [[[1,2]]], ValueError),
|
||||||
#(1, [AA([[40], [50]])], ValueError),
|
|
||||||
((1,), [[[40], [50]]], ValueError),
|
|
||||||
])
|
])
|
||||||
def test_has_seq_dim_dense(shape, batch, expected):
|
def test_has_seq_dim_dense(shape, batch, expected):
|
||||||
i1 = input_variable(shape)
|
i1 = input_variable(shape)
|
||||||
if expected in [False, True]:
|
if expected in [False, True]:
|
||||||
assert has_seq_dim(i1, batch) == expected
|
assert _has_seq_dim(i1, batch) == expected
|
||||||
else:
|
else:
|
||||||
with pytest.raises(expected):
|
with pytest.raises(expected):
|
||||||
has_seq_dim(i1, batch)
|
_has_seq_dim(i1, batch)
|
||||||
|
|
||||||
@pytest.mark.parametrize("shape, batch, expected", [
|
@pytest.mark.parametrize("shape, batch, expected", [
|
||||||
((1,2), [csr([1,0]), csr([2,3]), csr([5,6])], False),
|
((1,2), [csr([1,0]), csr([2,3]), csr([5,6])], False),
|
||||||
|
@ -141,10 +140,10 @@ def test_has_seq_dim_dense(shape, batch, expected):
|
||||||
def test_has_seq_dim_sparse(shape, batch, expected):
|
def test_has_seq_dim_sparse(shape, batch, expected):
|
||||||
i1 = input_variable(shape, is_sparse=True)
|
i1 = input_variable(shape, is_sparse=True)
|
||||||
if expected in [False, True]:
|
if expected in [False, True]:
|
||||||
assert has_seq_dim(i1, batch) == expected
|
assert _has_seq_dim(i1, batch) == expected
|
||||||
else:
|
else:
|
||||||
with pytest.raises(expected):
|
with pytest.raises(expected):
|
||||||
has_seq_dim(i1, batch)
|
_has_seq_dim(i1, batch)
|
||||||
|
|
||||||
def test_sanitize_batch_sparse():
|
def test_sanitize_batch_sparse():
|
||||||
batch = [[csr([1,0,2]), csr([2,3,0])],
|
batch = [[csr([1,0,2]), csr([2,3,0])],
|
||||||
|
@ -160,4 +159,20 @@ def test_sanitize_batch_sparse():
|
||||||
# 2 sequences, with max seq len of 2 and dimension 3
|
# 2 sequences, with max seq len of 2 and dimension 3
|
||||||
assert b.shape == (2,2,3)
|
assert b.shape == (2,2,3)
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("batch, seq_starts, expected_mask", [
|
||||||
|
([[5, 6, 7],
|
||||||
|
[8]],
|
||||||
|
[True, False],
|
||||||
|
[[2, 1, 1], [1, 0, 0]]),
|
||||||
|
|
||||||
|
([[5],
|
||||||
|
[8]],
|
||||||
|
[True, False],
|
||||||
|
[[2], [1]]),
|
||||||
|
])
|
||||||
|
def test_mask(batch, seq_starts, expected_mask):
|
||||||
|
shape = (1,)
|
||||||
|
var = input_variable(shape)
|
||||||
|
s = sanitize_batch(var, batch, seq_starts)
|
||||||
|
assert np.allclose(s.mask, expected_mask)
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче