Merge branch 'master' into qiwye/asgd-dev

This commit is contained in:
Qiwei Ye 2016-11-24 11:10:30 +08:00
Родитель 9b66aeec1e bb60423a48
Коммит 2b8b3047df
40 изменённых файлов: 1856 добавлений и 335 удалений

Просмотреть файл

@ -623,7 +623,7 @@ initValueScale
. .
If the model parameters are initialized using the Gaussian distribution, If the model parameters are initialized using the Gaussian distribution,
the standard deviation will be adjusted to the standard deviation will be adjusted to
\begin_inset Formula $0.2\times initValueScale/\sqrt{fanout}$ \begin_inset Formula $0.2\times initValueScale/\sqrt{fanout}$
\end_inset \end_inset

Просмотреть файл

@ -0,0 +1,190 @@
# Note: reader configuration comes from AlexNet.cntk or AlexNetComposite.cntk, depending on the test
RootDir = "."
ConfigDir = "$RootDir$"
DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
ndlMacros="$ConfigDir$/Macros.ndl"
precision = "float"
deviceId = "Auto"
command = Train:Test
parallelTrain = "true"
traceLevel = 1
numMBsToShowResult = 500
modelPath = "$ModelDir$/AlexNet"
stderr = "$OutputDir$/AlexNet"
################################
Train = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = 227:227:3
labelDim = 1000
# Local Response Normalization
# k : bias
# n : half radius
# alpha: scale factor
# beta: exponent
LRN {k, n, alpha, beta} = {
apply (x) = {
x2 = x .* x
# reshape to insert a fake singleton reduction dimension after the 3rd axis
x2s = SplitDimension(x2, 3, 1)
# 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
W = ParameterTensor{(1:1:2*n+1:1), learningRateMultiplier = 0, initValue = alpha/(2*n+1)}
y = Convolution (W, x2s, (1:1:2*n+1), mapDims = 1, stride = 1, sharing = true, autoPadding = true, lowerPad = 0, upperPad = 0, transpose = false, maxTempMemSizeInSamples = 0)
# reshape back to remove the fake singleton reduction dimension
b = FlattenDimensions(y, 3, 2)
den = Exp (beta .* Log(k + b))
r = x .* Reciprocal(den)
}.r
}.apply
# initValueScale are determined to generate Gaussians with variance of 0.01 and 0.005 (for the two DenseLayers)
model = Sequential (
ConvolutionalLayer {96, (11:11), stride=(4:4), pad=false, init='gaussian', initValueScale=0.95265} : ReLU :
LRN {1.0, 2, 0.0001, 0.75} :
MaxPoolingLayer {(3:3), stride=(2:2)} :
ConvolutionalLayer {192, (5:5), pad = true, init='gaussian', initValueScale=2.44978, initBias=0.1} : ReLU :
LRN {1.0, 2, 0.0001, 0.75} :
MaxPoolingLayer {(3:3), stride=(2:2)} :
ConvolutionalLayer {384, (3:3), pad = true, init='gaussian', initValueScale=2.07857} : ReLU :
ConvolutionalLayer {384, (3:3), pad = true, init='gaussian', initValueScale=2.93945, initBias=0.1} : ReLU :
ConvolutionalLayer {256, (3:3), pad = true, init='gaussian', initValueScale=2.93945, initBias=0.1} : ReLU :
MaxPoolingLayer {(3:3), stride=(2:2)} :
DenseLayer {4096, activation=ReLU, init='gaussian', initValueScale=2.40038, initBias=0.1} : Dropout :
DenseLayer {4096, activation=ReLU, init='gaussian', initValueScale=1.6, initBias=0.1} : Dropout :
LinearLayer {labelDim, init='gaussian', initValueScale=3.2}
)
# inputs
features = Input {imageShape}
featNorm = features - Constant(114)
labels = Input {labelDim}
# apply model to features
z = model (featNorm)
# loss and error computation
ce = CrossEntropyWithSoftmax (labels, z)
errs = ClassificationError (labels, z)
top5Errs = ClassificationError (labels, z, topN=5) # only used in Eval action
# declare special nodes
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (z)
}
SGD = {
epochSize = 0
minibatchSize = 256
learningRatesPerMB = 0.01*25:0.001*25:0.0001*25:0.00001*25:0.000001
momentumPerMB = 0.9
maxEpochs = 110
gradUpdateType = None
L2RegWeight = 0.0005
dropoutRate = 0.5
# TODO: try less bits?
ParallelTrain = {
parallelizationMethod = "DataParallelSGD"
distributedMBReading = "true"
parallelizationStartEpoch = 3
DataParallelSGD = {
gradientBits = 32
}
}
numMBsToShowResult = 100
}
# Reader
reader = {
verbosity = 0
randomize = true
randomizationWindow = 1
deserializers = (
{
type = "ImageDeserializer" ; module = "ImageReader"
file = "$DataDir$/train_map.txt"
input = {
features = {
transforms = (
{
type = "Crop"
cropType = "random"
cropRatio = 0.88671875
jitterType = "uniRatio"
}:{
type = "Scale"
width = 227
height = 227
channels = 3
interpolations = "linear"
#}:{
# type = "Mean"
# meanFile = "$ConfigDir$/ImageNet1K_mean.xml"
}:{
type = "Transpose"
}
)
}
labels = {
labelDim = 1000
}
}
})
}
}
################################
Test = {
action=test
minibatchSize=128
evalNodeNames = errs:top5Errs # also test top-5 error rate
# Reader
reader = {
verbosity = 0
randomize = false
deserializers = (
{
type = "ImageDeserializer" ; module = "ImageReader"
file="$DataDir$/val_map.txt"
input = {
features = {
transforms = (
{
type = "Crop"
cropType = "center"
cropRatio = 0.88671875
}:{
type = "Scale"
width = 227
height = 227
channels = 3
}:{
type = "Transpose"
}
)
}
labels = {
labelDim = 1000
}
}
})
}
}

Просмотреть файл

@ -0,0 +1,130 @@
# ConvNet applied on CIFAR-10 dataset, with data augmentation (translation and flipping).
command = TrainConvNet:Eval
precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = "../../.." ; dataDir = "$rootDir$/DataSets/CIFAR-10" ;
outputDir = "./Output" ;
modelPath = "$outputDir$/Models/ConvNetLRN_CIFAR10_DataAug"
#stderr = "$outputDir$/ConvNetLRN_CIFAR10_DataAug_bs_out"
TrainConvNet = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = 32:32:3
labelDim = 10
featScale = 1/256
Normalize{f} = x => f .* x
# Local Response Normalization
# k : bias
# n : half radius
# alpha: scale factor
# beta: exponent
LRN {k, n, alpha, beta} = {
apply (x) = {
x2 = x .* x
# reshape to insert a fake singleton reduction dimension after the 3rd axis
x2s = SplitDimension(x2, 3, 1)
# 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
W = ParameterTensor{(1:1:2*n+1:1), learningRateMultiplier = 0, initValue = alpha/(2*n+1)}
y = Convolution (W, x2s, (1:1:2*n+1), mapDims = 1, stride = 1, sharing = true, autoPadding = true, lowerPad = 0, upperPad = 0, transpose = false, maxTempMemSizeInSamples = 0)
# reshape back to remove the fake singleton reduction dimension
b = FlattenDimensions(y, 3, 2)
den = Exp (beta .* Log(k + b))
r = x .* Reciprocal(den)
}.r
}.apply
model = Sequential (
Normalize {featScale} :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
LRN {1.0, 4, 0.001, 0.75} :
MaxPoolingLayer {(3:3), stride = (2:2)} :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
LRN {1.0, 4, 0.001, 0.75} :
MaxPoolingLayer {(3:3), stride = (2:2)} :
DenseLayer {256} : ReLU : Dropout :
DenseLayer {128} : ReLU : Dropout :
LinearLayer {labelDim}
)
# inputs
features = Input {imageShape}
labels = Input {labelDim}
# apply model to features
z = model (features)
# connect to system
ce = CrossEntropyWithSoftmax (labels, z)
errs = ClassificationError (labels, z)
top5Errs = ClassificationError (labels, z, topN=5) # only used in Eval action
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs) # top5Errs only used in Eval
outputNodes = (z)
}
SGD = {
epochSize = 0
minibatchSize = 64
learningRatesPerSample = 0.0015625*20:0.00046875*20:0.00015625*20:0.000046875*10:0.000015625
momentumAsTimeConstant = 0*20:600*20:1200
maxEpochs = 80
L2RegWeight = 0.002
dropoutRate = 0.5
numMBsToShowResult = 100
}
reader = {
verbosity = 0 ; randomize = true
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$dataDir$/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Mean" ; meanFile = "$dataDir$/CIFAR-10_mean.xml" } :
{ type = "Transpose" }
)}
labels = { labelDim = 10 }
}
})
}
}
# Eval action
Eval = {
action = "eval"
evalNodeNames = errs:top5Errs # also test top-5 error rate
# Set minibatch size for testing.
minibatchSize = 512
reader = {
verbosity = 0 ; randomize = false
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$dataDir$/test_map.txt"
input = {
features = { transforms = (
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Mean"; meanFile = "$dataDir$/CIFAR-10_mean.xml" } :
{ type = "Transpose" }
)}
labels = { labelDim = 10 }
}
})
}
}

Просмотреть файл

@ -28,8 +28,8 @@ TrainConvNet = {
ConvolutionalLayer {64, (3:3), pad = true} : ReLU : ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU : ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(3:3), stride = (2:2)} : MaxPoolingLayer {(3:3), stride = (2:2)} :
DenseLayer {256} : Dropout : ReLU : DenseLayer {256} : ReLU : Dropout :
DenseLayer {128} : Dropout : ReLU : DenseLayer {128} : ReLU : Dropout :
LinearLayer {labelDim} LinearLayer {labelDim}
) )

Просмотреть файл

@ -31,4 +31,12 @@ Run the example from the current folder using:
`cntk configFile=ConvNet_CIFAR10_DataAug.cntk` `cntk configFile=ConvNet_CIFAR10_DataAug.cntk`
As seen in the CNTK configuration file [ConvNet_CIFAR10_DataAug.cntk](./ConvNet_CIFAR10_DataAug.cntk), we use a fixed crop ratio of `0.8` and scale the image to `32x32` pixels for training. Since all training images are pre-padded to `40x40` pixels, effectively we only perform translation transform without scaling. The accuracy of the network on test data is around `14%`, which is a lot better than the previous model. As seen in the CNTK configuration file [ConvNet_CIFAR10_DataAug.cntk](./ConvNet_CIFAR10_DataAug.cntk), we use a fixed crop ratio of `0.8` and scale the image to `32x32` pixels for training. Since all training images are pre-padded to `40x40` pixels, effectively we only perform translation transform without scaling. The accuracy of the network on test data is around `14.2%`, which is a lot better than the previous model.
### ConvNetLRN_CIFAR10_DataAug.cntk
The fourth example added local response normalization (LRN) to the previous example. LRN is implemented as a BrainScript function using 3D convolution with a constant kernel. You may run the example from the current folder using:
`cntk configFile=ConvNetLRN_CIFAR10_DataAug.cntk`
This model achieves slightly better accuracy of `13.8%`, which demonstrates the effectiveness of LRN. Nevertheless, as mentioned earlier, LRN is now rarely used by state-of-the-art deep networks.

Просмотреть файл

@ -13,6 +13,7 @@ from cntk.layers import *
from cntk.models import Sequential, LayerStack from cntk.models import Sequential, LayerStack
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_schedule, momentum_as_time_constant_schedule, UnitType from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_schedule, momentum_as_time_constant_schedule, UnitType
from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_as_time_constant_schedule, UnitType
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, relu, minus, element_times, constant from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, relu, minus, element_times, constant
from _cntk_py import set_computation_network_trace_level from _cntk_py import set_computation_network_trace_level
@ -71,9 +72,9 @@ def convnet_cifar10(debug_output=False):
# Set learning parameters # Set learning parameters
lr_per_sample = [0.0015625]*10+[0.00046875]*10+[0.00015625] lr_per_sample = [0.0015625]*10+[0.00046875]*10+[0.00015625]
lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) lr_schedule = learning_rate_schedule(lr_per_sample, UnitType.sample, epoch_size)
momentum_time_constant = [0]*20+[-minibatch_size/np.log(0.9)] mm_time_constant = [0]*20+[-minibatch_size/np.log(0.9)]
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size) mm_schedule = momentum_as_time_constant_schedule(mm_time_constant, epoch_size)
l2_reg_weight = 0.002 l2_reg_weight = 0.002
# Instantiate the trainer object to drive the model training # Instantiate the trainer object to drive the model training

Просмотреть файл

@ -14,6 +14,7 @@ from cntk.ops import input_variable, cross_entropy_with_softmax, classification_
from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs
from cntk import Trainer, persist, cntk_py from cntk import Trainer, persist, cntk_py
from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_schedule, momentum_as_time_constant_schedule, UnitType from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_schedule, momentum_as_time_constant_schedule, UnitType
from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_as_time_constant_schedule, UnitType
from _cntk_py import set_computation_network_trace_level from _cntk_py import set_computation_network_trace_level
# Paths relative to current python file. # Paths relative to current python file.
@ -83,8 +84,8 @@ def convnet_cifar10_dataaug(reader_train, reader_test, max_epochs = 80):
# Set learning parameters # Set learning parameters
lr_per_sample = [0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625] lr_per_sample = [0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625]
lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample, epoch_size=epoch_size) lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample, epoch_size=epoch_size)
momentum_time_constant = [0]*20+[600]*20+[1200] mm_time_constant = [0]*20+[600]*20+[1200]
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size) mm_schedule = momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size)
l2_reg_weight = 0.002 l2_reg_weight = 0.002
# trainer object # trainer object

Просмотреть файл

@ -48,10 +48,9 @@ def create_reader(map_file, mean_file, train, distributed_after=INFINITE_SAMPLES
ImageDeserializer(map_file, StreamDefs( ImageDeserializer(map_file, StreamDefs(
features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image' features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
labels = StreamDef(field='label', shape=num_classes))), # and second as 'label' labels = StreamDef(field='label', shape=num_classes))), # and second as 'label'
randomize = False, multithreaded_deserializer = False, # turn off omp as CIFAR-10 is not heavy for deserializer
distributed_after = distributed_after) distributed_after = distributed_after)
# Train and evaluate the network. # Train and evaluate the network.
def convnet_cifar10_dataaug(reader_train, reader_test, distributed_trainer, max_epochs = 80): def convnet_cifar10_dataaug(reader_train, reader_test, distributed_trainer, max_epochs = 80):
set_computation_network_trace_level(0) set_computation_network_trace_level(0)
@ -87,8 +86,8 @@ def convnet_cifar10_dataaug(reader_train, reader_test, distributed_trainer, max_
# Set learning parameters # Set learning parameters
lr_per_sample = [0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625] lr_per_sample = [0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625]
lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample, epoch_size=epoch_size) lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample, epoch_size=epoch_size)
momentum_time_constant = [0]*20+[600]*20+[1200] mm_time_constant = [0]*20+[600]*20+[1200]
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size) mm_schedule = momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size)
l2_reg_weight = 0.002 l2_reg_weight = 0.002
# trainer object # trainer object

Просмотреть файл

@ -11,7 +11,7 @@ from cntk import Trainer, persist
from cntk.utils import * from cntk.utils import *
from cntk.layers import * from cntk.layers import *
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_schedule, UnitType from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_as_time_constant_schedule, UnitType
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, relu, element_times, constant from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, relu, element_times, constant
# Paths relative to current python file. # Paths relative to current python file.
@ -63,11 +63,11 @@ def convnet_mnist(debug_output=False):
# Set learning parameters # Set learning parameters
lr_per_sample = [0.001]*10+[0.0005]*10+[0.0001] lr_per_sample = [0.001]*10+[0.0005]*10+[0.0001]
lr_schedule = learning_rate_schedule(lr_per_sample, UnitType.sample, epoch_size) lr_schedule = learning_rate_schedule(lr_per_sample, UnitType.sample, epoch_size)
momentum_time_constant = [0]*5+[1024] mm_time_constant = [0]*5+[1024]
mn_schedule = momentum_schedule(momentum_time_constant, epoch_size) mm_schedule = momentum_as_time_constant_schedule(mm_time_constant, epoch_size)
# Instantiate the trainer object to drive the model training # Instantiate the trainer object to drive the model training
learner = momentum_sgd(z.parameters, lr_schedule, mn_schedule) learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule)
trainer = Trainer(z, ce, pe, learner) trainer = Trainer(z, ce, pe, learner)
# define mapping from reader streams to network inputs # define mapping from reader streams to network inputs

Просмотреть файл

@ -35,7 +35,7 @@ We use a fixed crop ratio of `0.8` and scale the image to `32x32` pixels for tra
### ConvNet_CIFAR10_DataAug_Distributed.py ### ConvNet_CIFAR10_DataAug_Distributed.py
The fourth example uses the same CNN as ConvNet_CIFAR10_DataAug.py, but it adds support for distributed training with simple aggregation. For a reference on distributed training, please check [here](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines) The fourth example uses the same CNN as ConvNet_CIFAR10_DataAug.py, but it adds support for distributed training with simple aggregation. For a reference on distributed training, please check [here](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines).
Note that this example supports CPU-only build. Note that [this example](./ConvNet_CIFAR10_DataAug_Distributed.py) supports CPU-only build.
`mpiexec -n <#workers> python ConvNet_CIFAR10_DataAug_Distributed.py` `mpiexec -n <#workers> python ConvNet_CIFAR10_DataAug_Distributed.py`

Просмотреть файл

@ -15,7 +15,7 @@ for ResNet20 and ResNet110, respectively. The ResNet20 network achieves an error
### TrainResNet_CIFAR10_Distributed.py ### TrainResNet_CIFAR10_Distributed.py
This example code is similar to TrainResNet_CIFAR10.py, but it adds support for distributed training via [MPI](https://en.wikipedia.org/wiki/Message_Passing_Interface). Details can be found in [here](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines) [This example](./TrainResNet_CIFAR10_Distributed.py) is similar to TrainResNet_CIFAR10.py, but it adds support for distributed training via [MPI](https://en.wikipedia.org/wiki/Message_Passing_Interface). Details can be found in [here](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines).
Note this example requires a multi-GPU machine or mpi hosts file to distribute to multiple machines. Note this example requires a multi-GPU machine or mpi hosts file to distribute to multiple machines.
Simple aggregation, ResNet20, with a 2-GPU machine: Simple aggregation, ResNet20, with a 2-GPU machine:
@ -25,3 +25,7 @@ Simple aggregation, ResNet20, with a 2-GPU machine:
Quantized 1-bit aggregation with 50000 samples before distributed, ResNet20, with a 2-GPU machine: Quantized 1-bit aggregation with 50000 samples before distributed, ResNet20, with a 2-GPU machine:
`mpiexec -n 2 python TrainResNet_CIFAR10_Distributed.py -n resnet20 -q 1 -a 50000` `mpiexec -n 2 python TrainResNet_CIFAR10_Distributed.py -n resnet20 -q 1 -a 50000`
To run with maximum parallelization with minibatch size scaled according to #workers for 3 epochs:
`mpiexec -n 2 python TrainResNet_CIFAR10_Distributed.py -s True -e 3`

Просмотреть файл

@ -50,12 +50,12 @@ def create_reader(map_file, mean_file, train, distributed_after=INFINITE_SAMPLES
ImageDeserializer(map_file, StreamDefs( ImageDeserializer(map_file, StreamDefs(
features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image' features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
labels = StreamDef(field='label', shape=num_classes))), # and second as 'label' labels = StreamDef(field='label', shape=num_classes))), # and second as 'label'
randomize = False, multithreaded_deserializer = False, # turn off omp as CIFAR-10 is not heavy for deserializer
distributed_after = distributed_after) distributed_after = distributed_after)
# Train and evaluate the network. # Train and evaluate the network.
def train_and_evaluate(reader_train, reader_test, network_name, max_epochs, distributed_trainer): def train_and_evaluate(reader_train, reader_test, network_name, max_epochs, distributed_trainer, scale_up=False):
set_computation_network_trace_level(0) set_computation_network_trace_level(0)
@ -79,7 +79,13 @@ def train_and_evaluate(reader_train, reader_test, network_name, max_epochs, dist
# shared training parameters # shared training parameters
epoch_size = 50000 # for now we manually specify epoch size epoch_size = 50000 # for now we manually specify epoch size
minibatch_size = 128
# NOTE: scaling up minibatch_size increases sample throughput. In 8-GPU machine,
# ResNet110 samples-per-second is ~7x of single GPU, comparing to ~3x without scaling
# up. However, bigger minimatch size on the same number of samples means less updates,
# thus leads to higher training error. This is a trade-off of speed and accuracy
minibatch_size = 128 * (len(distributed_trainer.communicator().workers()) if scale_up else 1)
momentum_time_constant = -minibatch_size/np.log(0.9) momentum_time_constant = -minibatch_size/np.log(0.9)
l2_reg_weight = 0.0001 l2_reg_weight = 0.0001
@ -145,15 +151,17 @@ def train_and_evaluate(reader_train, reader_test, network_name, max_epochs, dist
if __name__=='__main__': if __name__=='__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('-n', '--network', help='network type, resnet20 or resnet110', required=False, default='resnet20') parser.add_argument('-n', '--network', help='network type, resnet20 or resnet110', required=False, default='resnet20')
parser.add_argument('-e', '--epochs', help='total epochs', required=False, default='160') parser.add_argument('-e', '--epochs', help='total epochs', type=int, required=False, default='160')
parser.add_argument('-q', '--quantize_bit', help='quantized bit', required=False, default='32') parser.add_argument('-q', '--quantize_bit', help='quantized bit', type=int, required=False, default='32')
parser.add_argument('-a', '--distributed_after', help='number of samples to train with before running distributed', required=False, default='0') parser.add_argument('-s', '--scale_up', help='scale up minibatch size with #workers for better parallelism', type=bool, required=False, default='False')
parser.add_argument('-a', '--distributed_after', help='number of samples to train with before running distributed', type=int, required=False, default='0')
args = vars(parser.parse_args()) args = vars(parser.parse_args())
num_quantization_bits = int(args['quantize_bit']) num_quantization_bits = int(args['quantize_bit'])
epochs = int(args['epochs']) epochs = int(args['epochs'])
distributed_after_samples = int(args['distributed_after']) distributed_after_samples = int(args['distributed_after'])
network_name = args['network'] network_name = args['network']
scale_up = bool(args['scale_up'])
# Create distributed trainer # Create distributed trainer
print("Start training: quantize_bit = {}, epochs = {}, distributed_after = {}".format(num_quantization_bits, epochs, distributed_after_samples)) print("Start training: quantize_bit = {}, epochs = {}, distributed_after = {}".format(num_quantization_bits, epochs, distributed_after_samples))
@ -164,7 +172,7 @@ if __name__=='__main__':
reader_train = create_reader(os.path.join(data_path, 'train_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), True, distributed_after_samples) reader_train = create_reader(os.path.join(data_path, 'train_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), True, distributed_after_samples)
reader_test = create_reader(os.path.join(data_path, 'test_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), False) reader_test = create_reader(os.path.join(data_path, 'test_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), False)
train_and_evaluate(reader_train, reader_test, network_name, epochs, distributed_trainer) train_and_evaluate(reader_train, reader_test, network_name, epochs, distributed_trainer, scale_up)
# Must call MPI finalize when process exit # Must call MPI finalize when process exit
distributed.Communicator.finalize() distributed.Communicator.finalize()

Просмотреть файл

@ -1,9 +1,11 @@
**The [CNTK Wiki](https://github.com/Microsoft/CNTK/wiki) has all information on CNTK including [setup](https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-your-machine ), [examples](https://github.com/Microsoft/CNTK/wiki/Examples ), etc.**
# Latest news # Latest news
*2016-11-21.* V 2.0 Beta 4 Release *2016-11-21.* V 2.0 Beta 4 Release
Highlights of this Release: Highlights of this Release:
* New ASGD/Hogwild! training using Microsofts Parameter Server ([Project Multiverso](https://github.com/Microsoft/multiverso)) * New ASGD/Hogwild! training using Microsofts Parameter Server ([Project Multiverso](https://github.com/Microsoft/multiverso))
* Distributed Scenarios now supported in CNTK Python API * Distributed Scenarios now supported in CNTK Python API
* New Memory compression -- ability to trade off memory usage with compute. * New [Memory Compression](https://github.com/Microsoft/CNTK/wiki/Top-level-configurations#hypercompressmemory) mode to reduce memory usage on GPU
* CNTK Docker image with 1bit-SGD support * CNTK Docker image with 1bit-SGD support
* Stability Improvements and bug fixes * Stability Improvements and bug fixes
@ -55,18 +57,6 @@ If you ARE using Model Evaluation Library we **strongly recommend** installing v
See [Release Notes](https://github.com/Microsoft/CNTk/wiki/CNTK_1_7_2_Release_Notes) for details. See [Release Notes](https://github.com/Microsoft/CNTk/wiki/CNTK_1_7_2_Release_Notes) for details.
*2016-09-28.* V 1.7.1 Binary release
Highlights of this Release:
* Two Breaking Changes related to Layers library default initialization and ```fsAdagrad``` gradient-normalization scheme
* Improvements in BrainScript
* Enabling of Deterministic Algorithm enforcement
* Improvements in Model Evaluation including the support of Evaluation for Azure Applications
* Different Performance improvements
* Multiple bug fixes
See more in the [Release Notes](https://github.com/Microsoft/CNTK/wiki/CNTK_1_7_1_Release_Notes) (including the full list of bugs fixed)
Get the Release from the [CNTK Releases page](https://github.com/Microsoft/CNTK/releases)
See [all news](https://github.com/Microsoft/CNTK/wiki/News). See [all news](https://github.com/Microsoft/CNTK/wiki/News).
# What is CNTK # What is CNTK

Просмотреть файл

@ -512,7 +512,7 @@ CNTK2 = [
# Parameter{} can do several forms of initialization. # Parameter{} can do several forms of initialization.
# - initValue=scalar, value=array --> initialize from this value --array form not implemented yet # - initValue=scalar, value=array --> initialize from this value --array form not implemented yet
# - initFromFilePath="..." --> read from a data file # - initFromFilePath="..." --> read from a data file
# - init="uniform|gaussian" (random init scaled by initValueScale). Warning: This has magic scaling factors. TODO: document them here # - init="uniform|gaussian" (random init scaled by initValueScale).
# - init="zero" # - init="zero"
# deprecated: # deprecated:
# - initFromLiteral="..." (deprecated) --> parse a string literal (obsolete with value=array form) # - initFromLiteral="..." (deprecated) --> parse a string literal (obsolete with value=array form)

Просмотреть файл

@ -393,16 +393,11 @@ namespace CNTK
friend class PackedValue; friend class PackedValue;
friend class MPICommunicatorImpl; friend class MPICommunicatorImpl;
friend class BlockMomentumDistributedTrainer; friend class BlockMomentumDistributedTrainer;
friend class Internal::VariableResolver;
template <typename T, typename ...CtorArgTypes> template <typename T, typename ...CtorArgTypes>
friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs); friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs);
template <typename ElementType>
friend Variable Internal::GetVariable(const Microsoft::MSR::CNTK::ComputationNodeBasePtr& node,
std::unordered_map<Microsoft::MSR::CNTK::ComputationNodeBasePtr, Variable>& nodeToVariableMap,
std::unordered_map<Variable, Variable>& placeholderReplacements,
std::unordered_set<FunctionPtr>& allPrimitiveFunctions);
public: public:
/// ///
/// Construct a NDArrayView with the specified 'dataBuffer' as the backing storage. /// Construct a NDArrayView with the specified 'dataBuffer' as the backing storage.
@ -635,6 +630,7 @@ namespace CNTK
static const size_t AutoSelectRowColSplitPoint = SIZE_MAX; static const size_t AutoSelectRowColSplitPoint = SIZE_MAX;
private: private:
CNTK_API NDArrayView(::CNTK::DataType dataType, const DeviceDescriptor& device, ::CNTK::StorageFormat storageType, const NDShape& viewShape, bool readOnly, void* tensorView); CNTK_API NDArrayView(::CNTK::DataType dataType, const DeviceDescriptor& device, ::CNTK::StorageFormat storageType, const NDShape& viewShape, bool readOnly, void* tensorView);
@ -1599,11 +1595,7 @@ namespace CNTK
template <typename T> template <typename T>
friend struct std::hash; friend struct std::hash;
template <typename ElementType> friend class Internal::VariableResolver;
friend Variable Internal::GetVariable(const Microsoft::MSR::CNTK::ComputationNodeBasePtr& node,
std::unordered_map<Microsoft::MSR::CNTK::ComputationNodeBasePtr, Variable>& nodeToVariableMap,
std::unordered_map<Variable, Variable>& placeholderReplacements,
std::unordered_set<FunctionPtr>& allPrimitiveFunctions);
#ifndef SWIG #ifndef SWIG
private: private:
@ -1952,11 +1944,7 @@ private:
template <typename T> template <typename T>
friend struct std::hash; friend struct std::hash;
template <typename ElementType> friend class Internal::VariableResolver;
friend Variable Internal::GetVariable(const Microsoft::MSR::CNTK::ComputationNodeBasePtr& node,
std::unordered_map<Microsoft::MSR::CNTK::ComputationNodeBasePtr, Variable>& nodeToVariableMap,
std::unordered_map<Variable, Variable>& placeholderReplacements,
std::unordered_set<FunctionPtr>& allPrimitiveFunctions);
public: public:
/// ///
@ -2037,11 +2025,7 @@ private:
template <typename T> template <typename T>
friend struct std::hash; friend struct std::hash;
template <typename ElementType> friend class Internal::VariableResolver;
friend Variable Internal::GetVariable(const Microsoft::MSR::CNTK::ComputationNodeBasePtr& node,
std::unordered_map<Microsoft::MSR::CNTK::ComputationNodeBasePtr, Variable>& nodeToVariableMap,
std::unordered_map<Variable, Variable>& placeholderReplacements,
std::unordered_set<FunctionPtr>& allPrimitiveFunctions);
public: public:
/// ///
@ -2175,13 +2159,17 @@ namespace CNTK
/// Returns the Function that 'this' BackPropState belongs to /// Returns the Function that 'this' BackPropState belongs to
/// ///
FunctionPtr Function() const { return m_function; } FunctionPtr Function() const { return m_function; }
DeviceDescriptor Device() const { return m_forwardComputeDevice; }
virtual ~BackPropState() {} virtual ~BackPropState() {}
protected: protected:
BackPropState(const FunctionPtr& function) : m_function(function) {} BackPropState(const FunctionPtr& function, const DeviceDescriptor& computeDevice)
: m_function(function), m_forwardComputeDevice(computeDevice)
{}
protected: protected:
FunctionPtr m_function; FunctionPtr m_function;
DeviceDescriptor m_forwardComputeDevice;
}; };
typedef std::shared_ptr<BackPropState> BackPropStatePtr; typedef std::shared_ptr<BackPropState> BackPropStatePtr;

Просмотреть файл

@ -247,10 +247,6 @@ namespace CNTK
CNTK_API bool AreEqual(const ::CNTK::NDArrayView& view1, const ::CNTK::NDArrayView& view2, double relativeTolerance = 0.0, double absoluteTolerance = 0.0); CNTK_API bool AreEqual(const ::CNTK::NDArrayView& view1, const ::CNTK::NDArrayView& view2, double relativeTolerance = 0.0, double absoluteTolerance = 0.0);
template <typename ElementType> class VariableResolver;
Variable GetVariable(const Microsoft::MSR::CNTK::ComputationNodeBasePtr& node,
std::unordered_map<Microsoft::MSR::CNTK::ComputationNodeBasePtr, ::CNTK::Variable>& nodeToVariableMap,
std::unordered_map<::CNTK::Variable, ::CNTK::Variable>& placeholderReplacements,
std::unordered_set<::CNTK::FunctionPtr>& allPrimitiveFunctions);
} }
} }

Просмотреть файл

@ -20,28 +20,67 @@
#include "DeprecatedNodes.h" #include "DeprecatedNodes.h"
#include "RNNNodes.h" #include "RNNNodes.h"
using namespace Microsoft::MSR::CNTK; using namespace Microsoft::MSR::CNTK;
namespace CNTK namespace CNTK
{ {
namespace Internal namespace Internal
{ {
template <typename ElementType> // Helper class to resolve variables in the model.
Variable GetVariable(const ComputationNodeBasePtr& node, class VariableResolver final
std::unordered_map<ComputationNodeBasePtr, Variable>& nodeToVariableMap,
std::unordered_map<Variable, Variable>& placeholderReplacements,
std::unordered_set<FunctionPtr>& allPrimitiveFunctions)
{ {
auto iter = nodeToVariableMap.find(node); std::unordered_map<Variable, Variable> m_placeholderReplacements;
if (iter != nodeToVariableMap.end()) std::unordered_map<ComputationNodeBasePtr, Variable> m_nodeToVariableMap;
return iter->second; std::unordered_set<FunctionPtr> m_allPrimitiveFunctions;
Variable var; public:
NDShape varShape = AsNDShape(node->GetSampleLayout()); const std::unordered_map<Variable, Variable>& GetPlaceHolders() const
if (node->IsLeaf())
{ {
return m_placeholderReplacements;
}
template<class ElementType>
Variable GetVariable(const ComputationNodeBasePtr& node)
{
auto iter = m_nodeToVariableMap.find(node);
if (iter != m_nodeToVariableMap.end())
return iter->second;
Variable var;
if (node->IsLeaf())
{
var = ResolveLeaf<ElementType>(node);
}
else
{
// This is a non-leaf node and maps to a primitive Function
NDShape varShape = AsNDShape(node->GetSampleLayout());
auto placeholderVar = PlaceholderVariable(varShape);
m_nodeToVariableMap[node] = placeholderVar;
std::vector<Variable> inputVars(node->GetNumInputs());
for (size_t i = 0; i < inputVars.size(); ++i)
{
inputVars[i] = GetVariable<ElementType>(node->Input(i));
if (inputVars[i].IsPlaceholder())
m_placeholderReplacements[inputVars[i]] = Variable();
}
var = ResolveFunction<ElementType>(node, inputVars);
if (m_placeholderReplacements.find(placeholderVar) != m_placeholderReplacements.end())
m_placeholderReplacements[placeholderVar] = var;
}
m_nodeToVariableMap[node] = var;
return var;
}
private:
template<class ElementType>
Variable ResolveLeaf(const ComputationNodeBasePtr& node)
{
NDShape variableShape = AsNDShape(node->GetSampleLayout());
std::wstring varUid, varName; std::wstring varUid, varName;
if (node->Is<InputValueBase<ElementType>>()) if (node->Is<InputValueBase<ElementType>>())
{ {
@ -54,48 +93,32 @@ namespace CNTK
auto inputNodeInternalDynamicAxisName = node->As<InputValueBase<ElementType>>()->GetRequestedDynamicAxis(); auto inputNodeInternalDynamicAxisName = node->As<InputValueBase<ElementType>>()->GetRequestedDynamicAxis();
std::vector<Axis> inputVarDynamicAxes = DynamicAxesFromInternalDynamicAxisName(inputNodeInternalDynamicAxisName); std::vector<Axis> inputVarDynamicAxes = DynamicAxesFromInternalDynamicAxisName(inputNodeInternalDynamicAxisName);
var = Variable(varShape, isSparse, AsDataType<ElementType>(), node->GetLearningRateMultiplier() != 0, varName, inputVarDynamicAxes, varUid); return Variable(variableShape, isSparse, AsDataType<ElementType>(), node->GetLearningRateMultiplier() != 0, varName, inputVarDynamicAxes, varUid);
}
else
{
// TODO: Allow creating inputs without a dynamic axis
LogicError("Found InputNode with no dynamic axes which is currently unsupported");
} }
// TODO: Allow creating inputs without a dynamic axis
LogicError("Found InputNode with no dynamic axes which is currently unsupported");
} }
else if (node->Is<LearnableParameter<ElementType>>())
if (node->Is<LearnableParameter<ElementType>>())
{ {
bool isConstant = (node->GetLearningRateMultiplier() == 0); bool isConstant = (node->GetLearningRateMultiplier() == 0);
auto& matrix = node->As<ComputationNode<ElementType>>()->Value(); auto& matrix = node->As<ComputationNode<ElementType>>()->Value();
auto tensorView = new TensorView<ElementType>(std::make_shared<Matrix<ElementType>>(matrix.AsReference()), AsTensorViewShape(node->GetSampleLayout())); auto tensorView = new TensorView<ElementType>(std::make_shared<Matrix<ElementType>>(matrix.AsReference()), AsTensorViewShape(node->GetSampleLayout()));
NDArrayViewPtr value = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(matrix.GetFormat()), varShape, false, tensorView); NDArrayViewPtr value = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(matrix.GetFormat()), variableShape, false, tensorView);
if (isConstant)
{ auto kind = isConstant ? VariableKind::Constant : VariableKind::Parameter;
std::tie(varUid, varName) = UidAndNameFromCNTKInternalNodeName(node->NodeName(), VariableKind::Constant); std::tie(varUid, varName) = UidAndNameFromCNTKInternalNodeName(node->NodeName(), kind);
var = Constant(value, varName, varUid); return isConstant ? (Variable)Constant(value, varName, varUid) : Parameter(value, varName, varUid);
}
else
{
std::tie(varUid, varName) = UidAndNameFromCNTKInternalNodeName(node->NodeName(), VariableKind::Parameter);
var = Parameter(value, varName, varUid);
}
} }
else
LogicError("CNTK::LoadLegacyModel: Unsupported legacy CNTK node named '%S'", node->NodeName().c_str()); LogicError("CNTK::LoadLegacyModel: Unsupported legacy CNTK node named '%S'", node->NodeName().c_str());
return Variable();// make compiler happy.
} }
else
template<class ElementType>
Variable ResolveFunction(const ComputationNodeBasePtr& node, std::vector<Variable>& inputVars)
{ {
// This is a non-leaf node and maps to a primitive Function
auto placeholderVar = PlaceholderVariable(varShape);
nodeToVariableMap[node] = placeholderVar;
std::vector<Variable> inputVars(node->GetNumInputs());
for (size_t i = 0; i < inputVars.size(); ++i)
{
inputVars[i] = GetVariable<ElementType>(node->Input(i), nodeToVariableMap, placeholderReplacements, allPrimitiveFunctions);
if (inputVars[i].IsPlaceholder())
placeholderReplacements[inputVars[i]] = Variable();
}
PrimitiveOpType opType; PrimitiveOpType opType;
Dictionary primitiveFunctionConfigParameters; Dictionary primitiveFunctionConfigParameters;
if (node->OperationName() == OperationNameOf(NegateNode)) if (node->OperationName() == OperationNameOf(NegateNode))
@ -376,15 +399,10 @@ namespace CNTK
std::tie(functionUid, functionName) = UidAndNameFromCNTKInternalNodeName(node->NodeName(), opType); std::tie(functionUid, functionName) = UidAndNameFromCNTKInternalNodeName(node->NodeName(), opType);
FunctionPtr primitiveFunction = MakeSharedObject<PrimitiveFunction>(opType, inputVars, std::move(primitiveFunctionConfigParameters), functionName, functionUid); FunctionPtr primitiveFunction = MakeSharedObject<PrimitiveFunction>(opType, inputVars, std::move(primitiveFunctionConfigParameters), functionName, functionUid);
allPrimitiveFunctions.insert(primitiveFunction); m_allPrimitiveFunctions.insert(primitiveFunction);
var = primitiveFunction->Output(); return primitiveFunction->Output();
if (placeholderReplacements.find(placeholderVar) != placeholderReplacements.end())
placeholderReplacements[placeholderVar] = var;
} }
};
nodeToVariableMap[node] = var;
return var;
}
FunctionPtr LoadLegacyModel(const std::wstring& modelFile, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/) FunctionPtr LoadLegacyModel(const std::wstring& modelFile, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
{ {
@ -410,8 +428,8 @@ namespace CNTK
// Now traverse the model and construct the Function graph // Now traverse the model and construct the Function graph
std::unordered_map<ComputationNodeBasePtr, Variable> nodeToVariableMap; std::unordered_map<ComputationNodeBasePtr, Variable> nodeToVariableMap;
std::unordered_map<Variable, Variable> placeholderReplacements; std::unordered_map<Variable, Variable> placeholderReplacements;
std::unordered_set<FunctionPtr> allPrimitiveFunctions;
std::vector<Variable> rootVariables; std::vector<Variable> rootVariables;
VariableResolver resolver;
auto& networkRoots = net->RootNodes(); auto& networkRoots = net->RootNodes();
for (auto& rootNode : networkRoots) for (auto& rootNode : networkRoots)
{ {
@ -420,11 +438,11 @@ namespace CNTK
if (ComputationNetwork::IsNodePtr<ComputationNode<float>>(rootNode)) if (ComputationNetwork::IsNodePtr<ComputationNode<float>>(rootNode))
{ {
rootVariables.push_back(Internal::GetVariable<float>(rootNode, nodeToVariableMap, placeholderReplacements, allPrimitiveFunctions).Owner()); rootVariables.push_back(resolver.GetVariable<float>(rootNode).Owner());
} }
else if (ComputationNetwork::IsNodePtr<ComputationNode<double>>(rootNode)) else if (ComputationNetwork::IsNodePtr<ComputationNode<double>>(rootNode))
{ {
rootVariables.push_back(Internal::GetVariable<double>(rootNode, nodeToVariableMap, placeholderReplacements, allPrimitiveFunctions).Owner()); rootVariables.push_back(resolver.GetVariable<double>(rootNode).Owner());
} }
else else
{ {
@ -433,8 +451,7 @@ namespace CNTK
} }
auto rootComposite = Combine(rootVariables); auto rootComposite = Combine(rootVariables);
rootComposite->ReplacePlaceholders(placeholderReplacements); rootComposite->ReplacePlaceholders(resolver.GetPlaceHolders());
return rootComposite; return rootComposite;
} }

Просмотреть файл

@ -2625,7 +2625,7 @@ namespace CNTK
else else
evalTimeStampVariable = arguments.begin()->first; evalTimeStampVariable = arguments.begin()->first;
return (outputsToRetainBackwardStateFor.size() > 0) ? MakeSharedObject<CNTKBackPropState>(this->shared_from_this(), std::make_pair(evalTimeStampVariable, m_variableToNodeMap[evalTimeStampVariable]->GetEvalTimeStamp())) : nullptr; return (outputsToRetainBackwardStateFor.size() > 0) ? MakeSharedObject<CNTKBackPropState>(this->shared_from_this(), computeDevice, std::make_pair(evalTimeStampVariable, m_variableToNodeMap[evalTimeStampVariable]->GetEvalTimeStamp())) : nullptr;
} }
/*virtual*/ void CompositeFunction::Backward(const BackPropStatePtr& state, /*virtual*/ void CompositeFunction::Backward(const BackPropStatePtr& state,

Просмотреть файл

@ -652,8 +652,8 @@ namespace CNTK
class CNTKBackPropState final : public BackPropState class CNTKBackPropState final : public BackPropState
{ {
public: public:
CNTKBackPropState(const FunctionPtr& function, const std::pair<Variable, int64_t>& evalTimeStamp) CNTKBackPropState(const FunctionPtr& function, const DeviceDescriptor& computeDevice, const std::pair<Variable, int64_t>& evalTimeStamp)
: BackPropState(function), m_evalTimeStamp(evalTimeStamp) : BackPropState(function, computeDevice), m_evalTimeStamp(evalTimeStamp)
{} {}
std::pair<Variable, int64_t> EvalTimeStamp() const std::pair<Variable, int64_t> EvalTimeStamp() const

Просмотреть файл

@ -103,9 +103,9 @@ private:
bool log = GetEnvironmentPtr() && Environment().traceLevel > 0; // note: this will not log before node is part of network bool log = GetEnvironmentPtr() && Environment().traceLevel > 0; // note: this will not log before node is part of network
if (log) if (log)
{ {
fprintf(stderr, "%ls: Initializing Parameter[%s] <- %ls(seed=%d, init dims=[%d x %d], range=%f*%f, onCPU=%s.\n)", fprintf(stderr, "%ls: Initializing Parameter[%s] <- %ls(seed=%d, init dims=[%d x %d], range=%f(%f*%f), onCPU=%s.\n)",
NodeDescription().c_str(), string(GetSampleLayout()).c_str(), m_initString.c_str(), NodeDescription().c_str(), string(GetSampleLayout()).c_str(), m_initString.c_str(),
(int)randomSeed, (int)fanOut, (int)fanIn, range, initValueScale, initOnCPUOnly ? "true" : "false"); (int)randomSeed, (int)fanOut, (int)fanIn, range, range/initValueScale, initValueScale, initOnCPUOnly ? "true" : "false");
} }
} }

Просмотреть файл

@ -109,67 +109,26 @@ public:
m_originIndex = 0; m_originIndex = 0;
for (int i = (int)dimCount - 1; i >= 0; i--) for (int i = (int)dimCount - 1; i >= 0; i--)
{ {
assert((m_outputShape[i] % GetMapCount(i)) == 0); bool padded = GetAutoPad(i);
int outPerMap = (int)(m_outputShape[i] / GetMapCount(i)); if (padded)
// Number of cells between first and last "centers", inclusive. m_start[i] = 0;
int cells = (int)((outPerMap - 1) * GetStride(i) + 1);
assert(m_inputShape[i] >= cells);
// Extra cells, to the left and right of "cells".
int extra = (int)m_inputShape[i] - cells;
assert(extra >= 0);
// When LowerPad and/or UpperPad are specified, the Start[i] value is determined by those values.
int lo = GetAutoPad(i) ? 0 : (int)m_lowerPad[m_lowerPad.size() == 1 ? 0 : i];
int hi = GetAutoPad(i) ? 0 : (int)m_upperPad[m_upperPad.size() == 1 ? 0 : i];
if (lo != 0 || hi != 0)
{
assert(extra + lo + hi + 1 == m_kernelShape[i]);
// Compute the number of cells on the left and right parts of the kernel,
// not counting the "kernel-center" cell. If m_kernelShape[i] is even, the extra cell is
// placed on the right (the center is shifted to the left).
int right = (int)m_kernelShape[i] - 1;
int left = right / 2;
right -= left;
assert(left <= right);
assert(right <= left + 1);
assert(lo <= left);
assert(hi <= right);
m_start[i] = left - lo;
assert(m_start[i] + cells + right == m_inputShape[i] + hi);
}
else else
{ {
m_start[i] = extra / 2; m_start[i] = ((int)m_kernelShape[i] - 1) / 2;
#ifdef _DEBUG int lo = (int)m_lowerPad[m_lowerPad.size() == 1 ? 0 : i];
// If we're padding then extra should be covered. int hi = (int)m_upperPad[m_upperPad.size() == 1 ? 0 : i];
bool padded = GetAutoPad(i); if (lo != 0 || hi != 0)
assert(!padded || extra + 1 <= m_kernelShape[i]); {
// If we're not padding then, we should stay within the input dimension. m_start[i] -= lo;
assert(padded || extra + 1 >= m_kernelShape[i]); assert(m_start[i] >= 0);
int outPerMap = (int)(m_outputShape[i] / GetMapCount(i));
// Compute the number of cells on the left and right parts of the kernel, int cells = (int)((outPerMap - 1) * GetStride(i) + 1);
// not counting the "kernel-center" cell. If m_kernelShape[i] is even, the extra cell is if (cells > 0) // dummy if, just to get rid of warning
// placed on the right (the center is shifted to the left). {
int right = (int)m_kernelShape[i] - 1; assert(m_inputShape[i] >= cells);
int left = right / 2; assert(m_start[i] + cells + (int)m_kernelShape[i] - 1 == m_inputShape[i] + hi);
right -= left; }
assert(0 <= left); }
assert(left <= right);
assert(right <= left + 1);
int min = m_start[i] - left;
int max = m_start[i] + (int)cells + right;
assert(!padded || min <= 0 && max >= m_inputShape[i]);
assert(padded || min >= 0 && max <= m_inputShape[i]);
int diff = min - ((int)m_inputShape[i] - max);
assert(std::abs(diff) <= 1);
UNUSED(padded);
UNUSED(diff);
#endif
} }
m_startIndex = m_startIndex * (int)m_inputShape[i] + m_start[i]; m_startIndex = m_startIndex * (int)m_inputShape[i] + m_start[i];

Просмотреть файл

@ -40,7 +40,6 @@
<ConfigurationType>DynamicLibrary</ConfigurationType> <ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries> <UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset> <PlatformToolset>v120</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet> <CharacterSet>Unicode</CharacterSet>
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
@ -51,7 +50,6 @@
<PropertyGroup Label="UserMacros" /> <PropertyGroup Label="UserMacros" />
<PropertyGroup> <PropertyGroup>
<!-- TODO intentional for all? --> <!-- TODO intentional for all? -->
<LinkIncremental>false</LinkIncremental>
<TargetName>Math</TargetName> <TargetName>Math</TargetName>
</PropertyGroup> </PropertyGroup>
<ItemDefinitionGroup> <ItemDefinitionGroup>
@ -102,9 +100,6 @@
<ClCompile> <ClCompile>
<WarningLevel>Level4</WarningLevel> <WarningLevel>Level4</WarningLevel>
<PrecompiledHeader>Use</PrecompiledHeader> <PrecompiledHeader>Use</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>$(MathDefine); NO_SYNC; WIN32; NDEBUG; _WINDOWS; _USRDLL; MATH_EXPORTS; %(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>$(MathDefine); NO_SYNC; WIN32; NDEBUG; _WINDOWS; _USRDLL; MATH_EXPORTS; %(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<MultiProcessorCompilation>true</MultiProcessorCompilation> <MultiProcessorCompilation>true</MultiProcessorCompilation>
@ -113,14 +108,12 @@
<EnableParallelCodeGeneration>true</EnableParallelCodeGeneration> <EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
<FloatingPointExceptions>false</FloatingPointExceptions> <FloatingPointExceptions>false</FloatingPointExceptions>
<TreatWarningAsError>true</TreatWarningAsError> <TreatWarningAsError>true</TreatWarningAsError>
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions>/d2Zi+ /bigobj %(AdditionalOptions)</AdditionalOptions>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary> <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
</ClCompile> </ClCompile>
<Link> <Link>
<SubSystem>Console</SubSystem> <SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation> <GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>$(MathLinkLibrary);Common.lib;%(AdditionalDependencies)</AdditionalDependencies> <AdditionalDependencies>$(MathLinkLibrary);Common.lib;%(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>$(MathDelayLoad); $(CudaDlls); %(DelayLoadDLLs)</DelayLoadDLLs> <DelayLoadDLLs>$(MathDelayLoad); $(CudaDlls); %(DelayLoadDLLs)</DelayLoadDLLs>
<Profile>true</Profile> <Profile>true</Profile>
@ -147,6 +140,7 @@
<ItemDefinitionGroup Condition="$(GpuBuild)"> <ItemDefinitionGroup Condition="$(GpuBuild)">
<ClCompile> <ClCompile>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(CudaInclude)</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(CudaInclude)</AdditionalIncludeDirectories>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/d2Zi+ /bigobj %(AdditionalOptions)</AdditionalOptions>
</ClCompile> </ClCompile>
<Link> <Link>
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories);$(CudaLibPath)</AdditionalLibraryDirectories> <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories);$(CudaLibPath)</AdditionalLibraryDirectories>
@ -155,6 +149,7 @@
<ItemDefinitionGroup Condition="$(CpuOnlyBuild)"> <ItemDefinitionGroup Condition="$(CpuOnlyBuild)">
<ClCompile> <ClCompile>
<PreprocessorDefinitions>CPUONLY;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>CPUONLY;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">/d2Zi+ /bigobj %(AdditionalOptions)</AdditionalOptions>
</ClCompile> </ClCompile>
<Link> <Link>
<DelayLoadDLLs>$(MathDelayLoad)</DelayLoadDLLs> <DelayLoadDLLs>$(MathDelayLoad)</DelayLoadDLLs>

Просмотреть файл

@ -0,0 +1,24 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
import os
import re
abs_path = os.path.dirname(os.path.abspath(__file__))
notebook = os.path.join(abs_path, "..", "..", "..", "..", "Tutorials", "CNTK_204_Sequence_To_Sequence.ipynb")
def test_cntk_204_sequence_to_sequence_noErrors(nb):
errors = [output for cell in nb.cells if 'outputs' in cell
for output in cell['outputs'] if output.output_type == "error"]
print(errors)
assert errors == []
expectedEvalError = 90
def test_cntk_204_sequence_to_sequence_trainerror(nb):
testCell = [cell for cell in nb.cells
if cell.cell_type == 'code' and re.search('#Print the training error', cell.source)]
assert float((testCell[0].outputs[0])['text']) < expectedEvalError

Просмотреть файл

@ -7,55 +7,36 @@
import numpy as np import numpy as np
import os import os
import sys import sys
import signal
import subprocess
import re
import pytest
from cntk.utils import cntk_device from cntk.utils import cntk_device
from cntk.cntk_py import DeviceKind_GPU from cntk.cntk_py import DeviceKind_GPU
from cntk.device import set_default_device from cntk.device import set_default_device
from cntk.io import ReaderConfig, ImageDeserializer
from cntk import distributed
import pytest
import platform
abs_path = os.path.dirname(os.path.abspath(__file__)) abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Classification", "ConvNet", "Python")) sys.path.append(abs_path)
from ConvNet_CIFAR10_DataAug_Distributed import convnet_cifar10_dataaug, create_reader from run_cifar_convnet_distributed import run_cifar_convnet_distributed
TOLERANCE_ABSOLUTE = 2E-1 TOLERANCE_ABSOLUTE = 2E-1
TIMEOUT_SECONDS = 300
def test_cifar_convnet_error(device_id): def test_cifar_convnet_distributed_mpiexec(device_id):
if platform.system() == 'Windows': if cntk_device(device_id).type() != DeviceKind_GPU:
pytest.skip('test skipped on Windows') pytest.skip('test only runs on GPU')
set_default_device(cntk_device(device_id))
cmd = ["mpiexec", "-n", "2", "python", os.path.join(abs_path, "run_cifar_convnet_distributed.py")]
p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
try: try:
base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], out = p.communicate(timeout=TIMEOUT_SECONDS)[0] # in case we have a hang
*"Image/CIFAR/v0/cifar-10-batches-py".split("/")) except subprocess.TimeoutExpired:
# N.B. CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY has {train,test}_map.txt os.kill(p.pid, signal.CTRL_C_EVENT)
# and CIFAR-10_mean.xml in the base_path. raise RuntimeError('Timeout in mpiexec, possibly hang')
except KeyError: str_out = out.decode(sys.getdefaultencoding())
base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), results = re.findall("Final Results: Minibatch\[.+?\]: errs = (.+?)%", str_out)
*"../../../../Examples/Image/DataSets/CIFAR-10".split("/")) assert len(results) == 2
assert results[0] == results[1]
base_path = os.path.normpath(base_path)
os.chdir(os.path.join(base_path, '..'))
from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
set_computation_network_trace_level(1)
set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works
#force_deterministic_algorithms()
# TODO: do the above; they lead to slightly different results, so not doing it for now
reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True, 0)
reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
distributed_after_samples = 0
num_quantization_bits = 32
distributed_trainer = distributed.data_parallel_distributed_trainer(
num_quantization_bits=num_quantization_bits,
distributed_after=distributed_after_samples)
test_error = convnet_cifar10_dataaug(reader_train, reader_test, distributed_trainer, max_epochs=1)
expected_test_error = 0.617 expected_test_error = 0.617
assert np.allclose(float(results[0])/100, expected_test_error,
assert np.allclose(test_error, expected_test_error,
atol=TOLERANCE_ABSOLUTE) atol=TOLERANCE_ABSOLUTE)

Просмотреть файл

@ -65,7 +65,7 @@ def nb(tmpdir_factory, request, device_id):
outPath = str(tmpdir_factory.mktemp('notebook').join('out.ipynb')) outPath = str(tmpdir_factory.mktemp('notebook').join('out.ipynb'))
assert os.path.isfile(inPath) assert os.path.isfile(inPath)
args = ["jupyter", "nbconvert", "--to", "notebook", "--execute", args = ["jupyter", "nbconvert", "--to", "notebook", "--execute",
"--ExecutePreprocessor.timeout=60", "--output", outPath, inPath] "--ExecutePreprocessor.timeout=300", "--output", outPath, inPath]
subprocess.check_call(args) subprocess.check_call(args)
nb = nbformat.read(outPath, nbformat.current_nbformat) nb = nbformat.read(outPath, nbformat.current_nbformat)
return nb return nb

Просмотреть файл

@ -1,2 +1,2 @@
[pytest] [pytest]
python_files = *.py python_files = *_test.py

Просмотреть файл

@ -0,0 +1,52 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
import numpy as np
import os
import sys
import platform
from cntk.io import ReaderConfig, ImageDeserializer
from cntk import distributed
from cntk.device import set_default_device, gpu
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Classification", "ConvNet", "Python"))
from ConvNet_CIFAR10_DataAug_Distributed import convnet_cifar10_dataaug, create_reader
def run_cifar_convnet_distributed():
try:
base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
*"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
# N.B. CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY has {train,test}_map.txt
# and CIFAR-10_mean.xml in the base_path.
except KeyError:
base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
*"../../../../Examples/Image/DataSets/CIFAR-10".split("/"))
base_path = os.path.normpath(base_path)
os.chdir(os.path.join(base_path, '..'))
from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
set_computation_network_trace_level(1)
set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works
#force_deterministic_algorithms()
# TODO: do the above; they lead to slightly different results, so not doing it for now
reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True, 0)
reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
distributed_after_samples = 0
num_quantization_bits = 32
distributed_trainer = distributed.data_parallel_distributed_trainer(
num_quantization_bits=num_quantization_bits,
distributed_after=distributed_after_samples)
return convnet_cifar10_dataaug(reader_train, reader_test, distributed_trainer, max_epochs=1)
if __name__=='__main__':
set_default_device(gpu(0)) # force using GPU-0 in test for speed
run_cifar_convnet_distributed()
distributed.Communicator.finalize()

Просмотреть файл

@ -2,6 +2,22 @@
. $TEST_ROOT_DIR/run-test-common . $TEST_ROOT_DIR/run-test-common
# Temporary workaround to force the default device to be always GPU 0 when
# running the python unit tests since data placement is currently broken which
# causes some of the test data to end on the default device instead of the
# explicitly selected GPU device 0 which results in the tests to fail
# This whould be removed when the test bugs have been addressed
if [ "$TEST_DEVICE" == "gpu" ]; then
if [ -z "$CUDA_VISIBLE_DEVICES" ]; then
export CUDA_VISIBLE_DEVICES=0
else
IFS=','
visibleDevicesArray=($CUDA_VISIBLE_DEVICES)
unset IFS
export CUDA_VISIBLE_DEVICES=${visibleDevicesArray[0]}
fi
fi
python -c "import sys; print('Python: %s'%sys.version)" python -c "import sys; print('Python: %s'%sys.version)"
python -c "import numpy; print('NumPy: %s'%numpy.version.full_version)" python -c "import numpy; print('NumPy: %s'%numpy.version.full_version)"
python -c "import scipy; print('SciPy: %s'%scipy.version.full_version)" python -c "import scipy; print('SciPy: %s'%scipy.version.full_version)"

Просмотреть файл

@ -1,8 +1,8 @@
dataDir: . dataDir: .
tags: tags:
- bvt-l (build_sku == 'gpu') and (flavor == 'release') and ((os == 'linux') or (device=='cpu')) - bvt-l (build_sku == 'gpu') and (flavor == 'release')
- nightly-l (build_sku == 'gpu') and (flavor == 'release') and ((os == 'linux') or (device=='cpu')) - nightly-l (build_sku == 'gpu') and (flavor == 'release')
testCases: testCases:
Run must finish with error code 0 (outputs __COMPLETED__ in that case): Run must finish with error code 0 (outputs __COMPLETED__ in that case):

Просмотреть файл

@ -763,7 +763,7 @@ INFO: rn4_1.c_proj.y.y: loading pre-CuDNNv5 model: approximated mini-batch count
INFO: rn4_2.c1.c.y.y: loading pre-CuDNNv5 model: approximated mini-batch count of 625625 as 10010000 trained samples. INFO: rn4_2.c1.c.y.y: loading pre-CuDNNv5 model: approximated mini-batch count of 625625 as 10010000 trained samples.
Statistics in further training may be biased; consider re-training instead. Statistics in further training may be biased; consider re-training instead.
INFO: rn4_2.c2.y.y: loading pre-CuDNNv5 model: approximated mini-batch count of 625625 as 10010000 trained samples. INFO: rn4_2.c2.y.y: loading pre-CuDNNv5 model: approximated mini-batch count of 625625 as 10010000 trained samples.
Statistics in further EvaluateImageInputUsingFeatureVector: Outcome = 340 Statistics in further EvaluateImageInputUsingFeatureVector: Outcome = 118
====== EvaluateImageInputUsingImageApi ======== ====== EvaluateImageInputUsingImageApi ========
training may be biased; consider re-training instead. training may be biased; consider re-training instead.
@ -847,7 +847,7 @@ WARNING: rn2_1.c2.y.y: loading pre-CuDNNv5 model: approximately converting varia
WARNING: rn2_1.c_proj.y.y: loading pre-CuDNNv5 model: approximately converting variance statistics format WARNING: rn2_1.c_proj.y.y: loading pre-CuDNNv5 model: approximately converting variance statistics format
WARNING: rn2_2.c1.c.y.y: loading pre-CuDNNv5 model: approximately converting variance statistics format WARNING: rn2_2.c1.c.y.y: loading pre-CuDNNv5 model: approximately converting variance statistics format
WARNING: rn2_2.c2.y.y: loading pre-CuDNNv5 model: approximately converting variance statistics format WARNING: rn2_2.c2.y.y: loading pre-CuDNNv5 model: approximately converting variance statistics format
WARNING: rn3_1.c1.c.y.y: loading pre-CuDNNv5 model:EvaluateImageInputUsingImageApi: Outcome = 340 WARNING: rn3_1.c1.c.y.y: loading pre-CuDNNv5 model:EvaluateImageInputUsingImageApi: Outcome = 118
====== CompareImageApiResults ======== ====== CompareImageApiResults ========
Both image API calls returned the same output vector. Both image API calls returned the same output vector.

Просмотреть файл

@ -80,7 +80,6 @@
<SubSystem>Console</SubSystem> <SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation> <GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>CNTKLibrary-2.0.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies> <AdditionalDependencies>CNTKLibrary-2.0.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<StackReserveSize Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">100000000</StackReserveSize>
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)"> <ItemDefinitionGroup Condition="$(ReleaseBuild)">
@ -145,4 +144,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
</ImportGroup> </ImportGroup>
</Project> </Project>

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -19,6 +19,7 @@ def uniform(scale=DefaultParamInitScale, seed=None):
Returns: Returns:
initializer for :class:`cntk.variables.Parameter` initializer for :class:`cntk.variables.Parameter`
initialized to uniform distribution between `scale*[-0.05, 0.05]`
''' '''
if seed is None: if seed is None:
seed = SentinelValueForAutoSelectRandomSeed seed = SentinelValueForAutoSelectRandomSeed
@ -37,6 +38,7 @@ def gaussian(output_rank=SentinelValueForInferParamInitRank, filter_rank=Sentine
Returns: Returns:
initializer for :class:`cntk.variables.Parameter` initializer for :class:`cntk.variables.Parameter`
initialized to Gaussian distribution with mean `0` and standard deviation `scale*0.2/sqrt(fanIn))`.
''' '''
if seed is None: if seed is None:
seed = SentinelValueForAutoSelectRandomSeed seed = SentinelValueForAutoSelectRandomSeed
@ -55,6 +57,7 @@ def xavier(output_rank=SentinelValueForInferParamInitRank, filter_rank=SentinelV
Returns: Returns:
initializer for :class:`cntk.variables.Parameter` initializer for :class:`cntk.variables.Parameter`
initialized to Gaussian distribution with mean `0` and standard deviation `scale*sqrt(3.0/fanIn)`
''' '''
if seed is None: if seed is None:
seed = SentinelValueForAutoSelectRandomSeed seed = SentinelValueForAutoSelectRandomSeed
@ -73,6 +76,7 @@ def glorot_uniform(output_rank=SentinelValueForInferParamInitRank, filter_rank=S
Returns: Returns:
initializer for :class:`cntk.variables.Parameter` initializer for :class:`cntk.variables.Parameter`
initialized to uniform distribution between `scale*sqrt(6.0/(fanIn+fanOut))*[-1,1]`
''' '''
if seed is None: if seed is None:
seed = SentinelValueForAutoSelectRandomSeed seed = SentinelValueForAutoSelectRandomSeed
@ -91,6 +95,7 @@ def glorot_normal(output_rank=SentinelValueForInferParamInitRank, filter_rank=Se
Returns: Returns:
initializer for :class:`cntk.variables.Parameter` initializer for :class:`cntk.variables.Parameter`
initialized to Gaussian distribution with mean `0` and standard deviation `scale*sqrt(2.0/(fanIn+fanOut))`
''' '''
if seed is None: if seed is None:
seed = SentinelValueForAutoSelectRandomSeed seed = SentinelValueForAutoSelectRandomSeed
@ -109,6 +114,7 @@ def he_uniform(output_rank=SentinelValueForInferParamInitRank, filter_rank=Senti
Returns: Returns:
initializer for :class:`cntk.variables.Parameter` initializer for :class:`cntk.variables.Parameter`
initialized to uniform distribution between `scale*sqrt(6.0/fanIn)*[-1,1]`
''' '''
if seed is None: if seed is None:
seed = SentinelValueForAutoSelectRandomSeed seed = SentinelValueForAutoSelectRandomSeed
@ -127,6 +133,7 @@ def he_normal(output_rank=SentinelValueForInferParamInitRank, filter_rank=Sentin
Returns: Returns:
initializer for :class:`cntk.variables.Parameter` initializer for :class:`cntk.variables.Parameter`
initialized to Gaussian distribution with mean `0` and standard deviation `scale*sqrt(2.0/fanIn)`
''' '''
if seed is None: if seed is None:
seed = SentinelValueForAutoSelectRandomSeed seed = SentinelValueForAutoSelectRandomSeed
@ -143,6 +150,7 @@ def bilinear(kernel_width, kernel_height):
Returns: Returns:
initializer for :class:`cntk.variables.Parameter` initializer for :class:`cntk.variables.Parameter`
useful for deconvolution layer
''' '''
return cntk_py.bilinear_initializer(kernel_width, kernel_height) return cntk_py.bilinear_initializer(kernel_width, kernel_height)

Просмотреть файл

@ -81,15 +81,17 @@ class MinibatchSource(cntk_py.MinibatchSource):
randomize (bool, default True): randomize images before every epoch randomize (bool, default True): randomize images before every epoch
epoch_size (int): epoch size epoch_size (int): epoch size
distributed_after (int): sample count after which minibatch source becomes distributed distributed_after (int): sample count after which minibatch source becomes distributed
multithreaded_deserializer (bool): using multi threaded deserializer
''' '''
def __init__(self, deserializers=None, randomize=True, epoch_size=INFINITELY_REPEAT, distributed_after=INFINITE_SAMPLES): def __init__(self, deserializers=None, randomize=True, epoch_size=INFINITELY_REPEAT, distributed_after=INFINITE_SAMPLES, multithreaded_deserializer=None):
if not isinstance(deserializers, (list,tuple)): if not isinstance(deserializers, (list,tuple)):
deserializers = [deserializers] # allow passing a single item or a list deserializers = [deserializers] # allow passing a single item or a list
reader_config = ReaderConfig( reader_config = ReaderConfig(
deserializers=deserializers, deserializers=deserializers,
randomize=randomize, randomize=randomize,
epoch_size=epoch_size, epoch_size=epoch_size,
distributed_after=distributed_after) distributed_after=distributed_after,
multithreaded_deserializer=multithreaded_deserializer)
source = minibatch_source(reader_config) source = minibatch_source(reader_config)
# transplant into this class instance # transplant into this class instance
self.__dict__ = source.__dict__ self.__dict__ = source.__dict__
@ -256,8 +258,9 @@ class ReaderConfig(dict):
randomize (bool, default True): randomize images before every epoch randomize (bool, default True): randomize images before every epoch
epoch_size (int): epoch size epoch_size (int): epoch size
distributed_after (int): sample count after which reader becomes distributed distributed_after (int): sample count after which reader becomes distributed
multithreaded_deserializer (bool): using multi threaded deserializer
''' '''
def __init__(self, deserializers=None, randomize=True, epoch_size=INFINITELY_REPEAT, distributed_after=INFINITE_SAMPLES): def __init__(self, deserializers=None, randomize=True, epoch_size=INFINITELY_REPEAT, distributed_after=INFINITE_SAMPLES, multithreaded_deserializer=None):
self['epochSize'] = cntk_py.SizeTWrapper(epoch_size) # force to store in size_t self['epochSize'] = cntk_py.SizeTWrapper(epoch_size) # force to store in size_t
if not isinstance(deserializers, (list, tuple)): if not isinstance(deserializers, (list, tuple)):
@ -265,6 +268,8 @@ class ReaderConfig(dict):
self['deserializers'] = self.deserializers = deserializers or [] self['deserializers'] = self.deserializers = deserializers or []
self['randomize'] = randomize self['randomize'] = randomize
self['distributedAfterSampleCount'] = cntk_py.SizeTWrapper(distributed_after) self['distributedAfterSampleCount'] = cntk_py.SizeTWrapper(distributed_after)
if multithreaded_deserializer != None:
self['multiThreadedDeserialization'] = multithreaded_deserializer
@typemap @typemap
def minibatch_source(self): def minibatch_source(self):

Просмотреть файл

@ -98,8 +98,8 @@ class Learner(cntk_py.Learner):
Returns: Returns:
`False` to indicate that learning has stopped for all of the parameters associated with this learner `False` to indicate that learning has stopped for all of the parameters associated with this learner
''' '''
from .utils import create_NDArrayView_from_NumPy from .utils import _create_NDArrayView_from_NumPy
var_nd_map = { var:create_NDArrayView_from_NumPy(val) for var, val in var_nd_map = { var: _create_NDArrayView_from_NumPy(val) for var, val in
gradient_values.items() } gradient_values.items() }
return super(Learner, self).update(var_nd_map, training_sample_count) return super(Learner, self).update(var_nd_map, training_sample_count)

Просмотреть файл

@ -250,7 +250,7 @@ def convolution(convolution_map, operand, strides=(1,), sharing=[True],
>>> x = C.input_variable(img.shape) >>> x = C.input_variable(img.shape)
>>> filter = np.reshape(np.array([2, -1, -1, 2], dtype = np.float32), (1, 2, 2)) >>> filter = np.reshape(np.array([2, -1, -1, 2], dtype = np.float32), (1, 2, 2))
>>> kernel = C.constant(value = filter) >>> kernel = C.constant(value = filter)
>>> C.convolution(kernel, x, auto_padding = [False]).eval({x: [img]}) >>> C.convolution(kernel, x, auto_padding = [False]).eval({x: [img]}) # doctest: +SKIP
array([[[[[ 6., 8., 10., 12.], array([[[[[ 6., 8., 10., 12.],
[ 16., 18., 20., 22.], [ 16., 18., 20., 22.],
[ 26., 28., 30., 32.], [ 26., 28., 30., 32.],

Просмотреть файл

@ -273,7 +273,9 @@ class Function(cntk_py.Function):
Returns: Returns:
dict: mapping of ``variables`` to NumPy arrays dict: mapping of ``variables`` to NumPy arrays
''' '''
root_gradients = sanitize_var_map(self.outputs, root_gradients) device = state.device()
root_gradients = sanitize_var_map(self.outputs, root_gradients,
None, device)
var_gradients = dict((var, None) for var in variables) var_gradients = dict((var, None) for var in variables)

Просмотреть файл

@ -13,7 +13,7 @@ from __future__ import division
import numpy as np import numpy as np
import pytest import pytest
from .ops_test_utils import unittest_helper, _test_unary_op, _test_binary_op, AA, I, precision, PRECISION_TO_TYPE, batch_dense_to_sparse, left_matrix_type, right_matrix_type from .ops_test_utils import unittest_helper, _test_unary_op, _test_binary_op, AA, I, precision, PRECISION_TO_TYPE, batch_dense_to_sparse, left_matrix_type, right_matrix_type
from ...utils import sanitize_dtype_cntk, ones_like, eval from ...utils import sanitize_dtype_cntk, _ones_like, eval
TENSOR_PAIRS = [ TENSOR_PAIRS = [
([30.], [10.]), ([30.], [10.]),
@ -74,8 +74,8 @@ def test_op_plus_var_sequences_input_input(left_batch, right_batch, device_id, p
for i in range(len(left_batch))] for i in range(len(left_batch))]
expected_backward = { expected_backward = {
'left': ones_like(left_batch, PRECISION_TO_TYPE[precision]), 'left': _ones_like(left_batch, PRECISION_TO_TYPE[precision]),
'right': ones_like(right_batch, PRECISION_TO_TYPE[precision]) 'right': _ones_like(right_batch, PRECISION_TO_TYPE[precision])
} }
left_value = [AA(sample, dtype=PRECISION_TO_TYPE[precision]) left_value = [AA(sample, dtype=PRECISION_TO_TYPE[precision])

Просмотреть файл

@ -53,18 +53,11 @@ def cntk_device(device_id):
return gpu(device_id) return gpu(device_id)
def is_string(value): def _dense_to_str(data):
if sys.version_info.major < 3:
return isinstance(value, basestring)
return isinstance(value, str)
def dense_to_str(data):
return ' '.join(data.ravel(order='C').astype(np.str)) return ' '.join(data.ravel(order='C').astype(np.str))
def sparse_to_str(data): def _sparse_to_str(data):
return ' '.join('%s:%s' % (k, v) for k, v in sorted(data.items())) return ' '.join('%s:%s' % (k, v) for k, v in sorted(data.items()))
@ -96,12 +89,12 @@ def tensors_to_text_format(sample_idx, alias_tensor_map):
# for this alias there no more sequence elements # for this alias there no more sequence elements
continue continue
if is_tensor(tensor): if _is_tensor(tensor):
if not isinstance(tensor, np.ndarray): if not isinstance(tensor, np.ndarray):
tensor = np.asarray(tensor) tensor = np.asarray(tensor)
to_str = dense_to_str to_str = _dense_to_str
elif isinstance(tensor, list) and isinstance(tensor[0], dict): elif isinstance(tensor, list) and isinstance(tensor[0], dict):
to_str = sparse_to_str to_str = _sparse_to_str
else: else:
raise ValueError( raise ValueError(
'expected a tensor (dense) or list of dicts (sparse), but got "%s"' % type(tensor)) 'expected a tensor (dense) or list of dicts (sparse), but got "%s"' % type(tensor))
@ -113,7 +106,7 @@ def tensors_to_text_format(sample_idx, alias_tensor_map):
return '\n'.join(lines) return '\n'.join(lines)
def is_tensor(data): def _is_tensor(data):
''' '''
Checks whether the data is a tensor, i.e. whether it is a NumPy array or a Checks whether the data is a tensor, i.e. whether it is a NumPy array or a
list of NumPy arrays. list of NumPy arrays.
@ -179,7 +172,7 @@ def one_hot(batch, num_classes, dtype=None, device=None):
value = cntk_py.Value.create_one_hot_double(num_classes, batch, device, False) value = cntk_py.Value.create_one_hot_double(num_classes, batch, device, False)
return value return value
def has_seq_dim(var, data): def _has_seq_dim(var, data):
''' '''
Checks whether the data has a sequence dimensions or not. Checks whether the data has a sequence dimensions or not.
@ -274,7 +267,7 @@ def sanitize_shape(shape):
def sanitize_input(arg, fallback_dtype=np.float32, reshape=None): def sanitize_input(arg, fallback_dtype=np.float32, reshape=None):
""" """
Convert to :class:`cntk.ops.variables.Variable` so that it can be passed as Variable to the Convert to :class:`~cntk.ops.variables.Variable` so that it can be passed as Variable to the
CNTK operators. CNTK operators.
* If ``arg`` is a NumPy array and its type is neither `np.float32` nor `np.float64`, it sets it to `np.float32`. * If ``arg`` is a NumPy array and its type is neither `np.float32` nor `np.float64`, it sets it to `np.float32`.
@ -329,8 +322,7 @@ def get_data_type(*args):
inputs. Placeholders are ignored in the type determination. inputs. Placeholders are ignored in the type determination.
Args: Args:
args (number, list, NumPy array, :class:`cntk.ops.variables.Variable`, args (number, list, NumPy array, :class:`cntk.ops.variables.Variable`, or :class:`cntk.ops.functions.Function`): input
or :class:`cntk.ops.functions.Function`): input
Returns: Returns:
np.float32, np.float64, or None np.float32, np.float64, or None
""" """
@ -410,9 +402,10 @@ def _pad_dense_to_max_len(var, batch, max_seq_len):
Z = np.zeros((len(batch), max_seq_len) + Z = np.zeros((len(batch), max_seq_len) +
(data_point.shape), dtype=data_point.dtype) (data_point.shape), dtype=data_point.dtype)
for idx, seq in enumerate(batch): for idx, seq in enumerate(batch):
if seq[0].shape != data_point.shape: elem_shape = seq[0].shape if hasattr(seq, 'shape') else ()
if elem_shape != data_point.shape:
raise ValueError('shape mismatch: expected %s but got %s' raise ValueError('shape mismatch: expected %s but got %s'
% (str(data_point.shape), str(seq[0].shape))) % (str(data_point.shape), str(elem_shape)))
Z[idx, :len(seq)] += seq Z[idx, :len(seq)] += seq
return Z return Z
@ -443,6 +436,11 @@ def _pad_sparse_seq_to_max_len(batch, max_seq_len):
return Z return Z
def _is_dense(batch): def _is_dense(batch):
if isinstance(batch, np.ndarray):
return True
elif sparse.issparse(batch):
return False
is_dense = True is_dense = True
b = batch b = batch
while isinstance(b, list): while isinstance(b, list):
@ -452,6 +450,7 @@ def _is_dense(batch):
return True return True
@typemap
def sanitize_batch(var, batch, seq_starts=None, dtype=None, device=None): def sanitize_batch(var, batch, seq_starts=None, dtype=None, device=None):
''' '''
Convert to :class:`Value` with ``dtype``. If the samples in Convert to :class:`Value` with ``dtype``. If the samples in
@ -476,37 +475,31 @@ def sanitize_batch(var, batch, seq_starts=None, dtype=None, device=None):
if isinstance(batch, cntk_py.Value): if isinstance(batch, cntk_py.Value):
return batch return batch
if isinstance(batch, list):
if len(batch) == 0:
raise ValueError('batch is empty')
# We need to figure out whether the data has a sequence axis. Note that # We need to figure out whether the data has a sequence axis. Note that
# it is not enough to check whether the variable's dynamic axes include the # it is not enough to check whether the variable's dynamic axes include the
# sequence axis, because the sequence axis might be omitted in the data if # sequence axis, because the sequence axis might be omitted in the data if
# it is not needed (CNTK core would then take care of this). # it is not needed (CNTK core would then take care of this).
batch_has_seq = has_seq_dim(var, batch) batch_has_seq = _has_seq_dim(var, batch)
if isinstance(batch, list): is_dense = _is_dense(batch)
is_dense = _is_dense(batch)
if is_dense: if batch_has_seq or seq_starts:
if isinstance(batch[0], list):
seq_lens = [len(seq) for seq in batch] seq_lens = [len(seq) for seq in batch]
# If the input is a list of lists of dense values, all of the same
# length, then we convert it into a NumPy array without requiring a
# mask.
if len(set(seq_lens)) == 1:
batch = np.asarray(batch)
else: else:
if isinstance(batch[0], list): seq_lens = [seq.shape[0] for seq in batch]
seq_lens = [len(seq) for seq in batch]
else:
seq_lens = [seq.shape[0] for seq in batch]
if batch_has_seq: max_seq_len = max(seq_lens)
max_seq_len = max(seq_lens)
else: # If the input is a list of lists of dense values, all of the same
is_dense = isinstance(batch, np.ndarray) # length, we convert it into a NumPy array.
# It is a sparse or dense NumPy array having all sequences being the if is_dense and len(set(seq_lens)) == 1:
# same length, so we just calculate the sequence lengths batch_has_seq = False
if batch_has_seq: batch = np.asarray(batch, dtype=var.dtype)
max_seq_len = batch.shape[1]
if dtype is None: if dtype is None:
dtype = get_data_type(var) dtype = get_data_type(var)
@ -514,25 +507,8 @@ def sanitize_batch(var, batch, seq_starts=None, dtype=None, device=None):
if device is None: if device is None:
device = use_default_device() device = use_default_device()
if isinstance(batch, np.ndarray):
if np.issubdtype(batch.dtype, int):
batch = batch.astype(var.dtype)
elif batch.dtype not in (np.float32, np.float64):
raise ValueError('only float32 and float64 are supported')
ndav = create_NDArrayView_from_NumPy(batch, device)
return Value(data=ndav)
if isinstance(batch, list):
if len(batch) == 0:
raise ValueError('batch is empty')
if not batch_has_seq and seq_starts is not None:
raise ValueError('specification of individual sequence begins does not'
' make sense when not using the sequence axis')
# batch is now either a dense input that requires a mask, or it is sparse # batch is now either a dense input that requires a mask, or it is sparse
if batch_has_seq: if batch_has_seq or seq_starts:
mask = cntk_py.NDMask((len(batch), max_seq_len), mask = cntk_py.NDMask((len(batch), max_seq_len),
device or use_default_device()) device or use_default_device())
for idx, seq_len in enumerate(seq_lens): for idx, seq_len in enumerate(seq_lens):
@ -550,8 +526,20 @@ def sanitize_batch(var, batch, seq_starts=None, dtype=None, device=None):
mask = None mask = None
if is_dense: if is_dense:
batch = _pad_dense_to_max_len(var, batch, max_seq_len) if batch_has_seq:
ndav = create_NDArrayView_from_NumPy(batch.astype(dtype), device) batch = _pad_dense_to_max_len(var, batch, max_seq_len)
if not isinstance(batch, np.ndarray):
batch = np.asarray(batch)
ndav = _create_NDArrayView_from_NumPy(batch.astype(dtype), device)
return Value(data=ndav, mask=mask)
if isinstance(batch, np.ndarray):
if np.issubdtype(batch.dtype, int):
batch = batch.astype(var.dtype)
elif batch.dtype not in (np.float32, np.float64):
raise ValueError('only float32 and float64 are supported')
ndav = _create_NDArrayView_from_NumPy(batch.astype(dtype), device)
return Value(data=ndav, mask=mask) return Value(data=ndav, mask=mask)
# There are three possibilities of providing sparse batches: # There are three possibilities of providing sparse batches:
@ -638,7 +626,7 @@ def sanitize_value(shape, value, dtype, device):
if shape is None: if shape is None:
raise ValueError('you need to specify at least shape or value') raise ValueError('you need to specify at least shape or value')
cntk_dtype = sanitize_dtype_cntk(dtype) cntk_dtype = sanitize_dtype_cntk(dtype)
ndav = create_NDArrayView(shape, cntk_dtype, device) ndav = _create_NDArrayView(shape, cntk_dtype, device)
else: else:
np_dtype = sanitize_dtype_numpy(dtype) np_dtype = sanitize_dtype_numpy(dtype)
if not isinstance(value, np.ndarray) or value.dtype != np_dtype: if not isinstance(value, np.ndarray) or value.dtype != np_dtype:
@ -647,7 +635,7 @@ def sanitize_value(shape, value, dtype, device):
else: else:
value = np.asarray(value, dtype=np_dtype) value = np.asarray(value, dtype=np_dtype)
ndav = create_NDArrayView_from_NumPy(value, device) ndav = _create_NDArrayView_from_NumPy(value, device)
return ndav return ndav
@ -772,7 +760,7 @@ def sanitize_var_map(op_arguments, arguments, precision=None,
return var_map return var_map
def ones_like(batch, precision): def _ones_like(batch, precision):
''' '''
Returns a new batch, which has the same format as ``batch`` but all values Returns a new batch, which has the same format as ``batch`` but all values
set to 1. set to 1.
@ -783,7 +771,7 @@ def ones_like(batch, precision):
return [np.ones_like(sample, dtype=sanitize_precision(precision)) for sample in batch] return [np.ones_like(sample, dtype=sanitize_precision(precision)) for sample in batch]
def create_NDArrayView(shape, data_type=cntk_py.DataType_Float, device=None): def _create_NDArrayView(shape, data_type=cntk_py.DataType_Float, device=None):
shape = sanitize_shape(shape) shape = sanitize_shape(shape)
if device is None: if device is None:
device = use_default_device() device = use_default_device()
@ -793,7 +781,7 @@ def create_NDArrayView(shape, data_type=cntk_py.DataType_Float, device=None):
return view return view
def create_NDArrayView_from_NumPy(nd, device=None): def _create_NDArrayView_from_NumPy(nd, device=None):
if device is None: if device is None:
device = use_default_device() device = use_default_device()
@ -819,11 +807,11 @@ class Value(cntk_py.Value):
device = use_default_device() device = use_default_device()
if shape and dtype: if shape and dtype:
ndav = create_NDArrayView(shape, dtype, device) ndav = _create_NDArrayView(shape, dtype, device)
elif data: elif data:
if isinstance(data, np.ndarray): if isinstance(data, np.ndarray):
ndav = create_NDArrayView_from_NumPy(data, device) ndav = _create_NDArrayView_from_NumPy(data, device)
else: else:
ndav = data ndav = data
@ -841,6 +829,27 @@ class Value(cntk_py.Value):
''' '''
return super(Value, self).shape().dimensions() return super(Value, self).shape().dimensions()
@property
def mask(self):
'''
The mask matrix of this value. Each row denotes a sequence with its
elements describing the mask of the element:
* 2: beginning of sequence (e.g. an LSTM would be reset)
* 1: valid element
# 0: invalid element
Example:
A mask of
```[[2, 1, 1], [1, 1, 0]]
```
describes a batch of two sequences. The first has three elements, of
which the first element signals the beginning of a sequence. The second
sequence has two elements, which are both continuations of the first
sequence.
'''
return np.asarray(super(Value, self).mask())
def __len__(self): def __len__(self):
''' '''
Number of samples in this value object. Number of samples in this value object.
@ -939,7 +948,7 @@ def ensure_dev(ndav, dev):
if ndav.device() != dev: if ndav.device() != dev:
ndav_on_target = create_NDArrayView( ndav_on_target = _create_NDArrayView(
ndav.shape().dimensions(), data_type=ndav.get_data_type(), dev=dev) ndav.shape().dimensions(), data_type=ndav.get_data_type(), dev=dev)
ndav_on_target.copy_from(ndav) ndav_on_target.copy_from(ndav)
ndav = ndav_on_target ndav = ndav_on_target
@ -953,7 +962,7 @@ def value_to_seq(value):
entries removed. entries removed.
Args: Args:
value (`Value`): Value as it is returned by Swig value (:class:`Value`): Value as it is returned by Swig
Returns: Returns:
a list of NumPy arrays a list of NumPy arrays
@ -1013,7 +1022,7 @@ def eval(op, arguments=None, precision=None, device=None, backward_pass=False, e
if backward_pass: if backward_pass:
if expected_backward is None: if expected_backward is None:
expected_backward = arguments expected_backward = arguments
root_gradients = {v: ones_like(o, precision) for v, o in root_gradients = {v: _ones_like(o, precision) for v, o in
forward_output.items()} forward_output.items()}
backward_output = op.backward(state, root_gradients, expected_backward) backward_output = op.backward(state, root_gradients, expected_backward)

Просмотреть файл

@ -13,6 +13,7 @@ from cntk.device import default
from cntk.tests.test_utils import precision, PRECISION_TO_TYPE from cntk.tests.test_utils import precision, PRECISION_TO_TYPE
from cntk.ops import * from cntk.ops import *
from cntk.utils import * from cntk.utils import *
from cntk.utils import _has_seq_dim, _is_tensor
# Keeping things short # Keeping things short
AA = np.asarray AA = np.asarray
@ -58,7 +59,7 @@ def test_tensor_conversion_dense(idx, alias_tensor_map, expected):
([AA([1, 2]), AA([])], False), ([AA([1, 2]), AA([])], False),
]) ])
def test_is_tensor(data, expected): def test_is_tensor(data, expected):
assert is_tensor(data) == expected assert _is_tensor(data) == expected
def test_sanitize_dtype_numpy(): def test_sanitize_dtype_numpy():
@ -123,16 +124,14 @@ def test_get_data_type():
# exception handling # exception handling
((2,2), AA([[1,1],[2,2]]), ValueError), ((2,2), AA([[1,1],[2,2]]), ValueError),
(1, [[[1,2]]], ValueError), (1, [[[1,2]]], ValueError),
#(1, [AA([[40], [50]])], ValueError),
((1,), [[[40], [50]]], ValueError),
]) ])
def test_has_seq_dim_dense(shape, batch, expected): def test_has_seq_dim_dense(shape, batch, expected):
i1 = input_variable(shape) i1 = input_variable(shape)
if expected in [False, True]: if expected in [False, True]:
assert has_seq_dim(i1, batch) == expected assert _has_seq_dim(i1, batch) == expected
else: else:
with pytest.raises(expected): with pytest.raises(expected):
has_seq_dim(i1, batch) _has_seq_dim(i1, batch)
@pytest.mark.parametrize("shape, batch, expected", [ @pytest.mark.parametrize("shape, batch, expected", [
((1,2), [csr([1,0]), csr([2,3]), csr([5,6])], False), ((1,2), [csr([1,0]), csr([2,3]), csr([5,6])], False),
@ -141,10 +140,10 @@ def test_has_seq_dim_dense(shape, batch, expected):
def test_has_seq_dim_sparse(shape, batch, expected): def test_has_seq_dim_sparse(shape, batch, expected):
i1 = input_variable(shape, is_sparse=True) i1 = input_variable(shape, is_sparse=True)
if expected in [False, True]: if expected in [False, True]:
assert has_seq_dim(i1, batch) == expected assert _has_seq_dim(i1, batch) == expected
else: else:
with pytest.raises(expected): with pytest.raises(expected):
has_seq_dim(i1, batch) _has_seq_dim(i1, batch)
def test_sanitize_batch_sparse(): def test_sanitize_batch_sparse():
batch = [[csr([1,0,2]), csr([2,3,0])], batch = [[csr([1,0,2]), csr([2,3,0])],
@ -160,4 +159,20 @@ def test_sanitize_batch_sparse():
# 2 sequences, with max seq len of 2 and dimension 3 # 2 sequences, with max seq len of 2 and dimension 3
assert b.shape == (2,2,3) assert b.shape == (2,2,3)
@pytest.mark.parametrize("batch, seq_starts, expected_mask", [
([[5, 6, 7],
[8]],
[True, False],
[[2, 1, 1], [1, 0, 0]]),
([[5],
[8]],
[True, False],
[[2], [1]]),
])
def test_mask(batch, seq_starts, expected_mask):
shape = (1,)
var = input_variable(shape)
s = sanitize_batch(var, batch, seq_starts)
assert np.allclose(s.mask, expected_mask)