diff --git a/Examples/Image/Classification/AlexNet/BrainScript/AlexNet_ImageNet.cntk b/Examples/Image/Classification/AlexNet/BrainScript/AlexNet_ImageNet.cntk index cddc0b438..7ce5f9a69 100644 --- a/Examples/Image/Classification/AlexNet/BrainScript/AlexNet_ImageNet.cntk +++ b/Examples/Image/Classification/AlexNet/BrainScript/AlexNet_ImageNet.cntk @@ -124,9 +124,9 @@ Train = { transforms = ( { type = "Crop" - cropType = "random" - cropRatio = 0.88671875 - jitterType = "uniRatio" + cropType = "RandomSide" + sideRatio = 0.88671875 + jitterType = "UniRatio" }:{ type = "Scale" width = 227 @@ -167,7 +167,7 @@ Test = { { type = "Crop" cropType = "center" - cropRatio = 0.88671875 + sideRatio = 0.88671875 }:{ type = "Scale" width = 227 diff --git a/Examples/Image/Classification/ConvNet/BrainScript/ConvNetLRN_CIFAR10_DataAug.cntk b/Examples/Image/Classification/ConvNet/BrainScript/ConvNetLRN_CIFAR10_DataAug.cntk index 4c0cd2471..bdab5619c 100644 --- a/Examples/Image/Classification/ConvNet/BrainScript/ConvNetLRN_CIFAR10_DataAug.cntk +++ b/Examples/Image/Classification/ConvNet/BrainScript/ConvNetLRN_CIFAR10_DataAug.cntk @@ -94,7 +94,7 @@ TrainConvNet = { file = "$dataDir$/train_map.txt" input = { features = { transforms = ( - { type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } : + { type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } : { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } : { type = "Mean" ; meanFile = "$dataDir$/CIFAR-10_mean.xml" } : { type = "Transpose" } diff --git a/Examples/Image/Classification/ConvNet/BrainScript/ConvNet_CIFAR10_DataAug.cntk b/Examples/Image/Classification/ConvNet/BrainScript/ConvNet_CIFAR10_DataAug.cntk index 46d962b1b..2eba1f2af 100644 --- a/Examples/Image/Classification/ConvNet/BrainScript/ConvNet_CIFAR10_DataAug.cntk +++ b/Examples/Image/Classification/ConvNet/BrainScript/ConvNet_CIFAR10_DataAug.cntk @@ -72,7 +72,7 @@ TrainConvNet = { file = "$dataDir$/train_map.txt" input = { features = { transforms = ( - { type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } : + { type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } : { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } : { type = "Mean" ; meanFile = "$dataDir$/CIFAR-10_mean.xml" } : { type = "Transpose" } diff --git a/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10_DataAug.py b/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10_DataAug.py index 2855f525b..dc8ae56e9 100644 --- a/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10_DataAug.py +++ b/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10_DataAug.py @@ -32,7 +32,7 @@ def create_reader(map_file, mean_file, train): transforms = [] if train: transforms += [ - cntk.io.ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter + cntk.io.ImageDeserializer.crop(crop_type='randomside', side_ratio=0.8, jitter_type='uniratio') # train uses jitter ] transforms += [ cntk.io.ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'), diff --git a/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10_DataAug_Distributed.py b/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10_DataAug_Distributed.py index 7255aaf1e..300c56873 100644 --- a/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10_DataAug_Distributed.py +++ b/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10_DataAug_Distributed.py @@ -34,7 +34,7 @@ def create_reader(map_file, mean_file, train, total_number_of_samples, distribut transforms = [] if train: transforms += [ - cntk.io.ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter + cntk.io.ImageDeserializer.crop(crop_type='randomside', side_ratio=0.8, jitter_type='uniratio') # train uses jitter ] transforms += [ diff --git a/Examples/Image/Classification/GoogLeNet/BrainScript/InceptionV3.cntk b/Examples/Image/Classification/GoogLeNet/BrainScript/InceptionV3.cntk index 594b88547..0d10e5e1b 100644 --- a/Examples/Image/Classification/GoogLeNet/BrainScript/InceptionV3.cntk +++ b/Examples/Image/Classification/GoogLeNet/BrainScript/InceptionV3.cntk @@ -98,7 +98,7 @@ Train = { file = "$DataDir$/val_map.txt" input = { features = { transforms = ( - { type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } : + { type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } : { type = "Scale" ; width = $ImageW$ ; height = $ImageH$ ; channels = $ImageC$ ; interpolations = "linear" } : { type = "Mean" ; meanFile = "$ConfigDir$/ImageNet1K_mean.xml" } : { type = "Transpose" } diff --git a/Examples/Image/Classification/ResNet/BrainScript/ResNet101_ImageNet1K.cntk b/Examples/Image/Classification/ResNet/BrainScript/ResNet101_ImageNet1K.cntk index ae02d5bfa..626df070c 100644 --- a/Examples/Image/Classification/ResNet/BrainScript/ResNet101_ImageNet1K.cntk +++ b/Examples/Image/Classification/ResNet/BrainScript/ResNet101_ImageNet1K.cntk @@ -111,9 +111,9 @@ TrainNetwork = { width = 224 height = 224 channels = 3 - cropType = "Random" + cropType = "RandomSide" jitterType = "UniRatio" - cropRatio = 0.46666:0.875 + sideRatio = 0.46666:0.875 hflip = true meanFile = "$meanDir$/ImageNet1K_mean.xml" } @@ -131,7 +131,7 @@ TrainNetwork = { height = 224 channels = 3 cropType = "Center" - cropRatio = 0.875 + sideRatio = 0.875 meanFile = "$meanDir$/ImageNet1K_mean.xml" } labels = { @@ -156,9 +156,9 @@ BNStatistics = { width = 224 height = 224 channels = 3 - cropType = "Random" + cropType = "RandomSide" hflip = true - cropRatio = 0.46666:0.875 + sideRatio = 0.46666:0.875 jitterType = "UniRatio" meanFile = "$meanDir$/ImageNet1K_mean.xml" } @@ -183,7 +183,7 @@ Eval = { height = 224 channels = 3 cropType = "Center" - cropRatio = 0.875 + sideRatio = 0.875 meanFile = "$meanDir$/ImageNet1K_mean.xml" } labels = { diff --git a/Examples/Image/Classification/ResNet/BrainScript/ResNet110_CIFAR10.cntk b/Examples/Image/Classification/ResNet/BrainScript/ResNet110_CIFAR10.cntk index f7684f664..ca0e09a84 100644 --- a/Examples/Image/Classification/ResNet/BrainScript/ResNet110_CIFAR10.cntk +++ b/Examples/Image/Classification/ResNet/BrainScript/ResNet110_CIFAR10.cntk @@ -83,7 +83,7 @@ TrainConvNet = { file = "$dataDir$/train_map.txt" input = { features = { transforms = ( - { type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } : + { type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } : { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } : { type = "Mean" ; meanFile = "$dataDir$/CIFAR-10_mean.xml" } : { type = "Transpose" } diff --git a/Examples/Image/Classification/ResNet/BrainScript/ResNet152_ImageNet1K.cntk b/Examples/Image/Classification/ResNet/BrainScript/ResNet152_ImageNet1K.cntk index fe4bfe9d8..d92d7ec9f 100644 --- a/Examples/Image/Classification/ResNet/BrainScript/ResNet152_ImageNet1K.cntk +++ b/Examples/Image/Classification/ResNet/BrainScript/ResNet152_ImageNet1K.cntk @@ -111,9 +111,9 @@ TrainNetwork = { width = 224 height = 224 channels = 3 - cropType = "Random" + cropType = "RandomSide" jitterType = "UniRatio" - cropRatio = 0.46666:0.875 + sideRatio = 0.46666:0.875 hflip = true meanFile = "$meanDir$/ImageNet1K_mean.xml" } @@ -131,7 +131,7 @@ TrainNetwork = { height = 224 channels = 3 cropType = "Center" - cropRatio = 0.875 + sideRatio = 0.875 meanFile = "$meanDir$/ImageNet1K_mean.xml" } labels = { @@ -156,9 +156,9 @@ BNStatistics = { width = 224 height = 224 channels = 3 - cropType = "Random" + cropType = "RandomSide" hflip = true - cropRatio = 0.46666:0.875 + sideRatio = 0.46666:0.875 jitterType = "UniRatio" meanFile = "$meanDir$/ImageNet1K_mean.xml" } @@ -183,7 +183,7 @@ Eval = { height = 224 channels = 3 cropType = "Center" - cropRatio = 0.875 + sideRatio = 0.875 meanFile = "$meanDir$/ImageNet1K_mean.xml" } labels = { diff --git a/Examples/Image/Classification/ResNet/BrainScript/ResNet20_CIFAR10.cntk b/Examples/Image/Classification/ResNet/BrainScript/ResNet20_CIFAR10.cntk index fe980dad1..c3b638e21 100644 --- a/Examples/Image/Classification/ResNet/BrainScript/ResNet20_CIFAR10.cntk +++ b/Examples/Image/Classification/ResNet/BrainScript/ResNet20_CIFAR10.cntk @@ -83,7 +83,7 @@ TrainConvNet = { file = "$dataDir$/train_map.txt" input = { features = { transforms = ( - { type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } : + { type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } : { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } : { type = "Mean" ; meanFile = "$dataDir$/CIFAR-10_mean.xml" } : { type = "Transpose" } diff --git a/Examples/Image/Classification/ResNet/BrainScript/ResNet50_ImageNet1K.cntk b/Examples/Image/Classification/ResNet/BrainScript/ResNet50_ImageNet1K.cntk index e5799e99c..ccf3e6467 100644 --- a/Examples/Image/Classification/ResNet/BrainScript/ResNet50_ImageNet1K.cntk +++ b/Examples/Image/Classification/ResNet/BrainScript/ResNet50_ImageNet1K.cntk @@ -110,9 +110,9 @@ TrainNetwork = { width = 224 height = 224 channels = 3 - cropType = "Random" + cropType = "RandomSide" jitterType = "UniRatio" - cropRatio = 0.46666:0.875 + sideRatio = 0.46666:0.875 hflip = true meanFile = "$meanDir$/ImageNet1K_mean.xml" } @@ -130,7 +130,7 @@ TrainNetwork = { height = 224 channels = 3 cropType = "Center" - cropRatio = 0.875 + sideRatio = 0.875 meanFile = "$meanDir$/ImageNet1K_mean.xml" } labels = { @@ -156,9 +156,9 @@ BNStatistics = { width = 224 height = 224 channels = 3 - cropType = "Random" + cropType = "RandomSide" hflip = true - cropRatio = 0.46666:0.875 + sideRatio = 0.46666:0.875 jitterType = "UniRatio" meanFile = "$meanDir$/ImageNet1K_mean.xml" } @@ -183,7 +183,7 @@ Eval = { height = 224 channels = 3 cropType = "Center" - cropRatio = 0.875 + sideRatio = 0.875 meanFile = "$meanDir$/ImageNet1K_mean.xml" } labels = { diff --git a/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10.py b/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10.py index 8bb882aec..68dde5059 100644 --- a/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10.py +++ b/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10.py @@ -40,7 +40,7 @@ def create_reader(map_file, mean_file, train): transforms = [] if train: transforms += [ - ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter + ImageDeserializer.crop(crop_type='randomside', side_ratio=0.8, jitter_type='uniratio') # train uses jitter ] transforms += [ ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'), diff --git a/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10_Distributed.py b/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10_Distributed.py index 5b31ee19d..c6c19aed5 100644 --- a/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10_Distributed.py +++ b/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10_Distributed.py @@ -40,7 +40,7 @@ def create_reader(map_file, mean_file, train, total_data_size, distributed_after transforms = [] if train: transforms += [ - ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter + ImageDeserializer.crop(crop_type='randomside', side_ratio=0.8, jitter_type='uniratio') # train uses jitter ] transforms += [ ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'), diff --git a/Examples/Image/Classification/VGG/VGG_A_ndl_deprecated.cntk b/Examples/Image/Classification/VGG/VGG_A_ndl_deprecated.cntk index 3d64b78ba..e2775ba16 100644 --- a/Examples/Image/Classification/VGG/VGG_A_ndl_deprecated.cntk +++ b/Examples/Image/Classification/VGG/VGG_A_ndl_deprecated.cntk @@ -58,13 +58,13 @@ Train=[ channels=3 # Below are the optional parameters. # Possible values: Center, Random. Default: Center - cropType="Random" - # Horizontal random flip, will be enabled by default if cropType=Random + cropType="RandomSide" + # Horizontal random flip, will be enabled by default because cropType=RandomSide #hflip="true" - # Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1. - cropRatio=0.875 + # Crop scale side ratio. Examples: sideRatio=0.9, sideRatio=0.7:0.9. + sideRatio=0.875 # Crop scale ratio jitter type. - # Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio + # Possible values: None, UniRatio. Default: None jitterType="UniRatio" # Interpolation to use when scaling image to width x height size. # Possible values: nearest, linear, cubic, lanczos. Default: linear. diff --git a/Examples/Image/Classification/VGG/VGG_E_BN_ndl_deprecated.cntk b/Examples/Image/Classification/VGG/VGG_E_BN_ndl_deprecated.cntk index 78974454e..4f9a374d0 100644 --- a/Examples/Image/Classification/VGG/VGG_E_BN_ndl_deprecated.cntk +++ b/Examples/Image/Classification/VGG/VGG_E_BN_ndl_deprecated.cntk @@ -67,13 +67,13 @@ Train=[ channels=3 # Below are the optional parameters. # Possible values: Center, Random. Default: Center - cropType="Random" - # Horizontal random flip, will be enabled by default if cropType=Random + cropType="RandomSide" + # Horizontal random flip, will be enabled because cropType=RandomSide #hflip="true" - # Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1. - cropRatio=0.875 + # Crop scale side ratio. Examples: sideRatio=0.9, sideRatio=0.7:0.9. + sideRatio=0.875 # Crop scale ratio jitter type. - # Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio + # Possible values: None, UniRatio. Default: None jitterType="UniRatio" # Interpolation to use when scaling image to width x height size. # Possible values: nearest, linear, cubic, lanczos. Default: linear. diff --git a/Examples/Image/Classification/VGG/VGG_E_ndl_deprecated.cntk b/Examples/Image/Classification/VGG/VGG_E_ndl_deprecated.cntk index 06e79f54c..14ae0a9d2 100644 --- a/Examples/Image/Classification/VGG/VGG_E_ndl_deprecated.cntk +++ b/Examples/Image/Classification/VGG/VGG_E_ndl_deprecated.cntk @@ -67,13 +67,13 @@ Train=[ channels=3 # Below are the optional parameters. # Possible values: Center, Random. Default: Center - cropType="Random" - # Horizontal random flip, will be enabled by default if cropType=Random + cropType="RandomSide" + # Horizontal random flip, will be enabled because cropType=RandomSide #hflip="true" - # Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1. - cropRatio=0.875 + # Crop scale side ratio. Examples: sideRatio=0.9, sideRatio=0.7:0.9. + sideRatio=0.875 # Crop scale ratio jitter type. - # Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio + # Possible values: None, UniRatio. Default: None jitterType="UniRatio" # Interpolation to use when scaling image to width x height size. # Possible values: nearest, linear, cubic, lanczos. Default: linear. diff --git a/Source/Readers/ImageReader/ImageConfigHelper.cpp b/Source/Readers/ImageReader/ImageConfigHelper.cpp index 44d4b5506..9db79088a 100644 --- a/Source/Readers/ImageReader/ImageConfigHelper.cpp +++ b/Source/Readers/ImageReader/ImageConfigHelper.cpp @@ -126,9 +126,14 @@ CropType ImageConfigHelper::ParseCropType(const std::string &src) return CropType::Center; } - if (AreEqualIgnoreCase(src, "random")) + if (AreEqualIgnoreCase(src, "randomside")) { - return CropType::Random; + return CropType::RandomSide; + } + + if (AreEqualIgnoreCase(src, "randomarea")) + { + return CropType::RandomArea; } if (AreEqualIgnoreCase(src, "multiview10")) diff --git a/Source/Readers/ImageReader/ImageConfigHelper.h b/Source/Readers/ImageReader/ImageConfigHelper.h index db37fc07d..2433da2c9 100644 --- a/Source/Readers/ImageReader/ImageConfigHelper.h +++ b/Source/Readers/ImageReader/ImageConfigHelper.h @@ -14,9 +14,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { enum class CropType { - Center = 0, - Random = 1, - MultiView10 = 2 + Center = 0, // center crop with a given size + RandomSide = 1, // random scale resized with shorter side sampled from min and max (ResNet-style) + RandomArea = 2, // random scale resized with area size ratio between min and max (Inception-style) + MultiView10 = 3 // 10 view crop }; // A helper class for image specific parameters. diff --git a/Source/Readers/ImageReader/ImageTransformers.cpp b/Source/Readers/ImageReader/ImageTransformers.cpp index 8cb5e9e8e..ff69fec48 100644 --- a/Source/Readers/ImageReader/ImageTransformers.cpp +++ b/Source/Readers/ImageReader/ImageTransformers.cpp @@ -41,25 +41,60 @@ SequenceDataPtr ImageTransformerBase::Transform(SequenceDataPtr sequence) ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// CropTransformer::CropTransformer(const ConfigParameters& config) : ImageTransformerBase(config) { - floatargvector cropRatio = config(L"cropRatio", "1.0"); - m_cropRatioMin = cropRatio[0]; - m_cropRatioMax = cropRatio[1]; - - if (!(0 < m_cropRatioMin && m_cropRatioMin <= 1.0) || - !(0 < m_cropRatioMax && m_cropRatioMax <= 1.0) || - m_cropRatioMin > m_cropRatioMax) + intargvector cropSize = config(L"cropSize", "0"); + m_cropWidth = cropSize[0]; + m_cropHeight = cropSize[1]; + if (m_cropWidth < 0 || m_cropHeight < 0) { - RuntimeError("Invalid cropRatio value, must be > 0 and <= 1. cropMin must " - "<= cropMax"); + RuntimeError("Invalid cropSize value, must be >= 0"); + } + + m_useSideRatio = true; + floatargvector sideRatio = config(L"sideRatio", "0.0"); + m_sideRatioMin = sideRatio[0]; + m_sideRatioMax = sideRatio[1]; + if (m_sideRatioMin == 0.0 && m_sideRatioMax == 0.0) // taking default value means not specified + { + m_useSideRatio = false; + } + else if (!(m_sideRatioMin > 0 && m_sideRatioMax <= 1.0) || + m_sideRatioMin > m_sideRatioMax) + { + RuntimeError("Invalid sideRatio value, must be > 0 and <= 1. sideMin must <= sideMax"); + } + + m_useAreaRatio = true; + floatargvector areaRatio = config(L"areaRatio", "0.0"); + m_areaRatioMin = areaRatio[0]; + m_areaRatioMax = areaRatio[1]; + if (m_areaRatioMin == 0.0 && m_areaRatioMax == 0.0) // taking default value means not specified + { + m_useAreaRatio = false; + } + else if (!(m_areaRatioMin > 0 && m_areaRatioMax <= 1.0) || + m_areaRatioMin > m_areaRatioMax) + { + RuntimeError("Invalid areaRatio value, must be > 0 and <= 1. areaMin must <= areaMax"); + } + + if (m_useSideRatio && m_useAreaRatio) + RuntimeError("sideRatio and areaRatio cannot be specified simultaneously"); + + floatargvector aspectRatio = config(L"aspectRatio", "1.0"); + m_aspectRatioMin = aspectRatio[0]; + m_aspectRatioMax = aspectRatio[1]; + if (!(m_aspectRatioMin > 0 && m_aspectRatioMax <= 1.0) || + m_aspectRatioMin > m_aspectRatioMax) + { + RuntimeError("Invalid aspectRatio value, must be > 0 and <= 1. aspectMin must <= aspectMax"); } m_jitterType = ParseJitterType(config(L"jitterType", "")); - m_cropType = ImageConfigHelper::ParseCropType(config(L"cropType", "")); if (!config.ExistsCurrent(L"hflip")) { - m_hFlip = m_cropType == CropType::Random; + m_hFlip = (m_cropType == CropType::RandomSide || m_cropType == CropType::RandomArea); } else { @@ -71,47 +106,38 @@ CropTransformer::CropTransformer(const ConfigParameters& config) : ImageTransfor { m_hFlip = false; } - - m_aspectRatioRadius = config(L"aspectRatioRadius", ConfigParameters::Array(doubleargvector(vector{0.0}))); } void CropTransformer::StartEpoch(const EpochConfiguration &config) { - m_curAspectRatioRadius = m_aspectRatioRadius[config.m_epochIndex]; - if (!(0 <= m_curAspectRatioRadius && m_curAspectRatioRadius <= 1.0)) - InvalidArgument("aspectRatioRadius must be >= 0.0 and <= 1.0"); ImageTransformerBase::StartEpoch(config); } void CropTransformer::Apply(size_t id, cv::Mat &mat) { auto seed = GetSeed(); - auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique(seed); }); - - double ratio = 1; - switch (m_jitterType) - { - case RatioJitterType::None: - ratio = m_cropRatioMin; - break; - case RatioJitterType::UniRatio: - if (m_cropRatioMin == m_cropRatioMax) - { - ratio = m_cropRatioMin; - } - else - { - ratio = UniRealT(m_cropRatioMin, m_cropRatioMax)(*rng); - assert(m_cropRatioMin <= ratio && ratio < m_cropRatioMax); - } - break; - default: - RuntimeError("Jitter type currently not implemented."); - } - + auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique(seed); }); int viewIndex = m_cropType == CropType::MultiView10 ? (int)(id % 10) : 0; - mat = mat(GetCropRect(m_cropType, viewIndex, mat.rows, mat.cols, ratio, *rng)); + switch (m_cropType) + { + case CropType::Center: + mat = mat(GetCropRectCenter(mat.rows, mat.cols, *rng)); + break; + case CropType::RandomSide: + mat = mat(GetCropRectRandomSide(mat.rows, mat.cols, *rng)); + break; + case CropType::RandomArea: + mat = mat(GetCropRectRandomArea(mat.rows, mat.cols, *rng)); + break; + case CropType::MultiView10: + mat = mat(GetCropRectMultiView10(viewIndex, mat.rows, mat.cols, *rng)); + break; + default: + RuntimeError("Invalid crop type."); + break; + } + // for MultiView10 m_hFlip is false, hence the first 5 will be unflipped, the later 5 will be flipped if ((m_hFlip && boost::random::bernoulli_distribution<>()(*rng)) || viewIndex >= 5) @@ -135,106 +161,152 @@ CropTransformer::ParseJitterType(const std::string &src) return RatioJitterType::UniRatio; } - if (AreEqualIgnoreCase(src, "unilength")) - { - return RatioJitterType::UniLength; - } - - if (AreEqualIgnoreCase(src, "uniarea")) - { - return RatioJitterType::UniArea; - } - RuntimeError("Invalid jitter type: %s.", src.c_str()); } -cv::Rect CropTransformer::GetCropRect(CropType type, int viewIndex, int crow, int ccol, - double cropRatio, std::mt19937 &rng) +double CropTransformer::ApplyRatioJitter(const double minVal, const double maxVal, std::mt19937 &rng) +{ + assert(minVal > 0 && minVal <= maxVal); // ratio should always be > 0 + switch (m_jitterType) + { + case RatioJitterType::None: + return minVal; + case RatioJitterType::UniRatio: + if (minVal == maxVal) + return minVal; + else + return UniRealT(minVal, maxVal)(rng); + default: + RuntimeError("Jitter type currently not implemented."); + } + return -1; +} + +cv::Rect CropTransformer::GetCropRectCenter(int crow, int ccol, std::mt19937 &rng) { assert(crow > 0); - assert(ccol > 0); - assert(0 < cropRatio && cropRatio <= 1.0); + assert(ccol > 0); + assert(!(m_useSideRatio && m_useAreaRatio)); // cannot be applied simultaneously - // Get square crop size that preserves aspect ratio. - int cropSize = (int)(std::min(crow, ccol) * cropRatio); - int cropSizeX = cropSize; - int cropSizeY = cropSize; - // Change aspect ratio, if this option is enabled. - if (m_curAspectRatioRadius > 0) + int cropSizeX=ccol, cropSizeY=crow; + if (m_cropWidth > 0 && m_cropHeight > 0) // crop sizes are specified with meaningful values { - double factor = 1.0 + UniRealT(-m_curAspectRatioRadius, m_curAspectRatioRadius)(rng); - double area = cropSize * cropSize; - double newArea = area * factor; - if (boost::random::bernoulli_distribution<>()(rng)) - { - cropSizeX = (int)std::sqrt(newArea); - cropSizeY = (int)(area / cropSizeX); - } - else - { - cropSizeY = (int)std::sqrt(newArea); - cropSizeX = (int)(area / cropSizeY); - } - // This clamping should be ok if jittering ratio is not too big. - cropSizeX = std::min(cropSizeX, ccol); - cropSizeY = std::min(cropSizeY, crow); + cropSizeX = min(ccol, m_cropWidth); + cropSizeY = min(crow, m_cropHeight); + int xOff = (ccol - cropSizeX) / 2; + int yOff = (crow - cropSizeY) / 2; + return cv::Rect(xOff, yOff, cropSizeX, cropSizeY); } - - int xOff = -1; - int yOff = -1; - switch (type) + + bool bFound = false; + int nAttempt = 0; + while (!bFound && nAttempt < 10) { - case CropType::Center: - assert(viewIndex == 0); + if (m_useSideRatio) + { + double sideRatio = ApplyRatioJitter(m_sideRatioMin, m_sideRatioMax, rng); + assert(sideRatio >= m_sideRatioMin && sideRatio <= m_sideRatioMax); + cropSizeX = cropSizeY = (int)std::round(std::min(crow, ccol) * sideRatio); // we always crop square shape unless aspectRatio is not 1.0 + } + else if (m_useAreaRatio) + { + double areaRatio = ApplyRatioJitter(m_areaRatioMin, m_areaRatioMax, rng); + assert(areaRatio >= m_areaRatioMin && areaRatio <= m_sideRatioMax); + cropSizeX = cropSizeY = (int)std::round(std::sqrt(crow * ccol * areaRatio)); // we always crop square shape unless aspectRatio is not 1.0 + } + + double aspectRatio = ApplyRatioJitter(m_aspectRatioMin, m_aspectRatioMax, rng); + assert(aspectRatio >= m_aspectRatioMin && aspectRatio <= m_aspectRatioMax); + if (aspectRatio != 1.0) + { + double area = cropSizeX * cropSizeY; + if (boost::random::bernoulli_distribution<>()(rng)) + { + cropSizeX = (int)std::sqrt(area * aspectRatio); + cropSizeY = (int)std::sqrt(area / aspectRatio); + } + else + { + cropSizeY = (int)std::sqrt(area * aspectRatio); + cropSizeX = (int)std::sqrt(area / aspectRatio); + } + } + if (cropSizeX <= ccol && cropSizeY <= crow) + { + bFound = true; + break; + } + nAttempt++; + } + if (bFound) + { + int xOff = (ccol - cropSizeX) / 2; + int yOff = (crow - cropSizeY) / 2; + return cv::Rect(xOff, yOff, cropSizeX, cropSizeY); + } + else + { // fall back to return the whole image + return cv::Rect(0, 0, ccol, crow); + } +} + +cv::Rect CropTransformer::GetCropRectRandomSide(int crow, int ccol, std::mt19937 &rng) +{ + assert(m_useSideRatio); + cv::Rect rc = GetCropRectCenter(crow, ccol, rng); + + int xOff = UniIntT(0, ccol - rc.width)(rng); + int yOff = UniIntT(0, crow - rc.height)(rng); + return cv::Rect(xOff, yOff, rc.width, rc.height); +} + +cv::Rect CropTransformer::GetCropRectRandomArea(int crow, int ccol, std::mt19937 &rng) +{ + assert(m_useAreaRatio); + cv::Rect rc = GetCropRectCenter(crow, ccol, rng); + + int xOff = UniIntT(0, ccol - rc.width)(rng); + int yOff = UniIntT(0, crow - rc.height)(rng); + return cv::Rect(xOff, yOff, rc.width, rc.height); +} + +cv::Rect CropTransformer::GetCropRectMultiView10(int viewIndex, int crow, int ccol, std::mt19937 &rng) +{ + assert(viewIndex >= 0); + cv::Rect rc = GetCropRectCenter(crow, ccol, rng); + viewIndex = viewIndex % 10; + + // 0 - 4: 4 corners + center crop. 5 - 9: same, but with a flip in CropTransformer::Apply(). + int isubView = viewIndex % 5; + int xOff=-1, yOff=-1, cropSizeX = rc.width, cropSizeY = rc.height; + switch (isubView) + { + case 0: // top-left + xOff = 0; + yOff = 0; + break; + case 1: // top-right + xOff = ccol - cropSizeX; + yOff = 0; + break; + case 2: // bottom-left + xOff = 0; + yOff = crow - cropSizeY; + break; + case 3: // bottom-right + xOff = ccol - cropSizeX; + yOff = crow - cropSizeY; + break; + case 4: // center xOff = (ccol - cropSizeX) / 2; yOff = (crow - cropSizeY) / 2; break; - case CropType::Random: - assert(viewIndex == 0); - xOff = UniIntT(0, ccol - cropSizeX)(rng); - yOff = UniIntT(0, crow - cropSizeY)(rng); - break; - case CropType::MultiView10: - { - assert(0 <= viewIndex && viewIndex < 10); - // 0 - 4: 4 corners + center crop. 5 - 9: same, but with a flip. - int isubView = viewIndex % 5; - switch (isubView) - { - // top-left - case 0: - xOff = 0; - yOff = 0; - break; - // top-right - case 1: - xOff = ccol - cropSizeX; - yOff = 0; - break; - // bottom-left - case 2: - xOff = 0; - yOff = crow - cropSizeY; - break; - // bottom-right - case 3: - xOff = ccol - cropSizeX; - yOff = crow - cropSizeY; - break; - // center - case 4: - xOff = (ccol - cropSizeX) / 2; - yOff = (crow - cropSizeY) / 2; - break; - } - break; - } - default: - assert(false); + default: // should never happen + assert(false); } - assert(0 <= xOff && xOff <= ccol - cropSizeX); - assert(0 <= yOff && yOff <= crow - cropSizeY); + assert(xOff >= 0 && xOff <= ccol - cropSizeX); + assert(yOff >= 0 && yOff <= crow - cropSizeY); return cv::Rect(xOff, yOff, cropSizeX, cropSizeY); } @@ -501,7 +573,7 @@ SequenceDataPtr TransposeTransformer::TypedTranspose::Apply(ImageSeq IntensityTransformer::IntensityTransformer(const ConfigParameters &config) : ImageTransformerBase(config) { - m_stdDev = config(L"intensityStdDev", ConfigParameters::Array(doubleargvector(vector{0.0}))); + m_stdDev = config(L"intensityStdDev", "0.0"); std::wstring intFile = config(L"intensityFile", L""); if (intFile.empty()) { @@ -526,7 +598,6 @@ IntensityTransformer::IntensityTransformer(const ConfigParameters &config) : Ima void IntensityTransformer::StartEpoch(const EpochConfiguration &config) { - m_curStdDev = m_stdDev[config.m_epochIndex]; ImageTransformerBase::StartEpoch(config); } @@ -534,7 +605,7 @@ void IntensityTransformer::Apply(size_t id, cv::Mat &mat) { UNUSED(id); - if (m_eigVal.empty() || m_eigVec.empty() || m_curStdDev == 0) + if (m_eigVal.empty() || m_eigVec.empty() || m_stdDev == 0.0) return; // Have to convert to float. @@ -557,7 +628,7 @@ void IntensityTransformer::Apply(cv::Mat &mat) auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique(seed); } ); // Using single precision as EigVal and EigVec matrices are single precision. - boost::random::normal_distribution d(0, (float)m_curStdDev); + boost::random::normal_distribution d(0, (float)m_stdDev); cv::Mat alphas(1, 3, CV_32FC1); assert(m_eigVal.rows == 1 && m_eigVec.cols == 3); alphas.at(0) = d(*rng) * m_eigVal.at(0); @@ -587,25 +658,21 @@ void IntensityTransformer::Apply(cv::Mat &mat) ColorTransformer::ColorTransformer(const ConfigParameters &config) : ImageTransformerBase(config) { - m_brightnessRadius = config(L"brightnessRadius", ConfigParameters::Array(doubleargvector(vector{0.0}))); - m_contrastRadius = config(L"contrastRadius", ConfigParameters::Array(doubleargvector(vector{0.0}))); - m_saturationRadius = config(L"saturationRadius", ConfigParameters::Array(doubleargvector(vector{0.0}))); + m_brightnessRadius = config(L"brightnessRadius", "0.0"); + if (m_brightnessRadius < 0 || m_brightnessRadius > 1.0) + InvalidArgument("brightnessRadius must be >= 0.0 and <= 1.0"); + + m_contrastRadius = config(L"contrastRadius", "0.0"); + if (m_contrastRadius < 0 || m_contrastRadius > 1.0) + InvalidArgument("contrastRadius must be >= 0.0 and <= 1.0"); + + m_saturationRadius = config(L"saturationRadius", "0.0"); + if (m_saturationRadius < 0 || m_saturationRadius > 1.0) + InvalidArgument("saturationRadius must be >= 0.0 and <= 1.0"); } void ColorTransformer::StartEpoch(const EpochConfiguration &config) { - m_curBrightnessRadius = m_brightnessRadius[config.m_epochIndex]; - if (!(0 <= m_curBrightnessRadius && m_curBrightnessRadius <= 1.0)) - InvalidArgument("brightnessRadius must be >= 0.0 and <= 1.0"); - - m_curContrastRadius = m_contrastRadius[config.m_epochIndex]; - if (!(0 <= m_curContrastRadius && m_curContrastRadius <= 1.0)) - InvalidArgument("contrastRadius must be >= 0.0 and <= 1.0"); - - m_curSaturationRadius = m_saturationRadius[config.m_epochIndex]; - if (!(0 <= m_curSaturationRadius && m_curSaturationRadius <= 1.0)) - InvalidArgument("saturationRadius must be >= 0.0 and <= 1.0"); - ImageTransformerBase::StartEpoch(config); } @@ -613,7 +680,7 @@ void ColorTransformer::Apply(size_t id, cv::Mat &mat) { UNUSED(id); - if (m_curBrightnessRadius == 0 && m_curContrastRadius == 0 && m_curSaturationRadius == 0) + if (m_brightnessRadius == 0.0 && m_contrastRadius == 0.0 && m_saturationRadius == 0.0) return; // Have to convert to float @@ -633,15 +700,15 @@ void ColorTransformer::Apply(cv::Mat &mat) auto seed = GetSeed(); auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique(seed); }); - if (m_curBrightnessRadius > 0 || m_curContrastRadius > 0) + if (m_brightnessRadius > 0 || m_contrastRadius > 0) { // To change brightness and/or contrast the following standard transformation is used: // Xij = alpha * Xij + beta, where // alpha is a contrast adjustment and beta - brightness adjustment. ElemType beta = 0; - if (m_curBrightnessRadius > 0) + if (m_brightnessRadius > 0) { - UniRealT d(-m_curBrightnessRadius, m_curBrightnessRadius); + UniRealT d(-m_brightnessRadius, m_brightnessRadius); // Compute mean value of the image. cv::Scalar imgMean = cv::sum(cv::sum(mat)); // Compute beta as a fraction of the mean. @@ -649,9 +716,9 @@ void ColorTransformer::Apply(cv::Mat &mat) } ElemType alpha = 1; - if (m_curContrastRadius > 0) + if (m_contrastRadius > 0) { - UniRealT d(-m_curContrastRadius, m_curContrastRadius); + UniRealT d(-m_contrastRadius, m_contrastRadius); alpha = (ElemType)(1 + d(*rng)); } @@ -665,9 +732,9 @@ void ColorTransformer::Apply(cv::Mat &mat) } } - if (m_curSaturationRadius > 0 && mat.channels() == 3) + if (m_saturationRadius > 0 && mat.channels() == 3) { - UniRealT d(-m_curSaturationRadius, m_curSaturationRadius); + UniRealT d(-m_saturationRadius, m_saturationRadius); double ratio = 1.0 + d(*rng); assert(0 <= ratio && ratio <= 2); diff --git a/Source/Readers/ImageReader/ImageTransformers.h b/Source/Readers/ImageReader/ImageTransformers.h index 18508d506..918b910a5 100644 --- a/Source/Readers/ImageReader/ImageTransformers.h +++ b/Source/Readers/ImageReader/ImageTransformers.h @@ -89,24 +89,37 @@ private: enum class RatioJitterType { None = 0, - UniRatio = 1, - UniLength = 2, - UniArea = 3 + UniRatio = 1 }; void StartEpoch(const EpochConfiguration &config) override; RatioJitterType ParseJitterType(const std::string &src); - cv::Rect GetCropRect(CropType type, int viewIndex, int crow, int ccol, double cropRatio, std::mt19937 &rng); + + // assistent functions for GetCropRect****(). + double ApplyRatioJitter(const double minVal, const double maxVal, std::mt19937 &rng); + + cv::Rect GetCropRectCenter(int crow, int ccol, std::mt19937 &rng); + cv::Rect GetCropRectRandomSide(int crow, int ccol, std::mt19937 &rng); + cv::Rect GetCropRectRandomArea(int crow, int ccol, std::mt19937 &rng); + cv::Rect GetCropRectMultiView10(int viewIndex, int crow, int ccol, std::mt19937 &rng); conc_stack> m_rngs; - CropType m_cropType; - double m_cropRatioMin; - double m_cropRatioMax; + CropType m_cropType; + int m_cropWidth; + int m_cropHeight; + + bool m_useSideRatio; + double m_sideRatioMin; + double m_sideRatioMax; + bool m_useAreaRatio; + double m_areaRatioMin; + double m_areaRatioMax; + double m_aspectRatioMin; + double m_aspectRatioMax; + RatioJitterType m_jitterType; bool m_hFlip; - doubleargvector m_aspectRatioRadius; - double m_curAspectRatioRadius; }; // Scale transformation of the image. @@ -199,8 +212,7 @@ private: template void Apply(cv::Mat &mat); - doubleargvector m_stdDev; - double m_curStdDev; + double m_stdDev; cv::Mat m_eigVal; cv::Mat m_eigVec; @@ -222,12 +234,9 @@ private: template void Apply(cv::Mat &mat); - doubleargvector m_brightnessRadius; - double m_curBrightnessRadius; - doubleargvector m_contrastRadius; - double m_curContrastRadius; - doubleargvector m_saturationRadius; - double m_curSaturationRadius; + double m_brightnessRadius; + double m_contrastRadius; + double m_saturationRadius; conc_stack> m_rngs; conc_stack> m_hsvTemp; diff --git a/Tests/EndToEndTests/Examples/Image/Deprecated/CIFAR-10/03_ResNet/03_ResNet_ndl_deprecated.cntk b/Tests/EndToEndTests/Examples/Image/Deprecated/CIFAR-10/03_ResNet/03_ResNet_ndl_deprecated.cntk index 9886d8b39..7fc55034e 100644 --- a/Tests/EndToEndTests/Examples/Image/Deprecated/CIFAR-10/03_ResNet/03_ResNet_ndl_deprecated.cntk +++ b/Tests/EndToEndTests/Examples/Image/Deprecated/CIFAR-10/03_ResNet/03_ResNet_ndl_deprecated.cntk @@ -72,9 +72,9 @@ Train = [ width = 32 height = 32 channels = 3 - cropType = "random" - cropRatio = 0.8 - jitterType = "uniRatio" + cropType = "RandomSide" + sideRatio = 0.8 + jitterType = "UniRatio" interpolations = "linear" meanFile = "$DataDir$/CIFAR-10_mean.xml" ] @@ -98,9 +98,9 @@ Test = [ width = 32 height = 32 channels = 3 - cropType = "center" - cropRatio = 1 - jitterType = "uniRatio" + cropType = "Center" + sideRatio = 1 + jitterType = "UniRatio" interpolations = "linear" meanFile = "$DataDir$/CIFAR-10_mean.xml" ] diff --git a/Tests/EndToEndTests/Examples/Image/Deprecated/CIFAR-10/04_ResNet_56/04_ResNet_56_ndl_deprecated.cntk b/Tests/EndToEndTests/Examples/Image/Deprecated/CIFAR-10/04_ResNet_56/04_ResNet_56_ndl_deprecated.cntk index 9f2c3d630..0ca36c1dd 100644 --- a/Tests/EndToEndTests/Examples/Image/Deprecated/CIFAR-10/04_ResNet_56/04_ResNet_56_ndl_deprecated.cntk +++ b/Tests/EndToEndTests/Examples/Image/Deprecated/CIFAR-10/04_ResNet_56/04_ResNet_56_ndl_deprecated.cntk @@ -70,9 +70,9 @@ Train=[ width=32 height=32 channels=3 - cropType="random" - cropRatio=0.8 - jitterType="uniRatio" + cropType="RandomSide" + sideRatio=0.8 + jitterType="UniRatio" interpolations="linear" meanFile="$DataDir$/CIFAR-10_mean.xml" ] @@ -96,9 +96,9 @@ Test=[ width=32 height=32 channels=3 - cropType="center" - cropRatio=1 - jitterType="uniRatio" + cropType="Center" + sideRatio=1 + jitterType="UniRatio" interpolations="linear" meanFile="$DataDir$/CIFAR-10_mean.xml" ] diff --git a/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk b/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk index 91f7c37be..27c0a3882 100644 --- a/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk +++ b/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk @@ -15,13 +15,13 @@ Train=[ channels=3 # Below are the optional parameters. # Possible values: Center, Random. Default: Center - cropType=Random + cropType=RandomSide # Horizontal random flip, will be enabled by default if cropType=Random #hflip=0 - # Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1. - cropRatio=0.875 + # Crop scale side ratio. Examples: sideRatio=0.9, sideRatio=0.7:0.9. + sideRatio=0.875 # Crop scale ratio jitter type. - # Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio + # Possible values: None, UniRatio. Default: None jitterType=UniRatio # Interpolation to use when scaling image to width x height size. # Possible values: nearest, linear, cubic, lanczos. Default: linear. diff --git a/Tests/EndToEndTests/Image/AlexNet/AlexNetComposite.cntk b/Tests/EndToEndTests/Image/AlexNet/AlexNetComposite.cntk index c4e1836a1..a816eb2e5 100644 --- a/Tests/EndToEndTests/Image/AlexNet/AlexNetComposite.cntk +++ b/Tests/EndToEndTests/Image/AlexNet/AlexNetComposite.cntk @@ -26,12 +26,12 @@ Train = [ [ type = "Crop" # Possible values: Center, Random. Default: Center - cropType = "random" - # Crop scale ratio. Examples: cropRatio = 0.9, cropRatio = 0.7:0.9. Default: 1. - cropRatio = 0.875 + cropType = "RandomSide" + # Crop scale side ratio. Examples: sideRatio = 0.9, sideRatio = 0.7:0.9. + sideRatio = 0.875 # Crop scale ratio jitter type. - # Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio - jitterType = "uniRatio" + # Possible values: None, UniRatio. Default: None + jitterType = "UniRatio" ]:[ type = "Scale" width = 224 diff --git a/Tests/EndToEndTests/ParallelTraining/AsynchronousSGD/03_ResNet-parallel.cntk b/Tests/EndToEndTests/ParallelTraining/AsynchronousSGD/03_ResNet-parallel.cntk index bc9c4b64c..ebcdd1029 100644 --- a/Tests/EndToEndTests/ParallelTraining/AsynchronousSGD/03_ResNet-parallel.cntk +++ b/Tests/EndToEndTests/ParallelTraining/AsynchronousSGD/03_ResNet-parallel.cntk @@ -89,9 +89,9 @@ Train = [ width = 32 height = 32 channels = 3 - cropType = "random" - cropRatio = 0.8 - jitterType = "uniRatio" + cropType = "RandomSide" + sideRatio = 0.8 + jitterType = "UniRatio" interpolations = "linear" meanFile = "$DataDir$/CIFAR-10_mean.xml" ] @@ -108,9 +108,9 @@ Train = [ width = 32 height = 32 channels = 3 - cropType = "center" - cropRatio = 1 - jitterType = "uniRatio" + cropType = "Center" + sideRatio = 1 + jitterType = "UniRatio" interpolations = "linear" meanFile = "$DataDir$/CIFAR-10_mean.xml" ] @@ -134,9 +134,9 @@ Test = [ width = 32 height = 32 channels = 3 - cropType = "center" - cropRatio = 1 - jitterType = "uniRatio" + cropType = "Center" + sideRatio = 1 + jitterType = "UniRatio" interpolations = "linear" meanFile = "$DataDir$/cifar-10-batches-py/CIFAR-10_mean.xml" ] diff --git a/Tests/UnitTests/ReaderTests/Config/ImageAndTextReaderSimple_Config.cntk b/Tests/UnitTests/ReaderTests/Config/ImageAndTextReaderSimple_Config.cntk index d204fd045..1935fee29 100644 --- a/Tests/UnitTests/ReaderTests/Config/ImageAndTextReaderSimple_Config.cntk +++ b/Tests/UnitTests/ReaderTests/Config/ImageAndTextReaderSimple_Config.cntk @@ -42,9 +42,9 @@ reader = [ transforms = ( [ type = "Crop" - cropType = "center" - cropRatio = 1.0 - jitterType = "uniRatio" + cropType = "Center" + sideRatio = 1.0 + jitterType = "UniRatio" ]:[ type = "Scale" width = 4 diff --git a/Tests/UnitTests/ReaderTests/Config/ImageDeserializers.cntk b/Tests/UnitTests/ReaderTests/Config/ImageDeserializers.cntk index c6f9bf142..63d666e2b 100644 --- a/Tests/UnitTests/ReaderTests/Config/ImageDeserializers.cntk +++ b/Tests/UnitTests/ReaderTests/Config/ImageDeserializers.cntk @@ -21,9 +21,9 @@ reader = [ transforms = ( [ type = "Crop" - cropType = "center" - cropRatio = 1.0 - jitterType = "uniRatio" + cropType = "Center" + sideRatio = 1.0 + jitterType = "UniRatio" ]:[ type = "Scale" width = 4 diff --git a/Tests/UnitTests/ReaderTests/Config/ImageReaderBadLabel_Config.cntk b/Tests/UnitTests/ReaderTests/Config/ImageReaderBadLabel_Config.cntk index 77577d2f6..545d3e2f5 100644 --- a/Tests/UnitTests/ReaderTests/Config/ImageReaderBadLabel_Config.cntk +++ b/Tests/UnitTests/ReaderTests/Config/ImageReaderBadLabel_Config.cntk @@ -28,7 +28,7 @@ Simple_Test = [ height=8 channels=3 cropType=Center - cropRatio=1.0 + sideRatio=1.0 jitterType=UniRatio interpolations=linear #meanFile=$RootDir$/ImageReaderSimple_mean.xml diff --git a/Tests/UnitTests/ReaderTests/Config/ImageReaderBadMap_Config.cntk b/Tests/UnitTests/ReaderTests/Config/ImageReaderBadMap_Config.cntk index 42f63f500..79923f0b5 100644 --- a/Tests/UnitTests/ReaderTests/Config/ImageReaderBadMap_Config.cntk +++ b/Tests/UnitTests/ReaderTests/Config/ImageReaderBadMap_Config.cntk @@ -28,7 +28,7 @@ Simple_Test = [ height=8 channels=3 cropType=Center - cropRatio=1.0 + sideRatio=1.0 jitterType=UniRatio interpolations=linear #meanFile=$RootDir$/ImageReaderSimple_mean.xml diff --git a/Tests/UnitTests/ReaderTests/Config/ImageReaderColorTransform_Config.cntk b/Tests/UnitTests/ReaderTests/Config/ImageReaderColorTransform_Config.cntk index e1411c7b8..f4eb34bb8 100644 --- a/Tests/UnitTests/ReaderTests/Config/ImageReaderColorTransform_Config.cntk +++ b/Tests/UnitTests/ReaderTests/Config/ImageReaderColorTransform_Config.cntk @@ -27,12 +27,12 @@ ColorTransform_Test = [ width=4 height=4 channels=3 - cropType=center - cropRatio=1 + cropType=Center + sideRatio=1 jitterType=UniRatio - brightnessRadius=0:0.2 - contrastRadius=0:0.2 - saturationRadius=0:0.4 + brightnessRadius=0.2 + contrastRadius=0.2 + saturationRadius=0.4 interpolations=linear ] labels=[ diff --git a/Tests/UnitTests/ReaderTests/Config/ImageReaderGrayscale_Config.cntk b/Tests/UnitTests/ReaderTests/Config/ImageReaderGrayscale_Config.cntk index c84440e07..db79d5e0f 100644 --- a/Tests/UnitTests/ReaderTests/Config/ImageReaderGrayscale_Config.cntk +++ b/Tests/UnitTests/ReaderTests/Config/ImageReaderGrayscale_Config.cntk @@ -30,7 +30,7 @@ Grayscale_Test = [ height=4 channels=1 cropType=Center - cropRatio=1.0 + sideRatio=1.0 jitterType=UniRatio interpolations=linear ] diff --git a/Tests/UnitTests/ReaderTests/Config/ImageReaderIntensityTransform_Config.cntk b/Tests/UnitTests/ReaderTests/Config/ImageReaderIntensityTransform_Config.cntk index b781c0394..537a8b783 100644 --- a/Tests/UnitTests/ReaderTests/Config/ImageReaderIntensityTransform_Config.cntk +++ b/Tests/UnitTests/ReaderTests/Config/ImageReaderIntensityTransform_Config.cntk @@ -27,12 +27,12 @@ IntensityTransform_Test = [ width=4 height=4 channels=3 - cropType=center - cropRatio=1 + cropType=Center + sideRatio=1 jitterType=UniRatio interpolations=linear intensityFile="$RootDir$/ImageNet1K_intensity.xml" - intensityStdDev=0:0.1 + intensityStdDev=0.1 ] labels=[ labelDim=4 diff --git a/Tests/UnitTests/ReaderTests/Config/ImageReaderLabelOutOfRange_Config.cntk b/Tests/UnitTests/ReaderTests/Config/ImageReaderLabelOutOfRange_Config.cntk index 7a28642c3..d2747b71e 100644 --- a/Tests/UnitTests/ReaderTests/Config/ImageReaderLabelOutOfRange_Config.cntk +++ b/Tests/UnitTests/ReaderTests/Config/ImageReaderLabelOutOfRange_Config.cntk @@ -28,7 +28,7 @@ Simple_Test = [ height=8 channels=3 cropType=Center - cropRatio=1.0 + sideRatio=1.0 jitterType=UniRatio interpolations=linear #meanFile=$RootDir$/ImageReaderSimple_mean.xml diff --git a/Tests/UnitTests/ReaderTests/Config/ImageReaderMissingImage_Config.cntk b/Tests/UnitTests/ReaderTests/Config/ImageReaderMissingImage_Config.cntk index f20658fcb..d792ab7d8 100644 --- a/Tests/UnitTests/ReaderTests/Config/ImageReaderMissingImage_Config.cntk +++ b/Tests/UnitTests/ReaderTests/Config/ImageReaderMissingImage_Config.cntk @@ -18,7 +18,7 @@ MissingImage_Test = [ height=8 channels=3 cropType=Center - cropRatio=1.0 + sideRatio=1.0 jitterType=UniRatio interpolations=Linear #meanFile=$RootDir$/ImageReaderSimple_mean.xml diff --git a/Tests/UnitTests/ReaderTests/Config/ImageReaderMultiView_Config.cntk b/Tests/UnitTests/ReaderTests/Config/ImageReaderMultiView_Config.cntk index 976d4d9e8..b66118a6b 100644 --- a/Tests/UnitTests/ReaderTests/Config/ImageReaderMultiView_Config.cntk +++ b/Tests/UnitTests/ReaderTests/Config/ImageReaderMultiView_Config.cntk @@ -27,8 +27,8 @@ MultiView_Test = [ width=2 height=2 channels=3 - cropType=multiview10 - cropRatio=0.5 + cropType=Multiview10 + sideRatio=0.5 jitterType=UniRatio interpolations=linear #meanFile=$RootDir$/ImageReaderMultiView_mean.xml diff --git a/Tests/UnitTests/ReaderTests/Config/ImageReaderSimple_Config.cntk b/Tests/UnitTests/ReaderTests/Config/ImageReaderSimple_Config.cntk index a9df1c440..8c8f31d9f 100644 --- a/Tests/UnitTests/ReaderTests/Config/ImageReaderSimple_Config.cntk +++ b/Tests/UnitTests/ReaderTests/Config/ImageReaderSimple_Config.cntk @@ -28,7 +28,7 @@ Simple_Test = [ height=8 channels=3 cropType=Center - cropRatio=1.0 + sideRatio=1.0 jitterType=UniRatio interpolations=linear #meanFile=$RootDir$/ImageReaderSimple_mean.xml @@ -54,7 +54,7 @@ Composite_Test= { input = { features = { transforms = ( - { type = "Crop" ; cropType = "center" ; cropRatio = 1.0 ; jitterType = "uniRatio" }: + { type = "Crop" ; cropType = "Center" ; sideRatio = 1.0 ; jitterType = "UniRatio" }: { type = "Scale" ; width = 4 ; height = 8 ; channels = 3 ; interpolations = "linear" }: { type = "Mean" ; }: { type = "Transpose" } diff --git a/Tests/UnitTests/ReaderTests/Config/ImageReaderZipMissing_Config.cntk b/Tests/UnitTests/ReaderTests/Config/ImageReaderZipMissing_Config.cntk index b6954b143..89408fc3a 100644 --- a/Tests/UnitTests/ReaderTests/Config/ImageReaderZipMissing_Config.cntk +++ b/Tests/UnitTests/ReaderTests/Config/ImageReaderZipMissing_Config.cntk @@ -28,7 +28,7 @@ ZipMissing_Test = [ height=8 channels=3 cropType=Center - cropRatio=1.0 + sideRatio=1.0 jitterType=UniRatio interpolations=Linear ] diff --git a/Tests/UnitTests/ReaderTests/Config/ImageReaderZip_Config.cntk b/Tests/UnitTests/ReaderTests/Config/ImageReaderZip_Config.cntk index f5da962f8..6c42eb64d 100644 --- a/Tests/UnitTests/ReaderTests/Config/ImageReaderZip_Config.cntk +++ b/Tests/UnitTests/ReaderTests/Config/ImageReaderZip_Config.cntk @@ -28,7 +28,7 @@ Zip_Test = [ height=8 channels=3 cropType=Center - cropRatio=1.0 + sideRatio=1.0 jitterType=UniRatio interpolations=linear #meanFile=$RootDir$/ImageReaderZip_mean.xml diff --git a/Tests/UnitTests/ReaderTests/Control/ImageReaderColorTransform_Control.txt b/Tests/UnitTests/ReaderTests/Control/ImageReaderColorTransform_Control.txt index fc1bfc728..4e26cc370 100644 --- a/Tests/UnitTests/ReaderTests/Control/ImageReaderColorTransform_Control.txt +++ b/Tests/UnitTests/ReaderTests/Control/ImageReaderColorTransform_Control.txt @@ -1,2 +1,3 @@ -255 0 0 0 255 255 128 0 0 255 255 0 0 0 255 255 255 0 0 255 0 255 128 0 255 0 255 0 0 255 0 255 255 0 255 0 0 255 128 255 0 0 255 0 255 0 0 255 255 2.33451 0 0 255 255 135.088 0 0 255 255 2.33451 0 0 255 255 255 2.33451 0 255 0 255 135.088 0 255 0 255 2.33451 0 255 0 255 255 2.33451 255 0 0 255 135.088 255 0 0 255 2.33451 255 0 0 255 +255 16.4645 0 0 255 255 149.726 0 0 255 255 16.4645 0 0 255 255 255 16.4645 0 255 0 255 149.726 0 255 0 255 16.4645 0 255 0 255 255 16.4645 255 0 0 255 149.726 255 0 0 255 16.4645 255 0 0 255 + diff --git a/Tests/UnitTests/ReaderTests/Control/ImageReaderIntensityTransform_Control.txt b/Tests/UnitTests/ReaderTests/Control/ImageReaderIntensityTransform_Control.txt index 2222d963e..d4ca1ec74 100644 --- a/Tests/UnitTests/ReaderTests/Control/ImageReaderIntensityTransform_Control.txt +++ b/Tests/UnitTests/ReaderTests/Control/ImageReaderIntensityTransform_Control.txt @@ -1,2 +1,2 @@ -255 0 0 0 255 255 128 0 0 255 255 0 0 0 255 255 255 0 0 255 0 255 128 0 255 0 255 0 0 255 0 255 255 0 255 0 0 255 128 255 0 0 255 0 255 0 0 255 255 0.00711415 0.00711415 0.00711415 255 255 128.007 0.00711415 0.00711415 255 255 0.00711415 0.00711415 0.00711415 255 255 255 0.00813221 0.00813221 255 0.00813221 255 128.008 0.00813221 255 0.00813221 255 0.00813221 0.00813221 255 0.00813221 255 255 0.0103512 255 0.0103512 0.0103512 255 128.01 255 0.0103512 0.0103512 255 0.0103512 255 0.0103512 0.0103512 255 +255 0.0101945 0.0101945 0.0101945 255 255 128.01 0.0101945 0.0101945 255 255 0.0101945 0.0101945 0.0101945 255 255 255 0.0115043 0.0115043 255 0.0115043 255 128.012 0.0115043 255 0.0115043 255 0.0115043 0.0115043 255 0.0115043 255 255 0.0130829 255 0.0130829 0.0130829 255 128.013 255 0.0130829 0.0130829 255 0.0130829 255 0.0130829 0.0130829 255 diff --git a/Tests/UnitTests/V2LibraryTests/CifarResNet.cpp b/Tests/UnitTests/V2LibraryTests/CifarResNet.cpp index 8cdfbcffa..440d8e15b 100644 --- a/Tests/UnitTests/V2LibraryTests/CifarResNet.cpp +++ b/Tests/UnitTests/V2LibraryTests/CifarResNet.cpp @@ -20,8 +20,8 @@ MinibatchSourcePtr CreateCifarMinibatchSource(size_t epochSize) Dictionary cropTransformConfig; cropTransformConfig[L"type"] = L"Crop"; - cropTransformConfig[L"cropType"] = L"Random"; - cropTransformConfig[L"cropRatio"] = L"0.8"; + cropTransformConfig[L"cropType"] = L"RandomSide"; + cropTransformConfig[L"sideRatio"] = L"0.8"; cropTransformConfig[L"jitterType"] = L"uniRatio"; Dictionary scaleTransformConfig; diff --git a/Tutorials/CNTK_201B_CIFAR-10_ImageHandsOn.ipynb b/Tutorials/CNTK_201B_CIFAR-10_ImageHandsOn.ipynb index 8d5401892..a7bca8570 100644 --- a/Tutorials/CNTK_201B_CIFAR-10_ImageHandsOn.ipynb +++ b/Tutorials/CNTK_201B_CIFAR-10_ImageHandsOn.ipynb @@ -261,7 +261,7 @@ " transforms = []\n", " if train:\n", " transforms += [\n", - " ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter\n", + " ImageDeserializer.crop(crop_type='randomside', side_ratio=0.8) # train uses data augmentation (translation only)\n", " ]\n", " transforms += [\n", " ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'),\n", diff --git a/Tutorials/ImageHandsOn/ImageHandsOn.cntk b/Tutorials/ImageHandsOn/ImageHandsOn.cntk index 943068622..75267d09a 100644 --- a/Tutorials/ImageHandsOn/ImageHandsOn.cntk +++ b/Tutorials/ImageHandsOn/ImageHandsOn.cntk @@ -65,7 +65,7 @@ TrainConvNet = { file = "$dataDir$/cifar-10-batches-py/train_map.txt" input = { features = { transforms = ( - { type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } : + { type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } : { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } : { type = "Transpose" } )} diff --git a/Tutorials/ImageHandsOn/ImageHandsOn_Solution1.cntk b/Tutorials/ImageHandsOn/ImageHandsOn_Solution1.cntk index 66932d503..09e677839 100644 --- a/Tutorials/ImageHandsOn/ImageHandsOn_Solution1.cntk +++ b/Tutorials/ImageHandsOn/ImageHandsOn_Solution1.cntk @@ -67,7 +67,7 @@ TrainConvNet = { file = "$dataDir$/cifar-10-batches-py/train_map.txt" input = { features = { transforms = ( - { type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } : + { type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } : { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } : { type = "Transpose" } )} diff --git a/Tutorials/ImageHandsOn/ImageHandsOn_Solution2.cntk b/Tutorials/ImageHandsOn/ImageHandsOn_Solution2.cntk index ca2b27deb..d4f50500a 100644 --- a/Tutorials/ImageHandsOn/ImageHandsOn_Solution2.cntk +++ b/Tutorials/ImageHandsOn/ImageHandsOn_Solution2.cntk @@ -69,7 +69,7 @@ TrainConvNet = { file = "$dataDir$/cifar-10-batches-py/train_map.txt" input = { features = { transforms = ( - { type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } : + { type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } : { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } : { type = "Transpose" } )} diff --git a/Tutorials/ImageHandsOn/ImageHandsOn_Solution3.cntk b/Tutorials/ImageHandsOn/ImageHandsOn_Solution3.cntk index 8578353b7..177d6e483 100644 --- a/Tutorials/ImageHandsOn/ImageHandsOn_Solution3.cntk +++ b/Tutorials/ImageHandsOn/ImageHandsOn_Solution3.cntk @@ -73,7 +73,7 @@ TrainConvNet = { file = "$dataDir$/cifar-10-batches-py/train_map.txt" input = { features = { transforms = ( - { type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } : + { type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } : { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } : { type = "Transpose" } )} diff --git a/Tutorials/ImageHandsOn/ImageHandsOn_Solution4.cntk b/Tutorials/ImageHandsOn/ImageHandsOn_Solution4.cntk index 05d5ecac9..8ce6467c0 100644 --- a/Tutorials/ImageHandsOn/ImageHandsOn_Solution4.cntk +++ b/Tutorials/ImageHandsOn/ImageHandsOn_Solution4.cntk @@ -96,7 +96,7 @@ TrainConvNet = { file = "$dataDir$/cifar-10-batches-py/train_map.txt" input = { features = { transforms = ( - { type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } : + { type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } : { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } : { type = "Transpose" } )} diff --git a/Tutorials/ImageHandsOn/ImageHandsOn_Solution5.cntk b/Tutorials/ImageHandsOn/ImageHandsOn_Solution5.cntk index 7023ce52d..83234aaed 100644 --- a/Tutorials/ImageHandsOn/ImageHandsOn_Solution5.cntk +++ b/Tutorials/ImageHandsOn/ImageHandsOn_Solution5.cntk @@ -102,7 +102,7 @@ TrainConvNet = { file = "$dataDir$/cifar-10-batches-py/train_map.txt" input = { features = { transforms = ( - { type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } : + { type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } : { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } : { type = "Transpose" } )} diff --git a/Tutorials/ImageHandsOn/ImageHandsOn_Task4_Start.cntk b/Tutorials/ImageHandsOn/ImageHandsOn_Task4_Start.cntk index 566bb04ab..50e6c9677 100644 --- a/Tutorials/ImageHandsOn/ImageHandsOn_Task4_Start.cntk +++ b/Tutorials/ImageHandsOn/ImageHandsOn_Task4_Start.cntk @@ -93,7 +93,7 @@ TrainConvNet = { file = "$dataDir$/cifar-10-batches-py/train_map.txt" input = { features = { transforms = ( - { type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } : + { type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } : { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } : { type = "Transpose" } )} diff --git a/Tutorials/ImageHandsOn/ImageHandsOn_Task6.cntk b/Tutorials/ImageHandsOn/ImageHandsOn_Task6.cntk index abfe7f1fc..69c2be0d2 100644 --- a/Tutorials/ImageHandsOn/ImageHandsOn_Task6.cntk +++ b/Tutorials/ImageHandsOn/ImageHandsOn_Task6.cntk @@ -115,7 +115,7 @@ TrainConvNet = { file = "$dataDir$/cifar-10-batches-py/train_map.txt" input = { features = { transforms = ( - { type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } : + { type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } : { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } : { type = "Transpose" } )} diff --git a/bindings/python/cntk/io/__init__.py b/bindings/python/cntk/io/__init__.py index fbff145ea..5a6126e9d 100644 --- a/bindings/python/cntk/io/__init__.py +++ b/bindings/python/cntk/io/__init__.py @@ -372,33 +372,55 @@ class ImageDeserializer(Deserializer): self.input[node] = dict(labelDim=num_classes) # reader distinguishes labels from features by calling this 'labelDim' @staticmethod - def crop(crop_type='center', ratio=1.0, jitter_type='uniRatio'): + def crop(crop_type='center', crop_size=0, side_ratio=0.0, area_ratio=0.0, aspect_ratio=1.0, jitter_type='none'): ''' Crop transform that can be used to pass to `map_features` Args: - crop_type (str, default 'center'): 'center' or 'random'. 'random' - is usually used during training while 'center' is usually for testing. + crop_type (str, default 'center'): 'center', 'randomside', 'randomarea', + or 'multiview10'. 'randomside' and 'randomarea' are usually used during + training, while 'center' and 'multiview10' are usually used during testing. Random cropping is a popular data augmentation technique used to improve generalization of the DNN. - ratio (`float`, default 1.0): crop ratio. It specifies the ratio of - final image dimension, e.g. width , to the size of the random crop - taken from the image. For example, the ratio 224 / 256 = 0.875 means - crop of size 224 will be taken from the image rescaled to 256 (implementation - detail: ImageReader takes the crop and then rescales instead of doing - the other way around). To enable scale jitter (another popular data - augmentation technique), use colon-delimited values like cropRatio=0.875:0.466 - which means 224 crop will be taken from images randomly scaled to have - size in [256, 480] range. - jitter_type (str, default 'uniRatio'): crop scale jitter type, possible - values are 'None', 'UniRatio'. 'uniRatio' means uniform distributed jitter - scale between the minimum and maximum cropRatio values. + crop_size (`int`, default 0): crop size in pixels. Ignored if set to 0. + When crop_size is non-zero, for example, crop_size=256, it means a cropping + window of size 256x256 pixels will be taken. If one want to crop with + non-square shapes, specify crop_size=256:224 will crop 256x224 (width x height) + pixels. `When crop_size is specified, side_ratio, area_ratio and aspect_ratio + will be ignored.` + side_ratio (`float`, default 0.0): It specifies the ratio of final image + side (width or height) with respect to the original image. Ignored if set + to 0.0. Otherwise, must be set within `(0,1]`. For example, with an input + image size of 640x480, side_ratio of 0.5 means we crop a square region + (if aspect_ratio is 1.0) of the input image, whose width and height are + equal to 0.5*min(640, 480) = 240. To enable scale jitter (a popular data + augmentation technique), use colon-delimited values like side_ratio=0.5:0.75, + which means the crop will have size between 240 (0.5*min(640, 480)) and 360 + (0.75*min(640, 480)). + area_ratio (`float`, default 0.0): It specifies the area ratio of final image + with respect to the original image. Ignored if set to 0.0. Otherwise, must be + set within `(0,1]`. For example, for an input image size of 200x150 pixels, + the area is 30,000. If area_ratio is 0.3333, we crop a square region (if + aspect_ratio is 1.0) with width and height equal to sqrt(30,000*0.3333)=100. + To enable scale jitter, use colon-delimited values such as area_ratio=0.3333:0.8, + which means the crop will have size between 100 (sqrt(30,000*0.3333)) and + 155 (sqrt(30,000*0.8)). + aspect_ratio (`float`, default 1.0): It specifies the aspect ratio (width/height + or height/width) of the crop window. Must be set within `(0,1]`. For example, + if due to size_ratio the crop size is 240x240, an aspect_ratio of 0.64 will + change the window size to non-square: 192x300 or 300x192, each having 50% + chance. Note the area of the crop window does not change. To enable aspect + ratio jitter, use colon-delimited values such as aspect_ratio=0.64:1.0, which means + the crop will have size between 192x300 (or euqally likely 300x192) and 240x240. + jitter_type (str, default 'none'): crop scale jitter type, possible + values are 'none' and 'uniratio'. 'uniratio' means uniform distributed jitter + scale between the minimum and maximum ratio values. Returns: dict describing the crop transform ''' - return dict(type='Crop', cropType=crop_type, cropRatio=ratio, - jitterType=jitter_type) + return dict(type='Crop', cropType=crop_type, cropSize=crop_size, sideRatio=side_ratio, + areaRatio=area_ratio, aspectRatio=aspect_ratio, jitterType=jitter_type) @staticmethod def scale(width, height, channels, interpolations='linear', scale_mode="fill", pad_value=-1): @@ -438,8 +460,48 @@ class ImageDeserializer(Deserializer): ''' return dict(type='Mean', meanFile=filename) - # TODO color transpose + @staticmethod + def color(brightness_radius=0.0, contrast_radius=0.0, saturation_radius=0.0): + ''' + Color transform that can be used to pass to `map_features` for data augmentation. + Args: + brightness_radius (float, default 0.0): Radius for brightness change. Must be + set within [0.0, 1.0]. For example, assume brightness_radius = 0.2, a random + number `x` is uniformly drawn from [-0.2, 0.2], and every pixel's value is + added by `x*meanVal`, where meanVal is the mean of the image pixel intensity + combining all color channels. + contrast_radius (float, default 0.0): Radius for contrast change. Must be + set within [0.0, 1.0]. For example, assume contrast_radius = 0.2, a random + number `x` is uniformly drawn from [-0.2, 0.2], and every pixel's value is + multiplied by `1+x`. + saturation_radius (float, default 0.0): Radius for saturation change. Only for + color images and must be set within [0.0, 1.0]. For example, assume + saturation_radius = 0.2, a random number `x` is uniformly drawn from [-0.2, 0.2], + and every pixel's saturation is multiplied by `1+x`. + + Returns: + dict describing the mean transform + ''' + return dict(type='Color', brightnessRadius=brightness_radius, + contrastRadius=contrast_radius, saturationRadius=saturation_radius) + + #@staticmethod + #def intensity(intensity_stddev, intensity_file): + # ''' + # Intensity transform that can be used to pass to `map_features` for data augmentation. + # Intensity jittering based on PCA transform as described in original `AlexNet paper + # `_ + + # Currently uses precomputed values from + # https://github.com/facebook/fb.resnet.torch/blob/master/datasets/imagenet.lua + + # Args: + # intensity_stddev (float): intensity standard deviation. + # intensity_file (str): intensity file. + # Returns: + # dict describing the mean transform ''' + # return dict(type='Intensity', intensityStdDev=intensity_stddev, intensityFile=intensity_file) class CTFDeserializer(Deserializer): ''' diff --git a/bindings/python/cntk/io/tests/io_tests.py b/bindings/python/cntk/io/tests/io_tests.py index cddee4a35..04057ebf4 100644 --- a/bindings/python/cntk/io/tests/io_tests.py +++ b/bindings/python/cntk/io/tests/io_tests.py @@ -83,8 +83,8 @@ def test_image(): image = ImageDeserializer(map_file) image.map_features(feature_name, - [ImageDeserializer.crop(crop_type='Random', ratio=0.8, - jitter_type='uniRatio'), + [ImageDeserializer.crop(crop_type='randomside', side_ratio=0.8, + jitter_type='uniratio'), ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'), ImageDeserializer.mean(mean_file)]) @@ -109,9 +109,10 @@ def test_image(): assert t0['type'] == 'Crop' assert t1['type'] == 'Scale' assert t2['type'] == 'Mean' - t0['cropType'] == 'Random' - t0['cropRatio'] == 0.8 - t0['jitterType'] == 'uniRatio' + t0['cropType'] == 'randomside' + t0['sideRatio'] == 0.8 + t0['aspectRatio'] == 0.9 + t0['jitterType'] == 'uniratio' t1['width'] == image_width t1['height'] == image_height t1['channels'] == num_channels