Integrate chazhang/reader_fix into master

This commit is contained in:
Project Philly 2017-01-13 17:58:30 -08:00
Родитель 1b0db71ad7 4c03e2c0a5
Коммит 43b30a1c41
52 изменённых файлов: 458 добавлений и 312 удалений

Просмотреть файл

@ -124,9 +124,9 @@ Train = {
transforms = (
{
type = "Crop"
cropType = "random"
cropRatio = 0.88671875
jitterType = "uniRatio"
cropType = "RandomSide"
sideRatio = 0.88671875
jitterType = "UniRatio"
}:{
type = "Scale"
width = 227
@ -167,7 +167,7 @@ Test = {
{
type = "Crop"
cropType = "center"
cropRatio = 0.88671875
sideRatio = 0.88671875
}:{
type = "Scale"
width = 227

Просмотреть файл

@ -94,7 +94,7 @@ TrainConvNet = {
file = "$dataDir$/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Mean" ; meanFile = "$dataDir$/CIFAR-10_mean.xml" } :
{ type = "Transpose" }

Просмотреть файл

@ -72,7 +72,7 @@ TrainConvNet = {
file = "$dataDir$/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Mean" ; meanFile = "$dataDir$/CIFAR-10_mean.xml" } :
{ type = "Transpose" }

Просмотреть файл

@ -32,7 +32,7 @@ def create_reader(map_file, mean_file, train):
transforms = []
if train:
transforms += [
cntk.io.ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter
cntk.io.ImageDeserializer.crop(crop_type='randomside', side_ratio=0.8, jitter_type='uniratio') # train uses jitter
]
transforms += [
cntk.io.ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'),

Просмотреть файл

@ -34,7 +34,7 @@ def create_reader(map_file, mean_file, train, total_number_of_samples, distribut
transforms = []
if train:
transforms += [
cntk.io.ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter
cntk.io.ImageDeserializer.crop(crop_type='randomside', side_ratio=0.8, jitter_type='uniratio') # train uses jitter
]
transforms += [

Просмотреть файл

@ -98,7 +98,7 @@ Train = {
file = "$DataDir$/val_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } :
{ type = "Scale" ; width = $ImageW$ ; height = $ImageH$ ; channels = $ImageC$ ; interpolations = "linear" } :
{ type = "Mean" ; meanFile = "$ConfigDir$/ImageNet1K_mean.xml" } :
{ type = "Transpose" }

Просмотреть файл

@ -111,9 +111,9 @@ TrainNetwork = {
width = 224
height = 224
channels = 3
cropType = "Random"
cropType = "RandomSide"
jitterType = "UniRatio"
cropRatio = 0.46666:0.875
sideRatio = 0.46666:0.875
hflip = true
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
@ -131,7 +131,7 @@ TrainNetwork = {
height = 224
channels = 3
cropType = "Center"
cropRatio = 0.875
sideRatio = 0.875
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
labels = {
@ -156,9 +156,9 @@ BNStatistics = {
width = 224
height = 224
channels = 3
cropType = "Random"
cropType = "RandomSide"
hflip = true
cropRatio = 0.46666:0.875
sideRatio = 0.46666:0.875
jitterType = "UniRatio"
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
@ -183,7 +183,7 @@ Eval = {
height = 224
channels = 3
cropType = "Center"
cropRatio = 0.875
sideRatio = 0.875
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
labels = {

Просмотреть файл

@ -83,7 +83,7 @@ TrainConvNet = {
file = "$dataDir$/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Mean" ; meanFile = "$dataDir$/CIFAR-10_mean.xml" } :
{ type = "Transpose" }

Просмотреть файл

@ -111,9 +111,9 @@ TrainNetwork = {
width = 224
height = 224
channels = 3
cropType = "Random"
cropType = "RandomSide"
jitterType = "UniRatio"
cropRatio = 0.46666:0.875
sideRatio = 0.46666:0.875
hflip = true
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
@ -131,7 +131,7 @@ TrainNetwork = {
height = 224
channels = 3
cropType = "Center"
cropRatio = 0.875
sideRatio = 0.875
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
labels = {
@ -156,9 +156,9 @@ BNStatistics = {
width = 224
height = 224
channels = 3
cropType = "Random"
cropType = "RandomSide"
hflip = true
cropRatio = 0.46666:0.875
sideRatio = 0.46666:0.875
jitterType = "UniRatio"
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
@ -183,7 +183,7 @@ Eval = {
height = 224
channels = 3
cropType = "Center"
cropRatio = 0.875
sideRatio = 0.875
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
labels = {

Просмотреть файл

@ -83,7 +83,7 @@ TrainConvNet = {
file = "$dataDir$/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Mean" ; meanFile = "$dataDir$/CIFAR-10_mean.xml" } :
{ type = "Transpose" }

Просмотреть файл

@ -110,9 +110,9 @@ TrainNetwork = {
width = 224
height = 224
channels = 3
cropType = "Random"
cropType = "RandomSide"
jitterType = "UniRatio"
cropRatio = 0.46666:0.875
sideRatio = 0.46666:0.875
hflip = true
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
@ -130,7 +130,7 @@ TrainNetwork = {
height = 224
channels = 3
cropType = "Center"
cropRatio = 0.875
sideRatio = 0.875
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
labels = {
@ -156,9 +156,9 @@ BNStatistics = {
width = 224
height = 224
channels = 3
cropType = "Random"
cropType = "RandomSide"
hflip = true
cropRatio = 0.46666:0.875
sideRatio = 0.46666:0.875
jitterType = "UniRatio"
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
@ -183,7 +183,7 @@ Eval = {
height = 224
channels = 3
cropType = "Center"
cropRatio = 0.875
sideRatio = 0.875
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
labels = {

Просмотреть файл

@ -40,7 +40,7 @@ def create_reader(map_file, mean_file, train):
transforms = []
if train:
transforms += [
ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter
ImageDeserializer.crop(crop_type='randomside', side_ratio=0.8, jitter_type='uniratio') # train uses jitter
]
transforms += [
ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'),

Просмотреть файл

@ -40,7 +40,7 @@ def create_reader(map_file, mean_file, train, total_data_size, distributed_after
transforms = []
if train:
transforms += [
ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter
ImageDeserializer.crop(crop_type='randomside', side_ratio=0.8, jitter_type='uniratio') # train uses jitter
]
transforms += [
ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'),

Просмотреть файл

@ -58,13 +58,13 @@ Train=[
channels=3
# Below are the optional parameters.
# Possible values: Center, Random. Default: Center
cropType="Random"
# Horizontal random flip, will be enabled by default if cropType=Random
cropType="RandomSide"
# Horizontal random flip, will be enabled by default because cropType=RandomSide
#hflip="true"
# Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
cropRatio=0.875
# Crop scale side ratio. Examples: sideRatio=0.9, sideRatio=0.7:0.9.
sideRatio=0.875
# Crop scale ratio jitter type.
# Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
# Possible values: None, UniRatio. Default: None
jitterType="UniRatio"
# Interpolation to use when scaling image to width x height size.
# Possible values: nearest, linear, cubic, lanczos. Default: linear.

Просмотреть файл

@ -67,13 +67,13 @@ Train=[
channels=3
# Below are the optional parameters.
# Possible values: Center, Random. Default: Center
cropType="Random"
# Horizontal random flip, will be enabled by default if cropType=Random
cropType="RandomSide"
# Horizontal random flip, will be enabled because cropType=RandomSide
#hflip="true"
# Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
cropRatio=0.875
# Crop scale side ratio. Examples: sideRatio=0.9, sideRatio=0.7:0.9.
sideRatio=0.875
# Crop scale ratio jitter type.
# Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
# Possible values: None, UniRatio. Default: None
jitterType="UniRatio"
# Interpolation to use when scaling image to width x height size.
# Possible values: nearest, linear, cubic, lanczos. Default: linear.

Просмотреть файл

@ -67,13 +67,13 @@ Train=[
channels=3
# Below are the optional parameters.
# Possible values: Center, Random. Default: Center
cropType="Random"
# Horizontal random flip, will be enabled by default if cropType=Random
cropType="RandomSide"
# Horizontal random flip, will be enabled because cropType=RandomSide
#hflip="true"
# Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
cropRatio=0.875
# Crop scale side ratio. Examples: sideRatio=0.9, sideRatio=0.7:0.9.
sideRatio=0.875
# Crop scale ratio jitter type.
# Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
# Possible values: None, UniRatio. Default: None
jitterType="UniRatio"
# Interpolation to use when scaling image to width x height size.
# Possible values: nearest, linear, cubic, lanczos. Default: linear.

Просмотреть файл

@ -126,9 +126,14 @@ CropType ImageConfigHelper::ParseCropType(const std::string &src)
return CropType::Center;
}
if (AreEqualIgnoreCase(src, "random"))
if (AreEqualIgnoreCase(src, "randomside"))
{
return CropType::Random;
return CropType::RandomSide;
}
if (AreEqualIgnoreCase(src, "randomarea"))
{
return CropType::RandomArea;
}
if (AreEqualIgnoreCase(src, "multiview10"))

Просмотреть файл

@ -14,9 +14,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
enum class CropType
{
Center = 0,
Random = 1,
MultiView10 = 2
Center = 0, // center crop with a given size
RandomSide = 1, // random scale resized with shorter side sampled from min and max (ResNet-style)
RandomArea = 2, // random scale resized with area size ratio between min and max (Inception-style)
MultiView10 = 3 // 10 view crop
};
// A helper class for image specific parameters.

Просмотреть файл

@ -41,25 +41,60 @@ SequenceDataPtr ImageTransformerBase::Transform(SequenceDataPtr sequence)
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
CropTransformer::CropTransformer(const ConfigParameters& config) : ImageTransformerBase(config)
{
floatargvector cropRatio = config(L"cropRatio", "1.0");
m_cropRatioMin = cropRatio[0];
m_cropRatioMax = cropRatio[1];
if (!(0 < m_cropRatioMin && m_cropRatioMin <= 1.0) ||
!(0 < m_cropRatioMax && m_cropRatioMax <= 1.0) ||
m_cropRatioMin > m_cropRatioMax)
intargvector cropSize = config(L"cropSize", "0");
m_cropWidth = cropSize[0];
m_cropHeight = cropSize[1];
if (m_cropWidth < 0 || m_cropHeight < 0)
{
RuntimeError("Invalid cropRatio value, must be > 0 and <= 1. cropMin must "
"<= cropMax");
RuntimeError("Invalid cropSize value, must be >= 0");
}
m_useSideRatio = true;
floatargvector sideRatio = config(L"sideRatio", "0.0");
m_sideRatioMin = sideRatio[0];
m_sideRatioMax = sideRatio[1];
if (m_sideRatioMin == 0.0 && m_sideRatioMax == 0.0) // taking default value means not specified
{
m_useSideRatio = false;
}
else if (!(m_sideRatioMin > 0 && m_sideRatioMax <= 1.0) ||
m_sideRatioMin > m_sideRatioMax)
{
RuntimeError("Invalid sideRatio value, must be > 0 and <= 1. sideMin must <= sideMax");
}
m_useAreaRatio = true;
floatargvector areaRatio = config(L"areaRatio", "0.0");
m_areaRatioMin = areaRatio[0];
m_areaRatioMax = areaRatio[1];
if (m_areaRatioMin == 0.0 && m_areaRatioMax == 0.0) // taking default value means not specified
{
m_useAreaRatio = false;
}
else if (!(m_areaRatioMin > 0 && m_areaRatioMax <= 1.0) ||
m_areaRatioMin > m_areaRatioMax)
{
RuntimeError("Invalid areaRatio value, must be > 0 and <= 1. areaMin must <= areaMax");
}
if (m_useSideRatio && m_useAreaRatio)
RuntimeError("sideRatio and areaRatio cannot be specified simultaneously");
floatargvector aspectRatio = config(L"aspectRatio", "1.0");
m_aspectRatioMin = aspectRatio[0];
m_aspectRatioMax = aspectRatio[1];
if (!(m_aspectRatioMin > 0 && m_aspectRatioMax <= 1.0) ||
m_aspectRatioMin > m_aspectRatioMax)
{
RuntimeError("Invalid aspectRatio value, must be > 0 and <= 1. aspectMin must <= aspectMax");
}
m_jitterType = ParseJitterType(config(L"jitterType", ""));
m_cropType = ImageConfigHelper::ParseCropType(config(L"cropType", ""));
if (!config.ExistsCurrent(L"hflip"))
{
m_hFlip = m_cropType == CropType::Random;
m_hFlip = (m_cropType == CropType::RandomSide || m_cropType == CropType::RandomArea);
}
else
{
@ -71,47 +106,38 @@ CropTransformer::CropTransformer(const ConfigParameters& config) : ImageTransfor
{
m_hFlip = false;
}
m_aspectRatioRadius = config(L"aspectRatioRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
}
void CropTransformer::StartEpoch(const EpochConfiguration &config)
{
m_curAspectRatioRadius = m_aspectRatioRadius[config.m_epochIndex];
if (!(0 <= m_curAspectRatioRadius && m_curAspectRatioRadius <= 1.0))
InvalidArgument("aspectRatioRadius must be >= 0.0 and <= 1.0");
ImageTransformerBase::StartEpoch(config);
}
void CropTransformer::Apply(size_t id, cv::Mat &mat)
{
auto seed = GetSeed();
auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });
double ratio = 1;
switch (m_jitterType)
{
case RatioJitterType::None:
ratio = m_cropRatioMin;
break;
case RatioJitterType::UniRatio:
if (m_cropRatioMin == m_cropRatioMax)
{
ratio = m_cropRatioMin;
}
else
{
ratio = UniRealT(m_cropRatioMin, m_cropRatioMax)(*rng);
assert(m_cropRatioMin <= ratio && ratio < m_cropRatioMax);
}
break;
default:
RuntimeError("Jitter type currently not implemented.");
}
auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });
int viewIndex = m_cropType == CropType::MultiView10 ? (int)(id % 10) : 0;
mat = mat(GetCropRect(m_cropType, viewIndex, mat.rows, mat.cols, ratio, *rng));
switch (m_cropType)
{
case CropType::Center:
mat = mat(GetCropRectCenter(mat.rows, mat.cols, *rng));
break;
case CropType::RandomSide:
mat = mat(GetCropRectRandomSide(mat.rows, mat.cols, *rng));
break;
case CropType::RandomArea:
mat = mat(GetCropRectRandomArea(mat.rows, mat.cols, *rng));
break;
case CropType::MultiView10:
mat = mat(GetCropRectMultiView10(viewIndex, mat.rows, mat.cols, *rng));
break;
default:
RuntimeError("Invalid crop type.");
break;
}
// for MultiView10 m_hFlip is false, hence the first 5 will be unflipped, the later 5 will be flipped
if ((m_hFlip && boost::random::bernoulli_distribution<>()(*rng)) ||
viewIndex >= 5)
@ -135,106 +161,152 @@ CropTransformer::ParseJitterType(const std::string &src)
return RatioJitterType::UniRatio;
}
if (AreEqualIgnoreCase(src, "unilength"))
{
return RatioJitterType::UniLength;
}
if (AreEqualIgnoreCase(src, "uniarea"))
{
return RatioJitterType::UniArea;
}
RuntimeError("Invalid jitter type: %s.", src.c_str());
}
cv::Rect CropTransformer::GetCropRect(CropType type, int viewIndex, int crow, int ccol,
double cropRatio, std::mt19937 &rng)
double CropTransformer::ApplyRatioJitter(const double minVal, const double maxVal, std::mt19937 &rng)
{
assert(minVal > 0 && minVal <= maxVal); // ratio should always be > 0
switch (m_jitterType)
{
case RatioJitterType::None:
return minVal;
case RatioJitterType::UniRatio:
if (minVal == maxVal)
return minVal;
else
return UniRealT(minVal, maxVal)(rng);
default:
RuntimeError("Jitter type currently not implemented.");
}
return -1;
}
cv::Rect CropTransformer::GetCropRectCenter(int crow, int ccol, std::mt19937 &rng)
{
assert(crow > 0);
assert(ccol > 0);
assert(0 < cropRatio && cropRatio <= 1.0);
assert(ccol > 0);
assert(!(m_useSideRatio && m_useAreaRatio)); // cannot be applied simultaneously
// Get square crop size that preserves aspect ratio.
int cropSize = (int)(std::min(crow, ccol) * cropRatio);
int cropSizeX = cropSize;
int cropSizeY = cropSize;
// Change aspect ratio, if this option is enabled.
if (m_curAspectRatioRadius > 0)
int cropSizeX=ccol, cropSizeY=crow;
if (m_cropWidth > 0 && m_cropHeight > 0) // crop sizes are specified with meaningful values
{
double factor = 1.0 + UniRealT(-m_curAspectRatioRadius, m_curAspectRatioRadius)(rng);
double area = cropSize * cropSize;
double newArea = area * factor;
if (boost::random::bernoulli_distribution<>()(rng))
{
cropSizeX = (int)std::sqrt(newArea);
cropSizeY = (int)(area / cropSizeX);
}
else
{
cropSizeY = (int)std::sqrt(newArea);
cropSizeX = (int)(area / cropSizeY);
}
// This clamping should be ok if jittering ratio is not too big.
cropSizeX = std::min(cropSizeX, ccol);
cropSizeY = std::min(cropSizeY, crow);
cropSizeX = min(ccol, m_cropWidth);
cropSizeY = min(crow, m_cropHeight);
int xOff = (ccol - cropSizeX) / 2;
int yOff = (crow - cropSizeY) / 2;
return cv::Rect(xOff, yOff, cropSizeX, cropSizeY);
}
int xOff = -1;
int yOff = -1;
switch (type)
bool bFound = false;
int nAttempt = 0;
while (!bFound && nAttempt < 10)
{
case CropType::Center:
assert(viewIndex == 0);
if (m_useSideRatio)
{
double sideRatio = ApplyRatioJitter(m_sideRatioMin, m_sideRatioMax, rng);
assert(sideRatio >= m_sideRatioMin && sideRatio <= m_sideRatioMax);
cropSizeX = cropSizeY = (int)std::round(std::min(crow, ccol) * sideRatio); // we always crop square shape unless aspectRatio is not 1.0
}
else if (m_useAreaRatio)
{
double areaRatio = ApplyRatioJitter(m_areaRatioMin, m_areaRatioMax, rng);
assert(areaRatio >= m_areaRatioMin && areaRatio <= m_sideRatioMax);
cropSizeX = cropSizeY = (int)std::round(std::sqrt(crow * ccol * areaRatio)); // we always crop square shape unless aspectRatio is not 1.0
}
double aspectRatio = ApplyRatioJitter(m_aspectRatioMin, m_aspectRatioMax, rng);
assert(aspectRatio >= m_aspectRatioMin && aspectRatio <= m_aspectRatioMax);
if (aspectRatio != 1.0)
{
double area = cropSizeX * cropSizeY;
if (boost::random::bernoulli_distribution<>()(rng))
{
cropSizeX = (int)std::sqrt(area * aspectRatio);
cropSizeY = (int)std::sqrt(area / aspectRatio);
}
else
{
cropSizeY = (int)std::sqrt(area * aspectRatio);
cropSizeX = (int)std::sqrt(area / aspectRatio);
}
}
if (cropSizeX <= ccol && cropSizeY <= crow)
{
bFound = true;
break;
}
nAttempt++;
}
if (bFound)
{
int xOff = (ccol - cropSizeX) / 2;
int yOff = (crow - cropSizeY) / 2;
return cv::Rect(xOff, yOff, cropSizeX, cropSizeY);
}
else
{ // fall back to return the whole image
return cv::Rect(0, 0, ccol, crow);
}
}
cv::Rect CropTransformer::GetCropRectRandomSide(int crow, int ccol, std::mt19937 &rng)
{
assert(m_useSideRatio);
cv::Rect rc = GetCropRectCenter(crow, ccol, rng);
int xOff = UniIntT(0, ccol - rc.width)(rng);
int yOff = UniIntT(0, crow - rc.height)(rng);
return cv::Rect(xOff, yOff, rc.width, rc.height);
}
cv::Rect CropTransformer::GetCropRectRandomArea(int crow, int ccol, std::mt19937 &rng)
{
assert(m_useAreaRatio);
cv::Rect rc = GetCropRectCenter(crow, ccol, rng);
int xOff = UniIntT(0, ccol - rc.width)(rng);
int yOff = UniIntT(0, crow - rc.height)(rng);
return cv::Rect(xOff, yOff, rc.width, rc.height);
}
cv::Rect CropTransformer::GetCropRectMultiView10(int viewIndex, int crow, int ccol, std::mt19937 &rng)
{
assert(viewIndex >= 0);
cv::Rect rc = GetCropRectCenter(crow, ccol, rng);
viewIndex = viewIndex % 10;
// 0 - 4: 4 corners + center crop. 5 - 9: same, but with a flip in CropTransformer::Apply().
int isubView = viewIndex % 5;
int xOff=-1, yOff=-1, cropSizeX = rc.width, cropSizeY = rc.height;
switch (isubView)
{
case 0: // top-left
xOff = 0;
yOff = 0;
break;
case 1: // top-right
xOff = ccol - cropSizeX;
yOff = 0;
break;
case 2: // bottom-left
xOff = 0;
yOff = crow - cropSizeY;
break;
case 3: // bottom-right
xOff = ccol - cropSizeX;
yOff = crow - cropSizeY;
break;
case 4: // center
xOff = (ccol - cropSizeX) / 2;
yOff = (crow - cropSizeY) / 2;
break;
case CropType::Random:
assert(viewIndex == 0);
xOff = UniIntT(0, ccol - cropSizeX)(rng);
yOff = UniIntT(0, crow - cropSizeY)(rng);
break;
case CropType::MultiView10:
{
assert(0 <= viewIndex && viewIndex < 10);
// 0 - 4: 4 corners + center crop. 5 - 9: same, but with a flip.
int isubView = viewIndex % 5;
switch (isubView)
{
// top-left
case 0:
xOff = 0;
yOff = 0;
break;
// top-right
case 1:
xOff = ccol - cropSizeX;
yOff = 0;
break;
// bottom-left
case 2:
xOff = 0;
yOff = crow - cropSizeY;
break;
// bottom-right
case 3:
xOff = ccol - cropSizeX;
yOff = crow - cropSizeY;
break;
// center
case 4:
xOff = (ccol - cropSizeX) / 2;
yOff = (crow - cropSizeY) / 2;
break;
}
break;
}
default:
assert(false);
default: // should never happen
assert(false);
}
assert(0 <= xOff && xOff <= ccol - cropSizeX);
assert(0 <= yOff && yOff <= crow - cropSizeY);
assert(xOff >= 0 && xOff <= ccol - cropSizeX);
assert(yOff >= 0 && yOff <= crow - cropSizeY);
return cv::Rect(xOff, yOff, cropSizeX, cropSizeY);
}
@ -501,7 +573,7 @@ SequenceDataPtr TransposeTransformer::TypedTranspose<TElementTo>::Apply(ImageSeq
IntensityTransformer::IntensityTransformer(const ConfigParameters &config) : ImageTransformerBase(config)
{
m_stdDev = config(L"intensityStdDev", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
m_stdDev = config(L"intensityStdDev", "0.0");
std::wstring intFile = config(L"intensityFile", L"");
if (intFile.empty())
{
@ -526,7 +598,6 @@ IntensityTransformer::IntensityTransformer(const ConfigParameters &config) : Ima
void IntensityTransformer::StartEpoch(const EpochConfiguration &config)
{
m_curStdDev = m_stdDev[config.m_epochIndex];
ImageTransformerBase::StartEpoch(config);
}
@ -534,7 +605,7 @@ void IntensityTransformer::Apply(size_t id, cv::Mat &mat)
{
UNUSED(id);
if (m_eigVal.empty() || m_eigVec.empty() || m_curStdDev == 0)
if (m_eigVal.empty() || m_eigVec.empty() || m_stdDev == 0.0)
return;
// Have to convert to float.
@ -557,7 +628,7 @@ void IntensityTransformer::Apply(cv::Mat &mat)
auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); } );
// Using single precision as EigVal and EigVec matrices are single precision.
boost::random::normal_distribution<float> d(0, (float)m_curStdDev);
boost::random::normal_distribution<float> d(0, (float)m_stdDev);
cv::Mat alphas(1, 3, CV_32FC1);
assert(m_eigVal.rows == 1 && m_eigVec.cols == 3);
alphas.at<float>(0) = d(*rng) * m_eigVal.at<float>(0);
@ -587,25 +658,21 @@ void IntensityTransformer::Apply(cv::Mat &mat)
ColorTransformer::ColorTransformer(const ConfigParameters &config) : ImageTransformerBase(config)
{
m_brightnessRadius = config(L"brightnessRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
m_contrastRadius = config(L"contrastRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
m_saturationRadius = config(L"saturationRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
m_brightnessRadius = config(L"brightnessRadius", "0.0");
if (m_brightnessRadius < 0 || m_brightnessRadius > 1.0)
InvalidArgument("brightnessRadius must be >= 0.0 and <= 1.0");
m_contrastRadius = config(L"contrastRadius", "0.0");
if (m_contrastRadius < 0 || m_contrastRadius > 1.0)
InvalidArgument("contrastRadius must be >= 0.0 and <= 1.0");
m_saturationRadius = config(L"saturationRadius", "0.0");
if (m_saturationRadius < 0 || m_saturationRadius > 1.0)
InvalidArgument("saturationRadius must be >= 0.0 and <= 1.0");
}
void ColorTransformer::StartEpoch(const EpochConfiguration &config)
{
m_curBrightnessRadius = m_brightnessRadius[config.m_epochIndex];
if (!(0 <= m_curBrightnessRadius && m_curBrightnessRadius <= 1.0))
InvalidArgument("brightnessRadius must be >= 0.0 and <= 1.0");
m_curContrastRadius = m_contrastRadius[config.m_epochIndex];
if (!(0 <= m_curContrastRadius && m_curContrastRadius <= 1.0))
InvalidArgument("contrastRadius must be >= 0.0 and <= 1.0");
m_curSaturationRadius = m_saturationRadius[config.m_epochIndex];
if (!(0 <= m_curSaturationRadius && m_curSaturationRadius <= 1.0))
InvalidArgument("saturationRadius must be >= 0.0 and <= 1.0");
ImageTransformerBase::StartEpoch(config);
}
@ -613,7 +680,7 @@ void ColorTransformer::Apply(size_t id, cv::Mat &mat)
{
UNUSED(id);
if (m_curBrightnessRadius == 0 && m_curContrastRadius == 0 && m_curSaturationRadius == 0)
if (m_brightnessRadius == 0.0 && m_contrastRadius == 0.0 && m_saturationRadius == 0.0)
return;
// Have to convert to float
@ -633,15 +700,15 @@ void ColorTransformer::Apply(cv::Mat &mat)
auto seed = GetSeed();
auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });
if (m_curBrightnessRadius > 0 || m_curContrastRadius > 0)
if (m_brightnessRadius > 0 || m_contrastRadius > 0)
{
// To change brightness and/or contrast the following standard transformation is used:
// Xij = alpha * Xij + beta, where
// alpha is a contrast adjustment and beta - brightness adjustment.
ElemType beta = 0;
if (m_curBrightnessRadius > 0)
if (m_brightnessRadius > 0)
{
UniRealT d(-m_curBrightnessRadius, m_curBrightnessRadius);
UniRealT d(-m_brightnessRadius, m_brightnessRadius);
// Compute mean value of the image.
cv::Scalar imgMean = cv::sum(cv::sum(mat));
// Compute beta as a fraction of the mean.
@ -649,9 +716,9 @@ void ColorTransformer::Apply(cv::Mat &mat)
}
ElemType alpha = 1;
if (m_curContrastRadius > 0)
if (m_contrastRadius > 0)
{
UniRealT d(-m_curContrastRadius, m_curContrastRadius);
UniRealT d(-m_contrastRadius, m_contrastRadius);
alpha = (ElemType)(1 + d(*rng));
}
@ -665,9 +732,9 @@ void ColorTransformer::Apply(cv::Mat &mat)
}
}
if (m_curSaturationRadius > 0 && mat.channels() == 3)
if (m_saturationRadius > 0 && mat.channels() == 3)
{
UniRealT d(-m_curSaturationRadius, m_curSaturationRadius);
UniRealT d(-m_saturationRadius, m_saturationRadius);
double ratio = 1.0 + d(*rng);
assert(0 <= ratio && ratio <= 2);

Просмотреть файл

@ -89,24 +89,37 @@ private:
enum class RatioJitterType
{
None = 0,
UniRatio = 1,
UniLength = 2,
UniArea = 3
UniRatio = 1
};
void StartEpoch(const EpochConfiguration &config) override;
RatioJitterType ParseJitterType(const std::string &src);
cv::Rect GetCropRect(CropType type, int viewIndex, int crow, int ccol, double cropRatio, std::mt19937 &rng);
// assistent functions for GetCropRect****().
double ApplyRatioJitter(const double minVal, const double maxVal, std::mt19937 &rng);
cv::Rect GetCropRectCenter(int crow, int ccol, std::mt19937 &rng);
cv::Rect GetCropRectRandomSide(int crow, int ccol, std::mt19937 &rng);
cv::Rect GetCropRectRandomArea(int crow, int ccol, std::mt19937 &rng);
cv::Rect GetCropRectMultiView10(int viewIndex, int crow, int ccol, std::mt19937 &rng);
conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
CropType m_cropType;
double m_cropRatioMin;
double m_cropRatioMax;
CropType m_cropType;
int m_cropWidth;
int m_cropHeight;
bool m_useSideRatio;
double m_sideRatioMin;
double m_sideRatioMax;
bool m_useAreaRatio;
double m_areaRatioMin;
double m_areaRatioMax;
double m_aspectRatioMin;
double m_aspectRatioMax;
RatioJitterType m_jitterType;
bool m_hFlip;
doubleargvector m_aspectRatioRadius;
double m_curAspectRatioRadius;
};
// Scale transformation of the image.
@ -199,8 +212,7 @@ private:
template <typename ElemType>
void Apply(cv::Mat &mat);
doubleargvector m_stdDev;
double m_curStdDev;
double m_stdDev;
cv::Mat m_eigVal;
cv::Mat m_eigVec;
@ -222,12 +234,9 @@ private:
template <typename ElemType>
void Apply(cv::Mat &mat);
doubleargvector m_brightnessRadius;
double m_curBrightnessRadius;
doubleargvector m_contrastRadius;
double m_curContrastRadius;
doubleargvector m_saturationRadius;
double m_curSaturationRadius;
double m_brightnessRadius;
double m_contrastRadius;
double m_saturationRadius;
conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
conc_stack<std::unique_ptr<cv::Mat>> m_hsvTemp;

Просмотреть файл

@ -72,9 +72,9 @@ Train = [
width = 32
height = 32
channels = 3
cropType = "random"
cropRatio = 0.8
jitterType = "uniRatio"
cropType = "RandomSide"
sideRatio = 0.8
jitterType = "UniRatio"
interpolations = "linear"
meanFile = "$DataDir$/CIFAR-10_mean.xml"
]
@ -98,9 +98,9 @@ Test = [
width = 32
height = 32
channels = 3
cropType = "center"
cropRatio = 1
jitterType = "uniRatio"
cropType = "Center"
sideRatio = 1
jitterType = "UniRatio"
interpolations = "linear"
meanFile = "$DataDir$/CIFAR-10_mean.xml"
]

Просмотреть файл

@ -70,9 +70,9 @@ Train=[
width=32
height=32
channels=3
cropType="random"
cropRatio=0.8
jitterType="uniRatio"
cropType="RandomSide"
sideRatio=0.8
jitterType="UniRatio"
interpolations="linear"
meanFile="$DataDir$/CIFAR-10_mean.xml"
]
@ -96,9 +96,9 @@ Test=[
width=32
height=32
channels=3
cropType="center"
cropRatio=1
jitterType="uniRatio"
cropType="Center"
sideRatio=1
jitterType="UniRatio"
interpolations="linear"
meanFile="$DataDir$/CIFAR-10_mean.xml"
]

Просмотреть файл

@ -15,13 +15,13 @@ Train=[
channels=3
# Below are the optional parameters.
# Possible values: Center, Random. Default: Center
cropType=Random
cropType=RandomSide
# Horizontal random flip, will be enabled by default if cropType=Random
#hflip=0
# Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
cropRatio=0.875
# Crop scale side ratio. Examples: sideRatio=0.9, sideRatio=0.7:0.9.
sideRatio=0.875
# Crop scale ratio jitter type.
# Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
# Possible values: None, UniRatio. Default: None
jitterType=UniRatio
# Interpolation to use when scaling image to width x height size.
# Possible values: nearest, linear, cubic, lanczos. Default: linear.

Просмотреть файл

@ -26,12 +26,12 @@ Train = [
[
type = "Crop"
# Possible values: Center, Random. Default: Center
cropType = "random"
# Crop scale ratio. Examples: cropRatio = 0.9, cropRatio = 0.7:0.9. Default: 1.
cropRatio = 0.875
cropType = "RandomSide"
# Crop scale side ratio. Examples: sideRatio = 0.9, sideRatio = 0.7:0.9.
sideRatio = 0.875
# Crop scale ratio jitter type.
# Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
jitterType = "uniRatio"
# Possible values: None, UniRatio. Default: None
jitterType = "UniRatio"
]:[
type = "Scale"
width = 224

Просмотреть файл

@ -89,9 +89,9 @@ Train = [
width = 32
height = 32
channels = 3
cropType = "random"
cropRatio = 0.8
jitterType = "uniRatio"
cropType = "RandomSide"
sideRatio = 0.8
jitterType = "UniRatio"
interpolations = "linear"
meanFile = "$DataDir$/CIFAR-10_mean.xml"
]
@ -108,9 +108,9 @@ Train = [
width = 32
height = 32
channels = 3
cropType = "center"
cropRatio = 1
jitterType = "uniRatio"
cropType = "Center"
sideRatio = 1
jitterType = "UniRatio"
interpolations = "linear"
meanFile = "$DataDir$/CIFAR-10_mean.xml"
]
@ -134,9 +134,9 @@ Test = [
width = 32
height = 32
channels = 3
cropType = "center"
cropRatio = 1
jitterType = "uniRatio"
cropType = "Center"
sideRatio = 1
jitterType = "UniRatio"
interpolations = "linear"
meanFile = "$DataDir$/cifar-10-batches-py/CIFAR-10_mean.xml"
]

Просмотреть файл

@ -42,9 +42,9 @@ reader = [
transforms = (
[
type = "Crop"
cropType = "center"
cropRatio = 1.0
jitterType = "uniRatio"
cropType = "Center"
sideRatio = 1.0
jitterType = "UniRatio"
]:[
type = "Scale"
width = 4

Просмотреть файл

@ -21,9 +21,9 @@ reader = [
transforms = (
[
type = "Crop"
cropType = "center"
cropRatio = 1.0
jitterType = "uniRatio"
cropType = "Center"
sideRatio = 1.0
jitterType = "UniRatio"
]:[
type = "Scale"
width = 4

Просмотреть файл

@ -28,7 +28,7 @@ Simple_Test = [
height=8
channels=3
cropType=Center
cropRatio=1.0
sideRatio=1.0
jitterType=UniRatio
interpolations=linear
#meanFile=$RootDir$/ImageReaderSimple_mean.xml

Просмотреть файл

@ -28,7 +28,7 @@ Simple_Test = [
height=8
channels=3
cropType=Center
cropRatio=1.0
sideRatio=1.0
jitterType=UniRatio
interpolations=linear
#meanFile=$RootDir$/ImageReaderSimple_mean.xml

Просмотреть файл

@ -27,12 +27,12 @@ ColorTransform_Test = [
width=4
height=4
channels=3
cropType=center
cropRatio=1
cropType=Center
sideRatio=1
jitterType=UniRatio
brightnessRadius=0:0.2
contrastRadius=0:0.2
saturationRadius=0:0.4
brightnessRadius=0.2
contrastRadius=0.2
saturationRadius=0.4
interpolations=linear
]
labels=[

Просмотреть файл

@ -30,7 +30,7 @@ Grayscale_Test = [
height=4
channels=1
cropType=Center
cropRatio=1.0
sideRatio=1.0
jitterType=UniRatio
interpolations=linear
]

Просмотреть файл

@ -27,12 +27,12 @@ IntensityTransform_Test = [
width=4
height=4
channels=3
cropType=center
cropRatio=1
cropType=Center
sideRatio=1
jitterType=UniRatio
interpolations=linear
intensityFile="$RootDir$/ImageNet1K_intensity.xml"
intensityStdDev=0:0.1
intensityStdDev=0.1
]
labels=[
labelDim=4

Просмотреть файл

@ -28,7 +28,7 @@ Simple_Test = [
height=8
channels=3
cropType=Center
cropRatio=1.0
sideRatio=1.0
jitterType=UniRatio
interpolations=linear
#meanFile=$RootDir$/ImageReaderSimple_mean.xml

Просмотреть файл

@ -18,7 +18,7 @@ MissingImage_Test = [
height=8
channels=3
cropType=Center
cropRatio=1.0
sideRatio=1.0
jitterType=UniRatio
interpolations=Linear
#meanFile=$RootDir$/ImageReaderSimple_mean.xml

Просмотреть файл

@ -27,8 +27,8 @@ MultiView_Test = [
width=2
height=2
channels=3
cropType=multiview10
cropRatio=0.5
cropType=Multiview10
sideRatio=0.5
jitterType=UniRatio
interpolations=linear
#meanFile=$RootDir$/ImageReaderMultiView_mean.xml

Просмотреть файл

@ -28,7 +28,7 @@ Simple_Test = [
height=8
channels=3
cropType=Center
cropRatio=1.0
sideRatio=1.0
jitterType=UniRatio
interpolations=linear
#meanFile=$RootDir$/ImageReaderSimple_mean.xml
@ -54,7 +54,7 @@ Composite_Test= {
input = {
features = {
transforms = (
{ type = "Crop" ; cropType = "center" ; cropRatio = 1.0 ; jitterType = "uniRatio" }:
{ type = "Crop" ; cropType = "Center" ; sideRatio = 1.0 ; jitterType = "UniRatio" }:
{ type = "Scale" ; width = 4 ; height = 8 ; channels = 3 ; interpolations = "linear" }:
{ type = "Mean" ; }:
{ type = "Transpose" }

Просмотреть файл

@ -28,7 +28,7 @@ ZipMissing_Test = [
height=8
channels=3
cropType=Center
cropRatio=1.0
sideRatio=1.0
jitterType=UniRatio
interpolations=Linear
]

Просмотреть файл

@ -28,7 +28,7 @@ Zip_Test = [
height=8
channels=3
cropType=Center
cropRatio=1.0
sideRatio=1.0
jitterType=UniRatio
interpolations=linear
#meanFile=$RootDir$/ImageReaderZip_mean.xml

Просмотреть файл

@ -1,2 +1,3 @@
255 0 0 0 255 255 128 0 0 255 255 0 0 0 255 255 255 0 0 255 0 255 128 0 255 0 255 0 0 255 0 255 255 0 255 0 0 255 128 255 0 0 255 0 255 0 0 255
255 2.33451 0 0 255 255 135.088 0 0 255 255 2.33451 0 0 255 255 255 2.33451 0 255 0 255 135.088 0 255 0 255 2.33451 0 255 0 255 255 2.33451 255 0 0 255 135.088 255 0 0 255 2.33451 255 0 0 255
255 16.4645 0 0 255 255 149.726 0 0 255 255 16.4645 0 0 255 255 255 16.4645 0 255 0 255 149.726 0 255 0 255 16.4645 0 255 0 255 255 16.4645 255 0 0 255 149.726 255 0 0 255 16.4645 255 0 0 255

Просмотреть файл

@ -1,2 +1,2 @@
255 0 0 0 255 255 128 0 0 255 255 0 0 0 255 255 255 0 0 255 0 255 128 0 255 0 255 0 0 255 0 255 255 0 255 0 0 255 128 255 0 0 255 0 255 0 0 255
255 0.00711415 0.00711415 0.00711415 255 255 128.007 0.00711415 0.00711415 255 255 0.00711415 0.00711415 0.00711415 255 255 255 0.00813221 0.00813221 255 0.00813221 255 128.008 0.00813221 255 0.00813221 255 0.00813221 0.00813221 255 0.00813221 255 255 0.0103512 255 0.0103512 0.0103512 255 128.01 255 0.0103512 0.0103512 255 0.0103512 255 0.0103512 0.0103512 255
255 0.0101945 0.0101945 0.0101945 255 255 128.01 0.0101945 0.0101945 255 255 0.0101945 0.0101945 0.0101945 255 255 255 0.0115043 0.0115043 255 0.0115043 255 128.012 0.0115043 255 0.0115043 255 0.0115043 0.0115043 255 0.0115043 255 255 0.0130829 255 0.0130829 0.0130829 255 128.013 255 0.0130829 0.0130829 255 0.0130829 255 0.0130829 0.0130829 255

Просмотреть файл

@ -20,8 +20,8 @@ MinibatchSourcePtr CreateCifarMinibatchSource(size_t epochSize)
Dictionary cropTransformConfig;
cropTransformConfig[L"type"] = L"Crop";
cropTransformConfig[L"cropType"] = L"Random";
cropTransformConfig[L"cropRatio"] = L"0.8";
cropTransformConfig[L"cropType"] = L"RandomSide";
cropTransformConfig[L"sideRatio"] = L"0.8";
cropTransformConfig[L"jitterType"] = L"uniRatio";
Dictionary scaleTransformConfig;

Просмотреть файл

@ -261,7 +261,7 @@
" transforms = []\n",
" if train:\n",
" transforms += [\n",
" ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter\n",
" ImageDeserializer.crop(crop_type='randomside', side_ratio=0.8) # train uses data augmentation (translation only)\n",
" ]\n",
" transforms += [\n",
" ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'),\n",

Просмотреть файл

@ -65,7 +65,7 @@ TrainConvNet = {
file = "$dataDir$/cifar-10-batches-py/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Transpose" }
)}

Просмотреть файл

@ -67,7 +67,7 @@ TrainConvNet = {
file = "$dataDir$/cifar-10-batches-py/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Transpose" }
)}

Просмотреть файл

@ -69,7 +69,7 @@ TrainConvNet = {
file = "$dataDir$/cifar-10-batches-py/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Transpose" }
)}

Просмотреть файл

@ -73,7 +73,7 @@ TrainConvNet = {
file = "$dataDir$/cifar-10-batches-py/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Transpose" }
)}

Просмотреть файл

@ -96,7 +96,7 @@ TrainConvNet = {
file = "$dataDir$/cifar-10-batches-py/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Transpose" }
)}

Просмотреть файл

@ -102,7 +102,7 @@ TrainConvNet = {
file = "$dataDir$/cifar-10-batches-py/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Transpose" }
)}

Просмотреть файл

@ -93,7 +93,7 @@ TrainConvNet = {
file = "$dataDir$/cifar-10-batches-py/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Transpose" }
)}

Просмотреть файл

@ -115,7 +115,7 @@ TrainConvNet = {
file = "$dataDir$/cifar-10-batches-py/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Transpose" }
)}

Просмотреть файл

@ -372,33 +372,55 @@ class ImageDeserializer(Deserializer):
self.input[node] = dict(labelDim=num_classes) # reader distinguishes labels from features by calling this 'labelDim'
@staticmethod
def crop(crop_type='center', ratio=1.0, jitter_type='uniRatio'):
def crop(crop_type='center', crop_size=0, side_ratio=0.0, area_ratio=0.0, aspect_ratio=1.0, jitter_type='none'):
'''
Crop transform that can be used to pass to `map_features`
Args:
crop_type (str, default 'center'): 'center' or 'random'. 'random'
is usually used during training while 'center' is usually for testing.
crop_type (str, default 'center'): 'center', 'randomside', 'randomarea',
or 'multiview10'. 'randomside' and 'randomarea' are usually used during
training, while 'center' and 'multiview10' are usually used during testing.
Random cropping is a popular data augmentation technique used to improve
generalization of the DNN.
ratio (`float`, default 1.0): crop ratio. It specifies the ratio of
final image dimension, e.g. width , to the size of the random crop
taken from the image. For example, the ratio 224 / 256 = 0.875 means
crop of size 224 will be taken from the image rescaled to 256 (implementation
detail: ImageReader takes the crop and then rescales instead of doing
the other way around). To enable scale jitter (another popular data
augmentation technique), use colon-delimited values like cropRatio=0.875:0.466
which means 224 crop will be taken from images randomly scaled to have
size in [256, 480] range.
jitter_type (str, default 'uniRatio'): crop scale jitter type, possible
values are 'None', 'UniRatio'. 'uniRatio' means uniform distributed jitter
scale between the minimum and maximum cropRatio values.
crop_size (`int`, default 0): crop size in pixels. Ignored if set to 0.
When crop_size is non-zero, for example, crop_size=256, it means a cropping
window of size 256x256 pixels will be taken. If one want to crop with
non-square shapes, specify crop_size=256:224 will crop 256x224 (width x height)
pixels. `When crop_size is specified, side_ratio, area_ratio and aspect_ratio
will be ignored.`
side_ratio (`float`, default 0.0): It specifies the ratio of final image
side (width or height) with respect to the original image. Ignored if set
to 0.0. Otherwise, must be set within `(0,1]`. For example, with an input
image size of 640x480, side_ratio of 0.5 means we crop a square region
(if aspect_ratio is 1.0) of the input image, whose width and height are
equal to 0.5*min(640, 480) = 240. To enable scale jitter (a popular data
augmentation technique), use colon-delimited values like side_ratio=0.5:0.75,
which means the crop will have size between 240 (0.5*min(640, 480)) and 360
(0.75*min(640, 480)).
area_ratio (`float`, default 0.0): It specifies the area ratio of final image
with respect to the original image. Ignored if set to 0.0. Otherwise, must be
set within `(0,1]`. For example, for an input image size of 200x150 pixels,
the area is 30,000. If area_ratio is 0.3333, we crop a square region (if
aspect_ratio is 1.0) with width and height equal to sqrt(30,000*0.3333)=100.
To enable scale jitter, use colon-delimited values such as area_ratio=0.3333:0.8,
which means the crop will have size between 100 (sqrt(30,000*0.3333)) and
155 (sqrt(30,000*0.8)).
aspect_ratio (`float`, default 1.0): It specifies the aspect ratio (width/height
or height/width) of the crop window. Must be set within `(0,1]`. For example,
if due to size_ratio the crop size is 240x240, an aspect_ratio of 0.64 will
change the window size to non-square: 192x300 or 300x192, each having 50%
chance. Note the area of the crop window does not change. To enable aspect
ratio jitter, use colon-delimited values such as aspect_ratio=0.64:1.0, which means
the crop will have size between 192x300 (or euqally likely 300x192) and 240x240.
jitter_type (str, default 'none'): crop scale jitter type, possible
values are 'none' and 'uniratio'. 'uniratio' means uniform distributed jitter
scale between the minimum and maximum ratio values.
Returns:
dict describing the crop transform
'''
return dict(type='Crop', cropType=crop_type, cropRatio=ratio,
jitterType=jitter_type)
return dict(type='Crop', cropType=crop_type, cropSize=crop_size, sideRatio=side_ratio,
areaRatio=area_ratio, aspectRatio=aspect_ratio, jitterType=jitter_type)
@staticmethod
def scale(width, height, channels, interpolations='linear', scale_mode="fill", pad_value=-1):
@ -438,8 +460,48 @@ class ImageDeserializer(Deserializer):
'''
return dict(type='Mean', meanFile=filename)
# TODO color transpose
@staticmethod
def color(brightness_radius=0.0, contrast_radius=0.0, saturation_radius=0.0):
'''
Color transform that can be used to pass to `map_features` for data augmentation.
Args:
brightness_radius (float, default 0.0): Radius for brightness change. Must be
set within [0.0, 1.0]. For example, assume brightness_radius = 0.2, a random
number `x` is uniformly drawn from [-0.2, 0.2], and every pixel's value is
added by `x*meanVal`, where meanVal is the mean of the image pixel intensity
combining all color channels.
contrast_radius (float, default 0.0): Radius for contrast change. Must be
set within [0.0, 1.0]. For example, assume contrast_radius = 0.2, a random
number `x` is uniformly drawn from [-0.2, 0.2], and every pixel's value is
multiplied by `1+x`.
saturation_radius (float, default 0.0): Radius for saturation change. Only for
color images and must be set within [0.0, 1.0]. For example, assume
saturation_radius = 0.2, a random number `x` is uniformly drawn from [-0.2, 0.2],
and every pixel's saturation is multiplied by `1+x`.
Returns:
dict describing the mean transform
'''
return dict(type='Color', brightnessRadius=brightness_radius,
contrastRadius=contrast_radius, saturationRadius=saturation_radius)
#@staticmethod
#def intensity(intensity_stddev, intensity_file):
# '''
# Intensity transform that can be used to pass to `map_features` for data augmentation.
# Intensity jittering based on PCA transform as described in original `AlexNet paper
# <http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf>`_
# Currently uses precomputed values from
# https://github.com/facebook/fb.resnet.torch/blob/master/datasets/imagenet.lua
# Args:
# intensity_stddev (float): intensity standard deviation.
# intensity_file (str): intensity file.
# Returns:
# dict describing the mean transform '''
# return dict(type='Intensity', intensityStdDev=intensity_stddev, intensityFile=intensity_file)
class CTFDeserializer(Deserializer):
'''

Просмотреть файл

@ -83,8 +83,8 @@ def test_image():
image = ImageDeserializer(map_file)
image.map_features(feature_name,
[ImageDeserializer.crop(crop_type='Random', ratio=0.8,
jitter_type='uniRatio'),
[ImageDeserializer.crop(crop_type='randomside', side_ratio=0.8,
jitter_type='uniratio'),
ImageDeserializer.scale(width=image_width, height=image_height,
channels=num_channels, interpolations='linear'),
ImageDeserializer.mean(mean_file)])
@ -109,9 +109,10 @@ def test_image():
assert t0['type'] == 'Crop'
assert t1['type'] == 'Scale'
assert t2['type'] == 'Mean'
t0['cropType'] == 'Random'
t0['cropRatio'] == 0.8
t0['jitterType'] == 'uniRatio'
t0['cropType'] == 'randomside'
t0['sideRatio'] == 0.8
t0['aspectRatio'] == 0.9
t0['jitterType'] == 'uniratio'
t1['width'] == image_width
t1['height'] == image_height
t1['channels'] == num_channels