petridishnn/tensorpack/models/conv2d.py

460 строки
17 KiB
Python

# -*- coding: utf-8 -*-
# File: conv2d.py
from ..compat import tfv1 as tf # this should be avoided first in model code
from ..tfutils.common import get_tf_version_tuple
from ..utils.argtools import get_data_format, shape2d, shape4d, log_once
from .common import VariableHolder, layer_register
from .tflayer import convert_to_tflayer_args, rename_get_variable
__all__ = ['Conv2D', 'Deconv2D', 'Conv2DTranspose', 'GroupedConv2D', 'ResizeImages', 'SeparableConv2D']
@layer_register(log_shape=True)
@convert_to_tflayer_args(
args_names=['filters', 'kernel_size'],
name_mapping={
'out_channel': 'filters',
'kernel_shape': 'kernel_size',
'stride': 'strides',
})
def Conv2D(
inputs,
filters,
kernel_size,
strides=(1, 1),
padding='same',
data_format='channels_last',
dilation_rate=(1, 1),
activation=None,
use_bias=True,
kernel_initializer=None,
bias_initializer=tf.zeros_initializer(),
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
split=1):
"""
A wrapper around `tf.layers.Conv2D`.
Some differences to maintain backward-compatibility:
1. Default kernel initializer is variance_scaling_initializer(2.0).
2. Default padding is 'same'.
3. Support 'split' argument to do group conv. Note that this is not efficient.
Variable Names:
* ``W``: weights
* ``b``: bias
"""
if kernel_initializer is None:
if get_tf_version_tuple() <= (1, 12):
kernel_initializer = tf.contrib.layers.variance_scaling_initializer(2.0)
else:
kernel_initializer = tf.keras.initializers.VarianceScaling(2.0, distribution='untruncated_normal')
dilation_rate = shape2d(dilation_rate)
if split == 1 and dilation_rate == [1, 1]:
# tf.layers.Conv2D has bugs with dilations (https://github.com/tensorflow/tensorflow/issues/26797)
with rename_get_variable({'kernel': 'W', 'bias': 'b'}):
layer = tf.layers.Conv2D(
filters,
kernel_size,
strides=strides,
padding=padding,
data_format=data_format,
dilation_rate=dilation_rate,
activation=activation,
use_bias=use_bias,
kernel_initializer=kernel_initializer,
bias_initializer=bias_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer,
activity_regularizer=activity_regularizer,
_reuse=tf.get_variable_scope().reuse)
ret = layer.apply(inputs, scope=tf.get_variable_scope())
ret = tf.identity(ret, name='output')
ret.variables = VariableHolder(W=layer.kernel)
if use_bias:
ret.variables.b = layer.bias
# compute the flops of the conv
in_shape = inputs.get_shape().as_list()
channel_axis = 3 if data_format == 'channels_last' else 1
h_dim = 1 if data_format == 'channels_last' else 2
w_dim = h_dim + 1
in_channel = in_shape[channel_axis]
out_channel = filters
kernel_shape = shape2d(kernel_size)
stride = shape4d(strides, data_format=data_format)
flops = 1.0 * in_channel * out_channel * \
kernel_shape[0] * kernel_shape[1] / stride[h_dim] / stride[w_dim]
if in_shape[h_dim] is not None and in_shape[h_dim] > 0:
flops *= in_shape[h_dim] * in_shape[w_dim]
ret.info = VariableHolder(flops=flops)
else:
# group conv implementation
data_format = get_data_format(data_format, keras_mode=False)
in_shape = inputs.get_shape().as_list()
channel_axis = -1 if data_format == 'NHWC' else 1
in_channel = in_shape[channel_axis]
assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!"
assert in_channel % split == 0
assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \
"Not supported by group conv now!"
out_channel = filters
assert out_channel % split == 0
assert dilation_rate == [1, 1] or get_tf_version_tuple() >= (1, 5), 'TF>=1.5 required for dilated conv.'
kernel_shape = shape2d(kernel_size)
filter_shape = kernel_shape + [in_channel / split, out_channel]
stride = shape4d(strides, data_format=data_format)
kwargs = dict(data_format=data_format)
if get_tf_version_tuple() >= (1, 5):
kwargs['dilations'] = shape4d(dilation_rate, data_format=data_format)
W = tf.get_variable(
'W', filter_shape, initializer=kernel_initializer)
if use_bias:
b = tf.get_variable('b', [out_channel], initializer=bias_initializer)
conv = None
if get_tf_version_tuple() >= (1, 13):
try:
conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs)
except ValueError:
conv = None
log_once("CUDNN group convolution support is only available with "
"https://github.com/tensorflow/tensorflow/pull/25818 . "
"Will fall back to a loop-based slow implementation instead!", 'warn')
if conv is None:
inputs = tf.split(inputs, split, channel_axis)
kernels = tf.split(W, split, 3)
outputs = [tf.nn.conv2d(i, k, stride, padding.upper(), **kwargs)
for i, k in zip(inputs, kernels)]
conv = tf.concat(outputs, channel_axis)
if activation is None:
activation = tf.identity
ret = activation(tf.nn.bias_add(conv, b, data_format=data_format) if use_bias else conv, name='output')
ret.variables = VariableHolder(W=W)
if use_bias:
ret.variables.b = b
h_dim = 1 if data_format == 'NHWC' else 2
w_dim = h_dim + 1
flops = 1.0 * in_channel * out_channel * \
kernel_shape[0] * kernel_shape[1] / stride[h_dim] / stride[w_dim] / split
if in_shape[h_dim] is not None and in_shape[h_dim] > 0:
flops *= in_shape[h_dim] * in_shape[w_dim]
ret.info = VariableHolder(flops=flops)
return ret
@layer_register(log_shape=True)
@convert_to_tflayer_args(
args_names=['filters', 'kernel_size', 'strides'],
name_mapping={
'out_channel': 'filters',
'kernel_shape': 'kernel_size',
'stride': 'strides',
})
def Conv2DTranspose(
inputs,
filters,
kernel_size,
strides=(1, 1),
padding='same',
data_format='channels_last',
activation=None,
use_bias=True,
kernel_initializer=None,
bias_initializer=tf.zeros_initializer(),
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
dyn_hw=None):
"""
A wrapper around `tf.layers.Conv2DTranspose`.
Some differences to maintain backward-compatibility:
1. Default kernel initializer is variance_scaling_initializer(2.0).
2. Default padding is 'same'
Variable Names:
* ``W``: weights
* ``b``: bias
"""
if kernel_initializer is None:
if get_tf_version_tuple() <= (1, 12):
kernel_initializer = tf.contrib.layers.variance_scaling_initializer(2.0)
else:
kernel_initializer = tf.keras.initializers.VarianceScaling(2.0, distribution='untruncated_normal')
if get_tf_version_tuple() <= (1, 12):
with rename_get_variable({'kernel': 'W', 'bias': 'b'}):
layer = tf.layers.Conv2DTranspose(
filters,
kernel_size,
strides=strides,
padding=padding,
data_format=data_format,
activation=activation,
use_bias=use_bias,
kernel_initializer=kernel_initializer,
bias_initializer=bias_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer,
activity_regularizer=activity_regularizer,
_reuse=tf.get_variable_scope().reuse)
ret = layer.apply(inputs, scope=tf.get_variable_scope())
ret = tf.identity(ret, name='output')
ret.variables = VariableHolder(W=layer.kernel)
if use_bias:
ret.variables.b = layer.bias
else:
# Our own implementation, to avoid Keras bugs. https://github.com/tensorflow/tensorflow/issues/25946
assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \
"Unsupported arguments due to Keras bug in TensorFlow 1.13"
data_format = get_data_format(data_format, keras_mode=False)
shape_dyn = tf.shape(inputs)
strides2d = shape2d(strides)
channels_in = inputs.shape[1 if data_format == 'NCHW' else 3]
if data_format == 'NCHW':
channels_in = inputs.shape[1]
out_shape_dyn = tf.stack(
[shape_dyn[0], filters,
shape_dyn[2] * strides2d[0],
shape_dyn[3] * strides2d[1]])
out_shape3_sta = [filters,
None if inputs.shape[2] is None else inputs.shape[2] * strides2d[0],
None if inputs.shape[3] is None else inputs.shape[3] * strides2d[1]]
else:
channels_in = inputs.shape[-1]
out_shape_dyn = tf.stack(
[shape_dyn[0],
shape_dyn[1] * strides2d[0],
shape_dyn[2] * strides2d[1],
filters])
out_shape3_sta = [None if inputs.shape[1] is None else inputs.shape[1] * strides2d[0],
None if inputs.shape[2] is None else inputs.shape[2] * strides2d[1],
filters]
kernel_shape = shape2d(kernel_size)
W = tf.get_variable('W', kernel_shape + [filters, channels_in], initializer=kernel_initializer)
if use_bias:
b = tf.get_variable('b', [filters], initializer=bias_initializer)
conv = tf.nn.conv2d_transpose(
inputs, W, out_shape_dyn,
shape4d(strides, data_format=data_format),
padding=padding.upper(),
data_format=data_format)
conv.set_shape(tf.TensorShape([None] + out_shape3_sta))
ret = activation(tf.nn.bias_add(conv, b, data_format=data_format) if use_bias else conv, name='output')
ret.variables = VariableHolder(W=W)
if use_bias:
ret.variables.b = b
return ret
Deconv2D = Conv2DTranspose
@layer_register(log_shape=True)
def ResizeImages(
images,
size,
method=tf.image.ResizeMethod.BILINEAR,
align_corners=True,
data_format='channels_last'):
"""
Use tf.image.resize_images to resize feature map.
We have to do some transposing first before using image resize if the
data_format is 'channels_first', because resize_images only accept
'channels_last'.
images : tensor representing the feature map to resize
size : 2D int32 tensor of the new shape (h, w)
method : Resize method see tf.image.ResizeMethod
align_corners : Preserving the corner pixels ?
data_format : current data_format of the inputs 'channels_first' or 'channels_last'
"""
l = images
if data_format == 'channels_first':
l = tf.transpose(l, [0,2,3,1])
l = tf.image.resize_images(l, size, method, align_corners)
if data_format == 'channels_first':
l = tf.transpose(l, [0,3,1,2])
ret = tf.identity(l, name='output')
return ret
@layer_register(log_shape=True)
def GroupedConv2D(x, num_paths, path_ch_out, kernel_shape,
sum_paths=False, padding='SAME', stride=1,
W_init=None, b_init=None, nl=tf.identity,
use_bias=False, data_format='NHWC'):
"""
Grouped conv 2d for ResNeXt. Uses depthwise conv 2d and reshape and sum.
Args:
x : 4D tensor of data_format
num_paths : number of groups
path_ch_out : number of ch_out per group
kernel_shape : (h,w) tuple or an int
sum_paths : whether the groups are summed together (if True)
or concatenated (if False (default))
padding, W_init, b_init, nl, use_bias, data_format : see Conv2D
Returns:
tf.Tensor named ``output`` with attribute `variables`.
Variable Names:
* ``W``: weights
* ``b``: bias
"""
data_format = get_data_format(data_format, tfmode=False)
in_shape = x.get_shape().as_list()
ch_dim = 3 if data_format == 'NHWC' else 1
ch_in = in_shape[ch_dim]
assert ch_in % num_paths == 0, "Grouped conv requires n_groups to divide ch_in"
ch_in_per_path = ch_in // num_paths
ch_out = path_ch_out if sum_paths else num_paths * path_ch_out
kernel_shape = shape2d(kernel_shape)
padding = padding.upper()
filter_shape = kernel_shape + [ch_in, path_ch_out]
stride = shape4d(stride, data_format=data_format)
if W_init is None:
W_init = tf.contrib.layers.variance_scaling_initializer(2.0)
if b_init is None:
b_init = tf.constant_initializer()
W = tf.get_variable('W', filter_shape, initializer=W_init)
if use_bias:
b = tf.get_variable('b', [ch_out], initializer=b_init)
x = tf.nn.depthwise_conv2d(x, W, stride, padding, rate=None, data_format=data_format)
out_shape = x.get_shape().as_list()
# First reshape to expose the dimension by input channels
shape_depthwise = [num_paths, ch_in_per_path, path_ch_out]
if data_format == 'NHWC':
x = tf.reshape(x, [-1, out_shape[1], out_shape[2]] + shape_depthwise)
else:
x = tf.reshape(x, [-1] + shape_depthwise + [out_shape[2], out_shape[3]])
# Then reduce sum to remove the input channel leaving output dim and (path dim)
if sum_paths:
sum_axis = [ch_dim, ch_dim + 1]
else:
sum_axis = ch_dim + 1
x = tf.reduce_sum(x, sum_axis)
# reshape to output shape if path dim did not collapse
if not sum_paths:
if data_format == 'NHWC':
x = tf.reshape(x, [-1, out_shape[1], out_shape[2], ch_out])
else:
x = tf.reshape(x, [-1, ch_out, out_shape[2], out_shape[3]])
ret = nl(tf.nn.bias_add(x, b, data_format=data_format) if use_bias else x, name='output')
ret.variables = VariableHolder(W=W)
if use_bias:
ret.variables.b = b
return ret
@layer_register(log_shape=True)
@convert_to_tflayer_args(
args_names=['filters', 'kernel_size'],
name_mapping={
'out_channel': 'filters',
'kernel_shape': 'kernel_size',
'stride': 'strides',
})
def SeparableConv2D(
inputs,
filters,
kernel_size,
strides=(1,1),
padding='same',
data_format='channels_last',
dilation_rate=(1,1),
depth_multiplier=1,
activation=None,
use_bias=True,
depthwise_initializer=tf.contrib.layers.variance_scaling_initializer(2.0),
pointwise_initializer=tf.contrib.layers.variance_scaling_initializer(2.0),
bias_initializer=tf.zeros_initializer(),
depthwise_regularizer=None,
pointwise_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
depthwise_constraint=None,
pointwise_constraint=None,
bias_constraint=None,
trainable=True,
name=None,
reuse=None):
with rename_get_variable({'depthwise_kernel': 'Wd', 'pointwise_kernel': 'Wp', 'bias': 'b'}):
layer = tf.layers.SeparableConv2D(
filters,
kernel_size,
strides=strides,
padding=padding,
data_format=data_format,
dilation_rate=dilation_rate,
activation=activation,
use_bias=use_bias,
depth_multiplier=depth_multiplier,
depthwise_initializer=depthwise_initializer,
pointwise_initializer=pointwise_initializer,
bias_initializer=bias_initializer,
activity_regularizer=activity_regularizer,
depthwise_regularizer=depthwise_regularizer,
pointwise_regularizer=pointwise_regularizer,
bias_regularizer=bias_regularizer)
ret = layer.apply(inputs, scope=tf.get_variable_scope())
ret = tf.identity(ret, name='output')
ret.variables = VariableHolder(Wd=layer.depthwise_kernel, Wp=layer.pointwise_kernel)
if use_bias:
ret.variables.b = layer.bias
# compute the flops of the conv
in_shape = inputs.get_shape().as_list()
channel_axis = 3 if data_format == 'channels_last' else 1
h_dim = 1 if data_format == 'channels_last' else 2
w_dim = h_dim + 1
in_channel = in_shape[channel_axis]
out_channel = filters
kernel_shape = shape2d(kernel_size)
stride = shape4d(strides, data_format=data_format)
flops = 1.0 * in_channel * depth_multiplier * \
kernel_shape[0] * kernel_shape[1] / stride[h_dim] / stride[w_dim]
# since pointwise is on the result of depthwise, the stride is carried over.
pt_flops = 1.0 * (in_channel * depth_multiplier) * out_channel \
/ stride[h_dim] / stride[w_dim]
if in_shape[h_dim] is not None and in_shape[h_dim] > 0:
H_times_W = in_shape[h_dim] * in_shape[w_dim]
flops *= H_times_W
pt_flops *= H_times_W
ret.info = VariableHolder(flops=flops+pt_flops)
return ret