зеркало из https://github.com/microsoft/caffe.git
Merge pull request #3613 from longjon/py-coord-map
Python/net spec coordinate map and crop offset computation
This commit is contained in:
Коммит
74cc4970c8
|
@ -0,0 +1,185 @@
|
||||||
|
"""
|
||||||
|
Determine spatial relationships between layers to relate their coordinates.
|
||||||
|
Coordinates are mapped from input-to-output (forward), but can
|
||||||
|
be mapped output-to-input (backward) by the inverse mapping too.
|
||||||
|
This helps crop and align feature maps among other uses.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import division
|
||||||
|
import numpy as np
|
||||||
|
from caffe import layers as L
|
||||||
|
|
||||||
|
PASS_THROUGH_LAYERS = ['AbsVal', 'BatchNorm', 'Bias', 'BNLL', 'Dropout',
|
||||||
|
'Eltwise', 'ELU', 'Log', 'LRN', 'Exp', 'MVN', 'Power',
|
||||||
|
'ReLU', 'PReLU', 'Scale', 'Sigmoid', 'Split', 'TanH',
|
||||||
|
'Threshold']
|
||||||
|
|
||||||
|
|
||||||
|
def conv_params(fn):
|
||||||
|
"""
|
||||||
|
Extract the spatial parameters that determine the coordinate mapping:
|
||||||
|
kernel size, stride, padding, and dilation.
|
||||||
|
|
||||||
|
Implementation detail: Convolution, Deconvolution, and Im2col layers
|
||||||
|
define these in the convolution_param message, while Pooling has its
|
||||||
|
own fields in pooling_param. This method deals with these details to
|
||||||
|
extract canonical parameters.
|
||||||
|
"""
|
||||||
|
params = fn.params.get('convolution_param', fn.params)
|
||||||
|
axis = params.get('axis', 1)
|
||||||
|
ks = np.array(params['kernel_size'], ndmin=1)
|
||||||
|
dilation = np.array(params.get('dilation', 1), ndmin=1)
|
||||||
|
assert len({'pad_h', 'pad_w', 'kernel_h', 'kernel_w', 'stride_h',
|
||||||
|
'stride_w'} & set(fn.params)) == 0, \
|
||||||
|
'cropping does not support legacy _h/_w params'
|
||||||
|
return (axis, np.array(params.get('stride', 1), ndmin=1),
|
||||||
|
(ks - 1) * dilation + 1,
|
||||||
|
np.array(params.get('pad', 0), ndmin=1))
|
||||||
|
|
||||||
|
|
||||||
|
def crop_params(fn):
|
||||||
|
"""
|
||||||
|
Extract the crop layer parameters with defaults.
|
||||||
|
"""
|
||||||
|
params = fn.params.get('crop_param', fn.params)
|
||||||
|
axis = params.get('axis', 2) # default to spatial crop for N, C, H, W
|
||||||
|
offset = np.array(params.get('offset', 0), ndmin=1)
|
||||||
|
return (axis, offset)
|
||||||
|
|
||||||
|
|
||||||
|
class UndefinedMapException(Exception):
|
||||||
|
"""
|
||||||
|
Exception raised for layers that do not have a defined coordinate mapping.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def coord_map(fn):
|
||||||
|
"""
|
||||||
|
Define the coordinate mapping by its
|
||||||
|
- axis
|
||||||
|
- scale: output coord[i * scale] <- input_coord[i]
|
||||||
|
- shift: output coord[i] <- output_coord[i + shift]
|
||||||
|
s.t. the identity mapping, as for pointwise layers like ReLu, is defined by
|
||||||
|
(None, 1, 0) since it is independent of axis and does not transform coords.
|
||||||
|
"""
|
||||||
|
if fn.type_name in ['Convolution', 'Pooling', 'Im2col']:
|
||||||
|
axis, stride, ks, pad = conv_params(fn)
|
||||||
|
return axis, 1 / stride, (pad - (ks - 1) / 2) / stride
|
||||||
|
elif fn.type_name == 'Deconvolution':
|
||||||
|
axis, stride, ks, pad = conv_params(fn)
|
||||||
|
return axis, stride, (ks - 1) / 2 - pad
|
||||||
|
elif fn.type_name in PASS_THROUGH_LAYERS:
|
||||||
|
return None, 1, 0
|
||||||
|
elif fn.type_name == 'Crop':
|
||||||
|
axis, offset = crop_params(fn)
|
||||||
|
axis -= 1 # -1 for last non-coordinate dim.
|
||||||
|
return axis, 1, - offset
|
||||||
|
else:
|
||||||
|
raise UndefinedMapException
|
||||||
|
|
||||||
|
|
||||||
|
class AxisMismatchException(Exception):
|
||||||
|
"""
|
||||||
|
Exception raised for mappings with incompatible axes.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def compose(base_map, next_map):
|
||||||
|
"""
|
||||||
|
Compose a base coord map with scale a1, shift b1 with a further coord map
|
||||||
|
with scale a2, shift b2. The scales multiply and the further shift, b2,
|
||||||
|
is scaled by base coord scale a1.
|
||||||
|
"""
|
||||||
|
ax1, a1, b1 = base_map
|
||||||
|
ax2, a2, b2 = next_map
|
||||||
|
if ax1 is None:
|
||||||
|
ax = ax2
|
||||||
|
elif ax2 is None or ax1 == ax2:
|
||||||
|
ax = ax1
|
||||||
|
else:
|
||||||
|
raise AxisMismatchException
|
||||||
|
return ax, a1 * a2, a1 * b2 + b1
|
||||||
|
|
||||||
|
|
||||||
|
def inverse(coord_map):
|
||||||
|
"""
|
||||||
|
Invert a coord map by de-scaling and un-shifting;
|
||||||
|
this gives the backward mapping for the gradient.
|
||||||
|
"""
|
||||||
|
ax, a, b = coord_map
|
||||||
|
return ax, 1 / a, -b / a
|
||||||
|
|
||||||
|
|
||||||
|
def coord_map_from_to(top_from, top_to):
|
||||||
|
"""
|
||||||
|
Determine the coordinate mapping betweeen a top (from) and a top (to).
|
||||||
|
Walk the graph to find a common ancestor while composing the coord maps for
|
||||||
|
from and to until they meet. As a last step the from map is inverted.
|
||||||
|
"""
|
||||||
|
# We need to find a common ancestor of top_from and top_to.
|
||||||
|
# We'll assume that all ancestors are equivalent here (otherwise the graph
|
||||||
|
# is an inconsistent state (which we could improve this to check for)).
|
||||||
|
# For now use a brute-force algorithm.
|
||||||
|
|
||||||
|
def collect_bottoms(top):
|
||||||
|
"""
|
||||||
|
Collect the bottoms to walk for the coordinate mapping.
|
||||||
|
The general rule is that all the bottoms of a layer can be mapped, as
|
||||||
|
most layers have the same coordinate mapping for each bottom.
|
||||||
|
Crop layer is a notable exception. Only the first/cropped bottom is
|
||||||
|
mappable; the second/dimensions bottom is excluded from the walk.
|
||||||
|
"""
|
||||||
|
bottoms = top.fn.inputs
|
||||||
|
if top.fn.type_name == 'Crop':
|
||||||
|
bottoms = bottoms[:1]
|
||||||
|
return bottoms
|
||||||
|
|
||||||
|
# walk back from top_from, keeping the coord map as we go
|
||||||
|
from_maps = {top_from: (None, 1, 0)}
|
||||||
|
frontier = {top_from}
|
||||||
|
while frontier:
|
||||||
|
top = frontier.pop()
|
||||||
|
try:
|
||||||
|
bottoms = collect_bottoms(top)
|
||||||
|
for bottom in bottoms:
|
||||||
|
from_maps[bottom] = compose(from_maps[top], coord_map(top.fn))
|
||||||
|
frontier.add(bottom)
|
||||||
|
except UndefinedMapException:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# now walk back from top_to until we hit a common blob
|
||||||
|
to_maps = {top_to: (None, 1, 0)}
|
||||||
|
frontier = {top_to}
|
||||||
|
while frontier:
|
||||||
|
top = frontier.pop()
|
||||||
|
if top in from_maps:
|
||||||
|
return compose(to_maps[top], inverse(from_maps[top]))
|
||||||
|
try:
|
||||||
|
bottoms = collect_bottoms(top)
|
||||||
|
for bottom in bottoms:
|
||||||
|
to_maps[bottom] = compose(to_maps[top], coord_map(top.fn))
|
||||||
|
frontier.add(bottom)
|
||||||
|
except UndefinedMapException:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# if we got here, we did not find a blob in common
|
||||||
|
raise RuntimeError('Could not compute map between tops; are they '
|
||||||
|
'connected by spatial layers?')
|
||||||
|
|
||||||
|
|
||||||
|
def crop(top_from, top_to):
|
||||||
|
"""
|
||||||
|
Define a Crop layer to crop a top (from) to another top (to) by
|
||||||
|
determining the coordinate mapping between the two and net spec'ing
|
||||||
|
the axis and shift parameters of the crop.
|
||||||
|
"""
|
||||||
|
ax, a, b = coord_map_from_to(top_from, top_to)
|
||||||
|
assert (a == 1).all(), 'scale mismatch on crop (a = {})'.format(a)
|
||||||
|
assert (b <= 0).all(), 'cannot crop negative offset (b = {})'.format(b)
|
||||||
|
assert (np.round(b) == b).all(), 'cannot crop noninteger offset ' \
|
||||||
|
'(b = {})'.format(b)
|
||||||
|
return L.Crop(top_from, top_to,
|
||||||
|
crop_param=dict(axis=ax + 1, # +1 for first cropping dim.
|
||||||
|
offset=list(-np.round(b).astype(int))))
|
|
@ -0,0 +1,192 @@
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import random
|
||||||
|
|
||||||
|
import caffe
|
||||||
|
from caffe import layers as L
|
||||||
|
from caffe import params as P
|
||||||
|
from caffe.coord_map import coord_map_from_to, crop
|
||||||
|
|
||||||
|
|
||||||
|
def coord_net_spec(ks=3, stride=1, pad=0, pool=2, dstride=2, dpad=0):
|
||||||
|
"""
|
||||||
|
Define net spec for simple conv-pool-deconv pattern common to all
|
||||||
|
coordinate mapping tests.
|
||||||
|
"""
|
||||||
|
n = caffe.NetSpec()
|
||||||
|
n.data = L.Input(shape=dict(dim=[2, 1, 100, 100]))
|
||||||
|
n.aux = L.Input(shape=dict(dim=[2, 1, 20, 20]))
|
||||||
|
n.conv = L.Convolution(
|
||||||
|
n.data, num_output=10, kernel_size=ks, stride=stride, pad=pad)
|
||||||
|
n.pool = L.Pooling(
|
||||||
|
n.conv, pool=P.Pooling.MAX, kernel_size=pool, stride=pool, pad=0)
|
||||||
|
# for upsampling kernel size is 2x stride
|
||||||
|
try:
|
||||||
|
deconv_ks = [s*2 for s in dstride]
|
||||||
|
except:
|
||||||
|
deconv_ks = dstride*2
|
||||||
|
n.deconv = L.Deconvolution(
|
||||||
|
n.pool, num_output=10, kernel_size=deconv_ks, stride=dstride, pad=dpad)
|
||||||
|
return n
|
||||||
|
|
||||||
|
|
||||||
|
class TestCoordMap(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test_conv_pool_deconv(self):
|
||||||
|
"""
|
||||||
|
Map through conv, pool, and deconv.
|
||||||
|
"""
|
||||||
|
n = coord_net_spec()
|
||||||
|
# identity for 2x pool, 2x deconv
|
||||||
|
ax, a, b = coord_map_from_to(n.deconv, n.data)
|
||||||
|
self.assertEquals(ax, 1)
|
||||||
|
self.assertEquals(a, 1)
|
||||||
|
self.assertEquals(b, 0)
|
||||||
|
# shift-by-one for 4x pool, 4x deconv
|
||||||
|
n = coord_net_spec(pool=4, dstride=4)
|
||||||
|
ax, a, b = coord_map_from_to(n.deconv, n.data)
|
||||||
|
self.assertEquals(ax, 1)
|
||||||
|
self.assertEquals(a, 1)
|
||||||
|
self.assertEquals(b, -1)
|
||||||
|
|
||||||
|
def test_pass(self):
|
||||||
|
"""
|
||||||
|
A pass-through layer (ReLU) and conv (1x1, stride 1, pad 0)
|
||||||
|
both do identity mapping.
|
||||||
|
"""
|
||||||
|
n = coord_net_spec()
|
||||||
|
ax, a, b = coord_map_from_to(n.deconv, n.data)
|
||||||
|
n.relu = L.ReLU(n.deconv)
|
||||||
|
n.conv1x1 = L.Convolution(
|
||||||
|
n.relu, num_output=10, kernel_size=1, stride=1, pad=0)
|
||||||
|
for top in [n.relu, n.conv1x1]:
|
||||||
|
ax_pass, a_pass, b_pass = coord_map_from_to(top, n.data)
|
||||||
|
self.assertEquals(ax, ax_pass)
|
||||||
|
self.assertEquals(a, a_pass)
|
||||||
|
self.assertEquals(b, b_pass)
|
||||||
|
|
||||||
|
def test_padding(self):
|
||||||
|
"""
|
||||||
|
Padding conv adds offset while padding deconv subtracts offset.
|
||||||
|
"""
|
||||||
|
n = coord_net_spec()
|
||||||
|
ax, a, b = coord_map_from_to(n.deconv, n.data)
|
||||||
|
pad = random.randint(0, 10)
|
||||||
|
# conv padding
|
||||||
|
n = coord_net_spec(pad=pad)
|
||||||
|
_, a_pad, b_pad = coord_map_from_to(n.deconv, n.data)
|
||||||
|
self.assertEquals(a, a_pad)
|
||||||
|
self.assertEquals(b - pad, b_pad)
|
||||||
|
# deconv padding
|
||||||
|
n = coord_net_spec(dpad=pad)
|
||||||
|
_, a_pad, b_pad = coord_map_from_to(n.deconv, n.data)
|
||||||
|
self.assertEquals(a, a_pad)
|
||||||
|
self.assertEquals(b + pad, b_pad)
|
||||||
|
# pad both to cancel out
|
||||||
|
n = coord_net_spec(pad=pad, dpad=pad)
|
||||||
|
_, a_pad, b_pad = coord_map_from_to(n.deconv, n.data)
|
||||||
|
self.assertEquals(a, a_pad)
|
||||||
|
self.assertEquals(b, b_pad)
|
||||||
|
|
||||||
|
def test_multi_conv(self):
|
||||||
|
"""
|
||||||
|
Multiple bottoms/tops of a layer are identically mapped.
|
||||||
|
"""
|
||||||
|
n = coord_net_spec()
|
||||||
|
# multi bottom/top
|
||||||
|
n.conv_data, n.conv_aux = L.Convolution(
|
||||||
|
n.data, n.aux, ntop=2, num_output=10, kernel_size=5, stride=2,
|
||||||
|
pad=0)
|
||||||
|
ax1, a1, b1 = coord_map_from_to(n.conv_data, n.data)
|
||||||
|
ax2, a2, b2 = coord_map_from_to(n.conv_aux, n.aux)
|
||||||
|
self.assertEquals(ax1, ax2)
|
||||||
|
self.assertEquals(a1, a2)
|
||||||
|
self.assertEquals(b1, b2)
|
||||||
|
|
||||||
|
def test_rect(self):
|
||||||
|
"""
|
||||||
|
Anisotropic mapping is equivalent to its isotropic parts.
|
||||||
|
"""
|
||||||
|
n3x3 = coord_net_spec(ks=3, stride=1, pad=0)
|
||||||
|
n5x5 = coord_net_spec(ks=5, stride=2, pad=10)
|
||||||
|
n3x5 = coord_net_spec(ks=[3, 5], stride=[1, 2], pad=[0, 10])
|
||||||
|
ax_3x3, a_3x3, b_3x3 = coord_map_from_to(n3x3.deconv, n3x3.data)
|
||||||
|
ax_5x5, a_5x5, b_5x5 = coord_map_from_to(n5x5.deconv, n5x5.data)
|
||||||
|
ax_3x5, a_3x5, b_3x5 = coord_map_from_to(n3x5.deconv, n3x5.data)
|
||||||
|
self.assertTrue(ax_3x3 == ax_5x5 == ax_3x5)
|
||||||
|
self.assertEquals(a_3x3, a_3x5[0])
|
||||||
|
self.assertEquals(b_3x3, b_3x5[0])
|
||||||
|
self.assertEquals(a_5x5, a_3x5[1])
|
||||||
|
self.assertEquals(b_5x5, b_3x5[1])
|
||||||
|
|
||||||
|
def test_nd_conv(self):
|
||||||
|
"""
|
||||||
|
ND conv maps the same way in more dimensions.
|
||||||
|
"""
|
||||||
|
n = caffe.NetSpec()
|
||||||
|
# define data with 3 spatial dimensions, otherwise the same net
|
||||||
|
n.data = L.Input(shape=dict(dim=[2, 3, 100, 100, 100]))
|
||||||
|
n.conv = L.Convolution(
|
||||||
|
n.data, num_output=10, kernel_size=[3, 3, 3], stride=[1, 1, 1],
|
||||||
|
pad=[0, 1, 2])
|
||||||
|
n.pool = L.Pooling(
|
||||||
|
n.conv, pool=P.Pooling.MAX, kernel_size=2, stride=2, pad=0)
|
||||||
|
n.deconv = L.Deconvolution(
|
||||||
|
n.pool, num_output=10, kernel_size=4, stride=2, pad=0)
|
||||||
|
ax, a, b = coord_map_from_to(n.deconv, n.data)
|
||||||
|
self.assertEquals(ax, 1)
|
||||||
|
self.assertTrue(len(a) == len(b))
|
||||||
|
self.assertTrue(np.all(a == 1))
|
||||||
|
self.assertEquals(b[0] - 1, b[1])
|
||||||
|
self.assertEquals(b[1] - 1, b[2])
|
||||||
|
|
||||||
|
def test_crop_of_crop(self):
|
||||||
|
"""
|
||||||
|
Map coordinates through Crop layer:
|
||||||
|
crop an already-cropped output to the input and check change in offset.
|
||||||
|
"""
|
||||||
|
n = coord_net_spec()
|
||||||
|
offset = random.randint(0, 10)
|
||||||
|
ax, a, b = coord_map_from_to(n.deconv, n.data)
|
||||||
|
n.crop = L.Crop(n.deconv, n.data, axis=2, offset=offset)
|
||||||
|
ax_crop, a_crop, b_crop = coord_map_from_to(n.crop, n.data)
|
||||||
|
self.assertEquals(ax, ax_crop)
|
||||||
|
self.assertEquals(a, a_crop)
|
||||||
|
self.assertEquals(b + offset, b_crop)
|
||||||
|
|
||||||
|
def test_crop_helper(self):
|
||||||
|
"""
|
||||||
|
Define Crop layer by crop().
|
||||||
|
"""
|
||||||
|
n = coord_net_spec()
|
||||||
|
crop(n.deconv, n.data)
|
||||||
|
|
||||||
|
def test_catch_unconnected(self):
|
||||||
|
"""
|
||||||
|
Catch mapping spatially unconnected tops.
|
||||||
|
"""
|
||||||
|
n = coord_net_spec()
|
||||||
|
n.ip = L.InnerProduct(n.deconv, num_output=10)
|
||||||
|
with self.assertRaises(RuntimeError):
|
||||||
|
coord_map_from_to(n.ip, n.data)
|
||||||
|
|
||||||
|
def test_catch_scale_mismatch(self):
|
||||||
|
"""
|
||||||
|
Catch incompatible scales, such as when the top to be cropped
|
||||||
|
is mapped to a differently strided reference top.
|
||||||
|
"""
|
||||||
|
n = coord_net_spec(pool=3, dstride=2) # pool 3x but deconv 2x
|
||||||
|
with self.assertRaises(AssertionError):
|
||||||
|
crop(n.deconv, n.data)
|
||||||
|
|
||||||
|
def test_catch_negative_crop(self):
|
||||||
|
"""
|
||||||
|
Catch impossible offsets, such as when the top to be cropped
|
||||||
|
is mapped to a larger reference top.
|
||||||
|
"""
|
||||||
|
n = coord_net_spec(dpad=10) # make output smaller than input
|
||||||
|
with self.assertRaises(AssertionError):
|
||||||
|
crop(n.deconv, n.data)
|
Загрузка…
Ссылка в новой задаче