зеркало из https://github.com/microsoft/caffe.git
Merge pull request #10 from sergeyk/selective_search_proposals2
Efficient processing of images, with option to use selective search window proposals
This commit is contained in:
Коммит
58a16e5b9e
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,324 @@
|
|||
"""
|
||||
Classify a number of images at once, optionally using the selective
|
||||
search window proposal method.
|
||||
|
||||
This implementation follows
|
||||
Ross Girshick, Jeff Donahue, Trevor Darrell, Jitendra Malik.
|
||||
Rich feature hierarchies for accurate object detection and semantic
|
||||
segmentation.
|
||||
http://arxiv.org/abs/1311.2524
|
||||
|
||||
The selective_search_ijcv_with_python code is available at
|
||||
https://github.com/sergeyk/selective_search_ijcv_with_python
|
||||
|
||||
TODO:
|
||||
- [ ] batch up image filenames as well: don't want to load all of them into memory
|
||||
"""
|
||||
import numpy as np
|
||||
import os
|
||||
import sys
|
||||
import gflags
|
||||
import pandas as pd
|
||||
import time
|
||||
import skimage.io
|
||||
import skimage.transform
|
||||
import selective_search_ijcv_with_python as selective_search
|
||||
import caffe
|
||||
|
||||
IMAGE_DIM = 256
|
||||
CROPPED_DIM = 227
|
||||
IMAGE_CENTER = int((IMAGE_DIM - CROPPED_DIM) / 2)
|
||||
|
||||
CROP_MODES = ['center_only', 'corners', 'selective_search']
|
||||
|
||||
# NOTE: this must match the setting in the prototxt that is used!
|
||||
BATCH_SIZE = 245
|
||||
|
||||
# Load the imagenet mean file
|
||||
IMAGENET_MEAN = np.load(
|
||||
os.path.join(os.path.dirname(__file__), 'ilsvrc_2012_mean.npy'))
|
||||
CROPPED_IMAGENET_MEAN = IMAGENET_MEAN[
|
||||
IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM,
|
||||
IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM, :]
|
||||
|
||||
|
||||
def load_image(filename):
|
||||
"""
|
||||
Input:
|
||||
filename: string
|
||||
|
||||
Output:
|
||||
image: an image of size (256 x 256 x 3) of type uint8.
|
||||
"""
|
||||
img = skimage.io.imread(filename)
|
||||
if img.ndim == 2:
|
||||
img = np.tile(img[:, :, np.newaxis], (1, 1, 3))
|
||||
elif img.shape[2] == 4:
|
||||
img = img[:, :, :3]
|
||||
return img
|
||||
|
||||
|
||||
def format_image(image, window=None, cropped_size=False):
|
||||
"""
|
||||
Input:
|
||||
image: (H x W x 3) ndarray
|
||||
window: (4) ndarray
|
||||
(y, x, h, w) coordinates
|
||||
cropped_size: bool
|
||||
Whether to output cropped size image or full size image.
|
||||
|
||||
Output:
|
||||
image: (3 x H x W) ndarray
|
||||
Resized to either IMAGE_DIM or CROPPED_DIM.
|
||||
"""
|
||||
# Crop a subimage if window is provided.
|
||||
if window is not None:
|
||||
image = image[
|
||||
window[0]:window[2],
|
||||
window[1]:window[3]
|
||||
]
|
||||
|
||||
|
||||
# Resize to ImageNet size, convert to BGR, subtract mean.
|
||||
image = image[:, :, ::-1]
|
||||
if cropped_size:
|
||||
image = skimage.transform.resize(image, (CROPPED_DIM, CROPPED_DIM)) * 255
|
||||
image -= CROPPED_IMAGENET_MEAN
|
||||
else:
|
||||
image = skimage.transform.resize(image, (IMAGE_DIM, IMAGE_DIM)) * 255
|
||||
image -= IMAGENET_MEAN
|
||||
|
||||
image = image.swapaxes(1, 2).swapaxes(0, 1)
|
||||
return image
|
||||
|
||||
|
||||
def _assemble_images_center_only(image_fnames):
|
||||
"""
|
||||
For each image, square the image and crop its center.
|
||||
|
||||
Input:
|
||||
image_fnames: list
|
||||
|
||||
Output:
|
||||
images_df: pandas.DataFrame
|
||||
With 'image', 'filename' columns.
|
||||
"""
|
||||
all_images = []
|
||||
for image_filename in image_fnames:
|
||||
image = format_image(load_image(image_filename))
|
||||
all_images.append(np.ascontiguousarray(
|
||||
image[np.newaxis, :,
|
||||
IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM,
|
||||
IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM],
|
||||
dtype=np.float32
|
||||
))
|
||||
|
||||
images_df = pd.DataFrame({
|
||||
'image': all_images,
|
||||
'filename': image_fnames
|
||||
})
|
||||
return images_df
|
||||
|
||||
|
||||
def _assemble_images_corners(image_fnames):
|
||||
"""
|
||||
For each image, square the image and crop its center, four corners,
|
||||
and mirrored version of the above.
|
||||
|
||||
Input:
|
||||
image_fnames: list
|
||||
|
||||
Output:
|
||||
images_df: pandas.DataFrame
|
||||
With 'image', 'filename' columns.
|
||||
"""
|
||||
all_images = []
|
||||
for image_filename in image_fnames:
|
||||
image = format_image(load_image(image_filename))
|
||||
indices = [0, IMAGE_DIM - CROPPED_DIM]
|
||||
|
||||
images = np.empty((10, 3, CROPPED_DIM, CROPPED_DIM), dtype=np.float32)
|
||||
curr = 0
|
||||
for i in indices:
|
||||
for j in indices:
|
||||
images[curr] = image[:, i:i + CROPPED_DIM, j:j + CROPPED_DIM]
|
||||
curr += 1
|
||||
images[4] = image[
|
||||
:,
|
||||
IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM,
|
||||
IMAGE_CENTER:IMAGE_CENTER + CROPPED_DIM
|
||||
]
|
||||
images[5:] = images[:5, :, :, ::-1] # flipped versions
|
||||
|
||||
all_images.append(images)
|
||||
|
||||
images_df = pd.DataFrame({
|
||||
'image': [row[np.newaxis, :] for row in images for images in all_images],
|
||||
'filename': np.repeat(image_fnames, 10)
|
||||
})
|
||||
return images_df
|
||||
|
||||
|
||||
def _assemble_images_selective_search(image_fnames):
|
||||
"""
|
||||
Run Selective Search window proposals on all images, then for each
|
||||
image-window pair, extract a square crop.
|
||||
|
||||
Input:
|
||||
image_fnames: list
|
||||
|
||||
Output:
|
||||
images_df: pandas.DataFrame
|
||||
With 'image', 'filename' columns.
|
||||
"""
|
||||
windows_list = selective_search.get_windows(image_fnames)
|
||||
|
||||
data = []
|
||||
for image_fname, windows in zip(image_fnames, windows_list):
|
||||
image = load_image(image_fname)
|
||||
for window in windows:
|
||||
data.append({
|
||||
'image': format_image(image, window, CROPPED_DIM)[np.newaxis, :],
|
||||
'window': window,
|
||||
'filename': image_fname
|
||||
})
|
||||
|
||||
images_df = pd.DataFrame(data)
|
||||
return images_df
|
||||
|
||||
|
||||
def assemble_batches(image_fnames, crop_mode='center_only', batch_size=256):
|
||||
"""
|
||||
Assemble DataFrame of image crops for feature computation.
|
||||
|
||||
Input:
|
||||
image_fnames: list of string
|
||||
mode: string
|
||||
'center_only': the CROPPED_DIM middle of the image is taken as is
|
||||
'corners': take CROPPED_DIM-sized boxes at 4 corners and center of
|
||||
the image, as well as their flipped versions: a total of 10.
|
||||
'selective_search': run Selective Search region proposal on the
|
||||
image, and take each enclosing subwindow.
|
||||
|
||||
Output:
|
||||
df_batches: list of DataFrames, each one of BATCH_SIZE rows.
|
||||
Each row has 'image', 'filename', and 'window' info.
|
||||
Column 'image' contains (X x 3 x 227 x 227) ndarrays.
|
||||
Column 'filename' contains source filenames.
|
||||
If 'filename' is None, then the row is just for padding.
|
||||
"""
|
||||
if crop_mode == 'center_only':
|
||||
images_df = _assemble_images_center_only(image_fnames)
|
||||
|
||||
elif crop_mode == 'corners':
|
||||
images_df = _assemble_images_corners(image_fnames)
|
||||
|
||||
elif crop_mode == 'selective_search':
|
||||
images_df = _assemble_images_selective_search(image_fnames)
|
||||
|
||||
else:
|
||||
raise Exception("Unknown mode: not in {}".format(CROP_MODES))
|
||||
|
||||
# Make sure the DataFrame has a multiple of BATCH_SIZE rows:
|
||||
# just fill the extra rows with NaN filenames and all-zero images.
|
||||
N = images_df.shape[0]
|
||||
remainder = N % BATCH_SIZE
|
||||
if remainder > 0:
|
||||
zero_image = np.zeros_like(images_df['image'].iloc[0])
|
||||
remainder_df = pd.DataFrame([{
|
||||
'filename': None,
|
||||
'image': zero_image,
|
||||
'window': [0, 0, 0, 0]
|
||||
}] * (BATCH_SIZE - remainder))
|
||||
images_df = images_df.append(remainder_df)
|
||||
N = images_df.shape[0]
|
||||
|
||||
# Split into batches of BATCH_SIZE.
|
||||
ind = np.arange(N) / BATCH_SIZE
|
||||
df_batches = [images_df[ind == i] for i in range(N / BATCH_SIZE)]
|
||||
return df_batches
|
||||
|
||||
|
||||
def compute_feats(images_df, layer='imagenet'):
|
||||
if layer == 'imagenet':
|
||||
num_output = 1000
|
||||
else:
|
||||
raise ValueError("Unknown layer requested: {}".format(layer))
|
||||
|
||||
num = images_df.shape[0]
|
||||
input_blobs = [np.ascontiguousarray(
|
||||
np.concatenate(images_df['image'].values), dtype='float32')]
|
||||
output_blobs = [np.empty((num, num_output, 1, 1), dtype=np.float32)]
|
||||
print(input_blobs[0].shape, output_blobs[0].shape)
|
||||
|
||||
caffenet.Forward(input_blobs, output_blobs)
|
||||
feats = [output_blobs[0][i].flatten() for i in range(len(output_blobs[0]))]
|
||||
|
||||
# Add the features and delete the images.
|
||||
del images_df['image']
|
||||
images_df['feat'] = feats
|
||||
return images_df
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
## Parse cmdline options
|
||||
gflags.DEFINE_string(
|
||||
"model_def", "", "The model definition file.")
|
||||
gflags.DEFINE_string(
|
||||
"pretrained_model", "", "The pretrained model.")
|
||||
gflags.DEFINE_boolean(
|
||||
"gpu", False, "use gpu for computation")
|
||||
gflags.DEFINE_string(
|
||||
"crop_mode", "center_only", "Crop mode, from {}".format(CROP_MODES))
|
||||
gflags.DEFINE_string(
|
||||
"images_file", "", "File that contains image filenames.")
|
||||
gflags.DEFINE_string(
|
||||
"batch_size", 256, "Number of image crops to let through in one go")
|
||||
gflags.DEFINE_string(
|
||||
"output", "", "The output DataFrame HDF5 filename.")
|
||||
gflags.DEFINE_string(
|
||||
"layer", "imagenet", "Layer to output.")
|
||||
FLAGS = gflags.FLAGS
|
||||
FLAGS(sys.argv)
|
||||
|
||||
## Load list of image filenames and assemble into batches.
|
||||
t = time.time()
|
||||
print('Assembling batches...')
|
||||
with open(FLAGS.images_file) as f:
|
||||
image_fnames = [_.strip() for _ in f.readlines()]
|
||||
image_batches = assemble_batches(
|
||||
image_fnames, FLAGS.crop_mode, FLAGS.batch_size)
|
||||
print('{} batches assembled in {:.3f} s'.format(
|
||||
len(image_batches), time.time() - t))
|
||||
|
||||
# Initialize network by loading model definition and weights.
|
||||
t = time.time()
|
||||
print("Loading Caffe model.")
|
||||
caffenet = caffe.CaffeNet(FLAGS.model_def, FLAGS.pretrained_model)
|
||||
caffenet.set_phase_test()
|
||||
if FLAGS.gpu:
|
||||
caffenet.set_mode_gpu()
|
||||
print("Caffe model loaded in {:.3f} s".format(time.time() - t))
|
||||
|
||||
# Process the batches.
|
||||
t = time.time()
|
||||
print 'Processing {} files in {} batches'.format(
|
||||
len(image_fnames), len(image_batches))
|
||||
dfs_with_feats = []
|
||||
for i in range(len(image_batches)):
|
||||
if i % 10 == 0:
|
||||
print('Batch {}/{}, elapsed time: {:.3f} s'.format(
|
||||
i, len(image_batches), time.time() - t))
|
||||
dfs_with_feats.append(compute_feats(image_batches[i]))
|
||||
|
||||
# Concatenate, droppping the padding rows.
|
||||
df = pd.concat(dfs_with_feats).dropna(subset=['filename'])
|
||||
print("Processing complete after {:.3f} s.".format(time.time() - t))
|
||||
|
||||
# Write our the results.
|
||||
t = time.time()
|
||||
df.to_hdf(FLAGS.output, 'df', mode='w')
|
||||
print("Done. Saving to {} took {:.3f} s.".format(
|
||||
FLAGS.output, time.time() - t))
|
||||
|
||||
sys.exit()
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
Загрузка…
Ссылка в новой задаче