зеркало из https://github.com/microsoft/caffe.git
image classification in python
This commit is contained in:
Родитель
bf4d7262bd
Коммит
2fc32d558a
|
@ -1,2 +1,3 @@
|
|||
from .pycaffe import Net, SGDSolver
|
||||
from .classifier import Classifier
|
||||
import io
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
Classifier is an image classifier specialization of Net.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
import caffe
|
||||
|
||||
|
||||
class Classifier(caffe.Net):
|
||||
"""
|
||||
Classifier extends Net for image class prediction
|
||||
by scaling, center cropping, or oversampling.
|
||||
"""
|
||||
def __init__(self, model_file, pretrained_file, image_dims=None,
|
||||
gpu=False, mean_file=None, input_scale=None, channel_swap=None):
|
||||
"""
|
||||
Take
|
||||
image_dims: dimensions to scale input for cropping/sampling.
|
||||
Default is to scale to net input size for whole-image crop.
|
||||
gpu, mean_file, input_scale, channel_swap: convenience params for
|
||||
setting mode, mean, input scale, and channel order.
|
||||
"""
|
||||
caffe.Net.__init__(self, model_file, pretrained_file)
|
||||
self.set_phase_test()
|
||||
|
||||
if gpu:
|
||||
self.set_mode_gpu()
|
||||
else:
|
||||
self.set_mode_cpu()
|
||||
|
||||
if mean_file:
|
||||
self.set_mean(self.inputs[0], mean_file)
|
||||
if input_scale:
|
||||
self.set_input_scale(self.inputs[0], input_scale)
|
||||
if channel_swap:
|
||||
self.set_channel_swap(self.inputs[0], channel_swap)
|
||||
|
||||
self.crop_dims = np.array(self.blobs[self.inputs[0]].data.shape[2:])
|
||||
if not image_dims:
|
||||
image_dims = self.crop_dims
|
||||
self.image_dims = image_dims
|
||||
|
||||
|
||||
def predict(self, inputs, oversample=True):
|
||||
"""
|
||||
Predict classification probabilities of inputs.
|
||||
|
||||
Take
|
||||
inputs: iterable of (H x W x K) input ndarrays.
|
||||
oversample: average predictions across center, corners, and mirrors
|
||||
when True (default). Center-only prediction when False.
|
||||
|
||||
Give
|
||||
predictions: (N x C) ndarray of class probabilities
|
||||
for N images and C classes.
|
||||
"""
|
||||
# Scale to standardize input dimensions.
|
||||
inputs = np.asarray([caffe.io.resize_image(im, self.image_dims)
|
||||
for im in inputs])
|
||||
|
||||
if oversample:
|
||||
# Generate center, corner, and mirrored crops.
|
||||
inputs = caffe.io.oversample(inputs, self.crop_dims)
|
||||
else:
|
||||
# Take center crop.
|
||||
center = np.array(self.image_dims) / 2.0
|
||||
crop = np.tile(center, (1, 2))[0] + np.concatenate([
|
||||
-self.crop_dims / 2.0,
|
||||
self.crop_dims / 2.0
|
||||
])
|
||||
inputs = inputs[:, crop[0]:crop[2], crop[1]:crop[3], :]
|
||||
|
||||
# Classify
|
||||
caffe_in = self.preprocess(self.inputs[0], inputs)
|
||||
out = self.forward_all(**{self.inputs[0]: caffe_in})
|
||||
predictions = out[self.outputs[0]].squeeze(axis=(2,3))
|
||||
|
||||
# For oversampling, average predictions across crops.
|
||||
if oversample:
|
||||
predictions = predictions.reshape((len(predictions) / 10, 10, -1))
|
||||
predictions = predictions.mean(1)
|
||||
|
||||
return predictions
|
|
@ -1 +0,0 @@
|
|||
from .wrapper import *
|
|
@ -1,128 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
"""wrapper.py implements an end-to-end wrapper that classifies an image read
|
||||
from disk, using the imagenet classifier.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
from skimage import io
|
||||
from skimage import transform
|
||||
|
||||
import caffe
|
||||
|
||||
IMAGE_DIM = 256
|
||||
CROPPED_DIM = 227
|
||||
|
||||
# Load the imagenet mean file
|
||||
IMAGENET_MEAN = np.load(
|
||||
os.path.join(os.path.dirname(__file__), 'ilsvrc_2012_mean.npy'))
|
||||
|
||||
|
||||
def oversample(image, center_only=False):
|
||||
"""
|
||||
Oversamples an image. Currently the indices are hard coded to the
|
||||
4 corners and the center of the image, as well as their flipped ones,
|
||||
a total of 10 images.
|
||||
|
||||
Input:
|
||||
image: an image of size (256 x 256 x 3) and has data type uint8.
|
||||
center_only: if True, only return the center image.
|
||||
Output:
|
||||
images: the output of size (10 x 3 x 227 x 227)
|
||||
"""
|
||||
indices = [0, IMAGE_DIM - CROPPED_DIM]
|
||||
center = int(indices[1] / 2)
|
||||
if center_only:
|
||||
return np.ascontiguousarray(
|
||||
image[np.newaxis, :, center:center + CROPPED_DIM,
|
||||
center:center + CROPPED_DIM],
|
||||
dtype=np.float32)
|
||||
else:
|
||||
images = np.empty((10, 3, CROPPED_DIM, CROPPED_DIM), dtype=np.float32)
|
||||
curr = 0
|
||||
for i in indices:
|
||||
for j in indices:
|
||||
images[curr] = image[:, i:i + CROPPED_DIM, j:j + CROPPED_DIM]
|
||||
curr += 1
|
||||
images[4] = image[:, center:center + CROPPED_DIM,
|
||||
center:center + CROPPED_DIM]
|
||||
# flipped version
|
||||
images[5:] = images[:5, :, :, ::-1]
|
||||
return images
|
||||
|
||||
|
||||
def prepare_image(filename, center_only=False):
|
||||
img = io.imread(filename)
|
||||
if img.ndim == 2:
|
||||
img = np.tile(img[:, :, np.newaxis], (1, 1, 3))
|
||||
elif img.shape[2] == 4:
|
||||
img = img[:, :, :3]
|
||||
# Resize, convert to BGR, and permute axes to caffe order
|
||||
img_reshape = (transform.resize(img, (IMAGE_DIM,IMAGE_DIM)) * 255)[:, :, ::-1]
|
||||
img_reshape = img_reshape.swapaxes(1, 2).swapaxes(0, 1)
|
||||
# subtract main
|
||||
img_reshape -= IMAGENET_MEAN
|
||||
return oversample(img_reshape, center_only)
|
||||
|
||||
|
||||
class ImageNetClassifier(object):
|
||||
"""
|
||||
The ImageNetClassifier is a wrapper class to perform easier deployment
|
||||
of models trained on imagenet.
|
||||
"""
|
||||
def __init__(self, model_def_file, pretrained_model, center_only=False,
|
||||
num_output=1000):
|
||||
if center_only:
|
||||
num = 1
|
||||
else:
|
||||
num = 10
|
||||
self.caffenet = caffe.Net(model_def_file, pretrained_model)
|
||||
self._output_blobs = [np.empty((num, num_output, 1, 1), dtype=np.float32)]
|
||||
self._center_only = center_only
|
||||
|
||||
def predict(self, filename):
|
||||
input_blob = [prepare_image(filename, self._center_only)]
|
||||
self.caffenet.Forward(input_blob, self._output_blobs)
|
||||
return self._output_blobs[0].mean(0).flatten()
|
||||
|
||||
|
||||
def main(argv):
|
||||
"""
|
||||
The main function will carry out classification.
|
||||
"""
|
||||
import gflags
|
||||
import glob
|
||||
import time
|
||||
gflags.DEFINE_string("root", "", "The folder that contains images.")
|
||||
gflags.DEFINE_string("ext", "JPEG", "The image extension.")
|
||||
gflags.DEFINE_string("model_def", "", "The model definition file.")
|
||||
gflags.DEFINE_string("pretrained_model", "", "The pretrained model.")
|
||||
gflags.DEFINE_string("output", "", "The output numpy file.")
|
||||
gflags.DEFINE_boolean("gpu", True, "use gpu for computation")
|
||||
FLAGS = gflags.FLAGS
|
||||
FLAGS(argv)
|
||||
|
||||
net = ImageNetClassifier(FLAGS.model_def, FLAGS.pretrained_model)
|
||||
|
||||
if FLAGS.gpu:
|
||||
print 'Use gpu.'
|
||||
net.caffenet.set_mode_gpu()
|
||||
|
||||
files = glob.glob(os.path.join(FLAGS.root, "*." + FLAGS.ext))
|
||||
files.sort()
|
||||
print 'A total of %d files' % len(files)
|
||||
output = np.empty((len(files), net._output_blobs[0].shape[1]),
|
||||
dtype=np.float32)
|
||||
start = time.time()
|
||||
for i, f in enumerate(files):
|
||||
output[i] = net.predict(f)
|
||||
if i % 1000 == 0 and i > 0:
|
||||
print 'Processed %d files, elapsed %.2f s' % (i, time.time() - start)
|
||||
# Finally, write the results
|
||||
np.save(FLAGS.output, output)
|
||||
print 'Done. Saved to %s.' % FLAGS.output
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
main(sys.argv)
|
|
@ -0,0 +1,120 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
classify.py is an out-of-the-box image classifer callable from the command line.
|
||||
|
||||
By default it configures and runs the Caffe reference ImageNet model.
|
||||
"""
|
||||
import numpy as np
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import glob
|
||||
import time
|
||||
|
||||
import caffe
|
||||
|
||||
|
||||
def main(argv):
|
||||
pycaffe_dir = os.path.dirname(__file__)
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
# Required arguments: input and output files.
|
||||
parser.add_argument(
|
||||
"input_file",
|
||||
help="Input image, directory, or npy."
|
||||
)
|
||||
parser.add_argument(
|
||||
"output_file",
|
||||
help="Output npy filename."
|
||||
)
|
||||
# Optional arguments.
|
||||
parser.add_argument(
|
||||
"--model_def",
|
||||
default=os.path.join(pycaffe_dir,
|
||||
"../examples/imagenet/imagenet_deploy.prototxt"),
|
||||
help="Model definition file."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pretrained_model",
|
||||
default=os.path.join(pycaffe_dir,
|
||||
"../examples/imagenet/caffe_reference_imagenet_model"),
|
||||
help="Trained model weights file."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gpu",
|
||||
action='store_true',
|
||||
help="Switch for gpu computation."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--center_only",
|
||||
action='store_true',
|
||||
help="Switch for prediction from center crop alone instead of " +
|
||||
"averaging predictions across crops (default)."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--images_dim",
|
||||
default='256,256',
|
||||
help="Canonical 'height,width' dimensions of input images."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mean_file",
|
||||
default=os.path.join(pycaffe_dir,
|
||||
'caffe/imagenet/ilsvrc_2012_mean.npy'),
|
||||
help="Data set image mean of H x W x K dimensions (numpy array). " +
|
||||
"Set to '' for no mean subtraction."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--input_scale",
|
||||
type=float,
|
||||
default=255,
|
||||
help="Multiply input features by this scale before input to net"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--channel_swap",
|
||||
default='2,1,0',
|
||||
help="Order to permute input channels. The default converts " +
|
||||
"RGB -> BGR since BGR is the Caffe default by way of OpenCV."
|
||||
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ext",
|
||||
default='jpg',
|
||||
help="Image file extension to take as input when a directory " +
|
||||
"is given as the input file."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
image_dims = [int(s) for s in args.images_dim.split(',')]
|
||||
channel_swap = [int(s) for s in args.channel_swap.split(',')]
|
||||
|
||||
# Make classifier.
|
||||
classifier = caffe.Classifier(args.model_def, args.pretrained_model,
|
||||
image_dims=image_dims, gpu=args.gpu, mean_file=args.mean_file,
|
||||
input_scale=args.input_scale, channel_swap=channel_swap)
|
||||
|
||||
if args.gpu:
|
||||
print 'GPU mode'
|
||||
|
||||
# Load numpy array (.npy), directory glob (*.jpg), or image file.
|
||||
args.input_file = os.path.expanduser(args.input_file)
|
||||
if args.input_file.endswith('npy'):
|
||||
inputs = np.load(args.input_file)
|
||||
elif os.path.isdir(args.input_file):
|
||||
inputs =[caffe.io.load_image(im_f)
|
||||
for im_f in glob.glob(args.input_file + '/*.' + args.ext)]
|
||||
else:
|
||||
inputs = [caffe.io.load_image(args.input_file)]
|
||||
|
||||
print "Classifying %d inputs." % len(inputs)
|
||||
|
||||
# Classify.
|
||||
start = time.time()
|
||||
predictions = classifier.predict(inputs, not args.center_only)
|
||||
print "Done in %.2f s." % (time.time() - start)
|
||||
|
||||
# Save
|
||||
np.save(args.output_file, predictions)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv)
|
Загрузка…
Ссылка в новой задаче