enabling native proposal layer and dlib selective search

This commit is contained in:
Philipp Kranen 2017-08-22 09:46:27 +02:00
Родитель 7329e94aa3
Коммит 63488568fe
41 изменённых файлов: 633 добавлений и 1309 удалений

4
.gitattributes поставляемый
Просмотреть файл

@ -146,8 +146,8 @@ Examples/Text/LightRNN/test/word-0.location text
*.vsdm binary
*.zip binary
*.dnn binary
Examples/Image/Detection/FastRCNN/fastRCNN/*/*.pyd binary
Examples/Image/Detection/FastRCNN/fastRCNN/*/*.so binary
Examples/Image/Detection/FastRCNN/BrainScript/fastRCNN/*/*.pyd binary
Examples/Image/Detection/FastRCNN/BrainScript/fastRCNN/*/*.so binary
Examples/Image/Detection/utils/cython_modules/*.pyd binary
Examples/Image/Detection/utils/cython_modules/*.so binary
Tests/UnitTests/V2LibraryTests/data/*.bin binary

1
.gitignore поставляемый
Просмотреть файл

@ -291,6 +291,7 @@ Examples/Image/DataSets/grocery/positive/
Examples/Image/DataSets/grocery/testImages/
Examples/Image/DataSets/grocery/*.txt
PretrainedModels/*.model
Examples/Image/Detection/FastRCNN/BrainScript/Output/
Examples/Image/Detection/FastRCNN/BrainScript/proc/
Examples/Image/Detection/FastRCNN/Output/
Examples/Image/Detection/FasterRCNN/Output/

Просмотреть файл

@ -13,9 +13,6 @@ sys.path.append(os.path.join(abs_path, "..", "..", "Image", "Detection", "Faster
C.device.try_set_default_device(C.device.cpu())
from FasterRCNN import eval_faster_rcnn_mAP, set_global_vars
from config import cfg
ops.register_native_user_function('ProposalLayerOp', 'Cntk.ProposalLayerLib-' + C.__version__.rstrip('+'), 'CreateProposalLayer')
def clone_with_native_proposal_layer(model):
@ -26,16 +23,6 @@ def clone_with_native_proposal_layer(model):
def converter(x):
layer_config = copy.deepcopy(x.attributes)
layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N
layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N
layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH)
layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE)
layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N
layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH)
layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE)
return ops.native_user_function('ProposalLayerOp', list(x.inputs), layer_config, 'native_proposal_layer')
return C.misc.convert(model, filter, converter)
@ -52,8 +39,21 @@ def evaluate(model_path):
# ProposalLayer currently only runs on the CPU
eval_device = C.cpu()
model = C.Function.load(model_path, device=eval_device)
set_global_vars(False)
return eval_faster_rcnn_mAP(model)
from FasterRCNN.FasterRCNN_config import cfg as detector_cfg
from utils.configs.AlexNet_config import cfg as network_cfg
from utils.configs.Grocery_config import cfg as dataset_cfg
from utils.config_helpers import merge_configs
from FasterRCNN.FasterRCNN_train import prepare
from FasterRCNN.FasterRCNN_eval import compute_test_set_aps
cfg = merge_configs([detector_cfg, network_cfg, dataset_cfg])
cfg["CNTK"].FORCE_DETERMINISTIC = True
prepare(cfg, False)
eval_results = compute_test_set_aps(model, cfg)
meanAP = np.nanmean(list(eval_results.values()))
return meanAP
#############################
# main function boilerplate #

Просмотреть файл

@ -7,6 +7,7 @@
import sys, os
import numpy as np
import scipy.io as sio
import future
import xml.etree.ElementTree
from xml.etree import ElementTree
@ -21,9 +22,10 @@ use_pad_scale = False
pad_width = 850
pad_height = 850
pascal_voc2007_jpgimg_rel_path = ".../VOCdevkit/VOC2007/JPEGImages/"
pascal_voc2007_imgsets_rel_path = ".../VOCdevkit/VOC2007/ImageSets/Main/"
pascal_voc2007_annotations_rel_path = ".../VOCdevkit/VOC2007/Annotations/"
pascal_voc2007_jpgimg_rel_path = "../VOCdevkit/VOC2007/JPEGImages/"
pascal_voc2007_imgsets_rel_path = "../VOCdevkit/VOC2007/ImageSets/Main/"
pascal_voc2007_annotations_rel_path = "../VOCdevkit/VOC2007/Annotations/"
pascal_voc2007_proposals_rel_path = "../selective_search_data/"
abs_path = os.path.dirname(os.path.abspath(__file__))
cls_file_path = os.path.join(abs_path, "class_map.txt")
@ -47,9 +49,6 @@ def format_roi(cls_index, xmin, ymin, xmax, ymax, img_file_path):
scale_y = (1.0 * pad_height) / img_height
min_scale = min(scale_x, scale_y)
if round(img_width * min_scale) != pad_width and round(img_height * min_scale) != pad_height:
import pdb; pdb.set_trace()
new_width = round(img_width * min_scale)
new_height = round(img_height * min_scale)
assert(new_width == pad_width or new_height == pad_height)
@ -87,7 +86,7 @@ def format_roi(cls_index, xmin, ymin, xmax, ymax, img_file_path):
def create_mappings(train, skip_difficult):
file_prefix = "trainval" if train else "test"
img_map_input = "../VOCdevkit/VOC2007/ImageSets/Main/{}.txt".format(file_prefix)
img_map_input = "{}.txt".format(file_prefix)
img_map_output = "{}2007.txt".format(file_prefix)
roi_map_output = "{}2007_rois_{}_{}{}.txt".format(
file_prefix,
@ -95,11 +94,13 @@ def create_mappings(train, skip_difficult):
"pad" if use_pad_scale else "noPad",
"_skipDif" if skip_difficult else "")
size_map_output = "{}_size_file2007.txt".format(file_prefix)
proposals_output = "{}2007_proposals.txt".format(file_prefix)
in_map_file_path = os.path.join(abs_path, img_map_input)
in_map_file_path = os.path.join(abs_path, pascal_voc2007_imgsets_rel_path, img_map_input)
out_map_file_path = os.path.join(abs_path, img_map_output)
roi_file_path = os.path.join(abs_path, roi_map_output)
size_file_path = os.path.join(abs_path, size_map_output)
proposals_file_path = os.path.join(abs_path, proposals_output)
class_map_file_path = os.path.join(abs_path, "class_map.txt")
# write class map file
@ -115,11 +116,13 @@ def create_mappings(train, skip_difficult):
input_lines = input_file.readlines()
counter = 0
img_numbers = []
with open(out_map_file_path, 'w') as img_file:
with open(roi_file_path, 'w') as roi_file:
with open(size_file_path, 'w') as size_file:
for in_line in input_lines:
img_number = in_line.strip()
img_numbers.append(img_number)
img_file_path = "{}{}.jpg".format(pascal_voc2007_jpgimg_rel_path, img_number)
img_line = "{}\t{}\t0\n".format(counter, img_file_path)
img_file.write(img_line)
@ -164,6 +167,31 @@ def create_mappings(train, skip_difficult):
for cls in classes:
cls_file.write("{}\t{}\n".format(cls, class_dict[cls]))
if not skip_difficult: # proposals are the same and need to be processed only once
try:
# convert selective search proposals from matlab to CNTK text format
print("Converting matlab proposal file to CNTK format ({})".format(proposals_file_path))
proposal_input = 'voc_2007_{}.mat'.format(file_prefix)
in_ss_file_path = os.path.join(abs_path, pascal_voc2007_proposals_rel_path, proposal_input)
raw = sio.loadmat(in_ss_file_path)
boxes = raw['boxes'][0]
images = raw['images']
with open(proposals_file_path, 'w') as prop_file:
for i in range(len(img_numbers)):
img_number = img_numbers[i]
img_name = images[i,0][0]
assert img_number == img_name
box_coords = boxes[i]
prop_line = "{} |proposals ".format(i)
for c in range(box_coords.shape[0]):
prop_line += ' ' + ' '.join(str(x) for x in box_coords[c])
prop_file.write(prop_line + '\n')
except:
print("Warning: error converting selective search proposals from matlab to CNTK text format")
if __name__ == '__main__':
create_mappings(True, skip_difficult=True)
create_mappings(False, skip_difficult=True)

Просмотреть файл

@ -4,17 +4,39 @@
# for full license information.
# ==============================================================================
import os
import os, sys
import numpy as np
import utils.od_utils as od
from utils.config_helpers import merge_configs
available_detectors = ['FastRCNN', 'FasterRCNN']
def get_detector_name(args):
detector_name = None
default_detector = 'FasterRCNN'
if len(args) != 2:
print("Please provide a detector name as the single argument. Usage:")
print(" python DetectionDemo.py <detector_name>")
print("Available detectors: {}".format(available_detectors))
else:
detector_name = args[1]
if not any(detector_name == x for x in available_detectors):
print("Unknown detector: {}.".format(detector_name))
print("Available detectors: {}".format(available_detectors))
detector_name = None
if detector_name is None:
print("Using default detector: {}".format(default_detector))
return default_detector
else:
return detector_name
def get_configuration(detector_name):
# load configs for detector, base network and data set
if detector_name == "FastRCNN":
from FastRCNN.config import cfg as detector_cfg
from FastRCNN.FastRCNN_config import cfg as detector_cfg
elif detector_name == "FasterRCNN":
from FasterRCNN.config import cfg as detector_cfg
from FasterRCNN.FasterRCNN_config import cfg as detector_cfg
else:
print('Unknown detector: {}'.format(detector_name))
@ -29,7 +51,9 @@ def get_configuration(detector_name):
if __name__ == '__main__':
# Currently supported detectors: 'FastRCNN', 'FasterRCNN'
cfg = get_configuration('FasterRCNN')
args = sys.argv
detector_name = get_detector_name(args)
cfg = get_configuration(detector_name)
# train and test
eval_model = od.train_object_detector(cfg)
@ -40,7 +64,7 @@ if __name__ == '__main__':
print('Mean AP = {:.4f}'.format(np.nanmean(list(eval_results.values()))))
# detect objects in single image
img_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), r"..\DataSets\Grocery\testImages\WIN_20160803_11_28_42_Pro.jpg")
img_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), r"../DataSets/Grocery/testImages/WIN_20160803_11_28_42_Pro.jpg")
regressed_rois, cls_probs = od.evaluate_single_image(eval_model, img_path, cfg)
bboxes, labels, scores = od.filter_results(regressed_rois, cls_probs, cfg)

Просмотреть файл

@ -48,7 +48,7 @@ momentum_time_constant = p.cntk_momentum_time_constant
# model specific variables (only AlexNet for now)
base_model = "AlexNet"
if base_model == "AlexNet":
model_file = "../../../../../../../PretrainedModels/AlexNet_ImageNet_CNTK.model"
model_file = "../../../../../../../../PretrainedModels/AlexNet.model"
feature_node_name = "features"
last_conv_node_name = "conv5.y"
pool_node_name = "pool3"

Просмотреть файл

@ -73,22 +73,22 @@
" cntk.device.try_set_default_device(cntk.device.cpu()) \n",
" else:\n",
" cntk.device.try_set_default_device(cntk.device.gpu(0))\n",
" sys.path.append(os.path.join(*\"../../../../Tests/EndToEndTests/CNTKv2Python/Examples\".split(\"/\")))\n",
" sys.path.append(os.path.join(*\"../../../../../Tests/EndToEndTests/CNTKv2Python/Examples\".split(\"/\")))\n",
" import prepare_test_data as T\n",
" T.prepare_Grocery_data()\n",
" T.prepare_fastrcnn_grocery_100_model()\n",
"\n",
"#Make sure the grocery dataset is installed \n",
"sys.path.append('../../DataSets/Grocery')\n",
"sys.path.append('../../../DataSets/Grocery')\n",
"from install_grocery import download_grocery_data\n",
"download_grocery_data()\n",
"\n",
"# Make sure the FRCNN model exists - check if the model was trained and exists, if not - download the existing model\n",
"\n",
"sys.path.append('../../../../PretrainedModels')\n",
"sys.path.append('../../../../../PretrainedModels')\n",
"from download_model import download_model_by_name\n",
"download_model_by_name(\"Fast-RCNN_grocery100\")\n",
"model_path = '../../../../PretrainedModels/Fast-RCNN_grocery100.model'\n"
"model_path = '../../../../../PretrainedModels/Fast-RCNN_grocery100.model'\n"
]
},
{
@ -233,7 +233,7 @@
" img = cv2.imread(image_path)\n",
" return resize_and_pad(img, width, height, pad_value), img\n",
"\n",
"test_image_path = r\"../../DataSets/Grocery/testImages/WIN_20160803_11_28_42_Pro.jpg\"\n",
"test_image_path = r\"../../../DataSets/Grocery/testImages/WIN_20160803_11_28_42_Pro.jpg\"\n",
"(test_img, test_img_model_arg), original_img = load_image_and_scale(test_image_path, image_width, image_height)\n",
"\n",
"plt.imshow(cv2.cvtColor(test_img, cv2.COLOR_BGR2RGB))\n",

Просмотреть файл

@ -6,7 +6,7 @@
from __future__ import print_function
from builtins import str
import pdb, sys, os, time
import sys, os, time
import numpy as np
import selectivesearch
from easydict import EasyDict

Просмотреть файл

@ -17,7 +17,7 @@ __C.TRAIN = edict()
# If set to 'True' training will be skipped if a trained model exists already
__C.CNTK.MAKE_MODE = True
# set to 'True' to use determininistic algorithms
# set to 'True' to use deterministic algorithms
__C.CNTK.FORCE_DETERMINISTIC = False
# set to 'True' to run only a single epoch
__C.CNTK.FAST_MODE = False
@ -28,17 +28,14 @@ __C.CNTK.GRAPH_TYPE = "png" # "png" or "pdf"
# Learning parameters
__C.CNTK.L2_REG_WEIGHT = 0.0005
__C.CNTK.MOMENTUM_PER_MB = 0.9
__C.CNTK.MAX_EPOCHS = 15 # use more epochs and more ROIs (NUM_ROI_PROPOSALS) for better results
__C.CNTK.LR_FACTOR = 1.0
__C.CNTK.MAX_EPOCHS = 20
__C.CNTK.LR_FACTOR = 10.0 # 10.0 is used for the Grocery example data. Start with 1.0 for other data sets.
__C.CNTK.LR_PER_SAMPLE = [0.001] * 10 + [0.0001] * 10 + [0.00001]
# The learning rate multiplier for all bias weights
__C.CNTK.BIAS_LR_MULT = 2.0
# Number of regions of interest [ROIs] proposals
__C.NUM_ROI_PROPOSALS = 500 # use 2000 or more for good results
# minimum width and height for proposals in pixels
__C.PROPOSALS_MIN_W = 20
__C.PROPOSALS_MIN_H = 20
__C.NUM_ROI_PROPOSALS = 200 # use 2000 or more for good results
# the minimum IoU (overlap) of a proposal to qualify for training regression targets
__C.BBOX_THRESH = 0.5
@ -53,7 +50,7 @@ __C.IMAGE_WIDTH = 850
__C.IMAGE_HEIGHT = 850
# Use horizontally-flipped images during training?
__C.TRAIN.USE_FLIPPED = False
__C.TRAIN.USE_FLIPPED = True
# If set to 'True' conv layers weights from the base model will be trained, too
__C.TRAIN_CONV_LAYERS = True
# Sigma parameter for smooth L1 loss in the RPN and the detector (DET)
@ -65,13 +62,52 @@ __C.RESULTS_NMS_THRESHOLD = 0.5
__C.RESULTS_NMS_CONF_THRESHOLD = 0.0
# Enable plotting of results generally / also plot background boxes / also plot unregressed boxes
__C.VISUALIZE_RESULTS = True
__C.VISUALIZE_RESULTS = False
__C.DRAW_NEGATIVE_ROIS = False
__C.DRAW_UNREGRESSED_ROIS = False
# only for plotting results: boxes with a score lower than this threshold will be considered background
__C.RESULTS_BGR_PLOT_THRESHOLD = 0.1
# If set to True the following two parameters need to point to the corresponding files that contain the proposals:
# __C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE
# __C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE
__C.USE_PRECOMPUTED_PROPOSALS = False
# roi proposal parameters for selective search, grid and filtering
# The first three parameters are for dlib's selective search. For details see
# http://dlib.net/dlib/image_transforms/segment_image_abstract.h.html#find_candidate_object_locations
#
# The basic segmentation is performed kvals.size() times. The k parameter is set (from, to, step_size)
__C.roi_ss_kvals = (10, 500, 5)
# When doing the basic segmentations prior to any box merging, all
# rectangles that have an area < min_size are discarded. Therefore, all outputs and
# subsequent merged rectangles are built out of rectangles that contain at
# least min_size pixels. Note that setting min_size to a smaller value than
# you might otherwise be interested in using can be useful since it allows a
# larger number of possible merged boxes to be created
__C.roi_ss_min_size = 9
# There are max_merging_iterations rounds of neighboring blob merging.
# Therefore, this parameter has some effect on the number of output rectangles
# you get, with larger values of the parameter giving more output rectangles.
# Hint: set __C.CNTK.DEBUG_OUTPUT=True to see the number of ROIs from selective search
__C.roi_ss_mm_iterations = 30
#
# image size used for ROI generation
__C.roi_ss_img_size = 200
# minimum relative width/height of an ROI
__C.roi_min_side_rel = 0.01
# maximum relative width/height of an ROI
__C.roi_max_side_rel = 1.0
# minimum relative area of an ROI
__C.roi_min_area_rel = 0.0001
# maximum relative area of an ROI
__C.roi_max_area_rel = 0.9
# maximum aspect ratio of an ROI vertically and horizontally
__C.roi_max_aspect_ratio = 4.0
# aspect ratios of ROIs for uniform grid ROIs
__C.roi_grid_aspect_ratios = [1.0, 2.0, 0.5]
# For reproducibility
__C.RND_SEED = 3

Просмотреть файл

@ -5,16 +5,15 @@
# ==============================================================================
import os
import numpy as np
from matplotlib.pyplot import imsave
import cv2
import numpy as np
import cntk
from cntk import input_variable, Axis
from utils.map_helpers import evaluate_detections
from utils.plot_helpers import resize_and_pad
from utils.rpn.bbox_transform import regress_rois
from utils.od_mb_source import ObjectDetectionMinibatchSource
from utils.proposal_helpers import ProposalProvider, compute_proposals, compute_image_stats
from utils.proposal_helpers import ProposalProvider, compute_image_stats, compute_proposals
class FastRCNN_Evaluator:
def __init__(self, eval_model, cfg):
@ -26,9 +25,7 @@ class FastRCNN_Evaluator:
roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()],
name="roi_proposals")
self._eval_model = eval_model(image_input, roi_proposals)
self._min_w = cfg['PROPOSALS_MIN_W']
self._min_h = cfg['PROPOSALS_MIN_H']
self._num_proposals = cfg['NUM_ROI_PROPOSALS']
self._cfg = cfg
def process_image(self, img_path):
out_cls_pred, out_rpn_rois, out_bbox_regr, dims = self.process_image_detailed(img_path)
@ -41,8 +38,6 @@ class FastRCNN_Evaluator:
img = cv2.imread(img_path)
_, cntk_img_input, dims = resize_and_pad(img, self._img_shape[2], self._img_shape[1])
#import pdb; pdb.set_trace()
# compute ROI proposals and apply scaling and padding to them
# [target_w, target_h, img_width, img_height, top, bottom, left, right, scale_factor]
img_stats = compute_image_stats(len(img[0]), len(img), self._img_shape[2], self._img_shape[1])
@ -50,7 +45,8 @@ class FastRCNN_Evaluator:
top = img_stats[4]
left = img_stats[6]
proposals = compute_proposals(img, self._num_proposals, self._min_w, self._min_h)
num_proposals = self._cfg['NUM_ROI_PROPOSALS']
proposals = compute_proposals(img, num_proposals, self._cfg)
proposals = proposals * scale_factor
proposals += (left, top, left, top)
@ -76,7 +72,18 @@ def compute_test_set_aps(eval_model, cfg):
frcn_eval = eval_model(image_input, roi_proposals)
# Create the minibatch source
proposal_provider = ProposalProvider.fromconfig(cfg)
if cfg.USE_PRECOMPUTED_PROPOSALS:
try:
cfg["DATA"].TEST_PRECOMPUTED_PROPOSALS_FILE = os.path.join(cfg["DATA"].MAP_FILE_PATH, cfg["DATA"].TEST_PRECOMPUTED_PROPOSALS_FILE)
proposal_provider = ProposalProvider.fromfile(cfg["DATA"].TEST_PRECOMPUTED_PROPOSALS_FILE, cfg.NUM_ROI_PROPOSALS)
except:
print("To use precomputed proposals please specify the following parameters in your configuration:\n"
"__C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE\n"
"__C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE")
exit(-1)
else:
proposal_provider = ProposalProvider.fromconfig(cfg)
minibatch_source = ObjectDetectionMinibatchSource(
cfg["DATA"].TEST_MAP_FILE,
cfg["DATA"].TEST_ROI_FILE,

Просмотреть файл

@ -58,9 +58,17 @@ def prepare(cfg, use_arg_parser=True):
cfg["DATA"].TEST_MAP_FILE = os.path.join(data_path, cfg["DATA"].TEST_MAP_FILE)
cfg["DATA"].TRAIN_ROI_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_ROI_FILE)
cfg["DATA"].TEST_ROI_FILE = os.path.join(data_path, cfg["DATA"].TEST_ROI_FILE)
if cfg.USE_PRECOMPUTED_PROPOSALS:
try:
cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE)
except:
print("To use precomputed proposals please specify the following parameters in your configuration:\n"
"__C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE\n"
"__C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE")
exit(-1)
cfg['MODEL_PATH'] = os.path.join(cfg.OUTPUT_PATH, "fast_rcnn_eval_{}.model".format(cfg["MODEL"].BASE_MODEL))
cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "PretrainedModels",
cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "PretrainedModels",
cfg["MODEL"].BASE_MODEL_FILE)
cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE)
@ -300,7 +308,11 @@ def train_fast_rcnn(cfg):
log_number_of_parameters(loss)
# Create the minibatch source
proposal_provider = ProposalProvider.fromconfig(cfg)
if cfg.USE_PRECOMPUTED_PROPOSALS:
proposal_provider = ProposalProvider.fromfile(cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE, cfg.NUM_ROI_PROPOSALS)
else:
proposal_provider = ProposalProvider.fromconfig(cfg)
od_minibatch_source = ObjectDetectionMinibatchSource(
cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE,
max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE,

Просмотреть файл

@ -5,7 +5,7 @@
This folder contains an end-to-end solution for using Fast R-CNN to perform object detection.
The original research paper for Fast R-CNN can be found at [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083).
Base models that are supported by the current configuration are AlexNet and VGG16.
Two image set that are preconfigured are Pascal VOC 2007 and Grocery.
Two image sets that are preconfigured are Pascal VOC 2007 and Grocery.
Other base models or image sets can be used by adding a configuration file similar to the examples in `utils/configs` and importing it in `run_fast_rcnn.py`.
## Running the example
@ -15,14 +15,14 @@ Other base models or image sets can be used by adding a configuration file simil
To run Fast R-CNN you need a CNTK Python environment. Install the following additional packages:
```
pip install opencv-python easydict pyyaml future
pip install opencv-python easydict pyyaml dlib
```
The code uses prebuild Cython modules for parts of the region proposal network (see `Examples/Image/Detection/utils/cython_modules`).
These binaries are contained in the repository for Python 3.5 under Windows and Python 3.4 under Linux.
If you require other versions please follow the instructions at [https://github.com/rbgirshick/py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#installation-sufficient-for-the-demo).
If you want to use the debug output you need to run ' pip install pydot_ng) ([website](https://pypi.python.org/pypi/pydot-ng)) and install [graphviz](http://graphviz.org/) (GraphViz executable has to be in the systems PATH) to be able to plot the CNTK graphs.
If you want to use the debug output you need to run `pip install pydot_ng` ([website](https://pypi.python.org/pypi/pydot-ng)) and install [graphviz](http://graphviz.org/) (GraphViz executable has to be in the systems PATH) to be able to plot the CNTK graphs.
### Getting the data and AlexNet model
@ -90,7 +90,7 @@ and run `python run_fast_rcnn.py` to train and evaluate Fast R-CNN on your data.
### Parameters
All options and parameters are in `config.py` in the `FastRCNN` folder and all of them are explained there. These include
All options and parameters are in `FastRCNN_config.py` in the `FastRCNN` folder and all of them are explained there. These include
```
# learning parameters
@ -99,9 +99,10 @@ __C.CNTK.LR_PER_SAMPLE = [0.001] * 10 + [0.0001] * 10 + [0.00001]
# Number of regions of interest [ROIs] proposals
__C.NUM_ROI_PROPOSALS = 1000
# minimum width and height for proposals in pixels
__C.PROPOSALS_MIN_W = 20
__C.PROPOSALS_MIN_H = 20
# minimum relative width/height of an ROI
__C.roi_min_side_rel = 0.01
# maximum relative width/height of an ROI
__C.roi_max_side_rel = 1.0
```
### Fast R-CNN CNTK code
@ -110,4 +111,4 @@ Most of the code is in `FastRCNN_train.py` and `FastRCNN_eval.py` (and `Examples
### Algorithm
All details regarding the Fast R-CNN algortihm can be found in the original research paper: [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083).
All details regarding the Fast R-CNN algorithm can be found in the original research paper: [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083).

Просмотреть файл

@ -26,7 +26,7 @@ if __name__ == '__main__':
sys.path.append(os.path.join(base_folder, "..", "..", "..", "..", "PretrainedModels"))
from download_model import download_model_by_name
download_model_by_name("AlexNet_ImageNet_CNTK")
download_model_by_name("AlexNet_ImageNet_Caffe")
print("Creating mapping files for Grocery data set..")
create_grocery_mappings(base_folder)

Просмотреть файл

@ -1,19 +0,0 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
from __future__ import print_function
import zipfile
import os, sys
base_folder = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(base_folder, "..", "..", "DataSets", "Grocery"))
from install_grocery import download_grocery_data
download_grocery_data()
sys.path.append(os.path.join(base_folder, "..", "..", "..", "..", "PretrainedModels"))
from download_model import download_model_by_name
download_model_by_name("AlexNet_ImageNet_CNTK")

Просмотреть файл

@ -13,7 +13,7 @@ from utils.plot_helpers import plot_test_set_results
def get_configuration():
# load configs for detector, base network and data set
from config import cfg as detector_cfg
from FastRCNN_config import cfg as detector_cfg
# for VGG16 base model use: from utils.configs.VGG16_config import cfg as network_cfg
# for AlexNet base model use: from utils.configs.AlexNet_config import cfg as network_cfg
from utils.configs.AlexNet_config import cfg as network_cfg

Просмотреть файл

@ -1,750 +0,0 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
from __future__ import print_function
import numpy as np
import os, sys
import argparse
import yaml # pip install pyyaml
import easydict # pip install easydict
import cntk
import easydict
from cntk import Trainer, UnitType, load_model, Axis, input_variable, parameter, times, combine, \
softmax, roipooling, plus, element_times, CloneMethod, alias, Communicator, reduce_sum
from cntk.core import Value
from cntk.io import MinibatchData
from cntk.initializer import normal
from cntk.layers import placeholder, Constant, Sequential
from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_schedule
from cntk.logging import log_number_of_parameters, ProgressPrinter
from cntk.logging.graph import find_by_name, plot
from cntk.losses import cross_entropy_with_softmax
from cntk.metrics import classification_error
from _cntk_py import force_deterministic_algorithms
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(abs_path, ".."))
from utils.rpn.rpn_helpers import create_rpn, create_proposal_target_layer
from utils.rpn.cntk_smoothL1_loss import SmoothL1Loss
from utils.map.map_helpers import evaluate_detections
from utils.annotations.annotations_helper import parse_class_map_file
from config import cfg
from od_mb_source import ObjectDetectionMinibatchSource
from cntk_helpers import regress_rois
###############################################################
###############################################################
mb_size = 1
image_width = cfg["CNTK"].IMAGE_WIDTH
image_height = cfg["CNTK"].IMAGE_HEIGHT
num_channels = 3
# dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
dims_input_const = MinibatchData(Value(batch=np.asarray(
[image_width, image_height, image_width, image_height, image_width, image_height], dtype=np.float32)), 1, 1, False)
# Color used for padding and normalization (Caffe model uses [102.98010, 115.94650, 122.77170])
img_pad_value = [103, 116, 123] if cfg["CNTK"].BASE_MODEL == "VGG16" else [114, 114, 114]
normalization_const = Constant([[[103]], [[116]], [[123]]]) if cfg["CNTK"].BASE_MODEL == "VGG16" else Constant([[[114]], [[114]], [[114]]])
globalvars = {}
globalvars['output_path'] = os.path.join(abs_path, "Output")
# dataset specific parameters
map_file_path = os.path.join(abs_path, cfg["CNTK"].MAP_FILE_PATH)
globalvars['class_map_file'] = cfg["CNTK"].CLASS_MAP_FILE
globalvars['train_map_file'] = cfg["CNTK"].TRAIN_MAP_FILE
globalvars['test_map_file'] = cfg["CNTK"].TEST_MAP_FILE
globalvars['train_roi_file'] = cfg["CNTK"].TRAIN_ROI_FILE
globalvars['test_roi_file'] = cfg["CNTK"].TEST_ROI_FILE
epoch_size = cfg["CNTK"].NUM_TRAIN_IMAGES
num_test_images = cfg["CNTK"].NUM_TEST_IMAGES
# model specific parameters
model_folder = os.path.join(abs_path, "..", "..", "..", "..", "PretrainedModels")
base_model_file = os.path.join(model_folder, cfg["CNTK"].BASE_MODEL_FILE)
feature_node_name = cfg["CNTK"].FEATURE_NODE_NAME
last_conv_node_name = cfg["CNTK"].LAST_CONV_NODE_NAME
start_train_conv_node_name = cfg["CNTK"].START_TRAIN_CONV_NODE_NAME
pool_node_name = cfg["CNTK"].POOL_NODE_NAME
last_hidden_node_name = cfg["CNTK"].LAST_HIDDEN_NODE_NAME
roi_dim = cfg["CNTK"].ROI_DIM
###############################################################
###############################################################
def set_global_vars(use_arg_parser = True):
data_path = map_file_path
# set and overwrite learning parameters
globalvars['rpn_lr_factor'] = cfg["CNTK"].RPN_LR_FACTOR
globalvars['frcn_lr_factor'] = cfg["CNTK"].FRCN_LR_FACTOR
globalvars['e2e_lr_factor'] = cfg["CNTK"].E2E_LR_FACTOR
globalvars['momentum_per_mb'] = cfg["CNTK"].MOMENTUM_PER_MB
globalvars['e2e_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].E2E_MAX_EPOCHS
globalvars['rpn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].RPN_EPOCHS
globalvars['frcn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].FRCN_EPOCHS
globalvars['rnd_seed'] = cfg.RNG_SEED
globalvars['train_conv'] = cfg["CNTK"].TRAIN_CONV_LAYERS
globalvars['train_e2e'] = cfg["CNTK"].TRAIN_E2E
if use_arg_parser:
parser = argparse.ArgumentParser()
parser.add_argument('-datadir', '--datadir', help='Data directory where the ImageNet dataset is located',
required=False, default=data_path)
parser.add_argument('-outputdir', '--outputdir', help='Output directory for checkpoints and models',
required=False, default=None)
parser.add_argument('-logdir', '--logdir', help='Log file',
required=False, default=None)
parser.add_argument('-n', '--num_epochs', help='Total number of epochs to train', type=int,
required=False, default=cfg["CNTK"].E2E_MAX_EPOCHS)
parser.add_argument('-m', '--minibatch_size', help='Minibatch size', type=int,
required=False, default=mb_size)
parser.add_argument('-e', '--epoch_size', help='Epoch size', type=int,
required=False, default=epoch_size)
parser.add_argument('-q', '--quantized_bits', help='Number of quantized bits used for gradient aggregation', type=int,
required=False, default='32')
parser.add_argument('-r', '--restart',
help='Indicating whether to restart from scratch (instead of restart from checkpoint file by default)',
action='store_true')
parser.add_argument('-device', '--device', type=int, help="Force to run the script on a specified device",
required=False, default=None)
parser.add_argument('-rpnLrFactor', '--rpnLrFactor', type=float, help="Scale factor for rpn lr schedule", required=False)
parser.add_argument('-frcnLrFactor', '--frcnLrFactor', type=float, help="Scale factor for frcn lr schedule", required=False)
parser.add_argument('-e2eLrFactor', '--e2eLrFactor', type=float, help="Scale factor for e2e lr schedule", required=False)
parser.add_argument('-momentumPerMb', '--momentumPerMb', type=float, help="momentum per minibatch", required=False)
parser.add_argument('-e2eEpochs', '--e2eEpochs', type=int, help="number of epochs for e2e training", required=False)
parser.add_argument('-rpnEpochs', '--rpnEpochs', type=int, help="number of epochs for rpn training", required=False)
parser.add_argument('-frcnEpochs', '--frcnEpochs', type=int, help="number of epochs for frcn training", required=False)
parser.add_argument('-rndSeed', '--rndSeed', type=int, help="the random seed", required=False)
parser.add_argument('-trainConv', '--trainConv', type=int, help="whether to train conv layers", required=False)
parser.add_argument('-trainE2E', '--trainE2E', type=int, help="whether to train e2e (otherwise 4 stage)", required=False)
args = vars(parser.parse_args())
if args['rpnLrFactor'] is not None:
globalvars['rpn_lr_factor'] = args['rpnLrFactor']
if args['frcnLrFactor'] is not None:
globalvars['frcn_lr_factor'] = args['frcnLrFactor']
if args['e2eLrFactor'] is not None:
globalvars['e2e_lr_factor'] = args['e2eLrFactor']
if args['momentumPerMb'] is not None:
globalvars['momentum_per_mb'] = args['momentumPerMb']
if args['e2eEpochs'] is not None:
globalvars['e2e_epochs'] = args['e2eEpochs']
if args['rpnEpochs'] is not None:
globalvars['rpn_epochs'] = args['rpnEpochs']
if args['frcnEpochs'] is not None:
globalvars['frcn_epochs'] = args['frcnEpochs']
if args['rndSeed'] is not None:
globalvars['rnd_seed'] = args['rndSeed']
if args['trainConv'] is not None:
globalvars['train_conv'] = True if args['trainConv']==1 else False
if args['trainE2E'] is not None:
globalvars['train_e2e'] = True if args['trainE2E']==1 else False
if args['outputdir'] is not None:
globalvars['output_path'] = args['outputdir']
if args['logdir'] is not None:
log_dir = args['logdir']
if args['device'] is not None:
# Setting one worker on GPU and one worker on CPU. Otherwise memory consumption is too high for a single GPU.
if Communicator.rank() == 0:
cntk.device.try_set_default_device(cntk.device.gpu(args['device']))
else:
cntk.device.try_set_default_device(cntk.device.cpu())
if args['datadir'] is not None:
data_path = args['datadir']
if not os.path.isdir(data_path):
raise RuntimeError("Directory %s does not exist" % data_path)
globalvars['class_map_file'] = os.path.join(data_path, globalvars['class_map_file'])
globalvars['train_map_file'] = os.path.join(data_path, globalvars['train_map_file'])
globalvars['test_map_file'] = os.path.join(data_path, globalvars['test_map_file'])
globalvars['train_roi_file'] = os.path.join(data_path, globalvars['train_roi_file'])
globalvars['test_roi_file'] = os.path.join(data_path, globalvars['test_roi_file'])
if cfg["CNTK"].FORCE_DETERMINISTIC:
force_deterministic_algorithms()
np.random.seed(seed=globalvars['rnd_seed'])
globalvars['classes'] = parse_class_map_file(globalvars['class_map_file'])
globalvars['num_classes'] = len(globalvars['classes'])
if cfg["CNTK"].DEBUG_OUTPUT:
# report args
print("Using the following parameters:")
print("Flip image : {}".format(cfg["TRAIN"].USE_FLIPPED))
print("Train conv layers: {}".format(globalvars['train_conv']))
print("Random seed : {}".format(globalvars['rnd_seed']))
print("Momentum per MB : {}".format(globalvars['momentum_per_mb']))
if globalvars['train_e2e']:
print("E2E epochs : {}".format(globalvars['e2e_epochs']))
else:
print("RPN lr factor : {}".format(globalvars['rpn_lr_factor']))
print("RPN epochs : {}".format(globalvars['rpn_epochs']))
print("FRCN lr factor : {}".format(globalvars['frcn_lr_factor']))
print("FRCN epochs : {}".format(globalvars['frcn_epochs']))
###############################################################
###############################################################
def clone_model(base_model, from_node_names, to_node_names, clone_method):
from_nodes = [find_by_name(base_model, node_name) for node_name in from_node_names]
if None in from_nodes:
print("Error: could not find all specified 'from_nodes' in clone. Looking for {}, found {}"
.format(from_node_names, from_nodes))
to_nodes = [find_by_name(base_model, node_name) for node_name in to_node_names]
if None in to_nodes:
print("Error: could not find all specified 'to_nodes' in clone. Looking for {}, found {}"
.format(to_node_names, to_nodes))
input_placeholders = dict(zip(from_nodes, [placeholder() for x in from_nodes]))
cloned_net = combine(to_nodes).clone(clone_method, input_placeholders)
return cloned_net
def clone_conv_layers(base_model):
if not globalvars['train_conv']:
conv_layers = clone_model(base_model, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
elif feature_node_name == start_train_conv_node_name:
conv_layers = clone_model(base_model, [feature_node_name], [last_conv_node_name], CloneMethod.clone)
else:
fixed_conv_layers = clone_model(base_model, [feature_node_name], [start_train_conv_node_name],
CloneMethod.freeze)
train_conv_layers = clone_model(base_model, [start_train_conv_node_name], [last_conv_node_name],
CloneMethod.clone)
conv_layers = Sequential([fixed_conv_layers, train_conv_layers])
return conv_layers
# Please keep in sync with Readme.md
def create_fast_rcnn_predictor(conv_out, rois, fc_layers):
# RCNN
roi_out = roipooling(conv_out, rois, cntk.MAX_POOLING, (roi_dim, roi_dim), spatial_scale=1/16.0)
fc_out = fc_layers(roi_out)
# prediction head
W_pred = parameter(shape=(4096, globalvars['num_classes']), init=normal(scale=0.01), name="cls_score.W")
b_pred = parameter(shape=globalvars['num_classes'], init=0, name="cls_score.b")
cls_score = plus(times(fc_out, W_pred), b_pred, name='cls_score')
# regression head
W_regr = parameter(shape=(4096, globalvars['num_classes']*4), init=normal(scale=0.001), name="bbox_regr.W")
b_regr = parameter(shape=globalvars['num_classes']*4, init=0, name="bbox_regr.b")
bbox_pred = plus(times(fc_out, W_regr), b_regr, name='bbox_regr')
return cls_score, bbox_pred
# Please keep in sync with Readme.md
# Defines the Faster R-CNN network model for detecting objects in images
def create_faster_rcnn_predictor(base_model_file_name, features, scaled_gt_boxes, dims_input):
# Load the pre-trained classification net and clone layers
base_model = load_model(base_model_file_name)
conv_layers = clone_conv_layers(base_model)
fc_layers = clone_model(base_model, [pool_node_name], [last_hidden_node_name], clone_method=CloneMethod.clone)
# Normalization and conv layers
feat_norm = features - normalization_const
conv_out = conv_layers(feat_norm)
# RPN and prediction targets
rpn_rois, rpn_losses = \
create_rpn(conv_out, scaled_gt_boxes, dims_input, proposal_layer_param_string=cfg["CNTK"].PROPOSAL_LAYER_PARAMS)
rois, label_targets, bbox_targets, bbox_inside_weights = \
create_proposal_target_layer(rpn_rois, scaled_gt_boxes, num_classes=globalvars['num_classes'])
# Fast RCNN and losses
cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers)
detection_losses = create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights)
loss = rpn_losses + detection_losses
pred_error = classification_error(cls_score, label_targets, axis=1)
return loss, pred_error
def create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights):
# classification loss
cls_loss = cross_entropy_with_softmax(cls_score, label_targets, axis=1)
p_cls_loss = placeholder()
p_rois = placeholder()
# The terms that are accounted for in the cls loss are those that correspond to an actual roi proposal --> do not count no-op (all-zero) rois
roi_indicator = reduce_sum(p_rois, axis=1)
cls_num_terms = reduce_sum(cntk.greater_equal(roi_indicator, 0.0))
cls_normalization_factor = 1.0 / cls_num_terms
normalized_cls_loss = reduce_sum(p_cls_loss) * cls_normalization_factor
reduced_cls_loss = cntk.as_block(normalized_cls_loss,
[(p_cls_loss, cls_loss), (p_rois, rois)],
'Normalize', 'norm_cls_loss')
# regression loss
p_bbox_pred = placeholder()
p_bbox_targets = placeholder()
p_bbox_inside_weights = placeholder()
bbox_loss = SmoothL1Loss(cfg["CNTK"].SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0)
# The bbox loss is normalized by the batch size
bbox_normalization_factor = 1.0 / cfg["TRAIN"].BATCH_SIZE
normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor
reduced_bbox_loss = cntk.as_block(normalized_bbox_loss,
[(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets), (p_bbox_inside_weights, bbox_inside_weights)],
'SmoothL1Loss', 'norm_bbox_loss')
detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses")
return detection_losses
def create_eval_model(model, image_input, dims_input, rpn_model=None):
print("creating eval model")
conv_layers = clone_model(model, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
conv_out = conv_layers(image_input)
model_with_rpn = model if rpn_model is None else rpn_model
rpn = clone_model(model_with_rpn, [last_conv_node_name, "dims_input"], ["rpn_rois"], CloneMethod.freeze)
rpn_rois = rpn(conv_out, dims_input)
roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze)
pred_net = roi_fc_layers(conv_out, rpn_rois)
cls_score = pred_net.outputs[0]
bbox_regr = pred_net.outputs[1]
if cfg["TRAIN"].BBOX_NORMALIZE_TARGETS and cfg["TRAIN"].BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
num_boxes = int(bbox_regr.shape[1] / 4)
bbox_normalize_means = np.array(cfg["TRAIN"].BBOX_NORMALIZE_MEANS * num_boxes)
bbox_normalize_stds = np.array(cfg["TRAIN"].BBOX_NORMALIZE_STDS * num_boxes)
bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr')
cls_pred = softmax(cls_score, axis=1, name='cls_pred')
eval_model = combine([cls_pred, rpn_rois, bbox_regr])
return eval_model
def train_model(image_input, roi_input, dims_input, loss, pred_error,
lr_per_sample, mm_schedule, l2_reg_weight, epochs_to_train,
rpn_rois_input=None, buffered_rpn_proposals=None):
if isinstance(loss, cntk.Variable):
loss = combine([loss])
params = loss.parameters
biases = [p for p in params if '.b' in p.name or 'b' == p.name]
others = [p for p in params if not p in biases]
bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT
if cfg["CNTK"].DEBUG_OUTPUT:
print("biases")
for p in biases: print(p)
print("others")
for p in others: print(p)
print("bias_lr_mult: {}".format(bias_lr_mult))
# Instantiate the learners and the trainer object
lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT)
bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample]
bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample)
bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT)
trainer = Trainer(None, (loss, pred_error), [learner, bias_learner])
# Get minibatches of images and perform model training
print("Training model for %s epochs." % epochs_to_train)
log_number_of_parameters(loss)
# Create the minibatch source
od_minibatch_source = ObjectDetectionMinibatchSource(
globalvars['train_map_file'], globalvars['train_roi_file'],
max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
pad_width=image_width, pad_height=image_height, pad_value=img_pad_value,
randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED,
max_images=cfg["CNTK"].NUM_TRAIN_IMAGES,
buffered_rpn_proposals=buffered_rpn_proposals)
# define mapping from reader streams to network inputs
input_map = {
od_minibatch_source.image_si: image_input,
od_minibatch_source.roi_si: roi_input,
od_minibatch_source.dims_si: dims_input
}
use_buffered_proposals = buffered_rpn_proposals is not None
progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True)
for epoch in range(epochs_to_train): # loop over epochs
sample_count = 0
while sample_count < epoch_size: # loop over minibatches in the epoch
data, proposals = od_minibatch_source.next_minibatch_with_proposals(min(mb_size, epoch_size-sample_count), input_map=input_map)
if use_buffered_proposals:
data[rpn_rois_input] = MinibatchData(Value(batch=np.asarray(proposals, dtype=np.float32)), 1, 1, False)
# remove dims input if no rpn is required to avoid warnings
del data[[k for k in data if '[6]' in str(k)][0]]
trainer.train_minibatch(data) # update model with it
sample_count += trainer.previous_minibatch_sample_count # count samples processed so far
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
if sample_count % 100 == 0:
print("Processed {} samples".format(sample_count))
progress_printer.epoch_summary(with_metric=True)
def compute_rpn_proposals(rpn_model, image_input, roi_input, dims_input):
num_images = cfg["CNTK"].NUM_TRAIN_IMAGES
# Create the minibatch source
od_minibatch_source = ObjectDetectionMinibatchSource(
globalvars['train_map_file'], globalvars['train_roi_file'],
max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
pad_width=image_width, pad_height=image_height, pad_value=img_pad_value,
max_images=num_images,
randomize=False, use_flipping=False)
# define mapping from reader streams to network inputs
input_map = {
od_minibatch_source.image_si: image_input,
od_minibatch_source.roi_si: roi_input,
od_minibatch_source.dims_si: dims_input
}
# setting pre- and post-nms top N to training values since buffered proposals are used for further training
test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N
test_post = cfg["TEST"].RPN_POST_NMS_TOP_N
cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N
buffered_proposals = [None for _ in range(num_images)]
sample_count = 0
while sample_count < num_images:
data = od_minibatch_source.next_minibatch(1, input_map=input_map)
output = rpn_model.eval(data)
out_dict = dict([(k.name, k) for k in output])
out_rpn_rois = output[out_dict['rpn_rois']][0]
buffered_proposals[sample_count] = np.round(out_rpn_rois).astype(np.int16)
sample_count += 1
if sample_count % 500 == 0:
print("Buffered proposals for {} samples".format(sample_count))
# resetting config values to original test values
cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre
cfg["TEST"].RPN_POST_NMS_TOP_N = test_post
return buffered_proposals
# Trains a Faster R-CNN model end-to-end
def train_faster_rcnn_e2e(base_model_file_name, debug_output=False):
# Input variables denoting features and labeled ground truth rois (as 5-tuples per roi)
image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name)
roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
dims_node = alias(dims_input, name='dims_input')
# Instantiate the Faster R-CNN prediction model and loss function
loss, pred_error = create_faster_rcnn_predictor(base_model_file_name, image_input, roi_input, dims_node)
if debug_output:
print("Storing graphs and models to %s." % globalvars['output_path'])
plot(loss, os.path.join(globalvars['output_path'], "graph_frcn_train_e2e." + cfg["CNTK"].GRAPH_TYPE))
# Set learning parameters
e2e_lr_factor = globalvars['e2e_lr_factor']
e2e_lr_per_sample_scaled = [x * e2e_lr_factor for x in cfg["CNTK"].E2E_LR_PER_SAMPLE]
mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB)
print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL))
print("lr_per_sample: {}".format(e2e_lr_per_sample_scaled))
train_model(image_input, roi_input, dims_input, loss, pred_error,
e2e_lr_per_sample_scaled, mm_schedule, cfg["CNTK"].L2_REG_WEIGHT, globalvars['e2e_epochs'])
return create_eval_model(loss, image_input, dims_input)
# Trains a Faster R-CNN model using 4-stage alternating training
def train_faster_rcnn_alternating(base_model_file_name, debug_output=False):
'''
4-Step Alternating Training scheme from the Faster R-CNN paper:
# Create initial network, only rpn, without detection network
# --> train only the rpn (and conv3_1 and up for VGG16)
# buffer region proposals from rpn
# Create full network, initialize conv layers with imagenet, use buffered proposals
# --> train only detection network (and conv3_1 and up for VGG16)
# Keep conv weights from detection network and fix them
# --> train only rpn
# buffer region proposals from rpn
# Keep conv and rpn weights from step 3 and fix them
# --> train only detection network
'''
# Learning parameters
rpn_lr_factor = globalvars['rpn_lr_factor']
rpn_lr_per_sample_scaled = [x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE]
frcn_lr_factor = globalvars['frcn_lr_factor']
frcn_lr_per_sample_scaled = [x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE]
l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT
mm_schedule = momentum_schedule(globalvars['momentum_per_mb'])
rpn_epochs = globalvars['rpn_epochs']
frcn_epochs = globalvars['frcn_epochs']
print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL))
print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled))
print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled))
if debug_output:
print("Storing graphs and models to %s." % globalvars['output_path'])
# Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions
image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()],
name=feature_node_name)
feat_norm = image_input - normalization_const
roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
scaled_gt_boxes = alias(roi_input, name='roi_input')
dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
dims_node = alias(dims_input, name='dims_input')
rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()])
rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois')
# base image classification model (e.g. VGG16 or AlexNet)
base_model = load_model(base_model_file_name)
print("stage 1a - rpn")
if True:
# Create initial network, only rpn, without detection network
# initial weights train?
# conv: base_model only conv3_1 and up
# rpn: init new yes
# frcn: - -
# conv layers
conv_layers = clone_conv_layers(base_model)
conv_out = conv_layers(feat_norm)
# RPN and losses
rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_node, proposal_layer_param_string=cfg["CNTK"].PROPOSAL_LAYER_PARAMS)
stage1_rpn_network = combine([rpn_rois, rpn_losses])
# train
if debug_output: plot(stage1_rpn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE))
train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses,
rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs)
print("stage 1a - buffering rpn proposals")
buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input)
print("stage 1b - frcn")
if True:
# Create full network, initialize conv layers with imagenet, fix rpn weights
# initial weights train?
# conv: base_model only conv3_1 and up
# rpn: stage1a rpn model no --> use buffered proposals
# frcn: base_model + new yes
# conv_layers
conv_layers = clone_conv_layers(base_model)
conv_out = conv_layers(feat_norm)
# use buffered proposals in target layer
rois, label_targets, bbox_targets, bbox_inside_weights = \
create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, num_classes=globalvars['num_classes'])
# Fast RCNN and losses
fc_layers = clone_model(base_model, [pool_node_name], [last_hidden_node_name], CloneMethod.clone)
cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers)
detection_losses = create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights)
pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error")
stage1_frcn_network = combine([rois, cls_score, bbox_pred, detection_losses, pred_error])
# train
if debug_output: plot(stage1_frcn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE))
train_model(image_input, roi_input, dims_input, detection_losses, pred_error,
frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs,
rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1)
buffered_proposals_s1 = None
print("stage 2a - rpn")
if True:
# Keep conv weights from detection network and fix them
# initial weights train?
# conv: stage1b frcn model no
# rpn: stage1a rpn model yes
# frcn: - -
# conv_layers
conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
conv_out = conv_layers(image_input)
# RPN and losses
rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone)
rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes)
rpn_rois = rpn_net.outputs[0]
rpn_losses = rpn_net.outputs[1]
stage2_rpn_network = combine([rpn_rois, rpn_losses])
# train
if debug_output: plot(stage2_rpn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE))
train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses,
rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs)
print("stage 2a - buffering rpn proposals")
buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input)
print("stage 2b - frcn")
if True:
# Keep conv and rpn weights from step 3 and fix them
# initial weights train?
# conv: stage2a rpn model no
# rpn: stage2a rpn model no --> use buffered proposals
# frcn: stage1b frcn model yes -
# conv_layers
conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
conv_out = conv_layers(image_input)
# Fast RCNN and losses
frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"],
["cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error"], CloneMethod.clone)
stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes)
detection_losses = stage2_frcn_network.outputs[3]
pred_error = stage2_frcn_network.outputs[4]
# train
if debug_output: plot(stage2_frcn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE))
train_model(image_input, roi_input, dims_input, detection_losses, pred_error,
frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs,
rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2)
buffered_proposals_s2 = None
return create_eval_model(stage2_frcn_network, image_input, dims_input, rpn_model=stage2_rpn_network)
def eval_faster_rcnn_mAP(eval_model):
img_map_file = globalvars['test_map_file']
roi_map_file = globalvars['test_roi_file']
classes = globalvars['classes']
image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name)
roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
frcn_eval = eval_model(image_input, dims_input)
# Create the minibatch source
minibatch_source = ObjectDetectionMinibatchSource(
img_map_file, roi_map_file,
max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
pad_width=image_width, pad_height=image_height, pad_value=img_pad_value,
randomize=False, use_flipping=False,
max_images=cfg["CNTK"].NUM_TEST_IMAGES)
# define mapping from reader streams to network inputs
input_map = {
minibatch_source.image_si: image_input,
minibatch_source.roi_si: roi_input,
minibatch_source.dims_si: dims_input
}
# all detections are collected into:
# all_boxes[cls][image] = N x 5 array of detections in
# (x1, y1, x2, y2, score)
all_boxes = [[[] for _ in range(num_test_images)] for _ in range(globalvars['num_classes'])]
# evaluate test images and write netwrok output to file
print("Evaluating Faster R-CNN model for %s images." % num_test_images)
all_gt_infos = {key: [] for key in classes}
for img_i in range(0, num_test_images):
mb_data = minibatch_source.next_minibatch(1, input_map=input_map)
gt_row = mb_data[roi_input].asarray()
gt_row = gt_row.reshape((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5))
all_gt_boxes = gt_row[np.where(gt_row[:,-1] > 0)]
for cls_index, cls_name in enumerate(classes):
if cls_index == 0: continue
cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:,-1] == cls_index)]
all_gt_infos[cls_name].append({'bbox': np.array(cls_gt_boxes),
'difficult': [False] * len(cls_gt_boxes),
'det': [False] * len(cls_gt_boxes)})
output = frcn_eval.eval({image_input: mb_data[image_input], dims_input: mb_data[dims_input]})
out_dict = dict([(k.name, k) for k in output])
out_cls_pred = output[out_dict['cls_pred']][0]
out_rpn_rois = output[out_dict['rpn_rois']][0]
out_bbox_regr = output[out_dict['bbox_regr']][0]
labels = out_cls_pred.argmax(axis=1)
scores = out_cls_pred.max(axis=1)
regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray())
labels.shape = labels.shape + (1,)
scores.shape = scores.shape + (1,)
coords_score_label = np.hstack((regressed_rois, scores, labels))
# shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score
for cls_j in range(1, globalvars['num_classes']):
coords_score_label_for_cls = coords_score_label[np.where(coords_score_label[:,-1] == cls_j)]
all_boxes[cls_j][img_i] = coords_score_label_for_cls[:,:-1].astype(np.float32, copy=False)
if (img_i+1) % 100 == 0:
print("Processed {} samples".format(img_i+1))
# calculate mAP
aps = evaluate_detections(all_boxes, all_gt_infos, classes,
nms_threshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD,
conf_threshold = cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD)
ap_list = []
for class_name in aps:
ap_list += [aps[class_name]]
print('AP for {:>15} = {:.4f}'.format(class_name, aps[class_name]))
meanAP = np.nanmean(ap_list)
print('Mean AP = {:.4f}'.format(meanAP))
return meanAP
# The main method trains and evaluates a Fast R-CNN model.
# If a trained model is already available it is loaded an no training will be performed (if MAKE_MODE=True).
if __name__ == '__main__':
running_locally = os.path.exists(map_file_path)
if running_locally:
os.chdir(map_file_path)
if not os.path.exists(os.path.join(abs_path, "Output")):
os.makedirs(os.path.join(abs_path, "Output"))
if not os.path.exists(os.path.join(abs_path, "Output", cfg["CNTK"].DATASET)):
os.makedirs(os.path.join(abs_path, "Output", cfg["CNTK"].DATASET))
else:
# disable debug and plot outputs when running on GPU cluster
cfg["CNTK"].DEBUG_OUTPUT = False
cfg["CNTK"].VISUALIZE_RESULTS = False
set_global_vars()
model_path = os.path.join(globalvars['output_path'], "faster_rcnn_eval_{}_{}.model"
.format(cfg["CNTK"].BASE_MODEL, "e2e" if globalvars['train_e2e'] else "4stage"))
# Train only if no model exists yet
if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE:
print("Loading existing model from %s" % model_path)
eval_model = load_model(model_path)
else:
if globalvars['train_e2e']:
eval_model = train_faster_rcnn_e2e(base_model_file, debug_output=cfg["CNTK"].DEBUG_OUTPUT)
else:
eval_model = train_faster_rcnn_alternating(base_model_file, debug_output=cfg["CNTK"].DEBUG_OUTPUT)
eval_model.save(model_path)
if cfg["CNTK"].DEBUG_OUTPUT:
plot(eval_model, os.path.join(globalvars['output_path'], "graph_frcn_eval_{}_{}.{}"
.format(cfg["CNTK"].BASE_MODEL, "e2e" if globalvars['train_e2e'] else "4stage", cfg["CNTK"].GRAPH_TYPE)))
print("Stored eval model at %s" % model_path)
# Compute mean average precision on test set
eval_faster_rcnn_mAP(eval_model)
# Plot results on test set
if cfg["CNTK"].VISUALIZE_RESULTS:
from plot_helpers import eval_and_plot_faster_rcnn
num_eval = min(num_test_images, 100)
img_shape = (num_channels, image_height, image_width)
results_folder = os.path.join(globalvars['output_path'], cfg["CNTK"].DATASET)
eval_and_plot_faster_rcnn(eval_model, num_eval, globalvars['test_map_file'], img_shape,
results_folder, feature_node_name, globalvars['classes'],
drawUnregressedRois=cfg["CNTK"].DRAW_UNREGRESSED_ROIS,
drawNegativeRois=cfg["CNTK"].DRAW_NEGATIVE_ROIS,
nmsThreshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD,
nmsConfThreshold=cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD,
bgrPlotThreshold=cfg["CNTK"].RESULTS_BGR_PLOT_THRESHOLD)

Просмотреть файл

@ -23,13 +23,15 @@ __C.CNTK = edict()
__C.CNTK.MAKE_MODE = False
# E2E or 4-stage training
__C.CNTK.TRAIN_E2E = True
# set to 'True' to use determininistic algorithms
# set to 'True' to use deterministic algorithms
__C.CNTK.FORCE_DETERMINISTIC = False
# set to 'True' to run only a single epoch
__C.CNTK.FAST_MODE = True
__C.CNTK.FAST_MODE = False
# Debug parameters
__C.CNTK.DEBUG_OUTPUT = False
__C.CNTK.GRAPH_TYPE = "png" # "png" or "pdf"
# Set to True if you want to store an eval model with native UDFs (e.g. for inference using C++ or C#)
__C.STORE_EVAL_MODEL_WITH_NATIVE_UDF = False
# Learning parameters
__C.CNTK.L2_REG_WEIGHT = 0.0005
@ -62,7 +64,7 @@ __C.RESULTS_NMS_THRESHOLD = 0.5
__C.RESULTS_NMS_CONF_THRESHOLD = 0.0
# Enable plotting of results generally / also plot background boxes / also plot unregressed boxes
__C.VISUALIZE_RESULTS = True
__C.VISUALIZE_RESULTS = False
__C.DRAW_NEGATIVE_ROIS = False
__C.DRAW_UNREGRESSED_ROIS = False
# only for plotting results: boxes with a score lower than this threshold will be considered background

Просмотреть файл

@ -6,64 +6,12 @@
import os
import numpy as np
from matplotlib.pyplot import imsave
import cv2
import cntk
from cntk import input_variable, Axis
from utils.nms_wrapper import apply_nms_to_single_image_results
from utils.map_helpers import evaluate_detections
from utils.plot_helpers import load_resize_and_pad, resize_and_pad, visualize_detections
from utils.plot_helpers import load_resize_and_pad
from utils.rpn.bbox_transform import regress_rois
from utils.od_mb_source import ObjectDetectionMinibatchSource
from utils.proposal_helpers import ProposalProvider, compute_proposals, compute_image_stats
class FastRCNN_Evaluator:
def __init__(self, eval_model, cfg):
# load model once in constructor and push images through the model in 'process_image()'
self._img_shape = (cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH)
image_input = input_variable(shape=self._img_shape,
dynamic_axes=[Axis.default_batch_axis()],
name=cfg["MODEL"].FEATURE_NODE_NAME)
roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()],
name="roi_proposals")
self._eval_model = eval_model(image_input, roi_proposals)
self._min_w = cfg['PROPOSALS_MIN_W']
self._min_h = cfg['PROPOSALS_MIN_H']
self._num_proposals = cfg['NUM_ROI_PROPOSALS']
def process_image(self, img_path):
out_cls_pred, out_rpn_rois, out_bbox_regr, dims = self.process_image_detailed(img_path)
labels = out_cls_pred.argmax(axis=1)
regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims)
return regressed_rois, out_cls_pred
def process_image_detailed(self, img_path):
img = cv2.imread(img_path)
_, cntk_img_input, dims = resize_and_pad(img, self._img_shape[2], self._img_shape[1])
#import pdb; pdb.set_trace()
# compute ROI proposals and apply scaling and padding to them
# [target_w, target_h, img_width, img_height, top, bottom, left, right, scale_factor]
img_stats = compute_image_stats(len(img[0]), len(img), self._img_shape[2], self._img_shape[1])
scale_factor = img_stats[-1]
top = img_stats[4]
left = img_stats[6]
proposals = compute_proposals(img, self._num_proposals, self._min_w, self._min_h)
proposals = proposals * scale_factor
proposals += (left, top, left, top)
output = self._eval_model.eval({self._eval_model.arguments[0]: [cntk_img_input],
self._eval_model.arguments[1]: np.array(proposals, dtype=np.float32)})
out_dict = dict([(k.name, k) for k in output])
out_cls_pred = output[out_dict['cls_pred']][0]
out_rpn_rois = proposals
out_bbox_regr = output[out_dict['bbox_regr']][0]
return out_cls_pred, out_rpn_rois, out_bbox_regr, dims
class FasterRCNN_Evaluator:
def __init__(self, eval_model, cfg):
@ -178,133 +126,3 @@ def compute_test_set_aps(eval_model, cfg):
conf_threshold = cfg.RESULTS_NMS_CONF_THRESHOLD)
return aps
def plot_test_set_results(evaluator, num_images_to_plot, results_base_path, cfg):
# get image paths
with open(cfg["DATA"].TEST_MAP_FILE) as f:
content = f.readlines()
img_base_path = os.path.dirname(os.path.abspath(cfg["DATA"].TEST_MAP_FILE))
img_file_names = [os.path.join(img_base_path, x.split('\t')[1]) for x in content]
img_shape = (cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH)
print("Plotting results from Faster R-CNN model for %s images." % num_images_to_plot)
for i in range(0, num_images_to_plot):
img_path = img_file_names[i]
out_cls_pred, out_rpn_rois, out_bbox_regr, dims = evaluator.process_image_detailed(img_path)
labels = out_cls_pred.argmax(axis=1)
scores = out_cls_pred.max(axis=1)
if cfg.DRAW_UNREGRESSED_ROIS:
# plot results without final regression
imgDebug = visualize_detections(img_path, out_rpn_rois, labels, scores,
img_shape[2], img_shape[1],
classes=cfg["DATA"].CLASSES,
draw_negative_rois=cfg.DRAW_NEGATIVE_ROIS,
decision_threshold=cfg.RESULTS_BGR_PLOT_THRESHOLD)
imsave("{}/{}_{}".format(results_base_path, i, os.path.basename(img_path)), imgDebug)
# apply regression and nms to bbox coordinates
regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims)
nmsKeepIndices = apply_nms_to_single_image_results(regressed_rois, labels, scores,
use_gpu_nms=cfg.USE_GPU_NMS,
device_id=cfg.GPU_ID,
nms_threshold=cfg.RESULTS_NMS_THRESHOLD,
conf_threshold=cfg.RESULTS_NMS_CONF_THRESHOLD)
filtered_bboxes = regressed_rois[nmsKeepIndices]
filtered_labels = labels[nmsKeepIndices]
filtered_scores = scores[nmsKeepIndices]
img = visualize_detections(img_path, filtered_bboxes, filtered_labels, filtered_scores,
img_shape[2], img_shape[1],
classes=cfg["DATA"].CLASSES,
draw_negative_rois=cfg.DRAW_NEGATIVE_ROIS,
decision_threshold=cfg.RESULTS_BGR_PLOT_THRESHOLD)
imsave("{}/{}_regr_{}".format(results_base_path, i, os.path.basename(img_path)), img)
def compute_test_set_aps_fast_rcnn(eval_model, cfg):
num_test_images = cfg["DATA"].NUM_TEST_IMAGES
classes = cfg["DATA"].CLASSES
image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH),
dynamic_axes=[Axis.default_batch_axis()],
name=cfg["MODEL"].FEATURE_NODE_NAME)
roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()], name="roi_proposals")
dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
frcn_eval = eval_model(image_input, roi_proposals)
# Create the minibatch source
proposal_provider = ProposalProvider.fromconfig(cfg)
minibatch_source = ObjectDetectionMinibatchSource(
cfg["DATA"].TEST_MAP_FILE,
cfg["DATA"].TEST_ROI_FILE,
max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE,
pad_width=cfg.IMAGE_WIDTH,
pad_height=cfg.IMAGE_HEIGHT,
pad_value=cfg["MODEL"].IMG_PAD_COLOR,
randomize=False, use_flipping=False,
max_images=cfg["DATA"].NUM_TEST_IMAGES,
num_classes=cfg["DATA"].NUM_CLASSES,
proposal_provider=proposal_provider,
provide_targets=False)
# define mapping from reader streams to network inputs
input_map = {
minibatch_source.image_si: image_input,
minibatch_source.roi_si: roi_input,
minibatch_source.proposals_si: roi_proposals,
minibatch_source.dims_si: dims_input
}
# all detections are collected into:
# all_boxes[cls][image] = N x 5 array of detections in (x1, y1, x2, y2, score)
all_boxes = [[[] for _ in range(num_test_images)] for _ in range(cfg["DATA"].NUM_CLASSES)]
# evaluate test images and write netwrok output to file
print("Evaluating Fast R-CNN model for %s images." % num_test_images)
all_gt_infos = {key: [] for key in classes}
for img_i in range(0, num_test_images):
mb_data = minibatch_source.next_minibatch(1, input_map=input_map)
gt_row = mb_data[roi_input].asarray()
gt_row = gt_row.reshape((cfg.INPUT_ROIS_PER_IMAGE, 5))
all_gt_boxes = gt_row[np.where(gt_row[:,-1] > 0)]
for cls_index, cls_name in enumerate(classes):
if cls_index == 0: continue
cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:,-1] == cls_index)]
all_gt_infos[cls_name].append({'bbox': np.array(cls_gt_boxes),
'difficult': [False] * len(cls_gt_boxes),
'det': [False] * len(cls_gt_boxes)})
output = frcn_eval.eval({image_input: mb_data[image_input], roi_proposals: mb_data[roi_proposals]})
out_dict = dict([(k.name, k) for k in output])
out_cls_pred = output[out_dict['cls_pred']][0]
out_rpn_rois = mb_data[roi_proposals].data.asarray()
out_bbox_regr = output[out_dict['bbox_regr']][0]
labels = out_cls_pred.argmax(axis=1)
scores = out_cls_pred.max(axis=1)
regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray())
labels.shape = labels.shape + (1,)
scores.shape = scores.shape + (1,)
coords_score_label = np.hstack((regressed_rois, scores, labels))
# shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score
for cls_j in range(1, cfg["DATA"].NUM_CLASSES):
coords_score_label_for_cls = coords_score_label[np.where(coords_score_label[:,-1] == cls_j)]
all_boxes[cls_j][img_i] = coords_score_label_for_cls[:,:-1].astype(np.float32, copy=False)
if (img_i+1) % 100 == 0:
print("Processed {} samples".format(img_i+1))
# calculate mAP
aps = evaluate_detections(all_boxes, all_gt_infos, classes,
use_gpu_nms = cfg.USE_GPU_NMS,
device_id = cfg.GPU_ID,
nms_threshold=cfg.RESULTS_NMS_THRESHOLD,
conf_threshold = cfg.RESULTS_NMS_CONF_THRESHOLD)
return aps

Просмотреть файл

@ -25,7 +25,7 @@ from _cntk_py import force_deterministic_algorithms
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(abs_path, ".."))
from utils.rpn.rpn_helpers import create_rpn, create_proposal_target_layer, add_proposal_layer
from utils.rpn.rpn_helpers import create_rpn, create_proposal_target_layer, create_proposal_layer
from utils.annotations.annotations_helper import parse_class_map_file
from utils.od_mb_source import ObjectDetectionMinibatchSource
from utils.proposal_helpers import ProposalProvider
@ -64,13 +64,11 @@ def prepare(cfg, use_arg_parser=True):
cfg['MODEL_PATH'] = os.path.join(cfg.OUTPUT_PATH, "faster_rcnn_eval_{}_{}.model"
.format(cfg["MODEL"].BASE_MODEL, "e2e" if cfg["CNTK"].TRAIN_E2E else "4stage"))
cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "PretrainedModels",
cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "PretrainedModels",
cfg["MODEL"].BASE_MODEL_FILE)
cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE)
cfg["DATA"].NUM_CLASSES = len(cfg["DATA"].CLASSES)
cfg.PROPOSAL_LAYER_PARAMS = "'feat_stride': {}\n'scales':\n - {}".\
format(cfg["MODEL"].FEATURE_STRIDE, "\n - ".join([str(v) for v in cfg["DATA"].PROPOSAL_LAYER_SCALES]))
if cfg["CNTK"].FAST_MODE:
cfg["CNTK"].E2E_MAX_EPOCHS = 1
@ -207,7 +205,7 @@ def create_faster_rcnn_eval_model(model, image_input, dims_input, cfg, rpn_model
rpn = clone_model(model_with_rpn, [last_conv_node_name], ["rpn_cls_prob_reshape", "rpn_bbox_pred"], CloneMethod.freeze)
rpn_out = rpn(conv_out)
# we need to add the proposal layer anew to account for changing configs when buffering proposals in 4-stage training
rpn_rois = add_proposal_layer(rpn_out.outputs[0], rpn_out.outputs[1], dims_input, cfg)
rpn_rois = create_proposal_layer(rpn_out.outputs[0], rpn_out.outputs[1], dims_input, cfg)
roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze)
pred_net = roi_fc_layers(conv_out, rpn_rois)
@ -225,6 +223,27 @@ def create_faster_rcnn_eval_model(model, image_input, dims_input, cfg, rpn_model
return eval_model
def store_eval_model_with_native_udf(eval_model, cfg):
import copy
sys.path.append(os.path.join(abs_path, "..", "..", "Extensibility", "ProposalLayer"))
cntk.ops.register_native_user_function('ProposalLayerOp',
'Cntk.ProposalLayerLib-' + cntk.__version__.rstrip('+'),
'CreateProposalLayer')
def filter(x):
return type(x) == cntk.Function and x.op_name == 'UserFunction' and x.name == 'ProposalLayer'
def converter(x):
layer_config = copy.deepcopy(x.attributes)
return cntk.ops.native_user_function('ProposalLayerOp', list(x.inputs), layer_config, 'native_proposal_layer')
model_w_native_udf = cntk.misc.convert(eval_model, filter, converter)
model_path = cfg['MODEL_PATH']
new_model_path = model_path[:-6] + '_native.model'
model_w_native_udf.save(new_model_path)
print("Stored eval model with native UDF to {}".format(new_model_path))
def compute_rpn_proposals(rpn_model, image_input, roi_input, dims_input, cfg):
num_images = cfg["DATA"].NUM_TRAIN_IMAGES
# Create the minibatch source

Просмотреть файл

@ -5,7 +5,7 @@
This folder contains an end-to-end solution for using Faster R-CNN to perform object detection.
The original research paper for Faster R-CNN can be found at [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497).
Base models that are supported by the current configuration are AlexNet and VGG16.
Two image set that are preconfigured are Pascal VOC 2007 and Grocery.
Two image sets that are preconfigured are Pascal VOC 2007 and Grocery.
Other base models or image sets can be used by adding a configuration file similar to the examples in `utils/configs` and importing it in `run_faster_rcnn.py`.
## Running the example
@ -15,7 +15,7 @@ Other base models or image sets can be used by adding a configuration file simil
To run Faster R-CNN you need a CNTK Python environment. Install the following additional packages:
```
pip install opencv-python easydict pyyaml future
pip install opencv-python easydict pyyaml
```
The code uses prebuild Cython modules for parts of the region proposal network (see `Examples/Image/Detection/utils/cython_modules`).
@ -112,7 +112,7 @@ and run `python run_faster_rcnn.py` to train and evaluate Faster R-CNN on your d
### Parameters
All options and parameters are in `config.py` in the `FasterRCNN` folder and all of them are explained there. These include
All options and parameters are in `FasterRCNN_config.py` in the `FasterRCNN` folder and all of them are explained there. These include
```
# E2E or 4-stage training
@ -134,4 +134,4 @@ Most of the code is in `FasterRCNN_train.py` and `FasterRCNN_eval.py` (and `Exam
### Algorithm
All details regarding the Faster R-CNN algortihm can be found in the original research paper: [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497).
All details regarding the Faster R-CNN algorithm can be found in the original research paper: [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497).

Просмотреть файл

@ -6,14 +6,15 @@
import os
import numpy as np
from FasterRCNN_train import prepare, train_faster_rcnn
import cntk
from FasterRCNN_train import prepare, train_faster_rcnn, store_eval_model_with_native_udf
from FasterRCNN_eval import compute_test_set_aps, FasterRCNN_Evaluator
from utils.config_helpers import merge_configs
from utils.plot_helpers import plot_test_set_results
def get_configuration():
# load configs for detector, base network and data set
from config import cfg as detector_cfg
from FasterRCNN_config import cfg as detector_cfg
# for VGG16 base model use: from utils.configs.VGG16_config import cfg as network_cfg
# for AlexNet base model use: from utils.configs.AlexNet_config import cfg as network_cfg
from utils.configs.AlexNet_config import cfg as network_cfg
@ -42,3 +43,8 @@ if __name__ == '__main__':
results_folder = os.path.join(cfg.OUTPUT_PATH, cfg["DATA"].DATASET)
evaluator = FasterRCNN_Evaluator(trained_model, cfg)
plot_test_set_results(evaluator, num_eval, results_folder, cfg)
if cfg.STORE_EVAL_MODEL_WITH_NATIVE_UDF:
store_eval_model_with_native_udf(trained_model, cfg)

Просмотреть файл

@ -13,7 +13,7 @@ This folder contains an end-to-end demo to try different object detectors, base
### Setup
To run Fast R-CNN you need a CNTK Python environment. Install the following additional packages:
To run the object detection demo you need a CNTK Python environment. Install the following additional packages:
```
pip install opencv-python easydict pyyaml future

Просмотреть файл

@ -19,10 +19,6 @@ python setup.py build_ext --inplace
```
Copy the compiled `.pyd` (Windows) or `.so` (Linux) files into the `cython_modules` subfolder of this utils folder.
##### `default_config`
Contains all required parameters for using a region proposal network in training or evaluation. You can overwrite these parameters by specifying a `config.py` file of the same format inside your working directory.
### `rpn` module overview
The rpn module contains helper methods and required layers to generate region proposal networks for object detection.
@ -48,7 +44,3 @@ Bbox regression targets are specified when the classification label is > 0.
Generates training targets/labels for each object proposal: classification labels 0 - K (bg or object class 1, ... , K)
and bbox regression targets in that case that the label is > 0.
##### `generate.py`
Generate object detection proposals from an imdb using an RPN.

Просмотреть файл

@ -13,13 +13,13 @@ cfg = __C
# model config
__C.MODEL.BASE_MODEL = "AlexNet"
__C.MODEL.BASE_MODEL_FILE = "AlexNet.model"
__C.MODEL.BASE_MODEL_FILE = "AlexNet_ImageNet_Caffe.model"
__C.MODEL.IMG_PAD_COLOR = [114, 114, 114]
__C.MODEL.FEATURE_NODE_NAME = "features"
__C.MODEL.LAST_CONV_NODE_NAME = "conv5.y"
__C.MODEL.FEATURE_NODE_NAME = "data"
__C.MODEL.LAST_CONV_NODE_NAME = "relu5"
__C.MODEL.START_TRAIN_CONV_NODE_NAME = __C.MODEL.FEATURE_NODE_NAME
__C.MODEL.POOL_NODE_NAME = "pool3"
__C.MODEL.LAST_HIDDEN_NODE_NAME = "h2_d"
__C.MODEL.POOL_NODE_NAME = "pool5"
__C.MODEL.LAST_HIDDEN_NODE_NAME = "drop7"
__C.MODEL.FEATURE_STRIDE = 16
__C.MODEL.RPN_NUM_CHANNELS = 256
__C.MODEL.ROI_DIM = 6

Просмотреть файл

@ -22,3 +22,18 @@ __C.DATA.TEST_ROI_FILE = "test_roi_file.txt"
__C.DATA.NUM_TRAIN_IMAGES = 20
__C.DATA.NUM_TEST_IMAGES = 5
__C.DATA.PROPOSAL_LAYER_SCALES = [4, 8, 12]
# overwriting proposal parameters for Fast R-CNN
# minimum relative width/height of an ROI
__C.roi_min_side_rel = 0.04
# maximum relative width/height of an ROI
__C.roi_max_side_rel = 0.4
# minimum relative area of an ROI
__C.roi_min_area_rel = 2 * __C.roi_min_side_rel * __C.roi_min_side_rel
# maximum relative area of an ROI
__C.roi_max_area_rel = 0.33 * __C.roi_max_side_rel * __C.roi_max_side_rel
# maximum aspect ratio of an ROI vertically and horizontally
__C.roi_max_aspect_ratio = 4.0
# For this data set use the following lr factor for Fast R-CNN:
# __C.CNTK.LR_FACTOR = 10.0

Просмотреть файл

@ -22,3 +22,6 @@ __C.DATA.TEST_ROI_FILE = "test2007_rois_abs-xyxy_noPad_skipDif.txt"
__C.DATA.NUM_TRAIN_IMAGES = 5010
__C.DATA.NUM_TEST_IMAGES = 4952
__C.DATA.PROPOSAL_LAYER_SCALES = [8, 16, 32]
__C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE = "trainval2007_proposals.txt"
__C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE = "test2007_proposals.txt"

Просмотреть файл

@ -222,7 +222,7 @@ class ObjectDetectionReader:
if self._flip_image:
resized_with_pad = cv2.flip(resized_with_pad, 1)
# transpose(2,0,1) converts the image to the HWC format which CNTK accepts
# transpose(2,0,1) converts the image to the HWC format which CNTK expects
model_arg_rep = np.ascontiguousarray(np.array(resized_with_pad, dtype=np.float32).transpose(2, 0, 1))
# dims = pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height

Просмотреть файл

@ -12,7 +12,6 @@ from builtins import range
import copy, textwrap
from PIL import Image, ImageFont, ImageDraw
from PIL.ExifTags import TAGS
from matplotlib.pyplot import imsave
import cntk
from cntk import input_variable, Axis
from utils.nms_wrapper import apply_nms_to_single_image_results
@ -121,7 +120,10 @@ def visualize_detections(img_path, roi_coords, roi_labels, roi_scores,
thickness = 4
drawRectangles(result_img, [rect], color=color, thickness=thickness)
elif iter == 2 and label > 0:
font = ImageFont.truetype(available_font, 18)
try:
font = ImageFont.truetype(available_font, 18)
except:
font = ImageFont.load_default()
text = classes[label]
if roi_scores is not None:
text += "(" + str(round(score, 2)) + ")"
@ -129,6 +131,8 @@ def visualize_detections(img_path, roi_coords, roi_labels, roi_scores,
return result_img
def plot_test_set_results(evaluator, num_images_to_plot, results_base_path, cfg):
from matplotlib.pyplot import imsave
# get image paths
with open(cfg["DATA"].TEST_MAP_FILE) as f:
content = f.readlines()
@ -284,12 +288,12 @@ def ptClip(pt, maxWidth, maxHeight):
pt[1] = min(pt[1], maxHeight)
return pt
def drawText(img, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = ImageFont.truetype("arial.ttf", 16)):
def drawText(img, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = None):
pilImg = imconvertCv2Pil(img)
pilImg = pilDrawText(pilImg, pt, text, textWidth, color, colorBackground, font)
return imconvertPil2Cv(pilImg)
def pilDrawText(pilImg, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = ImageFont.truetype("arial.ttf", 16)):
def pilDrawText(pilImg, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = None):
textY = pt[1]
draw = ImageDraw.Draw(pilImg)
if textWidth == None:

Просмотреть файл

@ -1,9 +1,29 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
import os, sys
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(abs_path, ".."))
import numpy as np
from dlib import find_candidate_object_locations
import cv2
from utils.rpn.bbox_transform import bbox_transform
from utils.cython_modules.cython_bbox import bbox_overlaps
random_seed = 23
global ss_lib_loaded, find_candidate_object_locations
ss_lib_loaded = False
def load_selective_search_lib():
global find_candidate_object_locations
from dlib import find_candidate_object_locations as algo
find_candidate_object_locations = algo
global ss_lib_loaded
ss_lib_loaded = True
def compute_image_stats(img_width, img_height, pad_width, pad_height):
do_scale_w = img_width > img_height
@ -23,63 +43,117 @@ def compute_image_stats(img_width, img_height, pad_width, pad_height):
right = pad_width - left - target_w
return [target_w, target_h, img_width, img_height, top, bottom, left, right, scale_factor]
def compute_proposals(img, num_proposals, min_w, min_h):
all_rects = []
min_size = min_w * min_h
find_candidate_object_locations(img, all_rects, min_size=min_size)
rects = []
for k, d in enumerate(all_rects):
w = d.right() - d.left()
h = d.bottom() - d.top()
if w < min_w or h < min_h:
def filterRois(rects, img_w, img_h, roi_min_area, roi_max_area, roi_min_side, roi_max_side, roi_max_aspect_ratio):
filteredRects = []
filteredRectsSet = set()
for rect in rects:
if tuple(rect) in filteredRectsSet: # excluding rectangles with same co-ordinates
continue
rects.append([d.left(), d.top(), d.right(), d.bottom()])
np_rects = np.array(rects)
num_rects = np_rects.shape[0]
x, y, x2, y2 = rect
w = x2 - x
h = y2 - y
assert(w>=0 and h>=0)
# apply filters
if h == 0 or w == 0 or \
x2 > img_w or y2 > img_h or \
w < roi_min_side or h < roi_min_side or \
w > roi_max_side or h > roi_max_side or \
w * h < roi_min_area or w * h > roi_max_area or \
w / h > roi_max_aspect_ratio or h / w > roi_max_aspect_ratio:
continue
filteredRects.append(rect)
filteredRectsSet.add(tuple(rect))
# could combine rectangles using non-maximum surpression or with similar co-ordinates
# groupedRectangles, weights = cv2.groupRectangles(np.asanyarray(rectsInput, np.float).tolist(), 1, 0.3)
# groupedRectangles = nms_python(np.asarray(rectsInput, np.float), 0.5)
assert(len(filteredRects) > 0)
return filteredRects
def compute_proposals(img, num_proposals, cfg):
img_w = len(img[0])
img_h = len(img)
if cfg is None: cfg = {}
roi_ss_kvals = (10, 500, 5) if 'roi_ss_kvals' not in cfg else tuple(cfg['roi_ss_kvals'])
roi_ss_mm_iterations = 30 if 'roi_ss_mm_iterations' not in cfg else cfg['roi_ss_mm_iterations']
roi_ss_min_size = 9 if 'roi_ss_min_size' not in cfg else cfg['roi_ss_min_size']
roi_ss_img_size = 200 if 'roi_ss_img_size' not in cfg else cfg['roi_ss_img_size']
roi_min_side_rel = 0.04 if 'roi_min_side_rel' not in cfg else cfg['roi_min_side_rel']
roi_max_side_rel = 0.4 if 'roi_max_side_rel' not in cfg else cfg['roi_max_side_rel']
roi_min_area_rel = 2 * roi_min_side_rel * roi_min_side_rel if 'roi_min_area_rel' not in cfg else cfg['roi_min_area_rel']
roi_max_area_rel = 0.33 * roi_max_side_rel * roi_max_side_rel if 'roi_max_area_rel' not in cfg else cfg['roi_max_area_rel']
roi_max_aspect_ratio = 4.0 if 'roi_max_aspect_ratio' not in cfg else cfg['roi_max_aspect_ratio']
roi_grid_aspect_ratios = [1.0, 2.0, 0.5] if 'roi_grid_aspect_ratios' not in cfg else cfg['roi_grid_aspect_ratios']
debug_output = False if not ('CNTK' in cfg and 'DEBUG_OUTPUT' in cfg.CNTK) else cfg.CNTK.DEBUG_OUTPUT
scale = 1.0 * roi_ss_img_size / max(img.shape[:2])
img = cv2.resize(img, (0,0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
roi_min_side = roi_min_side_rel * roi_ss_img_size
roi_max_side = roi_max_side_rel * roi_ss_img_size
roi_min_area = roi_min_area_rel * roi_ss_img_size * roi_ss_img_size
roi_max_area = roi_max_area_rel * roi_ss_img_size * roi_ss_img_size
if not ss_lib_loaded: load_selective_search_lib()
rects = []
tmp = []
find_candidate_object_locations(img, tmp, kvals=roi_ss_kvals, min_size=roi_ss_min_size, max_merging_iterations=roi_ss_mm_iterations)
for k, d in enumerate(tmp):
rects.append([d.left(), d.top(), d.right(), d.bottom()])
filtered_rects = filterRois(rects, img_w, img_h, roi_min_area, roi_max_area, roi_min_side, roi_max_side, roi_max_aspect_ratio)
scaled_rects = np.array(filtered_rects) * (1/scale)
if debug_output:
print("selective search rois before | after filtering: {} | {}. Requested: {}".format(len(rects), len(filtered_rects), num_proposals))
num_rects = scaled_rects.shape[0]
np.random.seed(random_seed)
if num_rects < num_proposals:
img_w = len(img[0])
img_h = len(img)
grid_proposals = compute_grid_proposals(num_proposals - len(rects), img_w, img_h, min_w, min_h)
np_rects = np.vstack([np_rects, grid_proposals])
elif len(rects) > num_proposals:
try:
shuffle = not cfg.CNTK.FORCE_DETERMINISTIC
except:
shuffle = True
roi_min_side = roi_min_side_rel * min(img_w, img_h)
roi_max_side = roi_max_side_rel * max(img_w, img_h)
grid_proposals = compute_grid_proposals(num_proposals - num_rects, img_w, img_h, roi_min_side, roi_max_side, roi_grid_aspect_ratios, shuffle)
scaled_rects = np.vstack([scaled_rects, grid_proposals])
elif num_rects > num_proposals:
keep_inds = range(num_rects)
keep_inds = np.random.choice(keep_inds, size=num_proposals, replace=False)
np_rects = np_rects[keep_inds]
scaled_rects = scaled_rects[keep_inds]
return np_rects
def compute_grid_proposals(num_proposals, img_w, img_h, min_w, min_h, max_w=None, max_h=None, aspect_ratios = [1.0], shuffle=True):
min_wh = max(min_w, min_h)
max_wh = min(img_h, img_w) / 2
if max_w is not None: max_wh = min(max_wh, max_w)
if max_h is not None: max_wh = min(max_wh, max_h)
return scaled_rects
def compute_grid_proposals(num_proposals, img_w, img_h, min_wh, max_wh, aspect_ratios = [1.0, 2.0, 0.5], shuffle=True):
rects = []
iter = 0
while len(rects) < num_proposals:
new_ar = []
for ar in aspect_ratios:
new_ar.append(ar * (0.9 ** iter))
new_ar.append(ar * (1.1 ** iter))
if iter == 0:
new_ar = aspect_ratios
else:
new_ar = []
for ar in aspect_ratios:
new_ar.append(ar * (0.9 ** iter))
new_ar.append(ar * (1.1 ** iter))
new_rects = _compute_grid_proposals(img_w, img_h, min_wh, max_wh, new_ar)
new_rects = np.array(_compute_grid_proposals(img_w, img_h, min_wh, max_wh, new_ar))
take = min(num_proposals - len(rects), len(new_rects))
new_rects = new_rects[:take]
if shuffle and take < len(new_rects):
keep_inds = range(len(new_rects))
keep_inds = np.random.choice(keep_inds, size=take, replace=False)
new_rects = new_rects[keep_inds]
else:
new_rects = new_rects[:take]
rects.extend(new_rects)
iter = iter + 1
np_rects = np.array(rects)
num_rects = np_rects.shape[0]
if shuffle and num_proposals < num_rects:
keep_inds = range(num_rects)
keep_inds = np.random.choice(keep_inds, size=num_proposals, replace=False)
np_rects = np_rects[keep_inds]
else:
np_rects = np_rects[:num_proposals]
assert np_rects.shape[0] == num_proposals
return np_rects
def _compute_grid_proposals(img_w, img_h, min_wh, max_wh, aspect_ratios):
@ -152,17 +226,30 @@ class ProposalProvider:
self._requires_scaling = requires_scaling
@classmethod
def fromfile(cls, filename):
def fromfile(cls, filename, max_num_proposals):
print('Reading proposals from file ({}) ...'.format(filename))
with open(filename) as f:
lines = f.readlines()
proposal_list = [[] for _ in lines]
index = 0
cut_counter = 0
for line in lines:
# TODO: parse line
index = 0
rects = np.zeros((4, 200))
# parse line
numbers = line[line.find('|') + 11:]
parsed_numbers = np.fromstring(numbers, dtype=int, sep=' ')
parsed_rects = parsed_numbers.reshape((int(parsed_numbers.shape[0] / 4), 4))
num_rects = parsed_rects.shape[0]
if num_rects > max_num_proposals:
rects = parsed_rects[:max_num_proposals,:]
cut_counter += 1
else:
pad_rects = np.zeros((max_num_proposals - num_rects, 4))
rects = np.vstack([parsed_rects, pad_rects])
proposal_list[index] = rects
index += 1
print('Done. {} images had more than {} proposals.'.format(cut_counter, max_num_proposals))
return cls(proposal_list)
@classmethod
@ -183,33 +270,25 @@ class ProposalProvider:
return self._proposal_cfg['NUM_ROI_PROPOSALS']
def get_proposals(self, index, img=None):
#import pdb; pdb.set_trace()
if index in self._proposal_dict:
return self._proposal_dict[index]
else:
return self._compute_proposals(img)
def _compute_proposals(self, img):
min_w = self._proposal_cfg['PROPOSALS_MIN_W']
min_h = self._proposal_cfg['PROPOSALS_MIN_H']
num_proposals = self._proposal_cfg.NUM_ROI_PROPOSALS
return compute_proposals(img, num_proposals, min_w, min_h)
num_proposals = self._proposal_cfg.NUM_ROI_PROPOSALS
return compute_proposals(img, num_proposals, self._proposal_cfg)
if __name__ == '__main__':
import cv2
image_file = r"C:\src\CNTK\Examples\Image\DataSets\Pascal\VOCdevkit\VOC2007\JPEGImages\000015.jpg"
image_file = os.path.join(abs_path, r"..\..\DataSets\Pascal\VOCdevkit\VOC2007\JPEGImages\000015.jpg")
img = cv2.imread(image_file)
# 0.18 sec for 4000
# 0.15 sec for 2000
# 0.13 sec for 1000
num_proposals = 2000
num_runs = 100
num_runs = 500
proposals = compute_proposals(img, num_proposals, cfg=None)
import time
start = int(time.time())
for i in range(num_runs):
proposals = compute_proposals(img, num_proposals, 20, 20)
proposals = compute_proposals(img, num_proposals, cfg=None)
total = int(time.time() - start)
print ("time: {}".format(total / (1.0 * num_runs)))
print ("time for {} proposals: {} (total time for {} runs: {}".format(num_proposals, total / (1.0 * num_runs), num_runs, total))
assert len(proposals) == num_proposals, "{} != {}".format(len(proposals), num_proposals)

Просмотреть файл

@ -20,39 +20,17 @@ class ProposalLayer(UserFunction):
transformations to a set of regular boxes (called "anchors").
'''
def __init__(self, arg1, arg2, arg3,
train_pre_nms_topN=12000,
train_post_nms_topN=2000,
train_nms_thresh=0.7,
train_min_size=16,
test_pre_nms_topN=6000,
test_post_nms_topN=300,
test_nms_thresh=0.7,
test_min_size=16,
param_str = None,
name='ProposalLayer'):
super(ProposalLayer, self).__init__([arg1, arg2, arg3], name=name)
self._train_pre_nms_topN = train_pre_nms_topN
self._train_post_nms_topN = train_post_nms_topN
self._train_nms_thresh = train_nms_thresh
self._train_min_size = train_min_size
self._test_pre_nms_topN = test_pre_nms_topN
self._test_post_nms_topN = test_post_nms_topN
self._test_nms_thresh = test_nms_thresh
self._test_min_size = test_min_size
self._param_str = param_str if param_str is not None else "'feat_stride': 16\n'scales':\n - 8 \n - 16 \n - 32"
def __init__(self, arg1, arg2, arg3, layer_config, name='ProposalLayer'):
super(ProposalLayer, self).__init__([arg1, arg2, arg3], attributes=layer_config, name=name)
self._layer_config = layer_config
self._feat_stride = 16 if 'feat_stride' not in layer_config else layer_config['feat_stride']
anchor_scales = [8, 16, 32] if 'scales' not in layer_config else layer_config['scales']
# parse the layer parameter string, which must be valid YAML
layer_params = yaml.load(self._param_str)
self._feat_stride = layer_params['feat_stride']
anchor_scales = layer_params.get('scales', (8, 16, 32))
self._anchors = generate_anchors(scales=np.array(anchor_scales))
self._num_anchors = self._anchors.shape[0]
attributes = {'feat_stride' : self._feat_stride, 'scales' : anchor_scales}
super(ProposalLayer, self).__init__([arg1, arg2, arg3], attributes=attributes, name=name)
if DEBUG:
print ('feat_stride: {}'.format(self._feat_stride))
print ('anchors:')
@ -85,15 +63,15 @@ class ProposalLayer(UserFunction):
# use potentially different number of proposals for training vs evaluation
if len(outputs_to_retain) == 0:
# print("EVAL")
pre_nms_topN = self._test_pre_nms_topN
post_nms_topN = self._test_post_nms_topN
nms_thresh = self._test_nms_thresh
min_size = self._test_min_size
pre_nms_topN = self._layer_config['test_pre_nms_topN']
post_nms_topN = self._layer_config['test_post_nms_topN']
nms_thresh = self._layer_config['test_nms_thresh']
min_size = self._layer_config['test_min_size']
else:
pre_nms_topN = self._train_pre_nms_topN
post_nms_topN = self._train_post_nms_topN
nms_thresh = self._train_nms_thresh
min_size = self._train_min_size
pre_nms_topN = self._layer_config['train_pre_nms_topN']
post_nms_topN = self._layer_config['train_post_nms_topN']
nms_thresh = self._layer_config['train_nms_thresh']
min_size = self._layer_config['train_min_size']
bottom = arguments
assert bottom[0].shape[0] == 1, \
@ -205,44 +183,16 @@ class ProposalLayer(UserFunction):
pass
def clone(self, cloned_inputs):
return ProposalLayer(cloned_inputs[0], cloned_inputs[1], cloned_inputs[2],
train_pre_nms_topN=self._train_pre_nms_topN,
train_post_nms_topN=self._train_post_nms_topN,
train_nms_thresh=self._train_nms_thresh,
train_min_size=self._train_min_size,
test_pre_nms_topN=self._test_pre_nms_topN,
test_post_nms_topN=self._test_post_nms_topN,
test_nms_thresh=self._test_nms_thresh,
test_min_size=self._test_min_size,
param_str=self._param_str)
return ProposalLayer(cloned_inputs[0], cloned_inputs[1], cloned_inputs[2], layer_config=self._layer_config)
def serialize(self):
internal_state = {}
internal_state['param_str'] = self._param_str
internal_state['train_pre_nms_topN'] = self._train_pre_nms_topN
internal_state['train_post_nms_topN'] = self._train_post_nms_topN
internal_state['train_nms_thresh'] = self._train_nms_thresh
internal_state['train_min_size'] = self._train_min_size
internal_state['test_pre_nms_topN'] = self._test_pre_nms_topN
internal_state['test_post_nms_topN'] = self._test_post_nms_topN
internal_state['test_nms_thresh'] = self._test_nms_thresh
internal_state['test_min_size'] = self._test_min_size
internal_state['layer_config'] = self._layer_config
return internal_state
@staticmethod
def deserialize(inputs, name, state):
return ProposalLayer(inputs[0], inputs[1], inputs[2],
train_pre_nms_topN=state['train_pre_nms_topN'],
train_post_nms_topN=state['train_post_nms_topN'],
train_nms_thresh=state['train_nms_thresh'],
train_min_size=state['train_min_size'],
test_pre_nms_topN=state['test_pre_nms_topN'],
test_post_nms_topN=state['test_post_nms_topN'],
test_nms_thresh=state['test_nms_thresh'],
test_min_size=state['test_min_size'],
param_str=state['param_str'],
name=name)
return ProposalLayer(inputs[0], inputs[1], inputs[2], layer_config=state['layer_config'], name=name)
def _filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min_size."""

Просмотреть файл

@ -6,7 +6,7 @@
import numpy as np
import cntk
from cntk import reduce_sum
from cntk import reduce_sum, ops
from cntk import user_function, relu, softmax, slice, splice, reshape, element_times, plus, minus, alias, classification_error
from cntk.initializer import glorot_uniform, normal
from cntk.layers import Convolution
@ -16,7 +16,6 @@ from utils.rpn.proposal_layer import ProposalLayer
from utils.rpn.proposal_target_layer import ProposalTargetLayer
from utils.rpn.cntk_smoothL1_loss import SmoothL1Loss
# Please keep in sync with Readme.md
def create_rpn(conv_out, scaled_gt_boxes, im_info, cfg, add_loss_functions=True):
'''
Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper:
@ -59,19 +58,21 @@ def create_rpn(conv_out, scaled_gt_boxes, im_info, cfg, add_loss_functions=True)
rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape")
# proposal layer
rpn_rois = add_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg)
rpn_rois = create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg)
rpn_losses = None
if(add_loss_functions):
# RPN targets
# Comment: rpn_cls_score is only passed vvv to get width and height of the conv feature map ...
proposal_layer_params = "'feat_stride': {}\n'scales':\n - {}". \
format(cfg["MODEL"].FEATURE_STRIDE, "\n - ".join([str(v) for v in cfg["DATA"].PROPOSAL_LAYER_SCALES]))
atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info,
rpn_batch_size=cfg["TRAIN"].RPN_BATCHSIZE,
rpn_fg_fraction=cfg["TRAIN"].RPN_FG_FRACTION,
clobber_positives=cfg["TRAIN"].RPN_CLOBBER_POSITIVES,
positive_overlap=cfg["TRAIN"].RPN_POSITIVE_OVERLAP,
negative_overlap=cfg["TRAIN"].RPN_NEGATIVE_OVERLAP,
param_str=cfg.PROPOSAL_LAYER_PARAMS))
param_str=proposal_layer_params))
rpn_labels = atl.outputs[0]
rpn_bbox_targets = atl.outputs[1]
rpn_bbox_inside_weights = atl.outputs[2]
@ -114,17 +115,30 @@ def create_rpn(conv_out, scaled_gt_boxes, im_info, cfg, add_loss_functions=True)
return rpn_rois, rpn_losses
def add_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg):
rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info,
train_pre_nms_topN=cfg["TRAIN"].RPN_PRE_NMS_TOP_N,
train_post_nms_topN=cfg["TRAIN"].RPN_POST_NMS_TOP_N,
train_nms_thresh=cfg["TRAIN"].RPN_NMS_THRESH,
train_min_size=cfg["TRAIN"].RPN_MIN_SIZE,
test_pre_nms_topN=cfg["TEST"].RPN_PRE_NMS_TOP_N,
test_post_nms_topN=cfg["TEST"].RPN_POST_NMS_TOP_N,
test_nms_thresh=cfg["TEST"].RPN_NMS_THRESH,
test_min_size=cfg["TEST"].RPN_MIN_SIZE,
param_str=cfg.PROPOSAL_LAYER_PARAMS))
def create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg, use_native_proposal_layer=False):
layer_config = {}
layer_config["feat_stride"] = cfg["MODEL"].FEATURE_STRIDE
layer_config["scales"] = cfg["DATA"].PROPOSAL_LAYER_SCALES
layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N
layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH)
layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE)
layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N
layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N
layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH)
layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE)
if use_native_proposal_layer:
cntk.ops.register_native_user_function('ProposalLayerOp',
'Cntk.ProposalLayerLib-' + cntk.__version__.rstrip('+'),
'CreateProposalLayer')
rpn_rois_raw = ops.native_user_function('ProposalLayerOp', [rpn_cls_prob_reshape, rpn_bbox_pred, im_info],
layer_config, 'native_proposal_layer')
else:
rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, layer_config))
return alias(rpn_rois_raw, name='rpn_rois')
def create_proposal_target_layer(rpn_rois, scaled_gt_boxes, cfg):

Просмотреть файл

@ -14,7 +14,7 @@ except ImportError:
# Add models here like this: (category, model_name, model_url)
models = (('Image Classification', 'AlexNet_ImageNet_CNTK', 'https://www.cntk.ai/Models/CNTK_Pretrained/AlexNet_ImageNet_CNTK.model'),
('Image Classification', 'AlexNet_ImageNet_Caffe', 'https://www.cntk.ai/Models/CNTK_Pretrained/AlexNet_ImageNet_Caffe.model'),
('Image Classification', 'AlexNet_ImageNet_Caffe', 'https://www.cntk.ai/Models/Caffe_Converted/AlexNet_ImageNet_Caffe.model'),
('Image Classification', 'InceptionV3_ImageNet_CNTK', 'https://www.cntk.ai/Models/CNTK_Pretrained/InceptionV3_ImageNet_CNTK.model'),
('Image Classification', 'BNInception_ImageNet_Caffe', 'https://www.cntk.ai/Models/Caffe_Converted/BNInception_ImageNet_Caffe.model'),
('Image Classification', 'ResNet18_ImageNet_CNTK', 'https://www.cntk.ai/Models/CNTK_Pretrained/ResNet18_ImageNet_CNTK.model'),

Просмотреть файл

@ -23,6 +23,7 @@ dependencies:
- setuptools=27.2.0=py34_0
- six=1.10.0=py34_0
- wheel=0.29.0=py34_0
- dlib=19.0=np111py34_blas_openblas_200
- pip:
- easydict==1.6.0
- future==0.16.0
@ -35,3 +36,4 @@ dependencies:
- sphinx==1.5.4
- twine==1.8.1
- protobuf==3.2.0

Просмотреть файл

@ -23,6 +23,7 @@ dependencies:
- six=1.10.0=py35_0
- wheel=0.29.0=py35_0
- opencv=3.1.0=np111py35_1
- dlib=19.0=np111py35_200
- pip:
- gym==0.5.2
- keras==2.0.6

Просмотреть файл

@ -0,0 +1,77 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
import numpy as np
import os
import pytest
import sys
from cntk import load_model
from cntk.cntk_py import DeviceKind_GPU
from cntk.device import try_set_default_device, gpu
from cntk.logging.graph import get_node_outputs
from cntk.ops.tests.ops_test_utils import cntk_device
from _cntk_py import force_deterministic_algorithms
force_deterministic_algorithms()
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(abs_path)
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection"))
win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version_info[:2] == (3,5)) or
(sys.platform != 'win32' and sys.version_info[:2] == (3,4))),
reason="it runs currently only in windows-py35 and linux-py34 due to precompiled cython modules")
@win35_linux34
def test_detection_demo(device_id):
if cntk_device(device_id).type() != DeviceKind_GPU:
pytest.skip('test only runs on GPU') # it runs very slow in CPU
try_set_default_device(cntk_device(device_id))
from prepare_test_data import prepare_Grocery_data, prepare_alexnet_v0_model
grocery_path = prepare_Grocery_data()
prepare_alexnet_v0_model()
from FastRCNN.install_data_and_model import create_grocery_mappings
create_grocery_mappings(grocery_path)
from DetectionDemo import get_configuration
import utils.od_utils as od
cfg = get_configuration('FasterRCNN')
cfg["CNTK"].FORCE_DETERMINISTIC = True
cfg["CNTK"].DEBUG_OUTPUT = False
cfg["CNTK"].MAKE_MODE = False
cfg["CNTK"].FAST_MODE = False
cfg.CNTK.E2E_MAX_EPOCHS = 3
cfg.CNTK.RPN_EPOCHS = 2
cfg.CNTK.FRCN_EPOCHS = 2
cfg.IMAGE_WIDTH = 400
cfg.IMAGE_HEIGHT = 400
cfg["CNTK"].TRAIN_E2E = True
cfg.USE_GPU_NMS = False
cfg.VISUALIZE_RESULTS = False
cfg["DATA"].MAP_FILE_PATH = grocery_path
externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
if externalData:
extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
cfg['BASE_MODEL_PATH'] = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v1", "AlexNet_ImageNet_Caffe.model")
else:
cfg['BASE_MODEL_PATH'] = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet_ImageNet_Caffe.model".split("/"))
# train and test
eval_model = od.train_object_detector(cfg)
eval_results = od.evaluate_test_set(eval_model, cfg)
meanAP = np.nanmean(list(eval_results.values()))
print('meanAP={}'.format(meanAP))
assert meanAP > 0.01
# detect objects in single image
img_path = os.path.join(grocery_path, "testImages", "WIN_20160803_11_28_42_Pro.jpg")
regressed_rois, cls_probs = od.evaluate_single_image(eval_model, img_path, cfg)
bboxes, labels, scores = od.filter_results(regressed_rois, cls_probs, cfg)
assert bboxes.shape[0] == labels.shape[0]

Просмотреть файл

@ -69,7 +69,7 @@ def test_fastrcnn_grocery_training(device_id):
extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
model_file = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model")
else:
model_file = os.path.join(abs_path, *"../../../../Examples/Image/PretrainedModels/AlexNet.model".split("/"))
model_file = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet.model".split("/"))
from A2_RunWithPyModel import train_fast_rcnn, evaluate_fast_rcnn
trained_model = train_fast_rcnn(model_path=model_file)

Просмотреть файл

@ -19,14 +19,14 @@ force_deterministic_algorithms()
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(abs_path)
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection"))
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection", "FastRCNN"))
from prepare_test_data import prepare_Grocery_data, prepare_alexnet_v0_model
grocery_path = prepare_Grocery_data()
prepare_alexnet_v0_model()
from FastRCNN.install_data_and_model import create_grocery_mappings
from install_data_and_model import create_grocery_mappings
create_grocery_mappings(grocery_path)
from utils.config_helpers import merge_configs
win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version_info[:2] == (3,5)) or
(sys.platform != 'win32' and sys.version_info[:2] == (3,4))),
@ -34,7 +34,12 @@ win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version
@win35_linux34
def test_fastrcnnpy_grocery_training(device_id):
from FastRCNN.config import cfg as detector_cfg
if cntk_device(device_id).type() != DeviceKind_GPU:
pytest.skip('test only runs on GPU') # it runs very slow in CPU
try_set_default_device(cntk_device(device_id))
from utils.config_helpers import merge_configs
from FastRCNN_config import cfg as detector_cfg
from utils.configs.AlexNet_config import cfg as network_cfg
from utils.configs.Grocery_config import cfg as dataset_cfg
@ -43,27 +48,25 @@ def test_fastrcnnpy_grocery_training(device_id):
cfg["CNTK"].DEBUG_OUTPUT = False
cfg["CNTK"].MAKE_MODE = False
cfg["CNTK"].FAST_MODE = False
cfg["CNTK"].MAX_EPOCHS = 2
cfg.NUM_ROI_PROPOSALS = 100
cfg.USE_GPU_NMS = True
cfg["CNTK"].MAX_EPOCHS = 4
cfg.IMAGE_WIDTH = 600
cfg.IMAGE_HEIGHT = 600
cfg.NUM_ROI_PROPOSALS = 200
cfg.USE_GPU_NMS = False
cfg.VISUALIZE_RESULTS = False
cfg["DATA"].MAP_FILE_PATH = grocery_path
externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
if externalData:
extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
cfg['BASE_MODEL_PATH'] = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model")
cfg['BASE_MODEL_PATH'] = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v1", "AlexNet_ImageNet_Caffe.model")
else:
cfg['BASE_MODEL_PATH'] = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet.model".split("/"))
cfg['BASE_MODEL_PATH'] = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet_ImageNet_Caffe.model".split("/"))
from FastRCNN.FastRCNN_train import prepare, train_fast_rcnn
from FastRCNN.FastRCNN_eval import compute_test_set_aps
from FastRCNN_train import prepare, train_fast_rcnn
from FastRCNN_eval import compute_test_set_aps
prepare(cfg, False)
if cntk_device(device_id).type() != DeviceKind_GPU:
pytest.skip('test only runs on GPU') # it runs very slow in CPU
try_set_default_device(cntk_device(device_id))
np.random.seed(seed=3)
trained_model = train_fast_rcnn(cfg)
eval_results = compute_test_set_aps(trained_model, cfg)

Просмотреть файл

@ -18,22 +18,24 @@ force_deterministic_algorithms()
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(abs_path)
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection"))
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Extensibility", "ProposalLayer"))
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection", "FastRCNN"))
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection", "FasterRCNN"))
from prepare_test_data import prepare_Grocery_data, prepare_alexnet_v0_model
grocery_path = prepare_Grocery_data()
prepare_alexnet_v0_model()
from FastRCNN.install_data_and_model import create_grocery_mappings
from install_data_and_model import create_grocery_mappings
create_grocery_mappings(grocery_path)
from utils.config_helpers import merge_configs
win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version_info[:2] == (3,5)) or
(sys.platform != 'win32' and sys.version_info[:2] == (3,4))),
reason="it runs currently only in windows-py35 and linux-py34 due to precompiled cython modules")
def run_fasterrcnn_grocery_training(device_id, e2e):
from FasterRCNN.config import cfg as detector_cfg
def run_fasterrcnn_grocery_training(e2e):
from FasterRCNN_eval import compute_test_set_aps
from utils.config_helpers import merge_configs
from FasterRCNN_config import cfg as detector_cfg
from utils.configs.AlexNet_config import cfg as network_cfg
from utils.configs.Grocery_config import cfg as dataset_cfg
@ -41,101 +43,68 @@ def run_fasterrcnn_grocery_training(device_id, e2e):
cfg["CNTK"].FORCE_DETERMINISTIC = True
cfg["CNTK"].DEBUG_OUTPUT = False
cfg["CNTK"].MAKE_MODE = False
cfg["CNTK"].FAST_MODE = True
cfg["CNTK"].FAST_MODE = False
cfg.CNTK.E2E_MAX_EPOCHS = 3
cfg.CNTK.RPN_EPOCHS = 2
cfg.CNTK.FRCN_EPOCHS = 2
cfg.IMAGE_WIDTH = 400
cfg.IMAGE_HEIGHT = 400
cfg["CNTK"].TRAIN_E2E = e2e
cfg.USE_GPU_NMS = True
cfg.USE_GPU_NMS = False
cfg.VISUALIZE_RESULTS = False
cfg["DATA"].MAP_FILE_PATH = grocery_path
externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
if externalData:
extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
cfg['BASE_MODEL_PATH'] = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model")
model_file = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v1", "AlexNet_ImageNet_Caffe.model")
else:
model_file = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet.model".split("/"))
model_file = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet_ImageNet_Caffe.model".split("/"))
from FasterRCNN import train_faster_rcnn_e2e, eval_faster_rcnn_mAP
from FasterRCNN_train import prepare, train_faster_rcnn
np.random.seed(seed=3)
eval_model = train_faster_rcnn_e2e(model_file, debug_output=False)
meanAP = eval_faster_rcnn_mAP(eval_model)
assert meanAP > 0.01
@win35_linux34
def test_native_fasterrcnn_eval(tmpdir, device_id):
from config import cfg
cfg["CNTK"].FORCE_DETERMINISTIC = True
cfg["CNTK"].DEBUG_OUTPUT = False
cfg["CNTK"].VISUALIZE_RESULTS = False
cfg["CNTK"].FAST_MODE = True
cfg["CNTK"].MAP_FILE_PATH = grocery_path
from FasterRCNN import set_global_vars
set_global_vars(False)
if cntk_device(device_id).type() != DeviceKind_GPU:
pytest.skip('test only runs on GPU') # it runs very slow in CPU
try_set_default_device(cntk_device(device_id))
# since we do not use a reader for evaluation we need unzipped data
externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
if externalData:
extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
model_file = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model")
else:
model_file = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet.model".split("/"))
from FasterRCNN import train_faster_rcnn_e2e, eval_faster_rcnn_mAP
np.random.seed(seed=3)
eval_model = train_faster_rcnn_e2e(model_file, debug_output=False)
meanAP_python = eval_faster_rcnn_mAP(eval_model)
cntk_py.always_allow_setting_default_device()
try_set_default_device(cpu())
from native_proposal_layer import clone_with_native_proposal_layer
model_with_native_pl = clone_with_native_proposal_layer(eval_model)
meanAP_native = eval_faster_rcnn_mAP(model_with_native_pl)
# 0.2067 (python) vs 0.2251 (native) -- the difference stems
# from different sorting algorithms: quicksort in python and
# heapsort in c++ (both are not stable).
assert abs(meanAP_python - meanAP_native) < 0.1
@win35_linux34
def test_fasterrcnn_grocery_training_4stage(device_id):
from config import cfg
cfg["CNTK"].FORCE_DETERMINISTIC = True
cfg["CNTK"].DEBUG_OUTPUT = False
cfg["CNTK"].VISUALIZE_RESULTS = False
cfg["CNTK"].FAST_MODE = True
cfg["CNTK"].MAP_FILE_PATH = grocery_path
from FasterRCNN.FasterRCNN_train import prepare, train_faster_rcnn
from FasterRCNN.FasterRCNN_eval import compute_test_set_aps
prepare(cfg, False)
if cntk_device(device_id).type() != DeviceKind_GPU:
pytest.skip('test only runs on GPU') # it runs very slow in CPU
try_set_default_device(cntk_device(device_id))
np.random.seed(seed=3)
cfg['BASE_MODEL_PATH'] = model_file
trained_model = train_faster_rcnn(cfg)
eval_results = compute_test_set_aps(trained_model, cfg)
meanAP = np.nanmean(list(eval_results.values()))
print('meanAP={}'.format(meanAP))
assert meanAP > 0.01
return trained_model, meanAP, cfg
@win35_linux34
def reenable_once_sorting_is_stable_test_native_fasterrcnn_eval(device_id):
if cntk_device(device_id).type() != DeviceKind_GPU:
pytest.skip('test only runs on GPU') # it runs very slow in CPU
try_set_default_device(cntk_device(device_id))
from FasterRCNN_eval import compute_test_set_aps
eval_model, meanAP_python, cfg = run_fasterrcnn_grocery_training(True)
cntk_py.always_allow_setting_default_device()
try_set_default_device(cpu())
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Extensibility", "ProposalLayer"))
from native_proposal_layer import clone_with_native_proposal_layer
model_with_native_pl = clone_with_native_proposal_layer(eval_model)
eval_results = compute_test_set_aps(model_with_native_pl, cfg)
meanAP_native = np.nanmean(list(eval_results.values()))
# 0.2067 (python) vs 0.2251 (native) -- the difference stems
# from different sorting algorithms: quicksort in python and
# heapsort in c++ (both are not stable).
print("Python: {}, native: {}".format(meanAP_python, meanAP_native))
assert abs(meanAP_python - meanAP_native) < 0.1
@win35_linux34
def test_fasterrcnn_grocery_training_e2e(device_id):
try_set_default_device(cntk_device(device_id))
_, _, _ = run_fasterrcnn_grocery_training(e2e = True)
@win35_linux34
def test_fasterrcnn_grocery_training_4stage(device_id):
run_fasterrcnn_grocery_training(device_id, e2e = False)
@win35_linux34
def test_fasterrcnn_grocery_training_e2e(device_id, e2e=True):
run_fasterrcnn_grocery_training(device_id, e2e = True)
if cntk_device(device_id).type() != DeviceKind_GPU:
pytest.skip('test only runs on GPU') # it runs very slow in CPU
try_set_default_device(cntk_device(device_id))
_, _, _ = run_fasterrcnn_grocery_training(e2e = False)

Просмотреть файл

@ -152,11 +152,18 @@ def prepare_alexnet_v0_model():
*"../../../../PretrainedModels".split("/"))
local_base_path = os.path.normpath(local_base_path)
# v0 model:
model_file = os.path.join(local_base_path, "AlexNet.model")
if not os.path.isfile(model_file):
external_model_path = os.path.join(os.environ[envvar], "PreTrainedModels", "AlexNet", "v0", "AlexNet.model")
copyfile(external_model_path, model_file)
# v1 model:
model_file = os.path.join(local_base_path, "AlexNet_ImageNet_Caffe.model")
if not os.path.isfile(model_file):
external_model_path = os.path.join(os.environ[envvar], "PreTrainedModels", "AlexNet", "v1", "AlexNet_ImageNet_Caffe.model")
copyfile(external_model_path, model_file)
return local_base_path
def prepare_UCF11_data():

Просмотреть файл

@ -5,23 +5,24 @@
# ==============================================================================
import os, sys
import pytest
import numpy as np
from cntk import user_function
from cntk.ops import input_variable
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(abs_path))
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection"))
import pytest
import numpy as np
import cntk
from cntk import user_function
from cntk.ops import input_variable
from utils.rpn.proposal_layer import ProposalLayer as CntkProposalLayer
from utils.rpn.proposal_target_layer import ProposalTargetLayer as CntkProposalTargetLayer
from utils.rpn.anchor_target_layer import AnchorTargetLayer as CntkAnchorTargetLayer
from utils.caffe_layers.proposal_layer import ProposalLayer as CaffeProposalLayer
from utils.caffe_layers.proposal_target_layer import ProposalTargetLayer as CaffeProposalTargetLayer
from utils.caffe_layers.anchor_target_layer import AnchorTargetLayer as CaffeAnchorTargetLayer
win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version_info[:2] == (3,5)) or
(sys.platform != 'win32' and sys.version_info[:2] == (3,4))),
reason="it runs currently only in windows-py35 and linux-py34 due to precompiled cython modules")
@win35_linux34
def test_proposal_layer():
from utils.rpn.proposal_layer import ProposalLayer as CntkProposalLayer
from utils.caffe_layers.proposal_layer import ProposalLayer as CaffeProposalLayer
from FasterRCNN.FasterRCNN_config import cfg
cls_prob_shape_cntk = (18,61,61)
cls_prob_shape_caffe = (18,61,61)
rpn_bbox_shape = (36, 61, 61)
@ -38,7 +39,21 @@ def test_proposal_layer():
rpn_bbox_var = input_variable(rpn_bbox_shape)
dims_info_var = input_variable(dims_info_shape)
cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, dims_info_var))
layer_config = {}
layer_config["feat_stride"] = 16
layer_config["scales"] = [8, 16, 32]
layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N
layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH)
layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE)
layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N
layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N
layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH)
layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE)
cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, dims_info_var, layer_config))
state, cntk_output = cntk_layer.forward({cls_prob_var: [cls_prob], rpn_bbox_var: [rpn_bbox_pred], dims_info_var: dims_input})
cntk_proposals = cntk_output[next(iter(cntk_output))][0]
@ -59,7 +74,11 @@ def test_proposal_layer():
assert np.allclose(cntk_proposals, caffe_proposals, rtol=0.0, atol=0.0)
print("Verified ProposalLayer")
@win35_linux34
def test_proposal_target_layer():
from utils.rpn.proposal_target_layer import ProposalTargetLayer as CntkProposalTargetLayer
from utils.caffe_layers.proposal_target_layer import ProposalTargetLayer as CaffeProposalTargetLayer
num_rois = 400
all_rois_shape_cntk = (num_rois,4)
num_gt_boxes = 50
@ -147,7 +166,11 @@ def test_proposal_target_layer():
assert np.allclose(cntk_bbox_inside_weights, caffe_bbox_inside_weights, rtol=0.0, atol=0.0)
print("Verified ProposalTargetLayer")
@win35_linux34
def test_anchor_target_layer():
from utils.rpn.anchor_target_layer import AnchorTargetLayer as CntkAnchorTargetLayer
from utils.caffe_layers.anchor_target_layer import AnchorTargetLayer as CaffeAnchorTargetLayer
rpn_cls_score_shape_cntk = (1, 18, 61, 61)
num_gt_boxes = 50
gt_boxes_shape_cntk = (num_gt_boxes,5)

Просмотреть файл

@ -10,7 +10,7 @@ import sys
import pytest
abs_path = os.path.dirname(os.path.abspath(__file__))
notebook = os.path.join(abs_path, "..", "..", "..", "..", "Examples","Image","Detection","FastRCNN", "CNTK_FastRCNN_Eval.ipynb")
notebook = os.path.join(abs_path, "..", "..", "..", "..", "Examples","Image","Detection","FastRCNN", "BrainScript", "CNTK_FastRCNN_Eval.ipynb")
sys.path.append(abs_path)