enabling native proposal layer and dlib selective search
This commit is contained in:
Родитель
7329e94aa3
Коммит
63488568fe
|
@ -146,8 +146,8 @@ Examples/Text/LightRNN/test/word-0.location text
|
|||
*.vsdm binary
|
||||
*.zip binary
|
||||
*.dnn binary
|
||||
Examples/Image/Detection/FastRCNN/fastRCNN/*/*.pyd binary
|
||||
Examples/Image/Detection/FastRCNN/fastRCNN/*/*.so binary
|
||||
Examples/Image/Detection/FastRCNN/BrainScript/fastRCNN/*/*.pyd binary
|
||||
Examples/Image/Detection/FastRCNN/BrainScript/fastRCNN/*/*.so binary
|
||||
Examples/Image/Detection/utils/cython_modules/*.pyd binary
|
||||
Examples/Image/Detection/utils/cython_modules/*.so binary
|
||||
Tests/UnitTests/V2LibraryTests/data/*.bin binary
|
||||
|
|
|
@ -291,6 +291,7 @@ Examples/Image/DataSets/grocery/positive/
|
|||
Examples/Image/DataSets/grocery/testImages/
|
||||
Examples/Image/DataSets/grocery/*.txt
|
||||
PretrainedModels/*.model
|
||||
Examples/Image/Detection/FastRCNN/BrainScript/Output/
|
||||
Examples/Image/Detection/FastRCNN/BrainScript/proc/
|
||||
Examples/Image/Detection/FastRCNN/Output/
|
||||
Examples/Image/Detection/FasterRCNN/Output/
|
||||
|
|
|
@ -13,9 +13,6 @@ sys.path.append(os.path.join(abs_path, "..", "..", "Image", "Detection", "Faster
|
|||
|
||||
C.device.try_set_default_device(C.device.cpu())
|
||||
|
||||
from FasterRCNN import eval_faster_rcnn_mAP, set_global_vars
|
||||
from config import cfg
|
||||
|
||||
ops.register_native_user_function('ProposalLayerOp', 'Cntk.ProposalLayerLib-' + C.__version__.rstrip('+'), 'CreateProposalLayer')
|
||||
|
||||
def clone_with_native_proposal_layer(model):
|
||||
|
@ -26,16 +23,6 @@ def clone_with_native_proposal_layer(model):
|
|||
|
||||
def converter(x):
|
||||
layer_config = copy.deepcopy(x.attributes)
|
||||
layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N
|
||||
layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N
|
||||
layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH)
|
||||
layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE)
|
||||
|
||||
layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
|
||||
layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N
|
||||
layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH)
|
||||
layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE)
|
||||
|
||||
return ops.native_user_function('ProposalLayerOp', list(x.inputs), layer_config, 'native_proposal_layer')
|
||||
|
||||
return C.misc.convert(model, filter, converter)
|
||||
|
@ -52,8 +39,21 @@ def evaluate(model_path):
|
|||
# ProposalLayer currently only runs on the CPU
|
||||
eval_device = C.cpu()
|
||||
model = C.Function.load(model_path, device=eval_device)
|
||||
set_global_vars(False)
|
||||
return eval_faster_rcnn_mAP(model)
|
||||
|
||||
from FasterRCNN.FasterRCNN_config import cfg as detector_cfg
|
||||
from utils.configs.AlexNet_config import cfg as network_cfg
|
||||
from utils.configs.Grocery_config import cfg as dataset_cfg
|
||||
from utils.config_helpers import merge_configs
|
||||
from FasterRCNN.FasterRCNN_train import prepare
|
||||
from FasterRCNN.FasterRCNN_eval import compute_test_set_aps
|
||||
|
||||
cfg = merge_configs([detector_cfg, network_cfg, dataset_cfg])
|
||||
cfg["CNTK"].FORCE_DETERMINISTIC = True
|
||||
|
||||
prepare(cfg, False)
|
||||
eval_results = compute_test_set_aps(model, cfg)
|
||||
meanAP = np.nanmean(list(eval_results.values()))
|
||||
return meanAP
|
||||
|
||||
#############################
|
||||
# main function boilerplate #
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
import sys, os
|
||||
import numpy as np
|
||||
import scipy.io as sio
|
||||
import future
|
||||
import xml.etree.ElementTree
|
||||
from xml.etree import ElementTree
|
||||
|
@ -21,9 +22,10 @@ use_pad_scale = False
|
|||
pad_width = 850
|
||||
pad_height = 850
|
||||
|
||||
pascal_voc2007_jpgimg_rel_path = ".../VOCdevkit/VOC2007/JPEGImages/"
|
||||
pascal_voc2007_imgsets_rel_path = ".../VOCdevkit/VOC2007/ImageSets/Main/"
|
||||
pascal_voc2007_annotations_rel_path = ".../VOCdevkit/VOC2007/Annotations/"
|
||||
pascal_voc2007_jpgimg_rel_path = "../VOCdevkit/VOC2007/JPEGImages/"
|
||||
pascal_voc2007_imgsets_rel_path = "../VOCdevkit/VOC2007/ImageSets/Main/"
|
||||
pascal_voc2007_annotations_rel_path = "../VOCdevkit/VOC2007/Annotations/"
|
||||
pascal_voc2007_proposals_rel_path = "../selective_search_data/"
|
||||
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
cls_file_path = os.path.join(abs_path, "class_map.txt")
|
||||
|
@ -47,9 +49,6 @@ def format_roi(cls_index, xmin, ymin, xmax, ymax, img_file_path):
|
|||
scale_y = (1.0 * pad_height) / img_height
|
||||
|
||||
min_scale = min(scale_x, scale_y)
|
||||
if round(img_width * min_scale) != pad_width and round(img_height * min_scale) != pad_height:
|
||||
import pdb; pdb.set_trace()
|
||||
|
||||
new_width = round(img_width * min_scale)
|
||||
new_height = round(img_height * min_scale)
|
||||
assert(new_width == pad_width or new_height == pad_height)
|
||||
|
@ -87,7 +86,7 @@ def format_roi(cls_index, xmin, ymin, xmax, ymax, img_file_path):
|
|||
|
||||
def create_mappings(train, skip_difficult):
|
||||
file_prefix = "trainval" if train else "test"
|
||||
img_map_input = "../VOCdevkit/VOC2007/ImageSets/Main/{}.txt".format(file_prefix)
|
||||
img_map_input = "{}.txt".format(file_prefix)
|
||||
img_map_output = "{}2007.txt".format(file_prefix)
|
||||
roi_map_output = "{}2007_rois_{}_{}{}.txt".format(
|
||||
file_prefix,
|
||||
|
@ -95,11 +94,13 @@ def create_mappings(train, skip_difficult):
|
|||
"pad" if use_pad_scale else "noPad",
|
||||
"_skipDif" if skip_difficult else "")
|
||||
size_map_output = "{}_size_file2007.txt".format(file_prefix)
|
||||
proposals_output = "{}2007_proposals.txt".format(file_prefix)
|
||||
|
||||
in_map_file_path = os.path.join(abs_path, img_map_input)
|
||||
in_map_file_path = os.path.join(abs_path, pascal_voc2007_imgsets_rel_path, img_map_input)
|
||||
out_map_file_path = os.path.join(abs_path, img_map_output)
|
||||
roi_file_path = os.path.join(abs_path, roi_map_output)
|
||||
size_file_path = os.path.join(abs_path, size_map_output)
|
||||
proposals_file_path = os.path.join(abs_path, proposals_output)
|
||||
class_map_file_path = os.path.join(abs_path, "class_map.txt")
|
||||
|
||||
# write class map file
|
||||
|
@ -115,11 +116,13 @@ def create_mappings(train, skip_difficult):
|
|||
input_lines = input_file.readlines()
|
||||
|
||||
counter = 0
|
||||
img_numbers = []
|
||||
with open(out_map_file_path, 'w') as img_file:
|
||||
with open(roi_file_path, 'w') as roi_file:
|
||||
with open(size_file_path, 'w') as size_file:
|
||||
for in_line in input_lines:
|
||||
img_number = in_line.strip()
|
||||
img_numbers.append(img_number)
|
||||
img_file_path = "{}{}.jpg".format(pascal_voc2007_jpgimg_rel_path, img_number)
|
||||
img_line = "{}\t{}\t0\n".format(counter, img_file_path)
|
||||
img_file.write(img_line)
|
||||
|
@ -164,6 +167,31 @@ def create_mappings(train, skip_difficult):
|
|||
for cls in classes:
|
||||
cls_file.write("{}\t{}\n".format(cls, class_dict[cls]))
|
||||
|
||||
if not skip_difficult: # proposals are the same and need to be processed only once
|
||||
try:
|
||||
# convert selective search proposals from matlab to CNTK text format
|
||||
print("Converting matlab proposal file to CNTK format ({})".format(proposals_file_path))
|
||||
proposal_input = 'voc_2007_{}.mat'.format(file_prefix)
|
||||
in_ss_file_path = os.path.join(abs_path, pascal_voc2007_proposals_rel_path, proposal_input)
|
||||
raw = sio.loadmat(in_ss_file_path)
|
||||
boxes = raw['boxes'][0]
|
||||
images = raw['images']
|
||||
|
||||
with open(proposals_file_path, 'w') as prop_file:
|
||||
for i in range(len(img_numbers)):
|
||||
img_number = img_numbers[i]
|
||||
img_name = images[i,0][0]
|
||||
assert img_number == img_name
|
||||
|
||||
box_coords = boxes[i]
|
||||
prop_line = "{} |proposals ".format(i)
|
||||
for c in range(box_coords.shape[0]):
|
||||
prop_line += ' ' + ' '.join(str(x) for x in box_coords[c])
|
||||
|
||||
prop_file.write(prop_line + '\n')
|
||||
except:
|
||||
print("Warning: error converting selective search proposals from matlab to CNTK text format")
|
||||
|
||||
if __name__ == '__main__':
|
||||
create_mappings(True, skip_difficult=True)
|
||||
create_mappings(False, skip_difficult=True)
|
||||
|
|
|
@ -4,17 +4,39 @@
|
|||
# for full license information.
|
||||
# ==============================================================================
|
||||
|
||||
import os
|
||||
import os, sys
|
||||
import numpy as np
|
||||
import utils.od_utils as od
|
||||
from utils.config_helpers import merge_configs
|
||||
|
||||
available_detectors = ['FastRCNN', 'FasterRCNN']
|
||||
|
||||
def get_detector_name(args):
|
||||
detector_name = None
|
||||
default_detector = 'FasterRCNN'
|
||||
if len(args) != 2:
|
||||
print("Please provide a detector name as the single argument. Usage:")
|
||||
print(" python DetectionDemo.py <detector_name>")
|
||||
print("Available detectors: {}".format(available_detectors))
|
||||
else:
|
||||
detector_name = args[1]
|
||||
if not any(detector_name == x for x in available_detectors):
|
||||
print("Unknown detector: {}.".format(detector_name))
|
||||
print("Available detectors: {}".format(available_detectors))
|
||||
detector_name = None
|
||||
|
||||
if detector_name is None:
|
||||
print("Using default detector: {}".format(default_detector))
|
||||
return default_detector
|
||||
else:
|
||||
return detector_name
|
||||
|
||||
def get_configuration(detector_name):
|
||||
# load configs for detector, base network and data set
|
||||
if detector_name == "FastRCNN":
|
||||
from FastRCNN.config import cfg as detector_cfg
|
||||
from FastRCNN.FastRCNN_config import cfg as detector_cfg
|
||||
elif detector_name == "FasterRCNN":
|
||||
from FasterRCNN.config import cfg as detector_cfg
|
||||
from FasterRCNN.FasterRCNN_config import cfg as detector_cfg
|
||||
else:
|
||||
print('Unknown detector: {}'.format(detector_name))
|
||||
|
||||
|
@ -29,7 +51,9 @@ def get_configuration(detector_name):
|
|||
|
||||
if __name__ == '__main__':
|
||||
# Currently supported detectors: 'FastRCNN', 'FasterRCNN'
|
||||
cfg = get_configuration('FasterRCNN')
|
||||
args = sys.argv
|
||||
detector_name = get_detector_name(args)
|
||||
cfg = get_configuration(detector_name)
|
||||
|
||||
# train and test
|
||||
eval_model = od.train_object_detector(cfg)
|
||||
|
@ -40,7 +64,7 @@ if __name__ == '__main__':
|
|||
print('Mean AP = {:.4f}'.format(np.nanmean(list(eval_results.values()))))
|
||||
|
||||
# detect objects in single image
|
||||
img_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), r"..\DataSets\Grocery\testImages\WIN_20160803_11_28_42_Pro.jpg")
|
||||
img_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), r"../DataSets/Grocery/testImages/WIN_20160803_11_28_42_Pro.jpg")
|
||||
regressed_rois, cls_probs = od.evaluate_single_image(eval_model, img_path, cfg)
|
||||
bboxes, labels, scores = od.filter_results(regressed_rois, cls_probs, cfg)
|
||||
|
||||
|
|
|
@ -48,7 +48,7 @@ momentum_time_constant = p.cntk_momentum_time_constant
|
|||
# model specific variables (only AlexNet for now)
|
||||
base_model = "AlexNet"
|
||||
if base_model == "AlexNet":
|
||||
model_file = "../../../../../../../PretrainedModels/AlexNet_ImageNet_CNTK.model"
|
||||
model_file = "../../../../../../../../PretrainedModels/AlexNet.model"
|
||||
feature_node_name = "features"
|
||||
last_conv_node_name = "conv5.y"
|
||||
pool_node_name = "pool3"
|
||||
|
|
|
@ -73,22 +73,22 @@
|
|||
" cntk.device.try_set_default_device(cntk.device.cpu()) \n",
|
||||
" else:\n",
|
||||
" cntk.device.try_set_default_device(cntk.device.gpu(0))\n",
|
||||
" sys.path.append(os.path.join(*\"../../../../Tests/EndToEndTests/CNTKv2Python/Examples\".split(\"/\")))\n",
|
||||
" sys.path.append(os.path.join(*\"../../../../../Tests/EndToEndTests/CNTKv2Python/Examples\".split(\"/\")))\n",
|
||||
" import prepare_test_data as T\n",
|
||||
" T.prepare_Grocery_data()\n",
|
||||
" T.prepare_fastrcnn_grocery_100_model()\n",
|
||||
"\n",
|
||||
"#Make sure the grocery dataset is installed \n",
|
||||
"sys.path.append('../../DataSets/Grocery')\n",
|
||||
"sys.path.append('../../../DataSets/Grocery')\n",
|
||||
"from install_grocery import download_grocery_data\n",
|
||||
"download_grocery_data()\n",
|
||||
"\n",
|
||||
"# Make sure the FRCNN model exists - check if the model was trained and exists, if not - download the existing model\n",
|
||||
"\n",
|
||||
"sys.path.append('../../../../PretrainedModels')\n",
|
||||
"sys.path.append('../../../../../PretrainedModels')\n",
|
||||
"from download_model import download_model_by_name\n",
|
||||
"download_model_by_name(\"Fast-RCNN_grocery100\")\n",
|
||||
"model_path = '../../../../PretrainedModels/Fast-RCNN_grocery100.model'\n"
|
||||
"model_path = '../../../../../PretrainedModels/Fast-RCNN_grocery100.model'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -233,7 +233,7 @@
|
|||
" img = cv2.imread(image_path)\n",
|
||||
" return resize_and_pad(img, width, height, pad_value), img\n",
|
||||
"\n",
|
||||
"test_image_path = r\"../../DataSets/Grocery/testImages/WIN_20160803_11_28_42_Pro.jpg\"\n",
|
||||
"test_image_path = r\"../../../DataSets/Grocery/testImages/WIN_20160803_11_28_42_Pro.jpg\"\n",
|
||||
"(test_img, test_img_model_arg), original_img = load_image_and_scale(test_image_path, image_width, image_height)\n",
|
||||
"\n",
|
||||
"plt.imshow(cv2.cvtColor(test_img, cv2.COLOR_BGR2RGB))\n",
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from __future__ import print_function
|
||||
from builtins import str
|
||||
import pdb, sys, os, time
|
||||
import sys, os, time
|
||||
import numpy as np
|
||||
import selectivesearch
|
||||
from easydict import EasyDict
|
||||
|
|
|
@ -17,7 +17,7 @@ __C.TRAIN = edict()
|
|||
|
||||
# If set to 'True' training will be skipped if a trained model exists already
|
||||
__C.CNTK.MAKE_MODE = True
|
||||
# set to 'True' to use determininistic algorithms
|
||||
# set to 'True' to use deterministic algorithms
|
||||
__C.CNTK.FORCE_DETERMINISTIC = False
|
||||
# set to 'True' to run only a single epoch
|
||||
__C.CNTK.FAST_MODE = False
|
||||
|
@ -28,17 +28,14 @@ __C.CNTK.GRAPH_TYPE = "png" # "png" or "pdf"
|
|||
# Learning parameters
|
||||
__C.CNTK.L2_REG_WEIGHT = 0.0005
|
||||
__C.CNTK.MOMENTUM_PER_MB = 0.9
|
||||
__C.CNTK.MAX_EPOCHS = 15 # use more epochs and more ROIs (NUM_ROI_PROPOSALS) for better results
|
||||
__C.CNTK.LR_FACTOR = 1.0
|
||||
__C.CNTK.MAX_EPOCHS = 20
|
||||
__C.CNTK.LR_FACTOR = 10.0 # 10.0 is used for the Grocery example data. Start with 1.0 for other data sets.
|
||||
__C.CNTK.LR_PER_SAMPLE = [0.001] * 10 + [0.0001] * 10 + [0.00001]
|
||||
# The learning rate multiplier for all bias weights
|
||||
__C.CNTK.BIAS_LR_MULT = 2.0
|
||||
|
||||
# Number of regions of interest [ROIs] proposals
|
||||
__C.NUM_ROI_PROPOSALS = 500 # use 2000 or more for good results
|
||||
# minimum width and height for proposals in pixels
|
||||
__C.PROPOSALS_MIN_W = 20
|
||||
__C.PROPOSALS_MIN_H = 20
|
||||
__C.NUM_ROI_PROPOSALS = 200 # use 2000 or more for good results
|
||||
# the minimum IoU (overlap) of a proposal to qualify for training regression targets
|
||||
__C.BBOX_THRESH = 0.5
|
||||
|
||||
|
@ -53,7 +50,7 @@ __C.IMAGE_WIDTH = 850
|
|||
__C.IMAGE_HEIGHT = 850
|
||||
|
||||
# Use horizontally-flipped images during training?
|
||||
__C.TRAIN.USE_FLIPPED = False
|
||||
__C.TRAIN.USE_FLIPPED = True
|
||||
# If set to 'True' conv layers weights from the base model will be trained, too
|
||||
__C.TRAIN_CONV_LAYERS = True
|
||||
# Sigma parameter for smooth L1 loss in the RPN and the detector (DET)
|
||||
|
@ -65,13 +62,52 @@ __C.RESULTS_NMS_THRESHOLD = 0.5
|
|||
__C.RESULTS_NMS_CONF_THRESHOLD = 0.0
|
||||
|
||||
# Enable plotting of results generally / also plot background boxes / also plot unregressed boxes
|
||||
__C.VISUALIZE_RESULTS = True
|
||||
__C.VISUALIZE_RESULTS = False
|
||||
__C.DRAW_NEGATIVE_ROIS = False
|
||||
__C.DRAW_UNREGRESSED_ROIS = False
|
||||
# only for plotting results: boxes with a score lower than this threshold will be considered background
|
||||
__C.RESULTS_BGR_PLOT_THRESHOLD = 0.1
|
||||
|
||||
|
||||
# If set to True the following two parameters need to point to the corresponding files that contain the proposals:
|
||||
# __C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE
|
||||
# __C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE
|
||||
__C.USE_PRECOMPUTED_PROPOSALS = False
|
||||
|
||||
# roi proposal parameters for selective search, grid and filtering
|
||||
# The first three parameters are for dlib's selective search. For details see
|
||||
# http://dlib.net/dlib/image_transforms/segment_image_abstract.h.html#find_candidate_object_locations
|
||||
#
|
||||
# The basic segmentation is performed kvals.size() times. The k parameter is set (from, to, step_size)
|
||||
__C.roi_ss_kvals = (10, 500, 5)
|
||||
# When doing the basic segmentations prior to any box merging, all
|
||||
# rectangles that have an area < min_size are discarded. Therefore, all outputs and
|
||||
# subsequent merged rectangles are built out of rectangles that contain at
|
||||
# least min_size pixels. Note that setting min_size to a smaller value than
|
||||
# you might otherwise be interested in using can be useful since it allows a
|
||||
# larger number of possible merged boxes to be created
|
||||
__C.roi_ss_min_size = 9
|
||||
# There are max_merging_iterations rounds of neighboring blob merging.
|
||||
# Therefore, this parameter has some effect on the number of output rectangles
|
||||
# you get, with larger values of the parameter giving more output rectangles.
|
||||
# Hint: set __C.CNTK.DEBUG_OUTPUT=True to see the number of ROIs from selective search
|
||||
__C.roi_ss_mm_iterations = 30
|
||||
#
|
||||
# image size used for ROI generation
|
||||
__C.roi_ss_img_size = 200
|
||||
# minimum relative width/height of an ROI
|
||||
__C.roi_min_side_rel = 0.01
|
||||
# maximum relative width/height of an ROI
|
||||
__C.roi_max_side_rel = 1.0
|
||||
# minimum relative area of an ROI
|
||||
__C.roi_min_area_rel = 0.0001
|
||||
# maximum relative area of an ROI
|
||||
__C.roi_max_area_rel = 0.9
|
||||
# maximum aspect ratio of an ROI vertically and horizontally
|
||||
__C.roi_max_aspect_ratio = 4.0
|
||||
# aspect ratios of ROIs for uniform grid ROIs
|
||||
__C.roi_grid_aspect_ratios = [1.0, 2.0, 0.5]
|
||||
|
||||
# For reproducibility
|
||||
__C.RND_SEED = 3
|
||||
|
|
@ -5,16 +5,15 @@
|
|||
# ==============================================================================
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
from matplotlib.pyplot import imsave
|
||||
import cv2
|
||||
import numpy as np
|
||||
import cntk
|
||||
from cntk import input_variable, Axis
|
||||
from utils.map_helpers import evaluate_detections
|
||||
from utils.plot_helpers import resize_and_pad
|
||||
from utils.rpn.bbox_transform import regress_rois
|
||||
from utils.od_mb_source import ObjectDetectionMinibatchSource
|
||||
from utils.proposal_helpers import ProposalProvider, compute_proposals, compute_image_stats
|
||||
from utils.proposal_helpers import ProposalProvider, compute_image_stats, compute_proposals
|
||||
|
||||
class FastRCNN_Evaluator:
|
||||
def __init__(self, eval_model, cfg):
|
||||
|
@ -26,9 +25,7 @@ class FastRCNN_Evaluator:
|
|||
roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()],
|
||||
name="roi_proposals")
|
||||
self._eval_model = eval_model(image_input, roi_proposals)
|
||||
self._min_w = cfg['PROPOSALS_MIN_W']
|
||||
self._min_h = cfg['PROPOSALS_MIN_H']
|
||||
self._num_proposals = cfg['NUM_ROI_PROPOSALS']
|
||||
self._cfg = cfg
|
||||
|
||||
def process_image(self, img_path):
|
||||
out_cls_pred, out_rpn_rois, out_bbox_regr, dims = self.process_image_detailed(img_path)
|
||||
|
@ -41,8 +38,6 @@ class FastRCNN_Evaluator:
|
|||
img = cv2.imread(img_path)
|
||||
_, cntk_img_input, dims = resize_and_pad(img, self._img_shape[2], self._img_shape[1])
|
||||
|
||||
#import pdb; pdb.set_trace()
|
||||
|
||||
# compute ROI proposals and apply scaling and padding to them
|
||||
# [target_w, target_h, img_width, img_height, top, bottom, left, right, scale_factor]
|
||||
img_stats = compute_image_stats(len(img[0]), len(img), self._img_shape[2], self._img_shape[1])
|
||||
|
@ -50,7 +45,8 @@ class FastRCNN_Evaluator:
|
|||
top = img_stats[4]
|
||||
left = img_stats[6]
|
||||
|
||||
proposals = compute_proposals(img, self._num_proposals, self._min_w, self._min_h)
|
||||
num_proposals = self._cfg['NUM_ROI_PROPOSALS']
|
||||
proposals = compute_proposals(img, num_proposals, self._cfg)
|
||||
proposals = proposals * scale_factor
|
||||
proposals += (left, top, left, top)
|
||||
|
||||
|
@ -76,7 +72,18 @@ def compute_test_set_aps(eval_model, cfg):
|
|||
frcn_eval = eval_model(image_input, roi_proposals)
|
||||
|
||||
# Create the minibatch source
|
||||
proposal_provider = ProposalProvider.fromconfig(cfg)
|
||||
if cfg.USE_PRECOMPUTED_PROPOSALS:
|
||||
try:
|
||||
cfg["DATA"].TEST_PRECOMPUTED_PROPOSALS_FILE = os.path.join(cfg["DATA"].MAP_FILE_PATH, cfg["DATA"].TEST_PRECOMPUTED_PROPOSALS_FILE)
|
||||
proposal_provider = ProposalProvider.fromfile(cfg["DATA"].TEST_PRECOMPUTED_PROPOSALS_FILE, cfg.NUM_ROI_PROPOSALS)
|
||||
except:
|
||||
print("To use precomputed proposals please specify the following parameters in your configuration:\n"
|
||||
"__C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE\n"
|
||||
"__C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE")
|
||||
exit(-1)
|
||||
else:
|
||||
proposal_provider = ProposalProvider.fromconfig(cfg)
|
||||
|
||||
minibatch_source = ObjectDetectionMinibatchSource(
|
||||
cfg["DATA"].TEST_MAP_FILE,
|
||||
cfg["DATA"].TEST_ROI_FILE,
|
||||
|
|
|
@ -58,9 +58,17 @@ def prepare(cfg, use_arg_parser=True):
|
|||
cfg["DATA"].TEST_MAP_FILE = os.path.join(data_path, cfg["DATA"].TEST_MAP_FILE)
|
||||
cfg["DATA"].TRAIN_ROI_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_ROI_FILE)
|
||||
cfg["DATA"].TEST_ROI_FILE = os.path.join(data_path, cfg["DATA"].TEST_ROI_FILE)
|
||||
if cfg.USE_PRECOMPUTED_PROPOSALS:
|
||||
try:
|
||||
cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE)
|
||||
except:
|
||||
print("To use precomputed proposals please specify the following parameters in your configuration:\n"
|
||||
"__C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE\n"
|
||||
"__C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE")
|
||||
exit(-1)
|
||||
|
||||
cfg['MODEL_PATH'] = os.path.join(cfg.OUTPUT_PATH, "fast_rcnn_eval_{}.model".format(cfg["MODEL"].BASE_MODEL))
|
||||
cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "PretrainedModels",
|
||||
cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "PretrainedModels",
|
||||
cfg["MODEL"].BASE_MODEL_FILE)
|
||||
|
||||
cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE)
|
||||
|
@ -300,7 +308,11 @@ def train_fast_rcnn(cfg):
|
|||
log_number_of_parameters(loss)
|
||||
|
||||
# Create the minibatch source
|
||||
proposal_provider = ProposalProvider.fromconfig(cfg)
|
||||
if cfg.USE_PRECOMPUTED_PROPOSALS:
|
||||
proposal_provider = ProposalProvider.fromfile(cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE, cfg.NUM_ROI_PROPOSALS)
|
||||
else:
|
||||
proposal_provider = ProposalProvider.fromconfig(cfg)
|
||||
|
||||
od_minibatch_source = ObjectDetectionMinibatchSource(
|
||||
cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE,
|
||||
max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE,
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
This folder contains an end-to-end solution for using Fast R-CNN to perform object detection.
|
||||
The original research paper for Fast R-CNN can be found at [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083).
|
||||
Base models that are supported by the current configuration are AlexNet and VGG16.
|
||||
Two image set that are preconfigured are Pascal VOC 2007 and Grocery.
|
||||
Two image sets that are preconfigured are Pascal VOC 2007 and Grocery.
|
||||
Other base models or image sets can be used by adding a configuration file similar to the examples in `utils/configs` and importing it in `run_fast_rcnn.py`.
|
||||
|
||||
## Running the example
|
||||
|
@ -15,14 +15,14 @@ Other base models or image sets can be used by adding a configuration file simil
|
|||
To run Fast R-CNN you need a CNTK Python environment. Install the following additional packages:
|
||||
|
||||
```
|
||||
pip install opencv-python easydict pyyaml future
|
||||
pip install opencv-python easydict pyyaml dlib
|
||||
```
|
||||
|
||||
The code uses prebuild Cython modules for parts of the region proposal network (see `Examples/Image/Detection/utils/cython_modules`).
|
||||
These binaries are contained in the repository for Python 3.5 under Windows and Python 3.4 under Linux.
|
||||
If you require other versions please follow the instructions at [https://github.com/rbgirshick/py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#installation-sufficient-for-the-demo).
|
||||
|
||||
If you want to use the debug output you need to run ' pip install pydot_ng) ([website](https://pypi.python.org/pypi/pydot-ng)) and install [graphviz](http://graphviz.org/) (GraphViz executable has to be in the system’s PATH) to be able to plot the CNTK graphs.
|
||||
If you want to use the debug output you need to run `pip install pydot_ng` ([website](https://pypi.python.org/pypi/pydot-ng)) and install [graphviz](http://graphviz.org/) (GraphViz executable has to be in the system’s PATH) to be able to plot the CNTK graphs.
|
||||
|
||||
### Getting the data and AlexNet model
|
||||
|
||||
|
@ -90,7 +90,7 @@ and run `python run_fast_rcnn.py` to train and evaluate Fast R-CNN on your data.
|
|||
|
||||
### Parameters
|
||||
|
||||
All options and parameters are in `config.py` in the `FastRCNN` folder and all of them are explained there. These include
|
||||
All options and parameters are in `FastRCNN_config.py` in the `FastRCNN` folder and all of them are explained there. These include
|
||||
|
||||
```
|
||||
# learning parameters
|
||||
|
@ -99,9 +99,10 @@ __C.CNTK.LR_PER_SAMPLE = [0.001] * 10 + [0.0001] * 10 + [0.00001]
|
|||
|
||||
# Number of regions of interest [ROIs] proposals
|
||||
__C.NUM_ROI_PROPOSALS = 1000
|
||||
# minimum width and height for proposals in pixels
|
||||
__C.PROPOSALS_MIN_W = 20
|
||||
__C.PROPOSALS_MIN_H = 20
|
||||
# minimum relative width/height of an ROI
|
||||
__C.roi_min_side_rel = 0.01
|
||||
# maximum relative width/height of an ROI
|
||||
__C.roi_max_side_rel = 1.0
|
||||
```
|
||||
|
||||
### Fast R-CNN CNTK code
|
||||
|
@ -110,4 +111,4 @@ Most of the code is in `FastRCNN_train.py` and `FastRCNN_eval.py` (and `Examples
|
|||
|
||||
### Algorithm
|
||||
|
||||
All details regarding the Fast R-CNN algortihm can be found in the original research paper: [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083).
|
||||
All details regarding the Fast R-CNN algorithm can be found in the original research paper: [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083).
|
||||
|
|
|
@ -26,7 +26,7 @@ if __name__ == '__main__':
|
|||
|
||||
sys.path.append(os.path.join(base_folder, "..", "..", "..", "..", "PretrainedModels"))
|
||||
from download_model import download_model_by_name
|
||||
download_model_by_name("AlexNet_ImageNet_CNTK")
|
||||
download_model_by_name("AlexNet_ImageNet_Caffe")
|
||||
|
||||
print("Creating mapping files for Grocery data set..")
|
||||
create_grocery_mappings(base_folder)
|
||||
|
|
|
@ -1,19 +0,0 @@
|
|||
# Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
# Licensed under the MIT license. See LICENSE.md file in the project root
|
||||
# for full license information.
|
||||
# ==============================================================================
|
||||
|
||||
from __future__ import print_function
|
||||
import zipfile
|
||||
import os, sys
|
||||
|
||||
base_folder = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
sys.path.append(os.path.join(base_folder, "..", "..", "DataSets", "Grocery"))
|
||||
from install_grocery import download_grocery_data
|
||||
download_grocery_data()
|
||||
|
||||
sys.path.append(os.path.join(base_folder, "..", "..", "..", "..", "PretrainedModels"))
|
||||
from download_model import download_model_by_name
|
||||
download_model_by_name("AlexNet_ImageNet_CNTK")
|
|
@ -13,7 +13,7 @@ from utils.plot_helpers import plot_test_set_results
|
|||
|
||||
def get_configuration():
|
||||
# load configs for detector, base network and data set
|
||||
from config import cfg as detector_cfg
|
||||
from FastRCNN_config import cfg as detector_cfg
|
||||
# for VGG16 base model use: from utils.configs.VGG16_config import cfg as network_cfg
|
||||
# for AlexNet base model use: from utils.configs.AlexNet_config import cfg as network_cfg
|
||||
from utils.configs.AlexNet_config import cfg as network_cfg
|
||||
|
|
|
@ -1,750 +0,0 @@
|
|||
# Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
# Licensed under the MIT license. See LICENSE.md file in the project root
|
||||
# for full license information.
|
||||
# ==============================================================================
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
import os, sys
|
||||
import argparse
|
||||
import yaml # pip install pyyaml
|
||||
import easydict # pip install easydict
|
||||
import cntk
|
||||
import easydict
|
||||
from cntk import Trainer, UnitType, load_model, Axis, input_variable, parameter, times, combine, \
|
||||
softmax, roipooling, plus, element_times, CloneMethod, alias, Communicator, reduce_sum
|
||||
from cntk.core import Value
|
||||
from cntk.io import MinibatchData
|
||||
from cntk.initializer import normal
|
||||
from cntk.layers import placeholder, Constant, Sequential
|
||||
from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_schedule
|
||||
from cntk.logging import log_number_of_parameters, ProgressPrinter
|
||||
from cntk.logging.graph import find_by_name, plot
|
||||
from cntk.losses import cross_entropy_with_softmax
|
||||
from cntk.metrics import classification_error
|
||||
from _cntk_py import force_deterministic_algorithms
|
||||
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(os.path.join(abs_path, ".."))
|
||||
from utils.rpn.rpn_helpers import create_rpn, create_proposal_target_layer
|
||||
from utils.rpn.cntk_smoothL1_loss import SmoothL1Loss
|
||||
from utils.map.map_helpers import evaluate_detections
|
||||
from utils.annotations.annotations_helper import parse_class_map_file
|
||||
from config import cfg
|
||||
from od_mb_source import ObjectDetectionMinibatchSource
|
||||
from cntk_helpers import regress_rois
|
||||
|
||||
###############################################################
|
||||
###############################################################
|
||||
mb_size = 1
|
||||
image_width = cfg["CNTK"].IMAGE_WIDTH
|
||||
image_height = cfg["CNTK"].IMAGE_HEIGHT
|
||||
num_channels = 3
|
||||
|
||||
# dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
|
||||
dims_input_const = MinibatchData(Value(batch=np.asarray(
|
||||
[image_width, image_height, image_width, image_height, image_width, image_height], dtype=np.float32)), 1, 1, False)
|
||||
|
||||
# Color used for padding and normalization (Caffe model uses [102.98010, 115.94650, 122.77170])
|
||||
img_pad_value = [103, 116, 123] if cfg["CNTK"].BASE_MODEL == "VGG16" else [114, 114, 114]
|
||||
normalization_const = Constant([[[103]], [[116]], [[123]]]) if cfg["CNTK"].BASE_MODEL == "VGG16" else Constant([[[114]], [[114]], [[114]]])
|
||||
|
||||
globalvars = {}
|
||||
globalvars['output_path'] = os.path.join(abs_path, "Output")
|
||||
|
||||
# dataset specific parameters
|
||||
map_file_path = os.path.join(abs_path, cfg["CNTK"].MAP_FILE_PATH)
|
||||
globalvars['class_map_file'] = cfg["CNTK"].CLASS_MAP_FILE
|
||||
globalvars['train_map_file'] = cfg["CNTK"].TRAIN_MAP_FILE
|
||||
globalvars['test_map_file'] = cfg["CNTK"].TEST_MAP_FILE
|
||||
globalvars['train_roi_file'] = cfg["CNTK"].TRAIN_ROI_FILE
|
||||
globalvars['test_roi_file'] = cfg["CNTK"].TEST_ROI_FILE
|
||||
epoch_size = cfg["CNTK"].NUM_TRAIN_IMAGES
|
||||
num_test_images = cfg["CNTK"].NUM_TEST_IMAGES
|
||||
|
||||
# model specific parameters
|
||||
model_folder = os.path.join(abs_path, "..", "..", "..", "..", "PretrainedModels")
|
||||
base_model_file = os.path.join(model_folder, cfg["CNTK"].BASE_MODEL_FILE)
|
||||
feature_node_name = cfg["CNTK"].FEATURE_NODE_NAME
|
||||
last_conv_node_name = cfg["CNTK"].LAST_CONV_NODE_NAME
|
||||
start_train_conv_node_name = cfg["CNTK"].START_TRAIN_CONV_NODE_NAME
|
||||
pool_node_name = cfg["CNTK"].POOL_NODE_NAME
|
||||
last_hidden_node_name = cfg["CNTK"].LAST_HIDDEN_NODE_NAME
|
||||
roi_dim = cfg["CNTK"].ROI_DIM
|
||||
###############################################################
|
||||
###############################################################
|
||||
|
||||
def set_global_vars(use_arg_parser = True):
|
||||
data_path = map_file_path
|
||||
|
||||
# set and overwrite learning parameters
|
||||
globalvars['rpn_lr_factor'] = cfg["CNTK"].RPN_LR_FACTOR
|
||||
globalvars['frcn_lr_factor'] = cfg["CNTK"].FRCN_LR_FACTOR
|
||||
globalvars['e2e_lr_factor'] = cfg["CNTK"].E2E_LR_FACTOR
|
||||
globalvars['momentum_per_mb'] = cfg["CNTK"].MOMENTUM_PER_MB
|
||||
globalvars['e2e_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].E2E_MAX_EPOCHS
|
||||
globalvars['rpn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].RPN_EPOCHS
|
||||
globalvars['frcn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].FRCN_EPOCHS
|
||||
globalvars['rnd_seed'] = cfg.RNG_SEED
|
||||
globalvars['train_conv'] = cfg["CNTK"].TRAIN_CONV_LAYERS
|
||||
globalvars['train_e2e'] = cfg["CNTK"].TRAIN_E2E
|
||||
|
||||
|
||||
if use_arg_parser:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-datadir', '--datadir', help='Data directory where the ImageNet dataset is located',
|
||||
required=False, default=data_path)
|
||||
parser.add_argument('-outputdir', '--outputdir', help='Output directory for checkpoints and models',
|
||||
required=False, default=None)
|
||||
parser.add_argument('-logdir', '--logdir', help='Log file',
|
||||
required=False, default=None)
|
||||
parser.add_argument('-n', '--num_epochs', help='Total number of epochs to train', type=int,
|
||||
required=False, default=cfg["CNTK"].E2E_MAX_EPOCHS)
|
||||
parser.add_argument('-m', '--minibatch_size', help='Minibatch size', type=int,
|
||||
required=False, default=mb_size)
|
||||
parser.add_argument('-e', '--epoch_size', help='Epoch size', type=int,
|
||||
required=False, default=epoch_size)
|
||||
parser.add_argument('-q', '--quantized_bits', help='Number of quantized bits used for gradient aggregation', type=int,
|
||||
required=False, default='32')
|
||||
parser.add_argument('-r', '--restart',
|
||||
help='Indicating whether to restart from scratch (instead of restart from checkpoint file by default)',
|
||||
action='store_true')
|
||||
parser.add_argument('-device', '--device', type=int, help="Force to run the script on a specified device",
|
||||
required=False, default=None)
|
||||
parser.add_argument('-rpnLrFactor', '--rpnLrFactor', type=float, help="Scale factor for rpn lr schedule", required=False)
|
||||
parser.add_argument('-frcnLrFactor', '--frcnLrFactor', type=float, help="Scale factor for frcn lr schedule", required=False)
|
||||
parser.add_argument('-e2eLrFactor', '--e2eLrFactor', type=float, help="Scale factor for e2e lr schedule", required=False)
|
||||
parser.add_argument('-momentumPerMb', '--momentumPerMb', type=float, help="momentum per minibatch", required=False)
|
||||
parser.add_argument('-e2eEpochs', '--e2eEpochs', type=int, help="number of epochs for e2e training", required=False)
|
||||
parser.add_argument('-rpnEpochs', '--rpnEpochs', type=int, help="number of epochs for rpn training", required=False)
|
||||
parser.add_argument('-frcnEpochs', '--frcnEpochs', type=int, help="number of epochs for frcn training", required=False)
|
||||
parser.add_argument('-rndSeed', '--rndSeed', type=int, help="the random seed", required=False)
|
||||
parser.add_argument('-trainConv', '--trainConv', type=int, help="whether to train conv layers", required=False)
|
||||
parser.add_argument('-trainE2E', '--trainE2E', type=int, help="whether to train e2e (otherwise 4 stage)", required=False)
|
||||
|
||||
args = vars(parser.parse_args())
|
||||
|
||||
if args['rpnLrFactor'] is not None:
|
||||
globalvars['rpn_lr_factor'] = args['rpnLrFactor']
|
||||
if args['frcnLrFactor'] is not None:
|
||||
globalvars['frcn_lr_factor'] = args['frcnLrFactor']
|
||||
if args['e2eLrFactor'] is not None:
|
||||
globalvars['e2e_lr_factor'] = args['e2eLrFactor']
|
||||
if args['momentumPerMb'] is not None:
|
||||
globalvars['momentum_per_mb'] = args['momentumPerMb']
|
||||
if args['e2eEpochs'] is not None:
|
||||
globalvars['e2e_epochs'] = args['e2eEpochs']
|
||||
if args['rpnEpochs'] is not None:
|
||||
globalvars['rpn_epochs'] = args['rpnEpochs']
|
||||
if args['frcnEpochs'] is not None:
|
||||
globalvars['frcn_epochs'] = args['frcnEpochs']
|
||||
if args['rndSeed'] is not None:
|
||||
globalvars['rnd_seed'] = args['rndSeed']
|
||||
if args['trainConv'] is not None:
|
||||
globalvars['train_conv'] = True if args['trainConv']==1 else False
|
||||
if args['trainE2E'] is not None:
|
||||
globalvars['train_e2e'] = True if args['trainE2E']==1 else False
|
||||
|
||||
if args['outputdir'] is not None:
|
||||
globalvars['output_path'] = args['outputdir']
|
||||
if args['logdir'] is not None:
|
||||
log_dir = args['logdir']
|
||||
if args['device'] is not None:
|
||||
# Setting one worker on GPU and one worker on CPU. Otherwise memory consumption is too high for a single GPU.
|
||||
if Communicator.rank() == 0:
|
||||
cntk.device.try_set_default_device(cntk.device.gpu(args['device']))
|
||||
else:
|
||||
cntk.device.try_set_default_device(cntk.device.cpu())
|
||||
|
||||
if args['datadir'] is not None:
|
||||
data_path = args['datadir']
|
||||
|
||||
if not os.path.isdir(data_path):
|
||||
raise RuntimeError("Directory %s does not exist" % data_path)
|
||||
|
||||
globalvars['class_map_file'] = os.path.join(data_path, globalvars['class_map_file'])
|
||||
globalvars['train_map_file'] = os.path.join(data_path, globalvars['train_map_file'])
|
||||
globalvars['test_map_file'] = os.path.join(data_path, globalvars['test_map_file'])
|
||||
globalvars['train_roi_file'] = os.path.join(data_path, globalvars['train_roi_file'])
|
||||
globalvars['test_roi_file'] = os.path.join(data_path, globalvars['test_roi_file'])
|
||||
|
||||
if cfg["CNTK"].FORCE_DETERMINISTIC:
|
||||
force_deterministic_algorithms()
|
||||
np.random.seed(seed=globalvars['rnd_seed'])
|
||||
globalvars['classes'] = parse_class_map_file(globalvars['class_map_file'])
|
||||
globalvars['num_classes'] = len(globalvars['classes'])
|
||||
|
||||
if cfg["CNTK"].DEBUG_OUTPUT:
|
||||
# report args
|
||||
print("Using the following parameters:")
|
||||
print("Flip image : {}".format(cfg["TRAIN"].USE_FLIPPED))
|
||||
print("Train conv layers: {}".format(globalvars['train_conv']))
|
||||
print("Random seed : {}".format(globalvars['rnd_seed']))
|
||||
print("Momentum per MB : {}".format(globalvars['momentum_per_mb']))
|
||||
if globalvars['train_e2e']:
|
||||
print("E2E epochs : {}".format(globalvars['e2e_epochs']))
|
||||
else:
|
||||
print("RPN lr factor : {}".format(globalvars['rpn_lr_factor']))
|
||||
print("RPN epochs : {}".format(globalvars['rpn_epochs']))
|
||||
print("FRCN lr factor : {}".format(globalvars['frcn_lr_factor']))
|
||||
print("FRCN epochs : {}".format(globalvars['frcn_epochs']))
|
||||
|
||||
###############################################################
|
||||
###############################################################
|
||||
|
||||
def clone_model(base_model, from_node_names, to_node_names, clone_method):
|
||||
from_nodes = [find_by_name(base_model, node_name) for node_name in from_node_names]
|
||||
if None in from_nodes:
|
||||
print("Error: could not find all specified 'from_nodes' in clone. Looking for {}, found {}"
|
||||
.format(from_node_names, from_nodes))
|
||||
to_nodes = [find_by_name(base_model, node_name) for node_name in to_node_names]
|
||||
if None in to_nodes:
|
||||
print("Error: could not find all specified 'to_nodes' in clone. Looking for {}, found {}"
|
||||
.format(to_node_names, to_nodes))
|
||||
input_placeholders = dict(zip(from_nodes, [placeholder() for x in from_nodes]))
|
||||
cloned_net = combine(to_nodes).clone(clone_method, input_placeholders)
|
||||
return cloned_net
|
||||
|
||||
def clone_conv_layers(base_model):
|
||||
if not globalvars['train_conv']:
|
||||
conv_layers = clone_model(base_model, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
|
||||
elif feature_node_name == start_train_conv_node_name:
|
||||
conv_layers = clone_model(base_model, [feature_node_name], [last_conv_node_name], CloneMethod.clone)
|
||||
else:
|
||||
fixed_conv_layers = clone_model(base_model, [feature_node_name], [start_train_conv_node_name],
|
||||
CloneMethod.freeze)
|
||||
train_conv_layers = clone_model(base_model, [start_train_conv_node_name], [last_conv_node_name],
|
||||
CloneMethod.clone)
|
||||
conv_layers = Sequential([fixed_conv_layers, train_conv_layers])
|
||||
return conv_layers
|
||||
|
||||
# Please keep in sync with Readme.md
|
||||
def create_fast_rcnn_predictor(conv_out, rois, fc_layers):
|
||||
# RCNN
|
||||
roi_out = roipooling(conv_out, rois, cntk.MAX_POOLING, (roi_dim, roi_dim), spatial_scale=1/16.0)
|
||||
fc_out = fc_layers(roi_out)
|
||||
|
||||
# prediction head
|
||||
W_pred = parameter(shape=(4096, globalvars['num_classes']), init=normal(scale=0.01), name="cls_score.W")
|
||||
b_pred = parameter(shape=globalvars['num_classes'], init=0, name="cls_score.b")
|
||||
cls_score = plus(times(fc_out, W_pred), b_pred, name='cls_score')
|
||||
|
||||
# regression head
|
||||
W_regr = parameter(shape=(4096, globalvars['num_classes']*4), init=normal(scale=0.001), name="bbox_regr.W")
|
||||
b_regr = parameter(shape=globalvars['num_classes']*4, init=0, name="bbox_regr.b")
|
||||
bbox_pred = plus(times(fc_out, W_regr), b_regr, name='bbox_regr')
|
||||
|
||||
return cls_score, bbox_pred
|
||||
|
||||
# Please keep in sync with Readme.md
|
||||
# Defines the Faster R-CNN network model for detecting objects in images
|
||||
def create_faster_rcnn_predictor(base_model_file_name, features, scaled_gt_boxes, dims_input):
|
||||
# Load the pre-trained classification net and clone layers
|
||||
base_model = load_model(base_model_file_name)
|
||||
conv_layers = clone_conv_layers(base_model)
|
||||
fc_layers = clone_model(base_model, [pool_node_name], [last_hidden_node_name], clone_method=CloneMethod.clone)
|
||||
|
||||
# Normalization and conv layers
|
||||
feat_norm = features - normalization_const
|
||||
conv_out = conv_layers(feat_norm)
|
||||
|
||||
# RPN and prediction targets
|
||||
rpn_rois, rpn_losses = \
|
||||
create_rpn(conv_out, scaled_gt_boxes, dims_input, proposal_layer_param_string=cfg["CNTK"].PROPOSAL_LAYER_PARAMS)
|
||||
rois, label_targets, bbox_targets, bbox_inside_weights = \
|
||||
create_proposal_target_layer(rpn_rois, scaled_gt_boxes, num_classes=globalvars['num_classes'])
|
||||
|
||||
# Fast RCNN and losses
|
||||
cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers)
|
||||
detection_losses = create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights)
|
||||
loss = rpn_losses + detection_losses
|
||||
pred_error = classification_error(cls_score, label_targets, axis=1)
|
||||
|
||||
return loss, pred_error
|
||||
|
||||
def create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights):
|
||||
# classification loss
|
||||
cls_loss = cross_entropy_with_softmax(cls_score, label_targets, axis=1)
|
||||
|
||||
p_cls_loss = placeholder()
|
||||
p_rois = placeholder()
|
||||
# The terms that are accounted for in the cls loss are those that correspond to an actual roi proposal --> do not count no-op (all-zero) rois
|
||||
roi_indicator = reduce_sum(p_rois, axis=1)
|
||||
cls_num_terms = reduce_sum(cntk.greater_equal(roi_indicator, 0.0))
|
||||
cls_normalization_factor = 1.0 / cls_num_terms
|
||||
normalized_cls_loss = reduce_sum(p_cls_loss) * cls_normalization_factor
|
||||
|
||||
reduced_cls_loss = cntk.as_block(normalized_cls_loss,
|
||||
[(p_cls_loss, cls_loss), (p_rois, rois)],
|
||||
'Normalize', 'norm_cls_loss')
|
||||
|
||||
# regression loss
|
||||
p_bbox_pred = placeholder()
|
||||
p_bbox_targets = placeholder()
|
||||
p_bbox_inside_weights = placeholder()
|
||||
bbox_loss = SmoothL1Loss(cfg["CNTK"].SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0)
|
||||
# The bbox loss is normalized by the batch size
|
||||
bbox_normalization_factor = 1.0 / cfg["TRAIN"].BATCH_SIZE
|
||||
normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor
|
||||
|
||||
reduced_bbox_loss = cntk.as_block(normalized_bbox_loss,
|
||||
[(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets), (p_bbox_inside_weights, bbox_inside_weights)],
|
||||
'SmoothL1Loss', 'norm_bbox_loss')
|
||||
|
||||
detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses")
|
||||
|
||||
return detection_losses
|
||||
|
||||
def create_eval_model(model, image_input, dims_input, rpn_model=None):
|
||||
print("creating eval model")
|
||||
conv_layers = clone_model(model, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
|
||||
conv_out = conv_layers(image_input)
|
||||
|
||||
model_with_rpn = model if rpn_model is None else rpn_model
|
||||
rpn = clone_model(model_with_rpn, [last_conv_node_name, "dims_input"], ["rpn_rois"], CloneMethod.freeze)
|
||||
rpn_rois = rpn(conv_out, dims_input)
|
||||
|
||||
roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze)
|
||||
pred_net = roi_fc_layers(conv_out, rpn_rois)
|
||||
cls_score = pred_net.outputs[0]
|
||||
bbox_regr = pred_net.outputs[1]
|
||||
|
||||
if cfg["TRAIN"].BBOX_NORMALIZE_TARGETS and cfg["TRAIN"].BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
|
||||
num_boxes = int(bbox_regr.shape[1] / 4)
|
||||
bbox_normalize_means = np.array(cfg["TRAIN"].BBOX_NORMALIZE_MEANS * num_boxes)
|
||||
bbox_normalize_stds = np.array(cfg["TRAIN"].BBOX_NORMALIZE_STDS * num_boxes)
|
||||
bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr')
|
||||
|
||||
cls_pred = softmax(cls_score, axis=1, name='cls_pred')
|
||||
eval_model = combine([cls_pred, rpn_rois, bbox_regr])
|
||||
|
||||
return eval_model
|
||||
|
||||
def train_model(image_input, roi_input, dims_input, loss, pred_error,
|
||||
lr_per_sample, mm_schedule, l2_reg_weight, epochs_to_train,
|
||||
rpn_rois_input=None, buffered_rpn_proposals=None):
|
||||
if isinstance(loss, cntk.Variable):
|
||||
loss = combine([loss])
|
||||
|
||||
params = loss.parameters
|
||||
biases = [p for p in params if '.b' in p.name or 'b' == p.name]
|
||||
others = [p for p in params if not p in biases]
|
||||
bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT
|
||||
|
||||
if cfg["CNTK"].DEBUG_OUTPUT:
|
||||
print("biases")
|
||||
for p in biases: print(p)
|
||||
print("others")
|
||||
for p in others: print(p)
|
||||
print("bias_lr_mult: {}".format(bias_lr_mult))
|
||||
|
||||
# Instantiate the learners and the trainer object
|
||||
lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
|
||||
learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
|
||||
unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT)
|
||||
|
||||
bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample]
|
||||
bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample)
|
||||
bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
|
||||
unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT)
|
||||
trainer = Trainer(None, (loss, pred_error), [learner, bias_learner])
|
||||
|
||||
# Get minibatches of images and perform model training
|
||||
print("Training model for %s epochs." % epochs_to_train)
|
||||
log_number_of_parameters(loss)
|
||||
|
||||
# Create the minibatch source
|
||||
od_minibatch_source = ObjectDetectionMinibatchSource(
|
||||
globalvars['train_map_file'], globalvars['train_roi_file'],
|
||||
max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
|
||||
pad_width=image_width, pad_height=image_height, pad_value=img_pad_value,
|
||||
randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED,
|
||||
max_images=cfg["CNTK"].NUM_TRAIN_IMAGES,
|
||||
buffered_rpn_proposals=buffered_rpn_proposals)
|
||||
|
||||
# define mapping from reader streams to network inputs
|
||||
input_map = {
|
||||
od_minibatch_source.image_si: image_input,
|
||||
od_minibatch_source.roi_si: roi_input,
|
||||
od_minibatch_source.dims_si: dims_input
|
||||
}
|
||||
|
||||
use_buffered_proposals = buffered_rpn_proposals is not None
|
||||
progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True)
|
||||
for epoch in range(epochs_to_train): # loop over epochs
|
||||
sample_count = 0
|
||||
while sample_count < epoch_size: # loop over minibatches in the epoch
|
||||
data, proposals = od_minibatch_source.next_minibatch_with_proposals(min(mb_size, epoch_size-sample_count), input_map=input_map)
|
||||
if use_buffered_proposals:
|
||||
data[rpn_rois_input] = MinibatchData(Value(batch=np.asarray(proposals, dtype=np.float32)), 1, 1, False)
|
||||
# remove dims input if no rpn is required to avoid warnings
|
||||
del data[[k for k in data if '[6]' in str(k)][0]]
|
||||
|
||||
trainer.train_minibatch(data) # update model with it
|
||||
sample_count += trainer.previous_minibatch_sample_count # count samples processed so far
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
if sample_count % 100 == 0:
|
||||
print("Processed {} samples".format(sample_count))
|
||||
|
||||
progress_printer.epoch_summary(with_metric=True)
|
||||
|
||||
def compute_rpn_proposals(rpn_model, image_input, roi_input, dims_input):
|
||||
num_images = cfg["CNTK"].NUM_TRAIN_IMAGES
|
||||
# Create the minibatch source
|
||||
od_minibatch_source = ObjectDetectionMinibatchSource(
|
||||
globalvars['train_map_file'], globalvars['train_roi_file'],
|
||||
max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
|
||||
pad_width=image_width, pad_height=image_height, pad_value=img_pad_value,
|
||||
max_images=num_images,
|
||||
randomize=False, use_flipping=False)
|
||||
|
||||
# define mapping from reader streams to network inputs
|
||||
input_map = {
|
||||
od_minibatch_source.image_si: image_input,
|
||||
od_minibatch_source.roi_si: roi_input,
|
||||
od_minibatch_source.dims_si: dims_input
|
||||
}
|
||||
|
||||
# setting pre- and post-nms top N to training values since buffered proposals are used for further training
|
||||
test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N
|
||||
test_post = cfg["TEST"].RPN_POST_NMS_TOP_N
|
||||
cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
|
||||
cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N
|
||||
|
||||
buffered_proposals = [None for _ in range(num_images)]
|
||||
sample_count = 0
|
||||
while sample_count < num_images:
|
||||
data = od_minibatch_source.next_minibatch(1, input_map=input_map)
|
||||
output = rpn_model.eval(data)
|
||||
out_dict = dict([(k.name, k) for k in output])
|
||||
out_rpn_rois = output[out_dict['rpn_rois']][0]
|
||||
buffered_proposals[sample_count] = np.round(out_rpn_rois).astype(np.int16)
|
||||
sample_count += 1
|
||||
if sample_count % 500 == 0:
|
||||
print("Buffered proposals for {} samples".format(sample_count))
|
||||
|
||||
# resetting config values to original test values
|
||||
cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre
|
||||
cfg["TEST"].RPN_POST_NMS_TOP_N = test_post
|
||||
|
||||
return buffered_proposals
|
||||
|
||||
# Trains a Faster R-CNN model end-to-end
|
||||
def train_faster_rcnn_e2e(base_model_file_name, debug_output=False):
|
||||
# Input variables denoting features and labeled ground truth rois (as 5-tuples per roi)
|
||||
image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name)
|
||||
roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
|
||||
dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
|
||||
dims_node = alias(dims_input, name='dims_input')
|
||||
|
||||
# Instantiate the Faster R-CNN prediction model and loss function
|
||||
loss, pred_error = create_faster_rcnn_predictor(base_model_file_name, image_input, roi_input, dims_node)
|
||||
|
||||
if debug_output:
|
||||
print("Storing graphs and models to %s." % globalvars['output_path'])
|
||||
plot(loss, os.path.join(globalvars['output_path'], "graph_frcn_train_e2e." + cfg["CNTK"].GRAPH_TYPE))
|
||||
|
||||
# Set learning parameters
|
||||
e2e_lr_factor = globalvars['e2e_lr_factor']
|
||||
e2e_lr_per_sample_scaled = [x * e2e_lr_factor for x in cfg["CNTK"].E2E_LR_PER_SAMPLE]
|
||||
mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB)
|
||||
|
||||
print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL))
|
||||
print("lr_per_sample: {}".format(e2e_lr_per_sample_scaled))
|
||||
|
||||
train_model(image_input, roi_input, dims_input, loss, pred_error,
|
||||
e2e_lr_per_sample_scaled, mm_schedule, cfg["CNTK"].L2_REG_WEIGHT, globalvars['e2e_epochs'])
|
||||
|
||||
return create_eval_model(loss, image_input, dims_input)
|
||||
|
||||
# Trains a Faster R-CNN model using 4-stage alternating training
|
||||
def train_faster_rcnn_alternating(base_model_file_name, debug_output=False):
|
||||
'''
|
||||
4-Step Alternating Training scheme from the Faster R-CNN paper:
|
||||
|
||||
# Create initial network, only rpn, without detection network
|
||||
# --> train only the rpn (and conv3_1 and up for VGG16)
|
||||
# buffer region proposals from rpn
|
||||
# Create full network, initialize conv layers with imagenet, use buffered proposals
|
||||
# --> train only detection network (and conv3_1 and up for VGG16)
|
||||
# Keep conv weights from detection network and fix them
|
||||
# --> train only rpn
|
||||
# buffer region proposals from rpn
|
||||
# Keep conv and rpn weights from step 3 and fix them
|
||||
# --> train only detection network
|
||||
'''
|
||||
|
||||
# Learning parameters
|
||||
rpn_lr_factor = globalvars['rpn_lr_factor']
|
||||
rpn_lr_per_sample_scaled = [x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE]
|
||||
frcn_lr_factor = globalvars['frcn_lr_factor']
|
||||
frcn_lr_per_sample_scaled = [x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE]
|
||||
|
||||
l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT
|
||||
mm_schedule = momentum_schedule(globalvars['momentum_per_mb'])
|
||||
rpn_epochs = globalvars['rpn_epochs']
|
||||
frcn_epochs = globalvars['frcn_epochs']
|
||||
|
||||
print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL))
|
||||
print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled))
|
||||
print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled))
|
||||
if debug_output:
|
||||
print("Storing graphs and models to %s." % globalvars['output_path'])
|
||||
|
||||
# Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions
|
||||
image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()],
|
||||
name=feature_node_name)
|
||||
feat_norm = image_input - normalization_const
|
||||
roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
|
||||
scaled_gt_boxes = alias(roi_input, name='roi_input')
|
||||
dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
|
||||
dims_node = alias(dims_input, name='dims_input')
|
||||
rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()])
|
||||
rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois')
|
||||
|
||||
# base image classification model (e.g. VGG16 or AlexNet)
|
||||
base_model = load_model(base_model_file_name)
|
||||
|
||||
print("stage 1a - rpn")
|
||||
if True:
|
||||
# Create initial network, only rpn, without detection network
|
||||
# initial weights train?
|
||||
# conv: base_model only conv3_1 and up
|
||||
# rpn: init new yes
|
||||
# frcn: - -
|
||||
|
||||
# conv layers
|
||||
conv_layers = clone_conv_layers(base_model)
|
||||
conv_out = conv_layers(feat_norm)
|
||||
|
||||
# RPN and losses
|
||||
rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_node, proposal_layer_param_string=cfg["CNTK"].PROPOSAL_LAYER_PARAMS)
|
||||
stage1_rpn_network = combine([rpn_rois, rpn_losses])
|
||||
|
||||
# train
|
||||
if debug_output: plot(stage1_rpn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE))
|
||||
train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses,
|
||||
rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs)
|
||||
|
||||
print("stage 1a - buffering rpn proposals")
|
||||
buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input)
|
||||
|
||||
print("stage 1b - frcn")
|
||||
if True:
|
||||
# Create full network, initialize conv layers with imagenet, fix rpn weights
|
||||
# initial weights train?
|
||||
# conv: base_model only conv3_1 and up
|
||||
# rpn: stage1a rpn model no --> use buffered proposals
|
||||
# frcn: base_model + new yes
|
||||
|
||||
# conv_layers
|
||||
conv_layers = clone_conv_layers(base_model)
|
||||
conv_out = conv_layers(feat_norm)
|
||||
|
||||
# use buffered proposals in target layer
|
||||
rois, label_targets, bbox_targets, bbox_inside_weights = \
|
||||
create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, num_classes=globalvars['num_classes'])
|
||||
|
||||
# Fast RCNN and losses
|
||||
fc_layers = clone_model(base_model, [pool_node_name], [last_hidden_node_name], CloneMethod.clone)
|
||||
cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers)
|
||||
detection_losses = create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights)
|
||||
pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error")
|
||||
stage1_frcn_network = combine([rois, cls_score, bbox_pred, detection_losses, pred_error])
|
||||
|
||||
# train
|
||||
if debug_output: plot(stage1_frcn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE))
|
||||
train_model(image_input, roi_input, dims_input, detection_losses, pred_error,
|
||||
frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs,
|
||||
rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1)
|
||||
buffered_proposals_s1 = None
|
||||
|
||||
print("stage 2a - rpn")
|
||||
if True:
|
||||
# Keep conv weights from detection network and fix them
|
||||
# initial weights train?
|
||||
# conv: stage1b frcn model no
|
||||
# rpn: stage1a rpn model yes
|
||||
# frcn: - -
|
||||
|
||||
# conv_layers
|
||||
conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
|
||||
conv_out = conv_layers(image_input)
|
||||
|
||||
# RPN and losses
|
||||
rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone)
|
||||
rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes)
|
||||
rpn_rois = rpn_net.outputs[0]
|
||||
rpn_losses = rpn_net.outputs[1]
|
||||
stage2_rpn_network = combine([rpn_rois, rpn_losses])
|
||||
|
||||
# train
|
||||
if debug_output: plot(stage2_rpn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE))
|
||||
train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses,
|
||||
rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs)
|
||||
|
||||
print("stage 2a - buffering rpn proposals")
|
||||
buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input)
|
||||
|
||||
print("stage 2b - frcn")
|
||||
if True:
|
||||
# Keep conv and rpn weights from step 3 and fix them
|
||||
# initial weights train?
|
||||
# conv: stage2a rpn model no
|
||||
# rpn: stage2a rpn model no --> use buffered proposals
|
||||
# frcn: stage1b frcn model yes -
|
||||
|
||||
# conv_layers
|
||||
conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
|
||||
conv_out = conv_layers(image_input)
|
||||
|
||||
# Fast RCNN and losses
|
||||
frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"],
|
||||
["cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error"], CloneMethod.clone)
|
||||
stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes)
|
||||
detection_losses = stage2_frcn_network.outputs[3]
|
||||
pred_error = stage2_frcn_network.outputs[4]
|
||||
|
||||
# train
|
||||
if debug_output: plot(stage2_frcn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE))
|
||||
train_model(image_input, roi_input, dims_input, detection_losses, pred_error,
|
||||
frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs,
|
||||
rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2)
|
||||
buffered_proposals_s2 = None
|
||||
|
||||
return create_eval_model(stage2_frcn_network, image_input, dims_input, rpn_model=stage2_rpn_network)
|
||||
|
||||
def eval_faster_rcnn_mAP(eval_model):
|
||||
img_map_file = globalvars['test_map_file']
|
||||
roi_map_file = globalvars['test_roi_file']
|
||||
classes = globalvars['classes']
|
||||
image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name)
|
||||
roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
|
||||
dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
|
||||
frcn_eval = eval_model(image_input, dims_input)
|
||||
|
||||
# Create the minibatch source
|
||||
minibatch_source = ObjectDetectionMinibatchSource(
|
||||
img_map_file, roi_map_file,
|
||||
max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
|
||||
pad_width=image_width, pad_height=image_height, pad_value=img_pad_value,
|
||||
randomize=False, use_flipping=False,
|
||||
max_images=cfg["CNTK"].NUM_TEST_IMAGES)
|
||||
|
||||
# define mapping from reader streams to network inputs
|
||||
input_map = {
|
||||
minibatch_source.image_si: image_input,
|
||||
minibatch_source.roi_si: roi_input,
|
||||
minibatch_source.dims_si: dims_input
|
||||
}
|
||||
|
||||
# all detections are collected into:
|
||||
# all_boxes[cls][image] = N x 5 array of detections in
|
||||
# (x1, y1, x2, y2, score)
|
||||
all_boxes = [[[] for _ in range(num_test_images)] for _ in range(globalvars['num_classes'])]
|
||||
|
||||
# evaluate test images and write netwrok output to file
|
||||
print("Evaluating Faster R-CNN model for %s images." % num_test_images)
|
||||
all_gt_infos = {key: [] for key in classes}
|
||||
for img_i in range(0, num_test_images):
|
||||
mb_data = minibatch_source.next_minibatch(1, input_map=input_map)
|
||||
|
||||
gt_row = mb_data[roi_input].asarray()
|
||||
gt_row = gt_row.reshape((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5))
|
||||
all_gt_boxes = gt_row[np.where(gt_row[:,-1] > 0)]
|
||||
|
||||
for cls_index, cls_name in enumerate(classes):
|
||||
if cls_index == 0: continue
|
||||
cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:,-1] == cls_index)]
|
||||
all_gt_infos[cls_name].append({'bbox': np.array(cls_gt_boxes),
|
||||
'difficult': [False] * len(cls_gt_boxes),
|
||||
'det': [False] * len(cls_gt_boxes)})
|
||||
|
||||
output = frcn_eval.eval({image_input: mb_data[image_input], dims_input: mb_data[dims_input]})
|
||||
out_dict = dict([(k.name, k) for k in output])
|
||||
out_cls_pred = output[out_dict['cls_pred']][0]
|
||||
out_rpn_rois = output[out_dict['rpn_rois']][0]
|
||||
out_bbox_regr = output[out_dict['bbox_regr']][0]
|
||||
|
||||
labels = out_cls_pred.argmax(axis=1)
|
||||
scores = out_cls_pred.max(axis=1)
|
||||
regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray())
|
||||
|
||||
labels.shape = labels.shape + (1,)
|
||||
scores.shape = scores.shape + (1,)
|
||||
coords_score_label = np.hstack((regressed_rois, scores, labels))
|
||||
|
||||
# shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score
|
||||
for cls_j in range(1, globalvars['num_classes']):
|
||||
coords_score_label_for_cls = coords_score_label[np.where(coords_score_label[:,-1] == cls_j)]
|
||||
all_boxes[cls_j][img_i] = coords_score_label_for_cls[:,:-1].astype(np.float32, copy=False)
|
||||
|
||||
if (img_i+1) % 100 == 0:
|
||||
print("Processed {} samples".format(img_i+1))
|
||||
|
||||
# calculate mAP
|
||||
aps = evaluate_detections(all_boxes, all_gt_infos, classes,
|
||||
nms_threshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD,
|
||||
conf_threshold = cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD)
|
||||
ap_list = []
|
||||
for class_name in aps:
|
||||
ap_list += [aps[class_name]]
|
||||
print('AP for {:>15} = {:.4f}'.format(class_name, aps[class_name]))
|
||||
meanAP = np.nanmean(ap_list)
|
||||
print('Mean AP = {:.4f}'.format(meanAP))
|
||||
return meanAP
|
||||
|
||||
# The main method trains and evaluates a Fast R-CNN model.
|
||||
# If a trained model is already available it is loaded an no training will be performed (if MAKE_MODE=True).
|
||||
if __name__ == '__main__':
|
||||
running_locally = os.path.exists(map_file_path)
|
||||
if running_locally:
|
||||
os.chdir(map_file_path)
|
||||
if not os.path.exists(os.path.join(abs_path, "Output")):
|
||||
os.makedirs(os.path.join(abs_path, "Output"))
|
||||
if not os.path.exists(os.path.join(abs_path, "Output", cfg["CNTK"].DATASET)):
|
||||
os.makedirs(os.path.join(abs_path, "Output", cfg["CNTK"].DATASET))
|
||||
else:
|
||||
# disable debug and plot outputs when running on GPU cluster
|
||||
cfg["CNTK"].DEBUG_OUTPUT = False
|
||||
cfg["CNTK"].VISUALIZE_RESULTS = False
|
||||
|
||||
set_global_vars()
|
||||
model_path = os.path.join(globalvars['output_path'], "faster_rcnn_eval_{}_{}.model"
|
||||
.format(cfg["CNTK"].BASE_MODEL, "e2e" if globalvars['train_e2e'] else "4stage"))
|
||||
|
||||
# Train only if no model exists yet
|
||||
if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE:
|
||||
print("Loading existing model from %s" % model_path)
|
||||
eval_model = load_model(model_path)
|
||||
else:
|
||||
if globalvars['train_e2e']:
|
||||
eval_model = train_faster_rcnn_e2e(base_model_file, debug_output=cfg["CNTK"].DEBUG_OUTPUT)
|
||||
else:
|
||||
eval_model = train_faster_rcnn_alternating(base_model_file, debug_output=cfg["CNTK"].DEBUG_OUTPUT)
|
||||
|
||||
eval_model.save(model_path)
|
||||
if cfg["CNTK"].DEBUG_OUTPUT:
|
||||
plot(eval_model, os.path.join(globalvars['output_path'], "graph_frcn_eval_{}_{}.{}"
|
||||
.format(cfg["CNTK"].BASE_MODEL, "e2e" if globalvars['train_e2e'] else "4stage", cfg["CNTK"].GRAPH_TYPE)))
|
||||
|
||||
print("Stored eval model at %s" % model_path)
|
||||
|
||||
# Compute mean average precision on test set
|
||||
eval_faster_rcnn_mAP(eval_model)
|
||||
|
||||
# Plot results on test set
|
||||
if cfg["CNTK"].VISUALIZE_RESULTS:
|
||||
from plot_helpers import eval_and_plot_faster_rcnn
|
||||
num_eval = min(num_test_images, 100)
|
||||
img_shape = (num_channels, image_height, image_width)
|
||||
results_folder = os.path.join(globalvars['output_path'], cfg["CNTK"].DATASET)
|
||||
eval_and_plot_faster_rcnn(eval_model, num_eval, globalvars['test_map_file'], img_shape,
|
||||
results_folder, feature_node_name, globalvars['classes'],
|
||||
drawUnregressedRois=cfg["CNTK"].DRAW_UNREGRESSED_ROIS,
|
||||
drawNegativeRois=cfg["CNTK"].DRAW_NEGATIVE_ROIS,
|
||||
nmsThreshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD,
|
||||
nmsConfThreshold=cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD,
|
||||
bgrPlotThreshold=cfg["CNTK"].RESULTS_BGR_PLOT_THRESHOLD)
|
||||
|
|
@ -23,13 +23,15 @@ __C.CNTK = edict()
|
|||
__C.CNTK.MAKE_MODE = False
|
||||
# E2E or 4-stage training
|
||||
__C.CNTK.TRAIN_E2E = True
|
||||
# set to 'True' to use determininistic algorithms
|
||||
# set to 'True' to use deterministic algorithms
|
||||
__C.CNTK.FORCE_DETERMINISTIC = False
|
||||
# set to 'True' to run only a single epoch
|
||||
__C.CNTK.FAST_MODE = True
|
||||
__C.CNTK.FAST_MODE = False
|
||||
# Debug parameters
|
||||
__C.CNTK.DEBUG_OUTPUT = False
|
||||
__C.CNTK.GRAPH_TYPE = "png" # "png" or "pdf"
|
||||
# Set to True if you want to store an eval model with native UDFs (e.g. for inference using C++ or C#)
|
||||
__C.STORE_EVAL_MODEL_WITH_NATIVE_UDF = False
|
||||
|
||||
# Learning parameters
|
||||
__C.CNTK.L2_REG_WEIGHT = 0.0005
|
||||
|
@ -62,7 +64,7 @@ __C.RESULTS_NMS_THRESHOLD = 0.5
|
|||
__C.RESULTS_NMS_CONF_THRESHOLD = 0.0
|
||||
|
||||
# Enable plotting of results generally / also plot background boxes / also plot unregressed boxes
|
||||
__C.VISUALIZE_RESULTS = True
|
||||
__C.VISUALIZE_RESULTS = False
|
||||
__C.DRAW_NEGATIVE_ROIS = False
|
||||
__C.DRAW_UNREGRESSED_ROIS = False
|
||||
# only for plotting results: boxes with a score lower than this threshold will be considered background
|
|
@ -6,64 +6,12 @@
|
|||
|
||||
import os
|
||||
import numpy as np
|
||||
from matplotlib.pyplot import imsave
|
||||
import cv2
|
||||
import cntk
|
||||
from cntk import input_variable, Axis
|
||||
from utils.nms_wrapper import apply_nms_to_single_image_results
|
||||
from utils.map_helpers import evaluate_detections
|
||||
from utils.plot_helpers import load_resize_and_pad, resize_and_pad, visualize_detections
|
||||
from utils.plot_helpers import load_resize_and_pad
|
||||
from utils.rpn.bbox_transform import regress_rois
|
||||
from utils.od_mb_source import ObjectDetectionMinibatchSource
|
||||
from utils.proposal_helpers import ProposalProvider, compute_proposals, compute_image_stats
|
||||
|
||||
class FastRCNN_Evaluator:
|
||||
def __init__(self, eval_model, cfg):
|
||||
# load model once in constructor and push images through the model in 'process_image()'
|
||||
self._img_shape = (cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH)
|
||||
image_input = input_variable(shape=self._img_shape,
|
||||
dynamic_axes=[Axis.default_batch_axis()],
|
||||
name=cfg["MODEL"].FEATURE_NODE_NAME)
|
||||
roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()],
|
||||
name="roi_proposals")
|
||||
self._eval_model = eval_model(image_input, roi_proposals)
|
||||
self._min_w = cfg['PROPOSALS_MIN_W']
|
||||
self._min_h = cfg['PROPOSALS_MIN_H']
|
||||
self._num_proposals = cfg['NUM_ROI_PROPOSALS']
|
||||
|
||||
def process_image(self, img_path):
|
||||
out_cls_pred, out_rpn_rois, out_bbox_regr, dims = self.process_image_detailed(img_path)
|
||||
labels = out_cls_pred.argmax(axis=1)
|
||||
regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims)
|
||||
|
||||
return regressed_rois, out_cls_pred
|
||||
|
||||
def process_image_detailed(self, img_path):
|
||||
img = cv2.imread(img_path)
|
||||
_, cntk_img_input, dims = resize_and_pad(img, self._img_shape[2], self._img_shape[1])
|
||||
|
||||
#import pdb; pdb.set_trace()
|
||||
|
||||
# compute ROI proposals and apply scaling and padding to them
|
||||
# [target_w, target_h, img_width, img_height, top, bottom, left, right, scale_factor]
|
||||
img_stats = compute_image_stats(len(img[0]), len(img), self._img_shape[2], self._img_shape[1])
|
||||
scale_factor = img_stats[-1]
|
||||
top = img_stats[4]
|
||||
left = img_stats[6]
|
||||
|
||||
proposals = compute_proposals(img, self._num_proposals, self._min_w, self._min_h)
|
||||
proposals = proposals * scale_factor
|
||||
proposals += (left, top, left, top)
|
||||
|
||||
output = self._eval_model.eval({self._eval_model.arguments[0]: [cntk_img_input],
|
||||
self._eval_model.arguments[1]: np.array(proposals, dtype=np.float32)})
|
||||
|
||||
out_dict = dict([(k.name, k) for k in output])
|
||||
out_cls_pred = output[out_dict['cls_pred']][0]
|
||||
out_rpn_rois = proposals
|
||||
out_bbox_regr = output[out_dict['bbox_regr']][0]
|
||||
|
||||
return out_cls_pred, out_rpn_rois, out_bbox_regr, dims
|
||||
|
||||
class FasterRCNN_Evaluator:
|
||||
def __init__(self, eval_model, cfg):
|
||||
|
@ -178,133 +126,3 @@ def compute_test_set_aps(eval_model, cfg):
|
|||
conf_threshold = cfg.RESULTS_NMS_CONF_THRESHOLD)
|
||||
|
||||
return aps
|
||||
|
||||
def plot_test_set_results(evaluator, num_images_to_plot, results_base_path, cfg):
|
||||
# get image paths
|
||||
with open(cfg["DATA"].TEST_MAP_FILE) as f:
|
||||
content = f.readlines()
|
||||
img_base_path = os.path.dirname(os.path.abspath(cfg["DATA"].TEST_MAP_FILE))
|
||||
img_file_names = [os.path.join(img_base_path, x.split('\t')[1]) for x in content]
|
||||
img_shape = (cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH)
|
||||
|
||||
print("Plotting results from Faster R-CNN model for %s images." % num_images_to_plot)
|
||||
for i in range(0, num_images_to_plot):
|
||||
img_path = img_file_names[i]
|
||||
out_cls_pred, out_rpn_rois, out_bbox_regr, dims = evaluator.process_image_detailed(img_path)
|
||||
labels = out_cls_pred.argmax(axis=1)
|
||||
scores = out_cls_pred.max(axis=1)
|
||||
|
||||
if cfg.DRAW_UNREGRESSED_ROIS:
|
||||
# plot results without final regression
|
||||
imgDebug = visualize_detections(img_path, out_rpn_rois, labels, scores,
|
||||
img_shape[2], img_shape[1],
|
||||
classes=cfg["DATA"].CLASSES,
|
||||
draw_negative_rois=cfg.DRAW_NEGATIVE_ROIS,
|
||||
decision_threshold=cfg.RESULTS_BGR_PLOT_THRESHOLD)
|
||||
imsave("{}/{}_{}".format(results_base_path, i, os.path.basename(img_path)), imgDebug)
|
||||
|
||||
# apply regression and nms to bbox coordinates
|
||||
regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims)
|
||||
nmsKeepIndices = apply_nms_to_single_image_results(regressed_rois, labels, scores,
|
||||
use_gpu_nms=cfg.USE_GPU_NMS,
|
||||
device_id=cfg.GPU_ID,
|
||||
nms_threshold=cfg.RESULTS_NMS_THRESHOLD,
|
||||
conf_threshold=cfg.RESULTS_NMS_CONF_THRESHOLD)
|
||||
|
||||
filtered_bboxes = regressed_rois[nmsKeepIndices]
|
||||
filtered_labels = labels[nmsKeepIndices]
|
||||
filtered_scores = scores[nmsKeepIndices]
|
||||
|
||||
img = visualize_detections(img_path, filtered_bboxes, filtered_labels, filtered_scores,
|
||||
img_shape[2], img_shape[1],
|
||||
classes=cfg["DATA"].CLASSES,
|
||||
draw_negative_rois=cfg.DRAW_NEGATIVE_ROIS,
|
||||
decision_threshold=cfg.RESULTS_BGR_PLOT_THRESHOLD)
|
||||
imsave("{}/{}_regr_{}".format(results_base_path, i, os.path.basename(img_path)), img)
|
||||
|
||||
def compute_test_set_aps_fast_rcnn(eval_model, cfg):
|
||||
num_test_images = cfg["DATA"].NUM_TEST_IMAGES
|
||||
classes = cfg["DATA"].CLASSES
|
||||
image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH),
|
||||
dynamic_axes=[Axis.default_batch_axis()],
|
||||
name=cfg["MODEL"].FEATURE_NODE_NAME)
|
||||
roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
|
||||
roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()], name="roi_proposals")
|
||||
dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
|
||||
frcn_eval = eval_model(image_input, roi_proposals)
|
||||
|
||||
# Create the minibatch source
|
||||
proposal_provider = ProposalProvider.fromconfig(cfg)
|
||||
minibatch_source = ObjectDetectionMinibatchSource(
|
||||
cfg["DATA"].TEST_MAP_FILE,
|
||||
cfg["DATA"].TEST_ROI_FILE,
|
||||
max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE,
|
||||
pad_width=cfg.IMAGE_WIDTH,
|
||||
pad_height=cfg.IMAGE_HEIGHT,
|
||||
pad_value=cfg["MODEL"].IMG_PAD_COLOR,
|
||||
randomize=False, use_flipping=False,
|
||||
max_images=cfg["DATA"].NUM_TEST_IMAGES,
|
||||
num_classes=cfg["DATA"].NUM_CLASSES,
|
||||
proposal_provider=proposal_provider,
|
||||
provide_targets=False)
|
||||
|
||||
# define mapping from reader streams to network inputs
|
||||
input_map = {
|
||||
minibatch_source.image_si: image_input,
|
||||
minibatch_source.roi_si: roi_input,
|
||||
minibatch_source.proposals_si: roi_proposals,
|
||||
minibatch_source.dims_si: dims_input
|
||||
}
|
||||
|
||||
# all detections are collected into:
|
||||
# all_boxes[cls][image] = N x 5 array of detections in (x1, y1, x2, y2, score)
|
||||
all_boxes = [[[] for _ in range(num_test_images)] for _ in range(cfg["DATA"].NUM_CLASSES)]
|
||||
|
||||
# evaluate test images and write netwrok output to file
|
||||
print("Evaluating Fast R-CNN model for %s images." % num_test_images)
|
||||
all_gt_infos = {key: [] for key in classes}
|
||||
for img_i in range(0, num_test_images):
|
||||
mb_data = minibatch_source.next_minibatch(1, input_map=input_map)
|
||||
|
||||
gt_row = mb_data[roi_input].asarray()
|
||||
gt_row = gt_row.reshape((cfg.INPUT_ROIS_PER_IMAGE, 5))
|
||||
all_gt_boxes = gt_row[np.where(gt_row[:,-1] > 0)]
|
||||
|
||||
for cls_index, cls_name in enumerate(classes):
|
||||
if cls_index == 0: continue
|
||||
cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:,-1] == cls_index)]
|
||||
all_gt_infos[cls_name].append({'bbox': np.array(cls_gt_boxes),
|
||||
'difficult': [False] * len(cls_gt_boxes),
|
||||
'det': [False] * len(cls_gt_boxes)})
|
||||
|
||||
output = frcn_eval.eval({image_input: mb_data[image_input], roi_proposals: mb_data[roi_proposals]})
|
||||
out_dict = dict([(k.name, k) for k in output])
|
||||
out_cls_pred = output[out_dict['cls_pred']][0]
|
||||
out_rpn_rois = mb_data[roi_proposals].data.asarray()
|
||||
out_bbox_regr = output[out_dict['bbox_regr']][0]
|
||||
|
||||
labels = out_cls_pred.argmax(axis=1)
|
||||
scores = out_cls_pred.max(axis=1)
|
||||
regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray())
|
||||
|
||||
labels.shape = labels.shape + (1,)
|
||||
scores.shape = scores.shape + (1,)
|
||||
coords_score_label = np.hstack((regressed_rois, scores, labels))
|
||||
|
||||
# shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score
|
||||
for cls_j in range(1, cfg["DATA"].NUM_CLASSES):
|
||||
coords_score_label_for_cls = coords_score_label[np.where(coords_score_label[:,-1] == cls_j)]
|
||||
all_boxes[cls_j][img_i] = coords_score_label_for_cls[:,:-1].astype(np.float32, copy=False)
|
||||
|
||||
if (img_i+1) % 100 == 0:
|
||||
print("Processed {} samples".format(img_i+1))
|
||||
|
||||
# calculate mAP
|
||||
aps = evaluate_detections(all_boxes, all_gt_infos, classes,
|
||||
use_gpu_nms = cfg.USE_GPU_NMS,
|
||||
device_id = cfg.GPU_ID,
|
||||
nms_threshold=cfg.RESULTS_NMS_THRESHOLD,
|
||||
conf_threshold = cfg.RESULTS_NMS_CONF_THRESHOLD)
|
||||
|
||||
return aps
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ from _cntk_py import force_deterministic_algorithms
|
|||
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(os.path.join(abs_path, ".."))
|
||||
from utils.rpn.rpn_helpers import create_rpn, create_proposal_target_layer, add_proposal_layer
|
||||
from utils.rpn.rpn_helpers import create_rpn, create_proposal_target_layer, create_proposal_layer
|
||||
from utils.annotations.annotations_helper import parse_class_map_file
|
||||
from utils.od_mb_source import ObjectDetectionMinibatchSource
|
||||
from utils.proposal_helpers import ProposalProvider
|
||||
|
@ -64,13 +64,11 @@ def prepare(cfg, use_arg_parser=True):
|
|||
|
||||
cfg['MODEL_PATH'] = os.path.join(cfg.OUTPUT_PATH, "faster_rcnn_eval_{}_{}.model"
|
||||
.format(cfg["MODEL"].BASE_MODEL, "e2e" if cfg["CNTK"].TRAIN_E2E else "4stage"))
|
||||
cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "PretrainedModels",
|
||||
cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "PretrainedModels",
|
||||
cfg["MODEL"].BASE_MODEL_FILE)
|
||||
|
||||
cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE)
|
||||
cfg["DATA"].NUM_CLASSES = len(cfg["DATA"].CLASSES)
|
||||
cfg.PROPOSAL_LAYER_PARAMS = "'feat_stride': {}\n'scales':\n - {}".\
|
||||
format(cfg["MODEL"].FEATURE_STRIDE, "\n - ".join([str(v) for v in cfg["DATA"].PROPOSAL_LAYER_SCALES]))
|
||||
|
||||
if cfg["CNTK"].FAST_MODE:
|
||||
cfg["CNTK"].E2E_MAX_EPOCHS = 1
|
||||
|
@ -207,7 +205,7 @@ def create_faster_rcnn_eval_model(model, image_input, dims_input, cfg, rpn_model
|
|||
rpn = clone_model(model_with_rpn, [last_conv_node_name], ["rpn_cls_prob_reshape", "rpn_bbox_pred"], CloneMethod.freeze)
|
||||
rpn_out = rpn(conv_out)
|
||||
# we need to add the proposal layer anew to account for changing configs when buffering proposals in 4-stage training
|
||||
rpn_rois = add_proposal_layer(rpn_out.outputs[0], rpn_out.outputs[1], dims_input, cfg)
|
||||
rpn_rois = create_proposal_layer(rpn_out.outputs[0], rpn_out.outputs[1], dims_input, cfg)
|
||||
|
||||
roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze)
|
||||
pred_net = roi_fc_layers(conv_out, rpn_rois)
|
||||
|
@ -225,6 +223,27 @@ def create_faster_rcnn_eval_model(model, image_input, dims_input, cfg, rpn_model
|
|||
|
||||
return eval_model
|
||||
|
||||
def store_eval_model_with_native_udf(eval_model, cfg):
|
||||
import copy
|
||||
sys.path.append(os.path.join(abs_path, "..", "..", "Extensibility", "ProposalLayer"))
|
||||
cntk.ops.register_native_user_function('ProposalLayerOp',
|
||||
'Cntk.ProposalLayerLib-' + cntk.__version__.rstrip('+'),
|
||||
'CreateProposalLayer')
|
||||
|
||||
def filter(x):
|
||||
return type(x) == cntk.Function and x.op_name == 'UserFunction' and x.name == 'ProposalLayer'
|
||||
|
||||
def converter(x):
|
||||
layer_config = copy.deepcopy(x.attributes)
|
||||
return cntk.ops.native_user_function('ProposalLayerOp', list(x.inputs), layer_config, 'native_proposal_layer')
|
||||
|
||||
|
||||
model_w_native_udf = cntk.misc.convert(eval_model, filter, converter)
|
||||
model_path = cfg['MODEL_PATH']
|
||||
new_model_path = model_path[:-6] + '_native.model'
|
||||
model_w_native_udf.save(new_model_path)
|
||||
print("Stored eval model with native UDF to {}".format(new_model_path))
|
||||
|
||||
def compute_rpn_proposals(rpn_model, image_input, roi_input, dims_input, cfg):
|
||||
num_images = cfg["DATA"].NUM_TRAIN_IMAGES
|
||||
# Create the minibatch source
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
This folder contains an end-to-end solution for using Faster R-CNN to perform object detection.
|
||||
The original research paper for Faster R-CNN can be found at [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497).
|
||||
Base models that are supported by the current configuration are AlexNet and VGG16.
|
||||
Two image set that are preconfigured are Pascal VOC 2007 and Grocery.
|
||||
Two image sets that are preconfigured are Pascal VOC 2007 and Grocery.
|
||||
Other base models or image sets can be used by adding a configuration file similar to the examples in `utils/configs` and importing it in `run_faster_rcnn.py`.
|
||||
|
||||
## Running the example
|
||||
|
@ -15,7 +15,7 @@ Other base models or image sets can be used by adding a configuration file simil
|
|||
To run Faster R-CNN you need a CNTK Python environment. Install the following additional packages:
|
||||
|
||||
```
|
||||
pip install opencv-python easydict pyyaml future
|
||||
pip install opencv-python easydict pyyaml
|
||||
```
|
||||
|
||||
The code uses prebuild Cython modules for parts of the region proposal network (see `Examples/Image/Detection/utils/cython_modules`).
|
||||
|
@ -112,7 +112,7 @@ and run `python run_faster_rcnn.py` to train and evaluate Faster R-CNN on your d
|
|||
|
||||
### Parameters
|
||||
|
||||
All options and parameters are in `config.py` in the `FasterRCNN` folder and all of them are explained there. These include
|
||||
All options and parameters are in `FasterRCNN_config.py` in the `FasterRCNN` folder and all of them are explained there. These include
|
||||
|
||||
```
|
||||
# E2E or 4-stage training
|
||||
|
@ -134,4 +134,4 @@ Most of the code is in `FasterRCNN_train.py` and `FasterRCNN_eval.py` (and `Exam
|
|||
|
||||
### Algorithm
|
||||
|
||||
All details regarding the Faster R-CNN algortihm can be found in the original research paper: [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497).
|
||||
All details regarding the Faster R-CNN algorithm can be found in the original research paper: [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497).
|
||||
|
|
|
@ -6,14 +6,15 @@
|
|||
|
||||
import os
|
||||
import numpy as np
|
||||
from FasterRCNN_train import prepare, train_faster_rcnn
|
||||
import cntk
|
||||
from FasterRCNN_train import prepare, train_faster_rcnn, store_eval_model_with_native_udf
|
||||
from FasterRCNN_eval import compute_test_set_aps, FasterRCNN_Evaluator
|
||||
from utils.config_helpers import merge_configs
|
||||
from utils.plot_helpers import plot_test_set_results
|
||||
|
||||
def get_configuration():
|
||||
# load configs for detector, base network and data set
|
||||
from config import cfg as detector_cfg
|
||||
from FasterRCNN_config import cfg as detector_cfg
|
||||
# for VGG16 base model use: from utils.configs.VGG16_config import cfg as network_cfg
|
||||
# for AlexNet base model use: from utils.configs.AlexNet_config import cfg as network_cfg
|
||||
from utils.configs.AlexNet_config import cfg as network_cfg
|
||||
|
@ -42,3 +43,8 @@ if __name__ == '__main__':
|
|||
results_folder = os.path.join(cfg.OUTPUT_PATH, cfg["DATA"].DATASET)
|
||||
evaluator = FasterRCNN_Evaluator(trained_model, cfg)
|
||||
plot_test_set_results(evaluator, num_eval, results_folder, cfg)
|
||||
|
||||
if cfg.STORE_EVAL_MODEL_WITH_NATIVE_UDF:
|
||||
store_eval_model_with_native_udf(trained_model, cfg)
|
||||
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ This folder contains an end-to-end demo to try different object detectors, base
|
|||
|
||||
### Setup
|
||||
|
||||
To run Fast R-CNN you need a CNTK Python environment. Install the following additional packages:
|
||||
To run the object detection demo you need a CNTK Python environment. Install the following additional packages:
|
||||
|
||||
```
|
||||
pip install opencv-python easydict pyyaml future
|
||||
|
|
|
@ -19,10 +19,6 @@ python setup.py build_ext --inplace
|
|||
```
|
||||
Copy the compiled `.pyd` (Windows) or `.so` (Linux) files into the `cython_modules` subfolder of this utils folder.
|
||||
|
||||
##### `default_config`
|
||||
|
||||
Contains all required parameters for using a region proposal network in training or evaluation. You can overwrite these parameters by specifying a `config.py` file of the same format inside your working directory.
|
||||
|
||||
### `rpn` module overview
|
||||
|
||||
The rpn module contains helper methods and required layers to generate region proposal networks for object detection.
|
||||
|
@ -48,7 +44,3 @@ Bbox regression targets are specified when the classification label is > 0.
|
|||
|
||||
Generates training targets/labels for each object proposal: classification labels 0 - K (bg or object class 1, ... , K)
|
||||
and bbox regression targets in that case that the label is > 0.
|
||||
|
||||
##### `generate.py`
|
||||
|
||||
Generate object detection proposals from an imdb using an RPN.
|
||||
|
|
|
@ -13,13 +13,13 @@ cfg = __C
|
|||
|
||||
# model config
|
||||
__C.MODEL.BASE_MODEL = "AlexNet"
|
||||
__C.MODEL.BASE_MODEL_FILE = "AlexNet.model"
|
||||
__C.MODEL.BASE_MODEL_FILE = "AlexNet_ImageNet_Caffe.model"
|
||||
__C.MODEL.IMG_PAD_COLOR = [114, 114, 114]
|
||||
__C.MODEL.FEATURE_NODE_NAME = "features"
|
||||
__C.MODEL.LAST_CONV_NODE_NAME = "conv5.y"
|
||||
__C.MODEL.FEATURE_NODE_NAME = "data"
|
||||
__C.MODEL.LAST_CONV_NODE_NAME = "relu5"
|
||||
__C.MODEL.START_TRAIN_CONV_NODE_NAME = __C.MODEL.FEATURE_NODE_NAME
|
||||
__C.MODEL.POOL_NODE_NAME = "pool3"
|
||||
__C.MODEL.LAST_HIDDEN_NODE_NAME = "h2_d"
|
||||
__C.MODEL.POOL_NODE_NAME = "pool5"
|
||||
__C.MODEL.LAST_HIDDEN_NODE_NAME = "drop7"
|
||||
__C.MODEL.FEATURE_STRIDE = 16
|
||||
__C.MODEL.RPN_NUM_CHANNELS = 256
|
||||
__C.MODEL.ROI_DIM = 6
|
||||
|
|
|
@ -22,3 +22,18 @@ __C.DATA.TEST_ROI_FILE = "test_roi_file.txt"
|
|||
__C.DATA.NUM_TRAIN_IMAGES = 20
|
||||
__C.DATA.NUM_TEST_IMAGES = 5
|
||||
__C.DATA.PROPOSAL_LAYER_SCALES = [4, 8, 12]
|
||||
|
||||
# overwriting proposal parameters for Fast R-CNN
|
||||
# minimum relative width/height of an ROI
|
||||
__C.roi_min_side_rel = 0.04
|
||||
# maximum relative width/height of an ROI
|
||||
__C.roi_max_side_rel = 0.4
|
||||
# minimum relative area of an ROI
|
||||
__C.roi_min_area_rel = 2 * __C.roi_min_side_rel * __C.roi_min_side_rel
|
||||
# maximum relative area of an ROI
|
||||
__C.roi_max_area_rel = 0.33 * __C.roi_max_side_rel * __C.roi_max_side_rel
|
||||
# maximum aspect ratio of an ROI vertically and horizontally
|
||||
__C.roi_max_aspect_ratio = 4.0
|
||||
|
||||
# For this data set use the following lr factor for Fast R-CNN:
|
||||
# __C.CNTK.LR_FACTOR = 10.0
|
||||
|
|
|
@ -22,3 +22,6 @@ __C.DATA.TEST_ROI_FILE = "test2007_rois_abs-xyxy_noPad_skipDif.txt"
|
|||
__C.DATA.NUM_TRAIN_IMAGES = 5010
|
||||
__C.DATA.NUM_TEST_IMAGES = 4952
|
||||
__C.DATA.PROPOSAL_LAYER_SCALES = [8, 16, 32]
|
||||
|
||||
__C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE = "trainval2007_proposals.txt"
|
||||
__C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE = "test2007_proposals.txt"
|
||||
|
|
|
@ -222,7 +222,7 @@ class ObjectDetectionReader:
|
|||
if self._flip_image:
|
||||
resized_with_pad = cv2.flip(resized_with_pad, 1)
|
||||
|
||||
# transpose(2,0,1) converts the image to the HWC format which CNTK accepts
|
||||
# transpose(2,0,1) converts the image to the HWC format which CNTK expects
|
||||
model_arg_rep = np.ascontiguousarray(np.array(resized_with_pad, dtype=np.float32).transpose(2, 0, 1))
|
||||
|
||||
# dims = pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height
|
||||
|
|
|
@ -12,7 +12,6 @@ from builtins import range
|
|||
import copy, textwrap
|
||||
from PIL import Image, ImageFont, ImageDraw
|
||||
from PIL.ExifTags import TAGS
|
||||
from matplotlib.pyplot import imsave
|
||||
import cntk
|
||||
from cntk import input_variable, Axis
|
||||
from utils.nms_wrapper import apply_nms_to_single_image_results
|
||||
|
@ -121,7 +120,10 @@ def visualize_detections(img_path, roi_coords, roi_labels, roi_scores,
|
|||
thickness = 4
|
||||
drawRectangles(result_img, [rect], color=color, thickness=thickness)
|
||||
elif iter == 2 and label > 0:
|
||||
font = ImageFont.truetype(available_font, 18)
|
||||
try:
|
||||
font = ImageFont.truetype(available_font, 18)
|
||||
except:
|
||||
font = ImageFont.load_default()
|
||||
text = classes[label]
|
||||
if roi_scores is not None:
|
||||
text += "(" + str(round(score, 2)) + ")"
|
||||
|
@ -129,6 +131,8 @@ def visualize_detections(img_path, roi_coords, roi_labels, roi_scores,
|
|||
return result_img
|
||||
|
||||
def plot_test_set_results(evaluator, num_images_to_plot, results_base_path, cfg):
|
||||
from matplotlib.pyplot import imsave
|
||||
|
||||
# get image paths
|
||||
with open(cfg["DATA"].TEST_MAP_FILE) as f:
|
||||
content = f.readlines()
|
||||
|
@ -284,12 +288,12 @@ def ptClip(pt, maxWidth, maxHeight):
|
|||
pt[1] = min(pt[1], maxHeight)
|
||||
return pt
|
||||
|
||||
def drawText(img, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = ImageFont.truetype("arial.ttf", 16)):
|
||||
def drawText(img, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = None):
|
||||
pilImg = imconvertCv2Pil(img)
|
||||
pilImg = pilDrawText(pilImg, pt, text, textWidth, color, colorBackground, font)
|
||||
return imconvertPil2Cv(pilImg)
|
||||
|
||||
def pilDrawText(pilImg, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = ImageFont.truetype("arial.ttf", 16)):
|
||||
def pilDrawText(pilImg, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = None):
|
||||
textY = pt[1]
|
||||
draw = ImageDraw.Draw(pilImg)
|
||||
if textWidth == None:
|
||||
|
|
|
@ -1,9 +1,29 @@
|
|||
# Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
# Licensed under the MIT license. See LICENSE.md file in the project root
|
||||
# for full license information.
|
||||
# ==============================================================================
|
||||
|
||||
import os, sys
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(os.path.join(abs_path, ".."))
|
||||
|
||||
import numpy as np
|
||||
from dlib import find_candidate_object_locations
|
||||
import cv2
|
||||
from utils.rpn.bbox_transform import bbox_transform
|
||||
from utils.cython_modules.cython_bbox import bbox_overlaps
|
||||
|
||||
random_seed = 23
|
||||
global ss_lib_loaded, find_candidate_object_locations
|
||||
ss_lib_loaded = False
|
||||
|
||||
def load_selective_search_lib():
|
||||
global find_candidate_object_locations
|
||||
from dlib import find_candidate_object_locations as algo
|
||||
find_candidate_object_locations = algo
|
||||
|
||||
global ss_lib_loaded
|
||||
ss_lib_loaded = True
|
||||
|
||||
def compute_image_stats(img_width, img_height, pad_width, pad_height):
|
||||
do_scale_w = img_width > img_height
|
||||
|
@ -23,63 +43,117 @@ def compute_image_stats(img_width, img_height, pad_width, pad_height):
|
|||
right = pad_width - left - target_w
|
||||
return [target_w, target_h, img_width, img_height, top, bottom, left, right, scale_factor]
|
||||
|
||||
|
||||
def compute_proposals(img, num_proposals, min_w, min_h):
|
||||
all_rects = []
|
||||
min_size = min_w * min_h
|
||||
find_candidate_object_locations(img, all_rects, min_size=min_size)
|
||||
|
||||
rects = []
|
||||
for k, d in enumerate(all_rects):
|
||||
w = d.right() - d.left()
|
||||
h = d.bottom() - d.top()
|
||||
if w < min_w or h < min_h:
|
||||
def filterRois(rects, img_w, img_h, roi_min_area, roi_max_area, roi_min_side, roi_max_side, roi_max_aspect_ratio):
|
||||
filteredRects = []
|
||||
filteredRectsSet = set()
|
||||
for rect in rects:
|
||||
if tuple(rect) in filteredRectsSet: # excluding rectangles with same co-ordinates
|
||||
continue
|
||||
rects.append([d.left(), d.top(), d.right(), d.bottom()])
|
||||
|
||||
np_rects = np.array(rects)
|
||||
num_rects = np_rects.shape[0]
|
||||
x, y, x2, y2 = rect
|
||||
w = x2 - x
|
||||
h = y2 - y
|
||||
assert(w>=0 and h>=0)
|
||||
|
||||
# apply filters
|
||||
if h == 0 or w == 0 or \
|
||||
x2 > img_w or y2 > img_h or \
|
||||
w < roi_min_side or h < roi_min_side or \
|
||||
w > roi_max_side or h > roi_max_side or \
|
||||
w * h < roi_min_area or w * h > roi_max_area or \
|
||||
w / h > roi_max_aspect_ratio or h / w > roi_max_aspect_ratio:
|
||||
continue
|
||||
filteredRects.append(rect)
|
||||
filteredRectsSet.add(tuple(rect))
|
||||
|
||||
# could combine rectangles using non-maximum surpression or with similar co-ordinates
|
||||
# groupedRectangles, weights = cv2.groupRectangles(np.asanyarray(rectsInput, np.float).tolist(), 1, 0.3)
|
||||
# groupedRectangles = nms_python(np.asarray(rectsInput, np.float), 0.5)
|
||||
assert(len(filteredRects) > 0)
|
||||
return filteredRects
|
||||
|
||||
def compute_proposals(img, num_proposals, cfg):
|
||||
img_w = len(img[0])
|
||||
img_h = len(img)
|
||||
|
||||
if cfg is None: cfg = {}
|
||||
roi_ss_kvals = (10, 500, 5) if 'roi_ss_kvals' not in cfg else tuple(cfg['roi_ss_kvals'])
|
||||
roi_ss_mm_iterations = 30 if 'roi_ss_mm_iterations' not in cfg else cfg['roi_ss_mm_iterations']
|
||||
roi_ss_min_size = 9 if 'roi_ss_min_size' not in cfg else cfg['roi_ss_min_size']
|
||||
roi_ss_img_size = 200 if 'roi_ss_img_size' not in cfg else cfg['roi_ss_img_size']
|
||||
roi_min_side_rel = 0.04 if 'roi_min_side_rel' not in cfg else cfg['roi_min_side_rel']
|
||||
roi_max_side_rel = 0.4 if 'roi_max_side_rel' not in cfg else cfg['roi_max_side_rel']
|
||||
roi_min_area_rel = 2 * roi_min_side_rel * roi_min_side_rel if 'roi_min_area_rel' not in cfg else cfg['roi_min_area_rel']
|
||||
roi_max_area_rel = 0.33 * roi_max_side_rel * roi_max_side_rel if 'roi_max_area_rel' not in cfg else cfg['roi_max_area_rel']
|
||||
roi_max_aspect_ratio = 4.0 if 'roi_max_aspect_ratio' not in cfg else cfg['roi_max_aspect_ratio']
|
||||
roi_grid_aspect_ratios = [1.0, 2.0, 0.5] if 'roi_grid_aspect_ratios' not in cfg else cfg['roi_grid_aspect_ratios']
|
||||
debug_output = False if not ('CNTK' in cfg and 'DEBUG_OUTPUT' in cfg.CNTK) else cfg.CNTK.DEBUG_OUTPUT
|
||||
|
||||
scale = 1.0 * roi_ss_img_size / max(img.shape[:2])
|
||||
img = cv2.resize(img, (0,0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
roi_min_side = roi_min_side_rel * roi_ss_img_size
|
||||
roi_max_side = roi_max_side_rel * roi_ss_img_size
|
||||
roi_min_area = roi_min_area_rel * roi_ss_img_size * roi_ss_img_size
|
||||
roi_max_area = roi_max_area_rel * roi_ss_img_size * roi_ss_img_size
|
||||
|
||||
if not ss_lib_loaded: load_selective_search_lib()
|
||||
rects = []
|
||||
tmp = []
|
||||
find_candidate_object_locations(img, tmp, kvals=roi_ss_kvals, min_size=roi_ss_min_size, max_merging_iterations=roi_ss_mm_iterations)
|
||||
for k, d in enumerate(tmp):
|
||||
rects.append([d.left(), d.top(), d.right(), d.bottom()])
|
||||
filtered_rects = filterRois(rects, img_w, img_h, roi_min_area, roi_max_area, roi_min_side, roi_max_side, roi_max_aspect_ratio)
|
||||
scaled_rects = np.array(filtered_rects) * (1/scale)
|
||||
if debug_output:
|
||||
print("selective search rois before | after filtering: {} | {}. Requested: {}".format(len(rects), len(filtered_rects), num_proposals))
|
||||
|
||||
num_rects = scaled_rects.shape[0]
|
||||
np.random.seed(random_seed)
|
||||
if num_rects < num_proposals:
|
||||
img_w = len(img[0])
|
||||
img_h = len(img)
|
||||
grid_proposals = compute_grid_proposals(num_proposals - len(rects), img_w, img_h, min_w, min_h)
|
||||
np_rects = np.vstack([np_rects, grid_proposals])
|
||||
elif len(rects) > num_proposals:
|
||||
try:
|
||||
shuffle = not cfg.CNTK.FORCE_DETERMINISTIC
|
||||
except:
|
||||
shuffle = True
|
||||
|
||||
roi_min_side = roi_min_side_rel * min(img_w, img_h)
|
||||
roi_max_side = roi_max_side_rel * max(img_w, img_h)
|
||||
grid_proposals = compute_grid_proposals(num_proposals - num_rects, img_w, img_h, roi_min_side, roi_max_side, roi_grid_aspect_ratios, shuffle)
|
||||
scaled_rects = np.vstack([scaled_rects, grid_proposals])
|
||||
elif num_rects > num_proposals:
|
||||
keep_inds = range(num_rects)
|
||||
keep_inds = np.random.choice(keep_inds, size=num_proposals, replace=False)
|
||||
np_rects = np_rects[keep_inds]
|
||||
scaled_rects = scaled_rects[keep_inds]
|
||||
|
||||
return np_rects
|
||||
|
||||
def compute_grid_proposals(num_proposals, img_w, img_h, min_w, min_h, max_w=None, max_h=None, aspect_ratios = [1.0], shuffle=True):
|
||||
min_wh = max(min_w, min_h)
|
||||
max_wh = min(img_h, img_w) / 2
|
||||
if max_w is not None: max_wh = min(max_wh, max_w)
|
||||
if max_h is not None: max_wh = min(max_wh, max_h)
|
||||
return scaled_rects
|
||||
|
||||
def compute_grid_proposals(num_proposals, img_w, img_h, min_wh, max_wh, aspect_ratios = [1.0, 2.0, 0.5], shuffle=True):
|
||||
rects = []
|
||||
iter = 0
|
||||
while len(rects) < num_proposals:
|
||||
new_ar = []
|
||||
for ar in aspect_ratios:
|
||||
new_ar.append(ar * (0.9 ** iter))
|
||||
new_ar.append(ar * (1.1 ** iter))
|
||||
if iter == 0:
|
||||
new_ar = aspect_ratios
|
||||
else:
|
||||
new_ar = []
|
||||
for ar in aspect_ratios:
|
||||
new_ar.append(ar * (0.9 ** iter))
|
||||
new_ar.append(ar * (1.1 ** iter))
|
||||
|
||||
new_rects = _compute_grid_proposals(img_w, img_h, min_wh, max_wh, new_ar)
|
||||
new_rects = np.array(_compute_grid_proposals(img_w, img_h, min_wh, max_wh, new_ar))
|
||||
take = min(num_proposals - len(rects), len(new_rects))
|
||||
new_rects = new_rects[:take]
|
||||
|
||||
if shuffle and take < len(new_rects):
|
||||
keep_inds = range(len(new_rects))
|
||||
keep_inds = np.random.choice(keep_inds, size=take, replace=False)
|
||||
new_rects = new_rects[keep_inds]
|
||||
else:
|
||||
new_rects = new_rects[:take]
|
||||
|
||||
rects.extend(new_rects)
|
||||
iter = iter + 1
|
||||
|
||||
np_rects = np.array(rects)
|
||||
num_rects = np_rects.shape[0]
|
||||
if shuffle and num_proposals < num_rects:
|
||||
keep_inds = range(num_rects)
|
||||
keep_inds = np.random.choice(keep_inds, size=num_proposals, replace=False)
|
||||
np_rects = np_rects[keep_inds]
|
||||
else:
|
||||
np_rects = np_rects[:num_proposals]
|
||||
|
||||
assert np_rects.shape[0] == num_proposals
|
||||
return np_rects
|
||||
|
||||
def _compute_grid_proposals(img_w, img_h, min_wh, max_wh, aspect_ratios):
|
||||
|
@ -152,17 +226,30 @@ class ProposalProvider:
|
|||
self._requires_scaling = requires_scaling
|
||||
|
||||
@classmethod
|
||||
def fromfile(cls, filename):
|
||||
def fromfile(cls, filename, max_num_proposals):
|
||||
print('Reading proposals from file ({}) ...'.format(filename))
|
||||
with open(filename) as f:
|
||||
lines = f.readlines()
|
||||
|
||||
proposal_list = [[] for _ in lines]
|
||||
index = 0
|
||||
cut_counter = 0
|
||||
for line in lines:
|
||||
# TODO: parse line
|
||||
index = 0
|
||||
rects = np.zeros((4, 200))
|
||||
# parse line
|
||||
numbers = line[line.find('|') + 11:]
|
||||
parsed_numbers = np.fromstring(numbers, dtype=int, sep=' ')
|
||||
parsed_rects = parsed_numbers.reshape((int(parsed_numbers.shape[0] / 4), 4))
|
||||
num_rects = parsed_rects.shape[0]
|
||||
if num_rects > max_num_proposals:
|
||||
rects = parsed_rects[:max_num_proposals,:]
|
||||
cut_counter += 1
|
||||
else:
|
||||
pad_rects = np.zeros((max_num_proposals - num_rects, 4))
|
||||
rects = np.vstack([parsed_rects, pad_rects])
|
||||
proposal_list[index] = rects
|
||||
index += 1
|
||||
|
||||
print('Done. {} images had more than {} proposals.'.format(cut_counter, max_num_proposals))
|
||||
return cls(proposal_list)
|
||||
|
||||
@classmethod
|
||||
|
@ -183,33 +270,25 @@ class ProposalProvider:
|
|||
return self._proposal_cfg['NUM_ROI_PROPOSALS']
|
||||
|
||||
def get_proposals(self, index, img=None):
|
||||
#import pdb; pdb.set_trace()
|
||||
if index in self._proposal_dict:
|
||||
return self._proposal_dict[index]
|
||||
else:
|
||||
return self._compute_proposals(img)
|
||||
|
||||
def _compute_proposals(self, img):
|
||||
min_w = self._proposal_cfg['PROPOSALS_MIN_W']
|
||||
min_h = self._proposal_cfg['PROPOSALS_MIN_H']
|
||||
num_proposals = self._proposal_cfg.NUM_ROI_PROPOSALS
|
||||
return compute_proposals(img, num_proposals, min_w, min_h)
|
||||
num_proposals = self._proposal_cfg.NUM_ROI_PROPOSALS
|
||||
return compute_proposals(img, num_proposals, self._proposal_cfg)
|
||||
|
||||
if __name__ == '__main__':
|
||||
import cv2
|
||||
image_file = r"C:\src\CNTK\Examples\Image\DataSets\Pascal\VOCdevkit\VOC2007\JPEGImages\000015.jpg"
|
||||
image_file = os.path.join(abs_path, r"..\..\DataSets\Pascal\VOCdevkit\VOC2007\JPEGImages\000015.jpg")
|
||||
img = cv2.imread(image_file)
|
||||
|
||||
# 0.18 sec for 4000
|
||||
# 0.15 sec for 2000
|
||||
# 0.13 sec for 1000
|
||||
num_proposals = 2000
|
||||
num_runs = 100
|
||||
num_runs = 500
|
||||
proposals = compute_proposals(img, num_proposals, cfg=None)
|
||||
import time
|
||||
start = int(time.time())
|
||||
for i in range(num_runs):
|
||||
proposals = compute_proposals(img, num_proposals, 20, 20)
|
||||
proposals = compute_proposals(img, num_proposals, cfg=None)
|
||||
total = int(time.time() - start)
|
||||
print ("time: {}".format(total / (1.0 * num_runs)))
|
||||
print ("time for {} proposals: {} (total time for {} runs: {}".format(num_proposals, total / (1.0 * num_runs), num_runs, total))
|
||||
|
||||
assert len(proposals) == num_proposals, "{} != {}".format(len(proposals), num_proposals)
|
||||
|
|
|
@ -20,39 +20,17 @@ class ProposalLayer(UserFunction):
|
|||
transformations to a set of regular boxes (called "anchors").
|
||||
'''
|
||||
|
||||
def __init__(self, arg1, arg2, arg3,
|
||||
train_pre_nms_topN=12000,
|
||||
train_post_nms_topN=2000,
|
||||
train_nms_thresh=0.7,
|
||||
train_min_size=16,
|
||||
test_pre_nms_topN=6000,
|
||||
test_post_nms_topN=300,
|
||||
test_nms_thresh=0.7,
|
||||
test_min_size=16,
|
||||
param_str = None,
|
||||
name='ProposalLayer'):
|
||||
super(ProposalLayer, self).__init__([arg1, arg2, arg3], name=name)
|
||||
self._train_pre_nms_topN = train_pre_nms_topN
|
||||
self._train_post_nms_topN = train_post_nms_topN
|
||||
self._train_nms_thresh = train_nms_thresh
|
||||
self._train_min_size = train_min_size
|
||||
self._test_pre_nms_topN = test_pre_nms_topN
|
||||
self._test_post_nms_topN = test_post_nms_topN
|
||||
self._test_nms_thresh = test_nms_thresh
|
||||
self._test_min_size = test_min_size
|
||||
self._param_str = param_str if param_str is not None else "'feat_stride': 16\n'scales':\n - 8 \n - 16 \n - 32"
|
||||
def __init__(self, arg1, arg2, arg3, layer_config, name='ProposalLayer'):
|
||||
super(ProposalLayer, self).__init__([arg1, arg2, arg3], attributes=layer_config, name=name)
|
||||
|
||||
self._layer_config = layer_config
|
||||
self._feat_stride = 16 if 'feat_stride' not in layer_config else layer_config['feat_stride']
|
||||
anchor_scales = [8, 16, 32] if 'scales' not in layer_config else layer_config['scales']
|
||||
|
||||
# parse the layer parameter string, which must be valid YAML
|
||||
layer_params = yaml.load(self._param_str)
|
||||
self._feat_stride = layer_params['feat_stride']
|
||||
anchor_scales = layer_params.get('scales', (8, 16, 32))
|
||||
self._anchors = generate_anchors(scales=np.array(anchor_scales))
|
||||
self._num_anchors = self._anchors.shape[0]
|
||||
|
||||
attributes = {'feat_stride' : self._feat_stride, 'scales' : anchor_scales}
|
||||
|
||||
super(ProposalLayer, self).__init__([arg1, arg2, arg3], attributes=attributes, name=name)
|
||||
|
||||
if DEBUG:
|
||||
print ('feat_stride: {}'.format(self._feat_stride))
|
||||
print ('anchors:')
|
||||
|
@ -85,15 +63,15 @@ class ProposalLayer(UserFunction):
|
|||
# use potentially different number of proposals for training vs evaluation
|
||||
if len(outputs_to_retain) == 0:
|
||||
# print("EVAL")
|
||||
pre_nms_topN = self._test_pre_nms_topN
|
||||
post_nms_topN = self._test_post_nms_topN
|
||||
nms_thresh = self._test_nms_thresh
|
||||
min_size = self._test_min_size
|
||||
pre_nms_topN = self._layer_config['test_pre_nms_topN']
|
||||
post_nms_topN = self._layer_config['test_post_nms_topN']
|
||||
nms_thresh = self._layer_config['test_nms_thresh']
|
||||
min_size = self._layer_config['test_min_size']
|
||||
else:
|
||||
pre_nms_topN = self._train_pre_nms_topN
|
||||
post_nms_topN = self._train_post_nms_topN
|
||||
nms_thresh = self._train_nms_thresh
|
||||
min_size = self._train_min_size
|
||||
pre_nms_topN = self._layer_config['train_pre_nms_topN']
|
||||
post_nms_topN = self._layer_config['train_post_nms_topN']
|
||||
nms_thresh = self._layer_config['train_nms_thresh']
|
||||
min_size = self._layer_config['train_min_size']
|
||||
|
||||
bottom = arguments
|
||||
assert bottom[0].shape[0] == 1, \
|
||||
|
@ -205,44 +183,16 @@ class ProposalLayer(UserFunction):
|
|||
pass
|
||||
|
||||
def clone(self, cloned_inputs):
|
||||
return ProposalLayer(cloned_inputs[0], cloned_inputs[1], cloned_inputs[2],
|
||||
train_pre_nms_topN=self._train_pre_nms_topN,
|
||||
train_post_nms_topN=self._train_post_nms_topN,
|
||||
train_nms_thresh=self._train_nms_thresh,
|
||||
train_min_size=self._train_min_size,
|
||||
test_pre_nms_topN=self._test_pre_nms_topN,
|
||||
test_post_nms_topN=self._test_post_nms_topN,
|
||||
test_nms_thresh=self._test_nms_thresh,
|
||||
test_min_size=self._test_min_size,
|
||||
param_str=self._param_str)
|
||||
return ProposalLayer(cloned_inputs[0], cloned_inputs[1], cloned_inputs[2], layer_config=self._layer_config)
|
||||
|
||||
def serialize(self):
|
||||
internal_state = {}
|
||||
internal_state['param_str'] = self._param_str
|
||||
internal_state['train_pre_nms_topN'] = self._train_pre_nms_topN
|
||||
internal_state['train_post_nms_topN'] = self._train_post_nms_topN
|
||||
internal_state['train_nms_thresh'] = self._train_nms_thresh
|
||||
internal_state['train_min_size'] = self._train_min_size
|
||||
internal_state['test_pre_nms_topN'] = self._test_pre_nms_topN
|
||||
internal_state['test_post_nms_topN'] = self._test_post_nms_topN
|
||||
internal_state['test_nms_thresh'] = self._test_nms_thresh
|
||||
internal_state['test_min_size'] = self._test_min_size
|
||||
|
||||
internal_state['layer_config'] = self._layer_config
|
||||
return internal_state
|
||||
|
||||
@staticmethod
|
||||
def deserialize(inputs, name, state):
|
||||
return ProposalLayer(inputs[0], inputs[1], inputs[2],
|
||||
train_pre_nms_topN=state['train_pre_nms_topN'],
|
||||
train_post_nms_topN=state['train_post_nms_topN'],
|
||||
train_nms_thresh=state['train_nms_thresh'],
|
||||
train_min_size=state['train_min_size'],
|
||||
test_pre_nms_topN=state['test_pre_nms_topN'],
|
||||
test_post_nms_topN=state['test_post_nms_topN'],
|
||||
test_nms_thresh=state['test_nms_thresh'],
|
||||
test_min_size=state['test_min_size'],
|
||||
param_str=state['param_str'],
|
||||
name=name)
|
||||
return ProposalLayer(inputs[0], inputs[1], inputs[2], layer_config=state['layer_config'], name=name)
|
||||
|
||||
def _filter_boxes(boxes, min_size):
|
||||
"""Remove all boxes with any side smaller than min_size."""
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
import numpy as np
|
||||
import cntk
|
||||
from cntk import reduce_sum
|
||||
from cntk import reduce_sum, ops
|
||||
from cntk import user_function, relu, softmax, slice, splice, reshape, element_times, plus, minus, alias, classification_error
|
||||
from cntk.initializer import glorot_uniform, normal
|
||||
from cntk.layers import Convolution
|
||||
|
@ -16,7 +16,6 @@ from utils.rpn.proposal_layer import ProposalLayer
|
|||
from utils.rpn.proposal_target_layer import ProposalTargetLayer
|
||||
from utils.rpn.cntk_smoothL1_loss import SmoothL1Loss
|
||||
|
||||
# Please keep in sync with Readme.md
|
||||
def create_rpn(conv_out, scaled_gt_boxes, im_info, cfg, add_loss_functions=True):
|
||||
'''
|
||||
Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper:
|
||||
|
@ -59,19 +58,21 @@ def create_rpn(conv_out, scaled_gt_boxes, im_info, cfg, add_loss_functions=True)
|
|||
rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape")
|
||||
|
||||
# proposal layer
|
||||
rpn_rois = add_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg)
|
||||
rpn_rois = create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg)
|
||||
|
||||
rpn_losses = None
|
||||
if(add_loss_functions):
|
||||
# RPN targets
|
||||
# Comment: rpn_cls_score is only passed vvv to get width and height of the conv feature map ...
|
||||
proposal_layer_params = "'feat_stride': {}\n'scales':\n - {}". \
|
||||
format(cfg["MODEL"].FEATURE_STRIDE, "\n - ".join([str(v) for v in cfg["DATA"].PROPOSAL_LAYER_SCALES]))
|
||||
atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info,
|
||||
rpn_batch_size=cfg["TRAIN"].RPN_BATCHSIZE,
|
||||
rpn_fg_fraction=cfg["TRAIN"].RPN_FG_FRACTION,
|
||||
clobber_positives=cfg["TRAIN"].RPN_CLOBBER_POSITIVES,
|
||||
positive_overlap=cfg["TRAIN"].RPN_POSITIVE_OVERLAP,
|
||||
negative_overlap=cfg["TRAIN"].RPN_NEGATIVE_OVERLAP,
|
||||
param_str=cfg.PROPOSAL_LAYER_PARAMS))
|
||||
param_str=proposal_layer_params))
|
||||
rpn_labels = atl.outputs[0]
|
||||
rpn_bbox_targets = atl.outputs[1]
|
||||
rpn_bbox_inside_weights = atl.outputs[2]
|
||||
|
@ -114,17 +115,30 @@ def create_rpn(conv_out, scaled_gt_boxes, im_info, cfg, add_loss_functions=True)
|
|||
|
||||
return rpn_rois, rpn_losses
|
||||
|
||||
def add_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg):
|
||||
rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info,
|
||||
train_pre_nms_topN=cfg["TRAIN"].RPN_PRE_NMS_TOP_N,
|
||||
train_post_nms_topN=cfg["TRAIN"].RPN_POST_NMS_TOP_N,
|
||||
train_nms_thresh=cfg["TRAIN"].RPN_NMS_THRESH,
|
||||
train_min_size=cfg["TRAIN"].RPN_MIN_SIZE,
|
||||
test_pre_nms_topN=cfg["TEST"].RPN_PRE_NMS_TOP_N,
|
||||
test_post_nms_topN=cfg["TEST"].RPN_POST_NMS_TOP_N,
|
||||
test_nms_thresh=cfg["TEST"].RPN_NMS_THRESH,
|
||||
test_min_size=cfg["TEST"].RPN_MIN_SIZE,
|
||||
param_str=cfg.PROPOSAL_LAYER_PARAMS))
|
||||
def create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg, use_native_proposal_layer=False):
|
||||
layer_config = {}
|
||||
layer_config["feat_stride"] = cfg["MODEL"].FEATURE_STRIDE
|
||||
layer_config["scales"] = cfg["DATA"].PROPOSAL_LAYER_SCALES
|
||||
|
||||
layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
|
||||
layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N
|
||||
layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH)
|
||||
layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE)
|
||||
|
||||
layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N
|
||||
layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N
|
||||
layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH)
|
||||
layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE)
|
||||
|
||||
if use_native_proposal_layer:
|
||||
cntk.ops.register_native_user_function('ProposalLayerOp',
|
||||
'Cntk.ProposalLayerLib-' + cntk.__version__.rstrip('+'),
|
||||
'CreateProposalLayer')
|
||||
rpn_rois_raw = ops.native_user_function('ProposalLayerOp', [rpn_cls_prob_reshape, rpn_bbox_pred, im_info],
|
||||
layer_config, 'native_proposal_layer')
|
||||
else:
|
||||
rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, layer_config))
|
||||
|
||||
return alias(rpn_rois_raw, name='rpn_rois')
|
||||
|
||||
def create_proposal_target_layer(rpn_rois, scaled_gt_boxes, cfg):
|
||||
|
|
|
@ -14,7 +14,7 @@ except ImportError:
|
|||
|
||||
# Add models here like this: (category, model_name, model_url)
|
||||
models = (('Image Classification', 'AlexNet_ImageNet_CNTK', 'https://www.cntk.ai/Models/CNTK_Pretrained/AlexNet_ImageNet_CNTK.model'),
|
||||
('Image Classification', 'AlexNet_ImageNet_Caffe', 'https://www.cntk.ai/Models/CNTK_Pretrained/AlexNet_ImageNet_Caffe.model'),
|
||||
('Image Classification', 'AlexNet_ImageNet_Caffe', 'https://www.cntk.ai/Models/Caffe_Converted/AlexNet_ImageNet_Caffe.model'),
|
||||
('Image Classification', 'InceptionV3_ImageNet_CNTK', 'https://www.cntk.ai/Models/CNTK_Pretrained/InceptionV3_ImageNet_CNTK.model'),
|
||||
('Image Classification', 'BNInception_ImageNet_Caffe', 'https://www.cntk.ai/Models/Caffe_Converted/BNInception_ImageNet_Caffe.model'),
|
||||
('Image Classification', 'ResNet18_ImageNet_CNTK', 'https://www.cntk.ai/Models/CNTK_Pretrained/ResNet18_ImageNet_CNTK.model'),
|
||||
|
|
|
@ -23,6 +23,7 @@ dependencies:
|
|||
- setuptools=27.2.0=py34_0
|
||||
- six=1.10.0=py34_0
|
||||
- wheel=0.29.0=py34_0
|
||||
- dlib=19.0=np111py34_blas_openblas_200
|
||||
- pip:
|
||||
- easydict==1.6.0
|
||||
- future==0.16.0
|
||||
|
@ -35,3 +36,4 @@ dependencies:
|
|||
- sphinx==1.5.4
|
||||
- twine==1.8.1
|
||||
- protobuf==3.2.0
|
||||
|
|
@ -23,6 +23,7 @@ dependencies:
|
|||
- six=1.10.0=py35_0
|
||||
- wheel=0.29.0=py35_0
|
||||
- opencv=3.1.0=np111py35_1
|
||||
- dlib=19.0=np111py35_200
|
||||
- pip:
|
||||
- gym==0.5.2
|
||||
- keras==2.0.6
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
# Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
# Licensed under the MIT license. See LICENSE.md file in the project root
|
||||
# for full license information.
|
||||
# ==============================================================================
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
import pytest
|
||||
import sys
|
||||
from cntk import load_model
|
||||
from cntk.cntk_py import DeviceKind_GPU
|
||||
from cntk.device import try_set_default_device, gpu
|
||||
from cntk.logging.graph import get_node_outputs
|
||||
from cntk.ops.tests.ops_test_utils import cntk_device
|
||||
from _cntk_py import force_deterministic_algorithms
|
||||
force_deterministic_algorithms()
|
||||
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(abs_path)
|
||||
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection"))
|
||||
|
||||
win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version_info[:2] == (3,5)) or
|
||||
(sys.platform != 'win32' and sys.version_info[:2] == (3,4))),
|
||||
reason="it runs currently only in windows-py35 and linux-py34 due to precompiled cython modules")
|
||||
|
||||
@win35_linux34
|
||||
def test_detection_demo(device_id):
|
||||
if cntk_device(device_id).type() != DeviceKind_GPU:
|
||||
pytest.skip('test only runs on GPU') # it runs very slow in CPU
|
||||
try_set_default_device(cntk_device(device_id))
|
||||
|
||||
from prepare_test_data import prepare_Grocery_data, prepare_alexnet_v0_model
|
||||
grocery_path = prepare_Grocery_data()
|
||||
prepare_alexnet_v0_model()
|
||||
|
||||
from FastRCNN.install_data_and_model import create_grocery_mappings
|
||||
create_grocery_mappings(grocery_path)
|
||||
|
||||
from DetectionDemo import get_configuration
|
||||
import utils.od_utils as od
|
||||
|
||||
cfg = get_configuration('FasterRCNN')
|
||||
cfg["CNTK"].FORCE_DETERMINISTIC = True
|
||||
cfg["CNTK"].DEBUG_OUTPUT = False
|
||||
cfg["CNTK"].MAKE_MODE = False
|
||||
cfg["CNTK"].FAST_MODE = False
|
||||
cfg.CNTK.E2E_MAX_EPOCHS = 3
|
||||
cfg.CNTK.RPN_EPOCHS = 2
|
||||
cfg.CNTK.FRCN_EPOCHS = 2
|
||||
cfg.IMAGE_WIDTH = 400
|
||||
cfg.IMAGE_HEIGHT = 400
|
||||
cfg["CNTK"].TRAIN_E2E = True
|
||||
cfg.USE_GPU_NMS = False
|
||||
cfg.VISUALIZE_RESULTS = False
|
||||
cfg["DATA"].MAP_FILE_PATH = grocery_path
|
||||
|
||||
externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
|
||||
if externalData:
|
||||
extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
|
||||
cfg['BASE_MODEL_PATH'] = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v1", "AlexNet_ImageNet_Caffe.model")
|
||||
else:
|
||||
cfg['BASE_MODEL_PATH'] = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet_ImageNet_Caffe.model".split("/"))
|
||||
|
||||
# train and test
|
||||
eval_model = od.train_object_detector(cfg)
|
||||
eval_results = od.evaluate_test_set(eval_model, cfg)
|
||||
|
||||
meanAP = np.nanmean(list(eval_results.values()))
|
||||
print('meanAP={}'.format(meanAP))
|
||||
assert meanAP > 0.01
|
||||
|
||||
# detect objects in single image
|
||||
img_path = os.path.join(grocery_path, "testImages", "WIN_20160803_11_28_42_Pro.jpg")
|
||||
regressed_rois, cls_probs = od.evaluate_single_image(eval_model, img_path, cfg)
|
||||
bboxes, labels, scores = od.filter_results(regressed_rois, cls_probs, cfg)
|
||||
assert bboxes.shape[0] == labels.shape[0]
|
|
@ -69,7 +69,7 @@ def test_fastrcnn_grocery_training(device_id):
|
|||
extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
|
||||
model_file = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model")
|
||||
else:
|
||||
model_file = os.path.join(abs_path, *"../../../../Examples/Image/PretrainedModels/AlexNet.model".split("/"))
|
||||
model_file = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet.model".split("/"))
|
||||
|
||||
from A2_RunWithPyModel import train_fast_rcnn, evaluate_fast_rcnn
|
||||
trained_model = train_fast_rcnn(model_path=model_file)
|
||||
|
|
|
@ -19,14 +19,14 @@ force_deterministic_algorithms()
|
|||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(abs_path)
|
||||
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection"))
|
||||
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection", "FastRCNN"))
|
||||
|
||||
from prepare_test_data import prepare_Grocery_data, prepare_alexnet_v0_model
|
||||
grocery_path = prepare_Grocery_data()
|
||||
prepare_alexnet_v0_model()
|
||||
|
||||
from FastRCNN.install_data_and_model import create_grocery_mappings
|
||||
from install_data_and_model import create_grocery_mappings
|
||||
create_grocery_mappings(grocery_path)
|
||||
from utils.config_helpers import merge_configs
|
||||
|
||||
win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version_info[:2] == (3,5)) or
|
||||
(sys.platform != 'win32' and sys.version_info[:2] == (3,4))),
|
||||
|
@ -34,7 +34,12 @@ win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version
|
|||
|
||||
@win35_linux34
|
||||
def test_fastrcnnpy_grocery_training(device_id):
|
||||
from FastRCNN.config import cfg as detector_cfg
|
||||
if cntk_device(device_id).type() != DeviceKind_GPU:
|
||||
pytest.skip('test only runs on GPU') # it runs very slow in CPU
|
||||
try_set_default_device(cntk_device(device_id))
|
||||
|
||||
from utils.config_helpers import merge_configs
|
||||
from FastRCNN_config import cfg as detector_cfg
|
||||
from utils.configs.AlexNet_config import cfg as network_cfg
|
||||
from utils.configs.Grocery_config import cfg as dataset_cfg
|
||||
|
||||
|
@ -43,27 +48,25 @@ def test_fastrcnnpy_grocery_training(device_id):
|
|||
cfg["CNTK"].DEBUG_OUTPUT = False
|
||||
cfg["CNTK"].MAKE_MODE = False
|
||||
cfg["CNTK"].FAST_MODE = False
|
||||
cfg["CNTK"].MAX_EPOCHS = 2
|
||||
cfg.NUM_ROI_PROPOSALS = 100
|
||||
cfg.USE_GPU_NMS = True
|
||||
cfg["CNTK"].MAX_EPOCHS = 4
|
||||
cfg.IMAGE_WIDTH = 600
|
||||
cfg.IMAGE_HEIGHT = 600
|
||||
cfg.NUM_ROI_PROPOSALS = 200
|
||||
cfg.USE_GPU_NMS = False
|
||||
cfg.VISUALIZE_RESULTS = False
|
||||
cfg["DATA"].MAP_FILE_PATH = grocery_path
|
||||
|
||||
externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
|
||||
if externalData:
|
||||
extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
|
||||
cfg['BASE_MODEL_PATH'] = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model")
|
||||
cfg['BASE_MODEL_PATH'] = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v1", "AlexNet_ImageNet_Caffe.model")
|
||||
else:
|
||||
cfg['BASE_MODEL_PATH'] = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet.model".split("/"))
|
||||
cfg['BASE_MODEL_PATH'] = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet_ImageNet_Caffe.model".split("/"))
|
||||
|
||||
from FastRCNN.FastRCNN_train import prepare, train_fast_rcnn
|
||||
from FastRCNN.FastRCNN_eval import compute_test_set_aps
|
||||
from FastRCNN_train import prepare, train_fast_rcnn
|
||||
from FastRCNN_eval import compute_test_set_aps
|
||||
prepare(cfg, False)
|
||||
|
||||
if cntk_device(device_id).type() != DeviceKind_GPU:
|
||||
pytest.skip('test only runs on GPU') # it runs very slow in CPU
|
||||
try_set_default_device(cntk_device(device_id))
|
||||
|
||||
np.random.seed(seed=3)
|
||||
trained_model = train_fast_rcnn(cfg)
|
||||
eval_results = compute_test_set_aps(trained_model, cfg)
|
||||
|
|
|
@ -18,22 +18,24 @@ force_deterministic_algorithms()
|
|||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(abs_path)
|
||||
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection"))
|
||||
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Extensibility", "ProposalLayer"))
|
||||
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection", "FastRCNN"))
|
||||
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection", "FasterRCNN"))
|
||||
|
||||
from prepare_test_data import prepare_Grocery_data, prepare_alexnet_v0_model
|
||||
grocery_path = prepare_Grocery_data()
|
||||
prepare_alexnet_v0_model()
|
||||
|
||||
from FastRCNN.install_data_and_model import create_grocery_mappings
|
||||
from install_data_and_model import create_grocery_mappings
|
||||
create_grocery_mappings(grocery_path)
|
||||
from utils.config_helpers import merge_configs
|
||||
|
||||
win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version_info[:2] == (3,5)) or
|
||||
(sys.platform != 'win32' and sys.version_info[:2] == (3,4))),
|
||||
reason="it runs currently only in windows-py35 and linux-py34 due to precompiled cython modules")
|
||||
|
||||
def run_fasterrcnn_grocery_training(device_id, e2e):
|
||||
from FasterRCNN.config import cfg as detector_cfg
|
||||
def run_fasterrcnn_grocery_training(e2e):
|
||||
from FasterRCNN_eval import compute_test_set_aps
|
||||
from utils.config_helpers import merge_configs
|
||||
from FasterRCNN_config import cfg as detector_cfg
|
||||
from utils.configs.AlexNet_config import cfg as network_cfg
|
||||
from utils.configs.Grocery_config import cfg as dataset_cfg
|
||||
|
||||
|
@ -41,101 +43,68 @@ def run_fasterrcnn_grocery_training(device_id, e2e):
|
|||
cfg["CNTK"].FORCE_DETERMINISTIC = True
|
||||
cfg["CNTK"].DEBUG_OUTPUT = False
|
||||
cfg["CNTK"].MAKE_MODE = False
|
||||
cfg["CNTK"].FAST_MODE = True
|
||||
cfg["CNTK"].FAST_MODE = False
|
||||
cfg.CNTK.E2E_MAX_EPOCHS = 3
|
||||
cfg.CNTK.RPN_EPOCHS = 2
|
||||
cfg.CNTK.FRCN_EPOCHS = 2
|
||||
cfg.IMAGE_WIDTH = 400
|
||||
cfg.IMAGE_HEIGHT = 400
|
||||
cfg["CNTK"].TRAIN_E2E = e2e
|
||||
cfg.USE_GPU_NMS = True
|
||||
cfg.USE_GPU_NMS = False
|
||||
cfg.VISUALIZE_RESULTS = False
|
||||
cfg["DATA"].MAP_FILE_PATH = grocery_path
|
||||
|
||||
externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
|
||||
if externalData:
|
||||
extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
|
||||
cfg['BASE_MODEL_PATH'] = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model")
|
||||
model_file = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v1", "AlexNet_ImageNet_Caffe.model")
|
||||
else:
|
||||
model_file = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet.model".split("/"))
|
||||
model_file = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet_ImageNet_Caffe.model".split("/"))
|
||||
|
||||
from FasterRCNN import train_faster_rcnn_e2e, eval_faster_rcnn_mAP
|
||||
from FasterRCNN_train import prepare, train_faster_rcnn
|
||||
|
||||
np.random.seed(seed=3)
|
||||
eval_model = train_faster_rcnn_e2e(model_file, debug_output=False)
|
||||
meanAP = eval_faster_rcnn_mAP(eval_model)
|
||||
assert meanAP > 0.01
|
||||
|
||||
@win35_linux34
|
||||
def test_native_fasterrcnn_eval(tmpdir, device_id):
|
||||
from config import cfg
|
||||
cfg["CNTK"].FORCE_DETERMINISTIC = True
|
||||
cfg["CNTK"].DEBUG_OUTPUT = False
|
||||
cfg["CNTK"].VISUALIZE_RESULTS = False
|
||||
cfg["CNTK"].FAST_MODE = True
|
||||
cfg["CNTK"].MAP_FILE_PATH = grocery_path
|
||||
|
||||
from FasterRCNN import set_global_vars
|
||||
set_global_vars(False)
|
||||
|
||||
if cntk_device(device_id).type() != DeviceKind_GPU:
|
||||
pytest.skip('test only runs on GPU') # it runs very slow in CPU
|
||||
try_set_default_device(cntk_device(device_id))
|
||||
|
||||
# since we do not use a reader for evaluation we need unzipped data
|
||||
externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
|
||||
|
||||
if externalData:
|
||||
extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
|
||||
model_file = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model")
|
||||
else:
|
||||
model_file = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet.model".split("/"))
|
||||
|
||||
from FasterRCNN import train_faster_rcnn_e2e, eval_faster_rcnn_mAP
|
||||
|
||||
np.random.seed(seed=3)
|
||||
|
||||
eval_model = train_faster_rcnn_e2e(model_file, debug_output=False)
|
||||
|
||||
meanAP_python = eval_faster_rcnn_mAP(eval_model)
|
||||
|
||||
cntk_py.always_allow_setting_default_device()
|
||||
|
||||
try_set_default_device(cpu())
|
||||
|
||||
from native_proposal_layer import clone_with_native_proposal_layer
|
||||
|
||||
model_with_native_pl = clone_with_native_proposal_layer(eval_model)
|
||||
meanAP_native = eval_faster_rcnn_mAP(model_with_native_pl)
|
||||
|
||||
# 0.2067 (python) vs 0.2251 (native) -- the difference stems
|
||||
# from different sorting algorithms: quicksort in python and
|
||||
# heapsort in c++ (both are not stable).
|
||||
assert abs(meanAP_python - meanAP_native) < 0.1
|
||||
|
||||
@win35_linux34
|
||||
def test_fasterrcnn_grocery_training_4stage(device_id):
|
||||
from config import cfg
|
||||
cfg["CNTK"].FORCE_DETERMINISTIC = True
|
||||
cfg["CNTK"].DEBUG_OUTPUT = False
|
||||
cfg["CNTK"].VISUALIZE_RESULTS = False
|
||||
cfg["CNTK"].FAST_MODE = True
|
||||
cfg["CNTK"].MAP_FILE_PATH = grocery_path
|
||||
|
||||
from FasterRCNN.FasterRCNN_train import prepare, train_faster_rcnn
|
||||
from FasterRCNN.FasterRCNN_eval import compute_test_set_aps
|
||||
prepare(cfg, False)
|
||||
|
||||
if cntk_device(device_id).type() != DeviceKind_GPU:
|
||||
pytest.skip('test only runs on GPU') # it runs very slow in CPU
|
||||
try_set_default_device(cntk_device(device_id))
|
||||
|
||||
np.random.seed(seed=3)
|
||||
cfg['BASE_MODEL_PATH'] = model_file
|
||||
trained_model = train_faster_rcnn(cfg)
|
||||
eval_results = compute_test_set_aps(trained_model, cfg)
|
||||
meanAP = np.nanmean(list(eval_results.values()))
|
||||
print('meanAP={}'.format(meanAP))
|
||||
assert meanAP > 0.01
|
||||
return trained_model, meanAP, cfg
|
||||
|
||||
@win35_linux34
|
||||
def reenable_once_sorting_is_stable_test_native_fasterrcnn_eval(device_id):
|
||||
if cntk_device(device_id).type() != DeviceKind_GPU:
|
||||
pytest.skip('test only runs on GPU') # it runs very slow in CPU
|
||||
try_set_default_device(cntk_device(device_id))
|
||||
|
||||
from FasterRCNN_eval import compute_test_set_aps
|
||||
eval_model, meanAP_python, cfg = run_fasterrcnn_grocery_training(True)
|
||||
|
||||
cntk_py.always_allow_setting_default_device()
|
||||
try_set_default_device(cpu())
|
||||
|
||||
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Extensibility", "ProposalLayer"))
|
||||
from native_proposal_layer import clone_with_native_proposal_layer
|
||||
model_with_native_pl = clone_with_native_proposal_layer(eval_model)
|
||||
eval_results = compute_test_set_aps(model_with_native_pl, cfg)
|
||||
meanAP_native = np.nanmean(list(eval_results.values()))
|
||||
|
||||
# 0.2067 (python) vs 0.2251 (native) -- the difference stems
|
||||
# from different sorting algorithms: quicksort in python and
|
||||
# heapsort in c++ (both are not stable).
|
||||
print("Python: {}, native: {}".format(meanAP_python, meanAP_native))
|
||||
assert abs(meanAP_python - meanAP_native) < 0.1
|
||||
|
||||
@win35_linux34
|
||||
def test_fasterrcnn_grocery_training_e2e(device_id):
|
||||
try_set_default_device(cntk_device(device_id))
|
||||
_, _, _ = run_fasterrcnn_grocery_training(e2e = True)
|
||||
|
||||
@win35_linux34
|
||||
def test_fasterrcnn_grocery_training_4stage(device_id):
|
||||
run_fasterrcnn_grocery_training(device_id, e2e = False)
|
||||
|
||||
@win35_linux34
|
||||
def test_fasterrcnn_grocery_training_e2e(device_id, e2e=True):
|
||||
run_fasterrcnn_grocery_training(device_id, e2e = True)
|
||||
if cntk_device(device_id).type() != DeviceKind_GPU:
|
||||
pytest.skip('test only runs on GPU') # it runs very slow in CPU
|
||||
try_set_default_device(cntk_device(device_id))
|
||||
_, _, _ = run_fasterrcnn_grocery_training(e2e = False)
|
||||
|
|
|
@ -152,11 +152,18 @@ def prepare_alexnet_v0_model():
|
|||
*"../../../../PretrainedModels".split("/"))
|
||||
local_base_path = os.path.normpath(local_base_path)
|
||||
|
||||
# v0 model:
|
||||
model_file = os.path.join(local_base_path, "AlexNet.model")
|
||||
|
||||
if not os.path.isfile(model_file):
|
||||
external_model_path = os.path.join(os.environ[envvar], "PreTrainedModels", "AlexNet", "v0", "AlexNet.model")
|
||||
copyfile(external_model_path, model_file)
|
||||
|
||||
# v1 model:
|
||||
model_file = os.path.join(local_base_path, "AlexNet_ImageNet_Caffe.model")
|
||||
if not os.path.isfile(model_file):
|
||||
external_model_path = os.path.join(os.environ[envvar], "PreTrainedModels", "AlexNet", "v1", "AlexNet_ImageNet_Caffe.model")
|
||||
copyfile(external_model_path, model_file)
|
||||
|
||||
return local_base_path
|
||||
|
||||
def prepare_UCF11_data():
|
||||
|
|
|
@ -5,23 +5,24 @@
|
|||
# ==============================================================================
|
||||
|
||||
import os, sys
|
||||
import pytest
|
||||
import numpy as np
|
||||
from cntk import user_function
|
||||
from cntk.ops import input_variable
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(os.path.join(abs_path))
|
||||
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection"))
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
import cntk
|
||||
from cntk import user_function
|
||||
from cntk.ops import input_variable
|
||||
from utils.rpn.proposal_layer import ProposalLayer as CntkProposalLayer
|
||||
from utils.rpn.proposal_target_layer import ProposalTargetLayer as CntkProposalTargetLayer
|
||||
from utils.rpn.anchor_target_layer import AnchorTargetLayer as CntkAnchorTargetLayer
|
||||
from utils.caffe_layers.proposal_layer import ProposalLayer as CaffeProposalLayer
|
||||
from utils.caffe_layers.proposal_target_layer import ProposalTargetLayer as CaffeProposalTargetLayer
|
||||
from utils.caffe_layers.anchor_target_layer import AnchorTargetLayer as CaffeAnchorTargetLayer
|
||||
win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version_info[:2] == (3,5)) or
|
||||
(sys.platform != 'win32' and sys.version_info[:2] == (3,4))),
|
||||
reason="it runs currently only in windows-py35 and linux-py34 due to precompiled cython modules")
|
||||
|
||||
@win35_linux34
|
||||
def test_proposal_layer():
|
||||
from utils.rpn.proposal_layer import ProposalLayer as CntkProposalLayer
|
||||
from utils.caffe_layers.proposal_layer import ProposalLayer as CaffeProposalLayer
|
||||
from FasterRCNN.FasterRCNN_config import cfg
|
||||
|
||||
cls_prob_shape_cntk = (18,61,61)
|
||||
cls_prob_shape_caffe = (18,61,61)
|
||||
rpn_bbox_shape = (36, 61, 61)
|
||||
|
@ -38,7 +39,21 @@ def test_proposal_layer():
|
|||
rpn_bbox_var = input_variable(rpn_bbox_shape)
|
||||
dims_info_var = input_variable(dims_info_shape)
|
||||
|
||||
cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, dims_info_var))
|
||||
layer_config = {}
|
||||
layer_config["feat_stride"] = 16
|
||||
layer_config["scales"] = [8, 16, 32]
|
||||
|
||||
layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
|
||||
layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N
|
||||
layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH)
|
||||
layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE)
|
||||
|
||||
layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N
|
||||
layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N
|
||||
layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH)
|
||||
layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE)
|
||||
|
||||
cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, dims_info_var, layer_config))
|
||||
state, cntk_output = cntk_layer.forward({cls_prob_var: [cls_prob], rpn_bbox_var: [rpn_bbox_pred], dims_info_var: dims_input})
|
||||
cntk_proposals = cntk_output[next(iter(cntk_output))][0]
|
||||
|
||||
|
@ -59,7 +74,11 @@ def test_proposal_layer():
|
|||
assert np.allclose(cntk_proposals, caffe_proposals, rtol=0.0, atol=0.0)
|
||||
print("Verified ProposalLayer")
|
||||
|
||||
@win35_linux34
|
||||
def test_proposal_target_layer():
|
||||
from utils.rpn.proposal_target_layer import ProposalTargetLayer as CntkProposalTargetLayer
|
||||
from utils.caffe_layers.proposal_target_layer import ProposalTargetLayer as CaffeProposalTargetLayer
|
||||
|
||||
num_rois = 400
|
||||
all_rois_shape_cntk = (num_rois,4)
|
||||
num_gt_boxes = 50
|
||||
|
@ -147,7 +166,11 @@ def test_proposal_target_layer():
|
|||
assert np.allclose(cntk_bbox_inside_weights, caffe_bbox_inside_weights, rtol=0.0, atol=0.0)
|
||||
print("Verified ProposalTargetLayer")
|
||||
|
||||
@win35_linux34
|
||||
def test_anchor_target_layer():
|
||||
from utils.rpn.anchor_target_layer import AnchorTargetLayer as CntkAnchorTargetLayer
|
||||
from utils.caffe_layers.anchor_target_layer import AnchorTargetLayer as CaffeAnchorTargetLayer
|
||||
|
||||
rpn_cls_score_shape_cntk = (1, 18, 61, 61)
|
||||
num_gt_boxes = 50
|
||||
gt_boxes_shape_cntk = (num_gt_boxes,5)
|
||||
|
|
|
@ -10,7 +10,7 @@ import sys
|
|||
import pytest
|
||||
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
notebook = os.path.join(abs_path, "..", "..", "..", "..", "Examples","Image","Detection","FastRCNN", "CNTK_FastRCNN_Eval.ipynb")
|
||||
notebook = os.path.join(abs_path, "..", "..", "..", "..", "Examples","Image","Detection","FastRCNN", "BrainScript", "CNTK_FastRCNN_Eval.ipynb")
|
||||
|
||||
sys.path.append(abs_path)
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче