From 63488568fe026b19a0ec881c3a8a5d9c3b5db696 Mon Sep 17 00:00:00 2001 From: Philipp Kranen Date: Tue, 22 Aug 2017 09:46:27 +0200 Subject: [PATCH] enabling native proposal layer and dlib selective search --- .gitattributes | 4 +- .gitignore | 1 + .../ProposalLayer/native_proposal_layer.py | 30 +- .../Pascal/mappings/create_mappings.py | 44 +- Examples/Image/Detection/DetectionDemo.py | 34 +- .../FastRCNN/BrainScript/A2_RunWithPyModel.py | 2 +- .../BrainScript/CNTK_FastRCNN_Eval.ipynb | 10 +- .../FastRCNN/BrainScript/cntk_helpers.py | 2 +- .../{config.py => FastRCNN_config.py} | 54 +- .../Image/Detection/FastRCNN/FastRCNN_eval.py | 27 +- .../Detection/FastRCNN/FastRCNN_train.py | 16 +- Examples/Image/Detection/FastRCNN/README.md | 17 +- .../FastRCNN/install_data_and_model.py | 2 +- .../Detection/FastRCNN/install_fastrcnn.py | 19 - .../Image/Detection/FastRCNN/run_fast_rcnn.py | 2 +- .../Image/Detection/FasterRCNN/FasterRCNN.py | 750 ------------------ .../{config.py => FasterRCNN_config.py} | 8 +- .../Detection/FasterRCNN/FasterRCNN_eval.py | 184 +---- .../Detection/FasterRCNN/FasterRCNN_train.py | 29 +- Examples/Image/Detection/FasterRCNN/README.md | 8 +- .../Detection/FasterRCNN/run_faster_rcnn.py | 10 +- Examples/Image/Detection/README.md | 2 +- Examples/Image/Detection/utils/Readme.md | 8 - .../Detection/utils/configs/AlexNet_config.py | 10 +- .../Detection/utils/configs/Grocery_config.py | 15 + .../Detection/utils/configs/Pascal_config.py | 3 + Examples/Image/Detection/utils/od_reader.py | 2 +- .../Image/Detection/utils/plot_helpers.py | 12 +- .../Image/Detection/utils/proposal_helpers.py | 201 +++-- .../Detection/utils/rpn/proposal_layer.py | 84 +- .../Image/Detection/utils/rpn/rpn_helpers.py | 44 +- PretrainedModels/download_model.py | 2 +- .../conda-linux-cntk-py34-environment.yml | 2 + .../conda-windows-cntk-py35-environment.yml | 1 + .../Examples/DetectionDemo_test.py | 77 ++ .../CNTKv2Python/Examples/FastRCNNBS_test.py | 2 +- .../CNTKv2Python/Examples/FastRCNN_test.py | 31 +- .../CNTKv2Python/Examples/FasterRCNN_test.py | 135 ++-- .../Examples/prepare_test_data.py | 9 +- .../CNTKv2Python/Examples/rpn_unit_test.py | 47 +- .../Tutorials/CNTK_FastRCNNEval_test.py | 2 +- 41 files changed, 633 insertions(+), 1309 deletions(-) rename Examples/Image/Detection/FastRCNN/{config.py => FastRCNN_config.py} (51%) delete mode 100644 Examples/Image/Detection/FastRCNN/install_fastrcnn.py delete mode 100644 Examples/Image/Detection/FasterRCNN/FasterRCNN.py rename Examples/Image/Detection/FasterRCNN/{config.py => FasterRCNN_config.py} (94%) create mode 100644 Tests/EndToEndTests/CNTKv2Python/Examples/DetectionDemo_test.py diff --git a/.gitattributes b/.gitattributes index f2707c72b..352d5ac59 100644 --- a/.gitattributes +++ b/.gitattributes @@ -146,8 +146,8 @@ Examples/Text/LightRNN/test/word-0.location text *.vsdm binary *.zip binary *.dnn binary -Examples/Image/Detection/FastRCNN/fastRCNN/*/*.pyd binary -Examples/Image/Detection/FastRCNN/fastRCNN/*/*.so binary +Examples/Image/Detection/FastRCNN/BrainScript/fastRCNN/*/*.pyd binary +Examples/Image/Detection/FastRCNN/BrainScript/fastRCNN/*/*.so binary Examples/Image/Detection/utils/cython_modules/*.pyd binary Examples/Image/Detection/utils/cython_modules/*.so binary Tests/UnitTests/V2LibraryTests/data/*.bin binary diff --git a/.gitignore b/.gitignore index f63939333..8bb57a4a2 100644 --- a/.gitignore +++ b/.gitignore @@ -291,6 +291,7 @@ Examples/Image/DataSets/grocery/positive/ Examples/Image/DataSets/grocery/testImages/ Examples/Image/DataSets/grocery/*.txt PretrainedModels/*.model +Examples/Image/Detection/FastRCNN/BrainScript/Output/ Examples/Image/Detection/FastRCNN/BrainScript/proc/ Examples/Image/Detection/FastRCNN/Output/ Examples/Image/Detection/FasterRCNN/Output/ diff --git a/Examples/Extensibility/ProposalLayer/native_proposal_layer.py b/Examples/Extensibility/ProposalLayer/native_proposal_layer.py index 33ae09b49..c58b340d2 100644 --- a/Examples/Extensibility/ProposalLayer/native_proposal_layer.py +++ b/Examples/Extensibility/ProposalLayer/native_proposal_layer.py @@ -13,9 +13,6 @@ sys.path.append(os.path.join(abs_path, "..", "..", "Image", "Detection", "Faster C.device.try_set_default_device(C.device.cpu()) -from FasterRCNN import eval_faster_rcnn_mAP, set_global_vars -from config import cfg - ops.register_native_user_function('ProposalLayerOp', 'Cntk.ProposalLayerLib-' + C.__version__.rstrip('+'), 'CreateProposalLayer') def clone_with_native_proposal_layer(model): @@ -26,16 +23,6 @@ def clone_with_native_proposal_layer(model): def converter(x): layer_config = copy.deepcopy(x.attributes) - layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N - layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N - layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH) - layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE) - - layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N - layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N - layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH) - layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE) - return ops.native_user_function('ProposalLayerOp', list(x.inputs), layer_config, 'native_proposal_layer') return C.misc.convert(model, filter, converter) @@ -52,8 +39,21 @@ def evaluate(model_path): # ProposalLayer currently only runs on the CPU eval_device = C.cpu() model = C.Function.load(model_path, device=eval_device) - set_global_vars(False) - return eval_faster_rcnn_mAP(model) + + from FasterRCNN.FasterRCNN_config import cfg as detector_cfg + from utils.configs.AlexNet_config import cfg as network_cfg + from utils.configs.Grocery_config import cfg as dataset_cfg + from utils.config_helpers import merge_configs + from FasterRCNN.FasterRCNN_train import prepare + from FasterRCNN.FasterRCNN_eval import compute_test_set_aps + + cfg = merge_configs([detector_cfg, network_cfg, dataset_cfg]) + cfg["CNTK"].FORCE_DETERMINISTIC = True + + prepare(cfg, False) + eval_results = compute_test_set_aps(model, cfg) + meanAP = np.nanmean(list(eval_results.values())) + return meanAP ############################# # main function boilerplate # diff --git a/Examples/Image/DataSets/Pascal/mappings/create_mappings.py b/Examples/Image/DataSets/Pascal/mappings/create_mappings.py index dcc0abfde..7f0339753 100644 --- a/Examples/Image/DataSets/Pascal/mappings/create_mappings.py +++ b/Examples/Image/DataSets/Pascal/mappings/create_mappings.py @@ -7,6 +7,7 @@ import sys, os import numpy as np +import scipy.io as sio import future import xml.etree.ElementTree from xml.etree import ElementTree @@ -21,9 +22,10 @@ use_pad_scale = False pad_width = 850 pad_height = 850 -pascal_voc2007_jpgimg_rel_path = ".../VOCdevkit/VOC2007/JPEGImages/" -pascal_voc2007_imgsets_rel_path = ".../VOCdevkit/VOC2007/ImageSets/Main/" -pascal_voc2007_annotations_rel_path = ".../VOCdevkit/VOC2007/Annotations/" +pascal_voc2007_jpgimg_rel_path = "../VOCdevkit/VOC2007/JPEGImages/" +pascal_voc2007_imgsets_rel_path = "../VOCdevkit/VOC2007/ImageSets/Main/" +pascal_voc2007_annotations_rel_path = "../VOCdevkit/VOC2007/Annotations/" +pascal_voc2007_proposals_rel_path = "../selective_search_data/" abs_path = os.path.dirname(os.path.abspath(__file__)) cls_file_path = os.path.join(abs_path, "class_map.txt") @@ -47,9 +49,6 @@ def format_roi(cls_index, xmin, ymin, xmax, ymax, img_file_path): scale_y = (1.0 * pad_height) / img_height min_scale = min(scale_x, scale_y) - if round(img_width * min_scale) != pad_width and round(img_height * min_scale) != pad_height: - import pdb; pdb.set_trace() - new_width = round(img_width * min_scale) new_height = round(img_height * min_scale) assert(new_width == pad_width or new_height == pad_height) @@ -87,7 +86,7 @@ def format_roi(cls_index, xmin, ymin, xmax, ymax, img_file_path): def create_mappings(train, skip_difficult): file_prefix = "trainval" if train else "test" - img_map_input = "../VOCdevkit/VOC2007/ImageSets/Main/{}.txt".format(file_prefix) + img_map_input = "{}.txt".format(file_prefix) img_map_output = "{}2007.txt".format(file_prefix) roi_map_output = "{}2007_rois_{}_{}{}.txt".format( file_prefix, @@ -95,11 +94,13 @@ def create_mappings(train, skip_difficult): "pad" if use_pad_scale else "noPad", "_skipDif" if skip_difficult else "") size_map_output = "{}_size_file2007.txt".format(file_prefix) + proposals_output = "{}2007_proposals.txt".format(file_prefix) - in_map_file_path = os.path.join(abs_path, img_map_input) + in_map_file_path = os.path.join(abs_path, pascal_voc2007_imgsets_rel_path, img_map_input) out_map_file_path = os.path.join(abs_path, img_map_output) roi_file_path = os.path.join(abs_path, roi_map_output) size_file_path = os.path.join(abs_path, size_map_output) + proposals_file_path = os.path.join(abs_path, proposals_output) class_map_file_path = os.path.join(abs_path, "class_map.txt") # write class map file @@ -115,11 +116,13 @@ def create_mappings(train, skip_difficult): input_lines = input_file.readlines() counter = 0 + img_numbers = [] with open(out_map_file_path, 'w') as img_file: with open(roi_file_path, 'w') as roi_file: with open(size_file_path, 'w') as size_file: for in_line in input_lines: img_number = in_line.strip() + img_numbers.append(img_number) img_file_path = "{}{}.jpg".format(pascal_voc2007_jpgimg_rel_path, img_number) img_line = "{}\t{}\t0\n".format(counter, img_file_path) img_file.write(img_line) @@ -164,6 +167,31 @@ def create_mappings(train, skip_difficult): for cls in classes: cls_file.write("{}\t{}\n".format(cls, class_dict[cls])) + if not skip_difficult: # proposals are the same and need to be processed only once + try: + # convert selective search proposals from matlab to CNTK text format + print("Converting matlab proposal file to CNTK format ({})".format(proposals_file_path)) + proposal_input = 'voc_2007_{}.mat'.format(file_prefix) + in_ss_file_path = os.path.join(abs_path, pascal_voc2007_proposals_rel_path, proposal_input) + raw = sio.loadmat(in_ss_file_path) + boxes = raw['boxes'][0] + images = raw['images'] + + with open(proposals_file_path, 'w') as prop_file: + for i in range(len(img_numbers)): + img_number = img_numbers[i] + img_name = images[i,0][0] + assert img_number == img_name + + box_coords = boxes[i] + prop_line = "{} |proposals ".format(i) + for c in range(box_coords.shape[0]): + prop_line += ' ' + ' '.join(str(x) for x in box_coords[c]) + + prop_file.write(prop_line + '\n') + except: + print("Warning: error converting selective search proposals from matlab to CNTK text format") + if __name__ == '__main__': create_mappings(True, skip_difficult=True) create_mappings(False, skip_difficult=True) diff --git a/Examples/Image/Detection/DetectionDemo.py b/Examples/Image/Detection/DetectionDemo.py index 396b947f6..2c6c3e510 100644 --- a/Examples/Image/Detection/DetectionDemo.py +++ b/Examples/Image/Detection/DetectionDemo.py @@ -4,17 +4,39 @@ # for full license information. # ============================================================================== -import os +import os, sys import numpy as np import utils.od_utils as od from utils.config_helpers import merge_configs +available_detectors = ['FastRCNN', 'FasterRCNN'] + +def get_detector_name(args): + detector_name = None + default_detector = 'FasterRCNN' + if len(args) != 2: + print("Please provide a detector name as the single argument. Usage:") + print(" python DetectionDemo.py ") + print("Available detectors: {}".format(available_detectors)) + else: + detector_name = args[1] + if not any(detector_name == x for x in available_detectors): + print("Unknown detector: {}.".format(detector_name)) + print("Available detectors: {}".format(available_detectors)) + detector_name = None + + if detector_name is None: + print("Using default detector: {}".format(default_detector)) + return default_detector + else: + return detector_name + def get_configuration(detector_name): # load configs for detector, base network and data set if detector_name == "FastRCNN": - from FastRCNN.config import cfg as detector_cfg + from FastRCNN.FastRCNN_config import cfg as detector_cfg elif detector_name == "FasterRCNN": - from FasterRCNN.config import cfg as detector_cfg + from FasterRCNN.FasterRCNN_config import cfg as detector_cfg else: print('Unknown detector: {}'.format(detector_name)) @@ -29,7 +51,9 @@ def get_configuration(detector_name): if __name__ == '__main__': # Currently supported detectors: 'FastRCNN', 'FasterRCNN' - cfg = get_configuration('FasterRCNN') + args = sys.argv + detector_name = get_detector_name(args) + cfg = get_configuration(detector_name) # train and test eval_model = od.train_object_detector(cfg) @@ -40,7 +64,7 @@ if __name__ == '__main__': print('Mean AP = {:.4f}'.format(np.nanmean(list(eval_results.values())))) # detect objects in single image - img_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), r"..\DataSets\Grocery\testImages\WIN_20160803_11_28_42_Pro.jpg") + img_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), r"../DataSets/Grocery/testImages/WIN_20160803_11_28_42_Pro.jpg") regressed_rois, cls_probs = od.evaluate_single_image(eval_model, img_path, cfg) bboxes, labels, scores = od.filter_results(regressed_rois, cls_probs, cfg) diff --git a/Examples/Image/Detection/FastRCNN/BrainScript/A2_RunWithPyModel.py b/Examples/Image/Detection/FastRCNN/BrainScript/A2_RunWithPyModel.py index aefecdbc7..5b17991ec 100644 --- a/Examples/Image/Detection/FastRCNN/BrainScript/A2_RunWithPyModel.py +++ b/Examples/Image/Detection/FastRCNN/BrainScript/A2_RunWithPyModel.py @@ -48,7 +48,7 @@ momentum_time_constant = p.cntk_momentum_time_constant # model specific variables (only AlexNet for now) base_model = "AlexNet" if base_model == "AlexNet": - model_file = "../../../../../../../PretrainedModels/AlexNet_ImageNet_CNTK.model" + model_file = "../../../../../../../../PretrainedModels/AlexNet.model" feature_node_name = "features" last_conv_node_name = "conv5.y" pool_node_name = "pool3" diff --git a/Examples/Image/Detection/FastRCNN/BrainScript/CNTK_FastRCNN_Eval.ipynb b/Examples/Image/Detection/FastRCNN/BrainScript/CNTK_FastRCNN_Eval.ipynb index 31cb29ac2..1fb41c751 100644 --- a/Examples/Image/Detection/FastRCNN/BrainScript/CNTK_FastRCNN_Eval.ipynb +++ b/Examples/Image/Detection/FastRCNN/BrainScript/CNTK_FastRCNN_Eval.ipynb @@ -73,22 +73,22 @@ " cntk.device.try_set_default_device(cntk.device.cpu()) \n", " else:\n", " cntk.device.try_set_default_device(cntk.device.gpu(0))\n", - " sys.path.append(os.path.join(*\"../../../../Tests/EndToEndTests/CNTKv2Python/Examples\".split(\"/\")))\n", + " sys.path.append(os.path.join(*\"../../../../../Tests/EndToEndTests/CNTKv2Python/Examples\".split(\"/\")))\n", " import prepare_test_data as T\n", " T.prepare_Grocery_data()\n", " T.prepare_fastrcnn_grocery_100_model()\n", "\n", "#Make sure the grocery dataset is installed \n", - "sys.path.append('../../DataSets/Grocery')\n", + "sys.path.append('../../../DataSets/Grocery')\n", "from install_grocery import download_grocery_data\n", "download_grocery_data()\n", "\n", "# Make sure the FRCNN model exists - check if the model was trained and exists, if not - download the existing model\n", "\n", - "sys.path.append('../../../../PretrainedModels')\n", + "sys.path.append('../../../../../PretrainedModels')\n", "from download_model import download_model_by_name\n", "download_model_by_name(\"Fast-RCNN_grocery100\")\n", - "model_path = '../../../../PretrainedModels/Fast-RCNN_grocery100.model'\n" + "model_path = '../../../../../PretrainedModels/Fast-RCNN_grocery100.model'\n" ] }, { @@ -233,7 +233,7 @@ " img = cv2.imread(image_path)\n", " return resize_and_pad(img, width, height, pad_value), img\n", "\n", - "test_image_path = r\"../../DataSets/Grocery/testImages/WIN_20160803_11_28_42_Pro.jpg\"\n", + "test_image_path = r\"../../../DataSets/Grocery/testImages/WIN_20160803_11_28_42_Pro.jpg\"\n", "(test_img, test_img_model_arg), original_img = load_image_and_scale(test_image_path, image_width, image_height)\n", "\n", "plt.imshow(cv2.cvtColor(test_img, cv2.COLOR_BGR2RGB))\n", diff --git a/Examples/Image/Detection/FastRCNN/BrainScript/cntk_helpers.py b/Examples/Image/Detection/FastRCNN/BrainScript/cntk_helpers.py index f85d1bdef..435d6317d 100644 --- a/Examples/Image/Detection/FastRCNN/BrainScript/cntk_helpers.py +++ b/Examples/Image/Detection/FastRCNN/BrainScript/cntk_helpers.py @@ -6,7 +6,7 @@ from __future__ import print_function from builtins import str -import pdb, sys, os, time +import sys, os, time import numpy as np import selectivesearch from easydict import EasyDict diff --git a/Examples/Image/Detection/FastRCNN/config.py b/Examples/Image/Detection/FastRCNN/FastRCNN_config.py similarity index 51% rename from Examples/Image/Detection/FastRCNN/config.py rename to Examples/Image/Detection/FastRCNN/FastRCNN_config.py index dbd29e506..ecc335059 100644 --- a/Examples/Image/Detection/FastRCNN/config.py +++ b/Examples/Image/Detection/FastRCNN/FastRCNN_config.py @@ -17,7 +17,7 @@ __C.TRAIN = edict() # If set to 'True' training will be skipped if a trained model exists already __C.CNTK.MAKE_MODE = True -# set to 'True' to use determininistic algorithms +# set to 'True' to use deterministic algorithms __C.CNTK.FORCE_DETERMINISTIC = False # set to 'True' to run only a single epoch __C.CNTK.FAST_MODE = False @@ -28,17 +28,14 @@ __C.CNTK.GRAPH_TYPE = "png" # "png" or "pdf" # Learning parameters __C.CNTK.L2_REG_WEIGHT = 0.0005 __C.CNTK.MOMENTUM_PER_MB = 0.9 -__C.CNTK.MAX_EPOCHS = 15 # use more epochs and more ROIs (NUM_ROI_PROPOSALS) for better results -__C.CNTK.LR_FACTOR = 1.0 +__C.CNTK.MAX_EPOCHS = 20 +__C.CNTK.LR_FACTOR = 10.0 # 10.0 is used for the Grocery example data. Start with 1.0 for other data sets. __C.CNTK.LR_PER_SAMPLE = [0.001] * 10 + [0.0001] * 10 + [0.00001] # The learning rate multiplier for all bias weights __C.CNTK.BIAS_LR_MULT = 2.0 # Number of regions of interest [ROIs] proposals -__C.NUM_ROI_PROPOSALS = 500 # use 2000 or more for good results -# minimum width and height for proposals in pixels -__C.PROPOSALS_MIN_W = 20 -__C.PROPOSALS_MIN_H = 20 +__C.NUM_ROI_PROPOSALS = 200 # use 2000 or more for good results # the minimum IoU (overlap) of a proposal to qualify for training regression targets __C.BBOX_THRESH = 0.5 @@ -53,7 +50,7 @@ __C.IMAGE_WIDTH = 850 __C.IMAGE_HEIGHT = 850 # Use horizontally-flipped images during training? -__C.TRAIN.USE_FLIPPED = False +__C.TRAIN.USE_FLIPPED = True # If set to 'True' conv layers weights from the base model will be trained, too __C.TRAIN_CONV_LAYERS = True # Sigma parameter for smooth L1 loss in the RPN and the detector (DET) @@ -65,13 +62,52 @@ __C.RESULTS_NMS_THRESHOLD = 0.5 __C.RESULTS_NMS_CONF_THRESHOLD = 0.0 # Enable plotting of results generally / also plot background boxes / also plot unregressed boxes -__C.VISUALIZE_RESULTS = True +__C.VISUALIZE_RESULTS = False __C.DRAW_NEGATIVE_ROIS = False __C.DRAW_UNREGRESSED_ROIS = False # only for plotting results: boxes with a score lower than this threshold will be considered background __C.RESULTS_BGR_PLOT_THRESHOLD = 0.1 +# If set to True the following two parameters need to point to the corresponding files that contain the proposals: +# __C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE +# __C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE +__C.USE_PRECOMPUTED_PROPOSALS = False + +# roi proposal parameters for selective search, grid and filtering +# The first three parameters are for dlib's selective search. For details see +# http://dlib.net/dlib/image_transforms/segment_image_abstract.h.html#find_candidate_object_locations +# +# The basic segmentation is performed kvals.size() times. The k parameter is set (from, to, step_size) +__C.roi_ss_kvals = (10, 500, 5) +# When doing the basic segmentations prior to any box merging, all +# rectangles that have an area < min_size are discarded. Therefore, all outputs and +# subsequent merged rectangles are built out of rectangles that contain at +# least min_size pixels. Note that setting min_size to a smaller value than +# you might otherwise be interested in using can be useful since it allows a +# larger number of possible merged boxes to be created +__C.roi_ss_min_size = 9 +# There are max_merging_iterations rounds of neighboring blob merging. +# Therefore, this parameter has some effect on the number of output rectangles +# you get, with larger values of the parameter giving more output rectangles. +# Hint: set __C.CNTK.DEBUG_OUTPUT=True to see the number of ROIs from selective search +__C.roi_ss_mm_iterations = 30 +# +# image size used for ROI generation +__C.roi_ss_img_size = 200 +# minimum relative width/height of an ROI +__C.roi_min_side_rel = 0.01 +# maximum relative width/height of an ROI +__C.roi_max_side_rel = 1.0 +# minimum relative area of an ROI +__C.roi_min_area_rel = 0.0001 +# maximum relative area of an ROI +__C.roi_max_area_rel = 0.9 +# maximum aspect ratio of an ROI vertically and horizontally +__C.roi_max_aspect_ratio = 4.0 +# aspect ratios of ROIs for uniform grid ROIs +__C.roi_grid_aspect_ratios = [1.0, 2.0, 0.5] + # For reproducibility __C.RND_SEED = 3 diff --git a/Examples/Image/Detection/FastRCNN/FastRCNN_eval.py b/Examples/Image/Detection/FastRCNN/FastRCNN_eval.py index d651bf831..968ccf5eb 100644 --- a/Examples/Image/Detection/FastRCNN/FastRCNN_eval.py +++ b/Examples/Image/Detection/FastRCNN/FastRCNN_eval.py @@ -5,16 +5,15 @@ # ============================================================================== import os -import numpy as np -from matplotlib.pyplot import imsave import cv2 +import numpy as np import cntk from cntk import input_variable, Axis from utils.map_helpers import evaluate_detections from utils.plot_helpers import resize_and_pad from utils.rpn.bbox_transform import regress_rois from utils.od_mb_source import ObjectDetectionMinibatchSource -from utils.proposal_helpers import ProposalProvider, compute_proposals, compute_image_stats +from utils.proposal_helpers import ProposalProvider, compute_image_stats, compute_proposals class FastRCNN_Evaluator: def __init__(self, eval_model, cfg): @@ -26,9 +25,7 @@ class FastRCNN_Evaluator: roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()], name="roi_proposals") self._eval_model = eval_model(image_input, roi_proposals) - self._min_w = cfg['PROPOSALS_MIN_W'] - self._min_h = cfg['PROPOSALS_MIN_H'] - self._num_proposals = cfg['NUM_ROI_PROPOSALS'] + self._cfg = cfg def process_image(self, img_path): out_cls_pred, out_rpn_rois, out_bbox_regr, dims = self.process_image_detailed(img_path) @@ -41,8 +38,6 @@ class FastRCNN_Evaluator: img = cv2.imread(img_path) _, cntk_img_input, dims = resize_and_pad(img, self._img_shape[2], self._img_shape[1]) - #import pdb; pdb.set_trace() - # compute ROI proposals and apply scaling and padding to them # [target_w, target_h, img_width, img_height, top, bottom, left, right, scale_factor] img_stats = compute_image_stats(len(img[0]), len(img), self._img_shape[2], self._img_shape[1]) @@ -50,7 +45,8 @@ class FastRCNN_Evaluator: top = img_stats[4] left = img_stats[6] - proposals = compute_proposals(img, self._num_proposals, self._min_w, self._min_h) + num_proposals = self._cfg['NUM_ROI_PROPOSALS'] + proposals = compute_proposals(img, num_proposals, self._cfg) proposals = proposals * scale_factor proposals += (left, top, left, top) @@ -76,7 +72,18 @@ def compute_test_set_aps(eval_model, cfg): frcn_eval = eval_model(image_input, roi_proposals) # Create the minibatch source - proposal_provider = ProposalProvider.fromconfig(cfg) + if cfg.USE_PRECOMPUTED_PROPOSALS: + try: + cfg["DATA"].TEST_PRECOMPUTED_PROPOSALS_FILE = os.path.join(cfg["DATA"].MAP_FILE_PATH, cfg["DATA"].TEST_PRECOMPUTED_PROPOSALS_FILE) + proposal_provider = ProposalProvider.fromfile(cfg["DATA"].TEST_PRECOMPUTED_PROPOSALS_FILE, cfg.NUM_ROI_PROPOSALS) + except: + print("To use precomputed proposals please specify the following parameters in your configuration:\n" + "__C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE\n" + "__C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE") + exit(-1) + else: + proposal_provider = ProposalProvider.fromconfig(cfg) + minibatch_source = ObjectDetectionMinibatchSource( cfg["DATA"].TEST_MAP_FILE, cfg["DATA"].TEST_ROI_FILE, diff --git a/Examples/Image/Detection/FastRCNN/FastRCNN_train.py b/Examples/Image/Detection/FastRCNN/FastRCNN_train.py index d43335469..377874189 100644 --- a/Examples/Image/Detection/FastRCNN/FastRCNN_train.py +++ b/Examples/Image/Detection/FastRCNN/FastRCNN_train.py @@ -58,9 +58,17 @@ def prepare(cfg, use_arg_parser=True): cfg["DATA"].TEST_MAP_FILE = os.path.join(data_path, cfg["DATA"].TEST_MAP_FILE) cfg["DATA"].TRAIN_ROI_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_ROI_FILE) cfg["DATA"].TEST_ROI_FILE = os.path.join(data_path, cfg["DATA"].TEST_ROI_FILE) + if cfg.USE_PRECOMPUTED_PROPOSALS: + try: + cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE) + except: + print("To use precomputed proposals please specify the following parameters in your configuration:\n" + "__C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE\n" + "__C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE") + exit(-1) cfg['MODEL_PATH'] = os.path.join(cfg.OUTPUT_PATH, "fast_rcnn_eval_{}.model".format(cfg["MODEL"].BASE_MODEL)) - cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "PretrainedModels", + cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "PretrainedModels", cfg["MODEL"].BASE_MODEL_FILE) cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE) @@ -300,7 +308,11 @@ def train_fast_rcnn(cfg): log_number_of_parameters(loss) # Create the minibatch source - proposal_provider = ProposalProvider.fromconfig(cfg) + if cfg.USE_PRECOMPUTED_PROPOSALS: + proposal_provider = ProposalProvider.fromfile(cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE, cfg.NUM_ROI_PROPOSALS) + else: + proposal_provider = ProposalProvider.fromconfig(cfg) + od_minibatch_source = ObjectDetectionMinibatchSource( cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE, max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE, diff --git a/Examples/Image/Detection/FastRCNN/README.md b/Examples/Image/Detection/FastRCNN/README.md index 879338e75..97cc69e6d 100644 --- a/Examples/Image/Detection/FastRCNN/README.md +++ b/Examples/Image/Detection/FastRCNN/README.md @@ -5,7 +5,7 @@ This folder contains an end-to-end solution for using Fast R-CNN to perform object detection. The original research paper for Fast R-CNN can be found at [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083). Base models that are supported by the current configuration are AlexNet and VGG16. -Two image set that are preconfigured are Pascal VOC 2007 and Grocery. +Two image sets that are preconfigured are Pascal VOC 2007 and Grocery. Other base models or image sets can be used by adding a configuration file similar to the examples in `utils/configs` and importing it in `run_fast_rcnn.py`. ## Running the example @@ -15,14 +15,14 @@ Other base models or image sets can be used by adding a configuration file simil To run Fast R-CNN you need a CNTK Python environment. Install the following additional packages: ``` -pip install opencv-python easydict pyyaml future +pip install opencv-python easydict pyyaml dlib ``` The code uses prebuild Cython modules for parts of the region proposal network (see `Examples/Image/Detection/utils/cython_modules`). These binaries are contained in the repository for Python 3.5 under Windows and Python 3.4 under Linux. If you require other versions please follow the instructions at [https://github.com/rbgirshick/py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#installation-sufficient-for-the-demo). -If you want to use the debug output you need to run ' pip install pydot_ng) ([website](https://pypi.python.org/pypi/pydot-ng)) and install [graphviz](http://graphviz.org/) (GraphViz executable has to be in the system’s PATH) to be able to plot the CNTK graphs. +If you want to use the debug output you need to run `pip install pydot_ng` ([website](https://pypi.python.org/pypi/pydot-ng)) and install [graphviz](http://graphviz.org/) (GraphViz executable has to be in the system’s PATH) to be able to plot the CNTK graphs. ### Getting the data and AlexNet model @@ -90,7 +90,7 @@ and run `python run_fast_rcnn.py` to train and evaluate Fast R-CNN on your data. ### Parameters -All options and parameters are in `config.py` in the `FastRCNN` folder and all of them are explained there. These include +All options and parameters are in `FastRCNN_config.py` in the `FastRCNN` folder and all of them are explained there. These include ``` # learning parameters @@ -99,9 +99,10 @@ __C.CNTK.LR_PER_SAMPLE = [0.001] * 10 + [0.0001] * 10 + [0.00001] # Number of regions of interest [ROIs] proposals __C.NUM_ROI_PROPOSALS = 1000 -# minimum width and height for proposals in pixels -__C.PROPOSALS_MIN_W = 20 -__C.PROPOSALS_MIN_H = 20 +# minimum relative width/height of an ROI +__C.roi_min_side_rel = 0.01 +# maximum relative width/height of an ROI +__C.roi_max_side_rel = 1.0 ``` ### Fast R-CNN CNTK code @@ -110,4 +111,4 @@ Most of the code is in `FastRCNN_train.py` and `FastRCNN_eval.py` (and `Examples ### Algorithm -All details regarding the Fast R-CNN algortihm can be found in the original research paper: [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083). +All details regarding the Fast R-CNN algorithm can be found in the original research paper: [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083). diff --git a/Examples/Image/Detection/FastRCNN/install_data_and_model.py b/Examples/Image/Detection/FastRCNN/install_data_and_model.py index 63d728f64..dbed00fc9 100644 --- a/Examples/Image/Detection/FastRCNN/install_data_and_model.py +++ b/Examples/Image/Detection/FastRCNN/install_data_and_model.py @@ -26,7 +26,7 @@ if __name__ == '__main__': sys.path.append(os.path.join(base_folder, "..", "..", "..", "..", "PretrainedModels")) from download_model import download_model_by_name - download_model_by_name("AlexNet_ImageNet_CNTK") + download_model_by_name("AlexNet_ImageNet_Caffe") print("Creating mapping files for Grocery data set..") create_grocery_mappings(base_folder) diff --git a/Examples/Image/Detection/FastRCNN/install_fastrcnn.py b/Examples/Image/Detection/FastRCNN/install_fastrcnn.py deleted file mode 100644 index b541e9ca4..000000000 --- a/Examples/Image/Detection/FastRCNN/install_fastrcnn.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -# Licensed under the MIT license. See LICENSE.md file in the project root -# for full license information. -# ============================================================================== - -from __future__ import print_function -import zipfile -import os, sys - -base_folder = os.path.dirname(os.path.abspath(__file__)) - -sys.path.append(os.path.join(base_folder, "..", "..", "DataSets", "Grocery")) -from install_grocery import download_grocery_data -download_grocery_data() - -sys.path.append(os.path.join(base_folder, "..", "..", "..", "..", "PretrainedModels")) -from download_model import download_model_by_name -download_model_by_name("AlexNet_ImageNet_CNTK") diff --git a/Examples/Image/Detection/FastRCNN/run_fast_rcnn.py b/Examples/Image/Detection/FastRCNN/run_fast_rcnn.py index 905d7c912..287513702 100644 --- a/Examples/Image/Detection/FastRCNN/run_fast_rcnn.py +++ b/Examples/Image/Detection/FastRCNN/run_fast_rcnn.py @@ -13,7 +13,7 @@ from utils.plot_helpers import plot_test_set_results def get_configuration(): # load configs for detector, base network and data set - from config import cfg as detector_cfg + from FastRCNN_config import cfg as detector_cfg # for VGG16 base model use: from utils.configs.VGG16_config import cfg as network_cfg # for AlexNet base model use: from utils.configs.AlexNet_config import cfg as network_cfg from utils.configs.AlexNet_config import cfg as network_cfg diff --git a/Examples/Image/Detection/FasterRCNN/FasterRCNN.py b/Examples/Image/Detection/FasterRCNN/FasterRCNN.py deleted file mode 100644 index 8071a3309..000000000 --- a/Examples/Image/Detection/FasterRCNN/FasterRCNN.py +++ /dev/null @@ -1,750 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -# Licensed under the MIT license. See LICENSE.md file in the project root -# for full license information. -# ============================================================================== - -from __future__ import print_function -import numpy as np -import os, sys -import argparse -import yaml # pip install pyyaml -import easydict # pip install easydict -import cntk -import easydict -from cntk import Trainer, UnitType, load_model, Axis, input_variable, parameter, times, combine, \ - softmax, roipooling, plus, element_times, CloneMethod, alias, Communicator, reduce_sum -from cntk.core import Value -from cntk.io import MinibatchData -from cntk.initializer import normal -from cntk.layers import placeholder, Constant, Sequential -from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_schedule -from cntk.logging import log_number_of_parameters, ProgressPrinter -from cntk.logging.graph import find_by_name, plot -from cntk.losses import cross_entropy_with_softmax -from cntk.metrics import classification_error -from _cntk_py import force_deterministic_algorithms - -abs_path = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(os.path.join(abs_path, "..")) -from utils.rpn.rpn_helpers import create_rpn, create_proposal_target_layer -from utils.rpn.cntk_smoothL1_loss import SmoothL1Loss -from utils.map.map_helpers import evaluate_detections -from utils.annotations.annotations_helper import parse_class_map_file -from config import cfg -from od_mb_source import ObjectDetectionMinibatchSource -from cntk_helpers import regress_rois - -############################################################### -############################################################### -mb_size = 1 -image_width = cfg["CNTK"].IMAGE_WIDTH -image_height = cfg["CNTK"].IMAGE_HEIGHT -num_channels = 3 - -# dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) -dims_input_const = MinibatchData(Value(batch=np.asarray( - [image_width, image_height, image_width, image_height, image_width, image_height], dtype=np.float32)), 1, 1, False) - -# Color used for padding and normalization (Caffe model uses [102.98010, 115.94650, 122.77170]) -img_pad_value = [103, 116, 123] if cfg["CNTK"].BASE_MODEL == "VGG16" else [114, 114, 114] -normalization_const = Constant([[[103]], [[116]], [[123]]]) if cfg["CNTK"].BASE_MODEL == "VGG16" else Constant([[[114]], [[114]], [[114]]]) - -globalvars = {} -globalvars['output_path'] = os.path.join(abs_path, "Output") - -# dataset specific parameters -map_file_path = os.path.join(abs_path, cfg["CNTK"].MAP_FILE_PATH) -globalvars['class_map_file'] = cfg["CNTK"].CLASS_MAP_FILE -globalvars['train_map_file'] = cfg["CNTK"].TRAIN_MAP_FILE -globalvars['test_map_file'] = cfg["CNTK"].TEST_MAP_FILE -globalvars['train_roi_file'] = cfg["CNTK"].TRAIN_ROI_FILE -globalvars['test_roi_file'] = cfg["CNTK"].TEST_ROI_FILE -epoch_size = cfg["CNTK"].NUM_TRAIN_IMAGES -num_test_images = cfg["CNTK"].NUM_TEST_IMAGES - -# model specific parameters -model_folder = os.path.join(abs_path, "..", "..", "..", "..", "PretrainedModels") -base_model_file = os.path.join(model_folder, cfg["CNTK"].BASE_MODEL_FILE) -feature_node_name = cfg["CNTK"].FEATURE_NODE_NAME -last_conv_node_name = cfg["CNTK"].LAST_CONV_NODE_NAME -start_train_conv_node_name = cfg["CNTK"].START_TRAIN_CONV_NODE_NAME -pool_node_name = cfg["CNTK"].POOL_NODE_NAME -last_hidden_node_name = cfg["CNTK"].LAST_HIDDEN_NODE_NAME -roi_dim = cfg["CNTK"].ROI_DIM -############################################################### -############################################################### - -def set_global_vars(use_arg_parser = True): - data_path = map_file_path - - # set and overwrite learning parameters - globalvars['rpn_lr_factor'] = cfg["CNTK"].RPN_LR_FACTOR - globalvars['frcn_lr_factor'] = cfg["CNTK"].FRCN_LR_FACTOR - globalvars['e2e_lr_factor'] = cfg["CNTK"].E2E_LR_FACTOR - globalvars['momentum_per_mb'] = cfg["CNTK"].MOMENTUM_PER_MB - globalvars['e2e_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].E2E_MAX_EPOCHS - globalvars['rpn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].RPN_EPOCHS - globalvars['frcn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].FRCN_EPOCHS - globalvars['rnd_seed'] = cfg.RNG_SEED - globalvars['train_conv'] = cfg["CNTK"].TRAIN_CONV_LAYERS - globalvars['train_e2e'] = cfg["CNTK"].TRAIN_E2E - - - if use_arg_parser: - parser = argparse.ArgumentParser() - parser.add_argument('-datadir', '--datadir', help='Data directory where the ImageNet dataset is located', - required=False, default=data_path) - parser.add_argument('-outputdir', '--outputdir', help='Output directory for checkpoints and models', - required=False, default=None) - parser.add_argument('-logdir', '--logdir', help='Log file', - required=False, default=None) - parser.add_argument('-n', '--num_epochs', help='Total number of epochs to train', type=int, - required=False, default=cfg["CNTK"].E2E_MAX_EPOCHS) - parser.add_argument('-m', '--minibatch_size', help='Minibatch size', type=int, - required=False, default=mb_size) - parser.add_argument('-e', '--epoch_size', help='Epoch size', type=int, - required=False, default=epoch_size) - parser.add_argument('-q', '--quantized_bits', help='Number of quantized bits used for gradient aggregation', type=int, - required=False, default='32') - parser.add_argument('-r', '--restart', - help='Indicating whether to restart from scratch (instead of restart from checkpoint file by default)', - action='store_true') - parser.add_argument('-device', '--device', type=int, help="Force to run the script on a specified device", - required=False, default=None) - parser.add_argument('-rpnLrFactor', '--rpnLrFactor', type=float, help="Scale factor for rpn lr schedule", required=False) - parser.add_argument('-frcnLrFactor', '--frcnLrFactor', type=float, help="Scale factor for frcn lr schedule", required=False) - parser.add_argument('-e2eLrFactor', '--e2eLrFactor', type=float, help="Scale factor for e2e lr schedule", required=False) - parser.add_argument('-momentumPerMb', '--momentumPerMb', type=float, help="momentum per minibatch", required=False) - parser.add_argument('-e2eEpochs', '--e2eEpochs', type=int, help="number of epochs for e2e training", required=False) - parser.add_argument('-rpnEpochs', '--rpnEpochs', type=int, help="number of epochs for rpn training", required=False) - parser.add_argument('-frcnEpochs', '--frcnEpochs', type=int, help="number of epochs for frcn training", required=False) - parser.add_argument('-rndSeed', '--rndSeed', type=int, help="the random seed", required=False) - parser.add_argument('-trainConv', '--trainConv', type=int, help="whether to train conv layers", required=False) - parser.add_argument('-trainE2E', '--trainE2E', type=int, help="whether to train e2e (otherwise 4 stage)", required=False) - - args = vars(parser.parse_args()) - - if args['rpnLrFactor'] is not None: - globalvars['rpn_lr_factor'] = args['rpnLrFactor'] - if args['frcnLrFactor'] is not None: - globalvars['frcn_lr_factor'] = args['frcnLrFactor'] - if args['e2eLrFactor'] is not None: - globalvars['e2e_lr_factor'] = args['e2eLrFactor'] - if args['momentumPerMb'] is not None: - globalvars['momentum_per_mb'] = args['momentumPerMb'] - if args['e2eEpochs'] is not None: - globalvars['e2e_epochs'] = args['e2eEpochs'] - if args['rpnEpochs'] is not None: - globalvars['rpn_epochs'] = args['rpnEpochs'] - if args['frcnEpochs'] is not None: - globalvars['frcn_epochs'] = args['frcnEpochs'] - if args['rndSeed'] is not None: - globalvars['rnd_seed'] = args['rndSeed'] - if args['trainConv'] is not None: - globalvars['train_conv'] = True if args['trainConv']==1 else False - if args['trainE2E'] is not None: - globalvars['train_e2e'] = True if args['trainE2E']==1 else False - - if args['outputdir'] is not None: - globalvars['output_path'] = args['outputdir'] - if args['logdir'] is not None: - log_dir = args['logdir'] - if args['device'] is not None: - # Setting one worker on GPU and one worker on CPU. Otherwise memory consumption is too high for a single GPU. - if Communicator.rank() == 0: - cntk.device.try_set_default_device(cntk.device.gpu(args['device'])) - else: - cntk.device.try_set_default_device(cntk.device.cpu()) - - if args['datadir'] is not None: - data_path = args['datadir'] - - if not os.path.isdir(data_path): - raise RuntimeError("Directory %s does not exist" % data_path) - - globalvars['class_map_file'] = os.path.join(data_path, globalvars['class_map_file']) - globalvars['train_map_file'] = os.path.join(data_path, globalvars['train_map_file']) - globalvars['test_map_file'] = os.path.join(data_path, globalvars['test_map_file']) - globalvars['train_roi_file'] = os.path.join(data_path, globalvars['train_roi_file']) - globalvars['test_roi_file'] = os.path.join(data_path, globalvars['test_roi_file']) - - if cfg["CNTK"].FORCE_DETERMINISTIC: - force_deterministic_algorithms() - np.random.seed(seed=globalvars['rnd_seed']) - globalvars['classes'] = parse_class_map_file(globalvars['class_map_file']) - globalvars['num_classes'] = len(globalvars['classes']) - - if cfg["CNTK"].DEBUG_OUTPUT: - # report args - print("Using the following parameters:") - print("Flip image : {}".format(cfg["TRAIN"].USE_FLIPPED)) - print("Train conv layers: {}".format(globalvars['train_conv'])) - print("Random seed : {}".format(globalvars['rnd_seed'])) - print("Momentum per MB : {}".format(globalvars['momentum_per_mb'])) - if globalvars['train_e2e']: - print("E2E epochs : {}".format(globalvars['e2e_epochs'])) - else: - print("RPN lr factor : {}".format(globalvars['rpn_lr_factor'])) - print("RPN epochs : {}".format(globalvars['rpn_epochs'])) - print("FRCN lr factor : {}".format(globalvars['frcn_lr_factor'])) - print("FRCN epochs : {}".format(globalvars['frcn_epochs'])) - -############################################################### -############################################################### - -def clone_model(base_model, from_node_names, to_node_names, clone_method): - from_nodes = [find_by_name(base_model, node_name) for node_name in from_node_names] - if None in from_nodes: - print("Error: could not find all specified 'from_nodes' in clone. Looking for {}, found {}" - .format(from_node_names, from_nodes)) - to_nodes = [find_by_name(base_model, node_name) for node_name in to_node_names] - if None in to_nodes: - print("Error: could not find all specified 'to_nodes' in clone. Looking for {}, found {}" - .format(to_node_names, to_nodes)) - input_placeholders = dict(zip(from_nodes, [placeholder() for x in from_nodes])) - cloned_net = combine(to_nodes).clone(clone_method, input_placeholders) - return cloned_net - -def clone_conv_layers(base_model): - if not globalvars['train_conv']: - conv_layers = clone_model(base_model, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) - elif feature_node_name == start_train_conv_node_name: - conv_layers = clone_model(base_model, [feature_node_name], [last_conv_node_name], CloneMethod.clone) - else: - fixed_conv_layers = clone_model(base_model, [feature_node_name], [start_train_conv_node_name], - CloneMethod.freeze) - train_conv_layers = clone_model(base_model, [start_train_conv_node_name], [last_conv_node_name], - CloneMethod.clone) - conv_layers = Sequential([fixed_conv_layers, train_conv_layers]) - return conv_layers - -# Please keep in sync with Readme.md -def create_fast_rcnn_predictor(conv_out, rois, fc_layers): - # RCNN - roi_out = roipooling(conv_out, rois, cntk.MAX_POOLING, (roi_dim, roi_dim), spatial_scale=1/16.0) - fc_out = fc_layers(roi_out) - - # prediction head - W_pred = parameter(shape=(4096, globalvars['num_classes']), init=normal(scale=0.01), name="cls_score.W") - b_pred = parameter(shape=globalvars['num_classes'], init=0, name="cls_score.b") - cls_score = plus(times(fc_out, W_pred), b_pred, name='cls_score') - - # regression head - W_regr = parameter(shape=(4096, globalvars['num_classes']*4), init=normal(scale=0.001), name="bbox_regr.W") - b_regr = parameter(shape=globalvars['num_classes']*4, init=0, name="bbox_regr.b") - bbox_pred = plus(times(fc_out, W_regr), b_regr, name='bbox_regr') - - return cls_score, bbox_pred - -# Please keep in sync with Readme.md -# Defines the Faster R-CNN network model for detecting objects in images -def create_faster_rcnn_predictor(base_model_file_name, features, scaled_gt_boxes, dims_input): - # Load the pre-trained classification net and clone layers - base_model = load_model(base_model_file_name) - conv_layers = clone_conv_layers(base_model) - fc_layers = clone_model(base_model, [pool_node_name], [last_hidden_node_name], clone_method=CloneMethod.clone) - - # Normalization and conv layers - feat_norm = features - normalization_const - conv_out = conv_layers(feat_norm) - - # RPN and prediction targets - rpn_rois, rpn_losses = \ - create_rpn(conv_out, scaled_gt_boxes, dims_input, proposal_layer_param_string=cfg["CNTK"].PROPOSAL_LAYER_PARAMS) - rois, label_targets, bbox_targets, bbox_inside_weights = \ - create_proposal_target_layer(rpn_rois, scaled_gt_boxes, num_classes=globalvars['num_classes']) - - # Fast RCNN and losses - cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers) - detection_losses = create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights) - loss = rpn_losses + detection_losses - pred_error = classification_error(cls_score, label_targets, axis=1) - - return loss, pred_error - -def create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights): - # classification loss - cls_loss = cross_entropy_with_softmax(cls_score, label_targets, axis=1) - - p_cls_loss = placeholder() - p_rois = placeholder() - # The terms that are accounted for in the cls loss are those that correspond to an actual roi proposal --> do not count no-op (all-zero) rois - roi_indicator = reduce_sum(p_rois, axis=1) - cls_num_terms = reduce_sum(cntk.greater_equal(roi_indicator, 0.0)) - cls_normalization_factor = 1.0 / cls_num_terms - normalized_cls_loss = reduce_sum(p_cls_loss) * cls_normalization_factor - - reduced_cls_loss = cntk.as_block(normalized_cls_loss, - [(p_cls_loss, cls_loss), (p_rois, rois)], - 'Normalize', 'norm_cls_loss') - - # regression loss - p_bbox_pred = placeholder() - p_bbox_targets = placeholder() - p_bbox_inside_weights = placeholder() - bbox_loss = SmoothL1Loss(cfg["CNTK"].SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0) - # The bbox loss is normalized by the batch size - bbox_normalization_factor = 1.0 / cfg["TRAIN"].BATCH_SIZE - normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor - - reduced_bbox_loss = cntk.as_block(normalized_bbox_loss, - [(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets), (p_bbox_inside_weights, bbox_inside_weights)], - 'SmoothL1Loss', 'norm_bbox_loss') - - detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses") - - return detection_losses - -def create_eval_model(model, image_input, dims_input, rpn_model=None): - print("creating eval model") - conv_layers = clone_model(model, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) - conv_out = conv_layers(image_input) - - model_with_rpn = model if rpn_model is None else rpn_model - rpn = clone_model(model_with_rpn, [last_conv_node_name, "dims_input"], ["rpn_rois"], CloneMethod.freeze) - rpn_rois = rpn(conv_out, dims_input) - - roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze) - pred_net = roi_fc_layers(conv_out, rpn_rois) - cls_score = pred_net.outputs[0] - bbox_regr = pred_net.outputs[1] - - if cfg["TRAIN"].BBOX_NORMALIZE_TARGETS and cfg["TRAIN"].BBOX_NORMALIZE_TARGETS_PRECOMPUTED: - num_boxes = int(bbox_regr.shape[1] / 4) - bbox_normalize_means = np.array(cfg["TRAIN"].BBOX_NORMALIZE_MEANS * num_boxes) - bbox_normalize_stds = np.array(cfg["TRAIN"].BBOX_NORMALIZE_STDS * num_boxes) - bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr') - - cls_pred = softmax(cls_score, axis=1, name='cls_pred') - eval_model = combine([cls_pred, rpn_rois, bbox_regr]) - - return eval_model - -def train_model(image_input, roi_input, dims_input, loss, pred_error, - lr_per_sample, mm_schedule, l2_reg_weight, epochs_to_train, - rpn_rois_input=None, buffered_rpn_proposals=None): - if isinstance(loss, cntk.Variable): - loss = combine([loss]) - - params = loss.parameters - biases = [p for p in params if '.b' in p.name or 'b' == p.name] - others = [p for p in params if not p in biases] - bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT - - if cfg["CNTK"].DEBUG_OUTPUT: - print("biases") - for p in biases: print(p) - print("others") - for p in others: print(p) - print("bias_lr_mult: {}".format(bias_lr_mult)) - - # Instantiate the learners and the trainer object - lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) - learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, - unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT) - - bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample] - bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample) - bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, - unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT) - trainer = Trainer(None, (loss, pred_error), [learner, bias_learner]) - - # Get minibatches of images and perform model training - print("Training model for %s epochs." % epochs_to_train) - log_number_of_parameters(loss) - - # Create the minibatch source - od_minibatch_source = ObjectDetectionMinibatchSource( - globalvars['train_map_file'], globalvars['train_roi_file'], - max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE, - pad_width=image_width, pad_height=image_height, pad_value=img_pad_value, - randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED, - max_images=cfg["CNTK"].NUM_TRAIN_IMAGES, - buffered_rpn_proposals=buffered_rpn_proposals) - - # define mapping from reader streams to network inputs - input_map = { - od_minibatch_source.image_si: image_input, - od_minibatch_source.roi_si: roi_input, - od_minibatch_source.dims_si: dims_input - } - - use_buffered_proposals = buffered_rpn_proposals is not None - progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True) - for epoch in range(epochs_to_train): # loop over epochs - sample_count = 0 - while sample_count < epoch_size: # loop over minibatches in the epoch - data, proposals = od_minibatch_source.next_minibatch_with_proposals(min(mb_size, epoch_size-sample_count), input_map=input_map) - if use_buffered_proposals: - data[rpn_rois_input] = MinibatchData(Value(batch=np.asarray(proposals, dtype=np.float32)), 1, 1, False) - # remove dims input if no rpn is required to avoid warnings - del data[[k for k in data if '[6]' in str(k)][0]] - - trainer.train_minibatch(data) # update model with it - sample_count += trainer.previous_minibatch_sample_count # count samples processed so far - progress_printer.update_with_trainer(trainer, with_metric=True) # log progress - if sample_count % 100 == 0: - print("Processed {} samples".format(sample_count)) - - progress_printer.epoch_summary(with_metric=True) - -def compute_rpn_proposals(rpn_model, image_input, roi_input, dims_input): - num_images = cfg["CNTK"].NUM_TRAIN_IMAGES - # Create the minibatch source - od_minibatch_source = ObjectDetectionMinibatchSource( - globalvars['train_map_file'], globalvars['train_roi_file'], - max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE, - pad_width=image_width, pad_height=image_height, pad_value=img_pad_value, - max_images=num_images, - randomize=False, use_flipping=False) - - # define mapping from reader streams to network inputs - input_map = { - od_minibatch_source.image_si: image_input, - od_minibatch_source.roi_si: roi_input, - od_minibatch_source.dims_si: dims_input - } - - # setting pre- and post-nms top N to training values since buffered proposals are used for further training - test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N - test_post = cfg["TEST"].RPN_POST_NMS_TOP_N - cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N - cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N - - buffered_proposals = [None for _ in range(num_images)] - sample_count = 0 - while sample_count < num_images: - data = od_minibatch_source.next_minibatch(1, input_map=input_map) - output = rpn_model.eval(data) - out_dict = dict([(k.name, k) for k in output]) - out_rpn_rois = output[out_dict['rpn_rois']][0] - buffered_proposals[sample_count] = np.round(out_rpn_rois).astype(np.int16) - sample_count += 1 - if sample_count % 500 == 0: - print("Buffered proposals for {} samples".format(sample_count)) - - # resetting config values to original test values - cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre - cfg["TEST"].RPN_POST_NMS_TOP_N = test_post - - return buffered_proposals - -# Trains a Faster R-CNN model end-to-end -def train_faster_rcnn_e2e(base_model_file_name, debug_output=False): - # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi) - image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) - roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) - dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) - dims_node = alias(dims_input, name='dims_input') - - # Instantiate the Faster R-CNN prediction model and loss function - loss, pred_error = create_faster_rcnn_predictor(base_model_file_name, image_input, roi_input, dims_node) - - if debug_output: - print("Storing graphs and models to %s." % globalvars['output_path']) - plot(loss, os.path.join(globalvars['output_path'], "graph_frcn_train_e2e." + cfg["CNTK"].GRAPH_TYPE)) - - # Set learning parameters - e2e_lr_factor = globalvars['e2e_lr_factor'] - e2e_lr_per_sample_scaled = [x * e2e_lr_factor for x in cfg["CNTK"].E2E_LR_PER_SAMPLE] - mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) - - print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL)) - print("lr_per_sample: {}".format(e2e_lr_per_sample_scaled)) - - train_model(image_input, roi_input, dims_input, loss, pred_error, - e2e_lr_per_sample_scaled, mm_schedule, cfg["CNTK"].L2_REG_WEIGHT, globalvars['e2e_epochs']) - - return create_eval_model(loss, image_input, dims_input) - -# Trains a Faster R-CNN model using 4-stage alternating training -def train_faster_rcnn_alternating(base_model_file_name, debug_output=False): - ''' - 4-Step Alternating Training scheme from the Faster R-CNN paper: - - # Create initial network, only rpn, without detection network - # --> train only the rpn (and conv3_1 and up for VGG16) - # buffer region proposals from rpn - # Create full network, initialize conv layers with imagenet, use buffered proposals - # --> train only detection network (and conv3_1 and up for VGG16) - # Keep conv weights from detection network and fix them - # --> train only rpn - # buffer region proposals from rpn - # Keep conv and rpn weights from step 3 and fix them - # --> train only detection network - ''' - - # Learning parameters - rpn_lr_factor = globalvars['rpn_lr_factor'] - rpn_lr_per_sample_scaled = [x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE] - frcn_lr_factor = globalvars['frcn_lr_factor'] - frcn_lr_per_sample_scaled = [x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE] - - l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT - mm_schedule = momentum_schedule(globalvars['momentum_per_mb']) - rpn_epochs = globalvars['rpn_epochs'] - frcn_epochs = globalvars['frcn_epochs'] - - print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL)) - print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled)) - print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled)) - if debug_output: - print("Storing graphs and models to %s." % globalvars['output_path']) - - # Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions - image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], - name=feature_node_name) - feat_norm = image_input - normalization_const - roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) - scaled_gt_boxes = alias(roi_input, name='roi_input') - dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) - dims_node = alias(dims_input, name='dims_input') - rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()]) - rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois') - - # base image classification model (e.g. VGG16 or AlexNet) - base_model = load_model(base_model_file_name) - - print("stage 1a - rpn") - if True: - # Create initial network, only rpn, without detection network - # initial weights train? - # conv: base_model only conv3_1 and up - # rpn: init new yes - # frcn: - - - - # conv layers - conv_layers = clone_conv_layers(base_model) - conv_out = conv_layers(feat_norm) - - # RPN and losses - rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_node, proposal_layer_param_string=cfg["CNTK"].PROPOSAL_LAYER_PARAMS) - stage1_rpn_network = combine([rpn_rois, rpn_losses]) - - # train - if debug_output: plot(stage1_rpn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE)) - train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, - rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs) - - print("stage 1a - buffering rpn proposals") - buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input) - - print("stage 1b - frcn") - if True: - # Create full network, initialize conv layers with imagenet, fix rpn weights - # initial weights train? - # conv: base_model only conv3_1 and up - # rpn: stage1a rpn model no --> use buffered proposals - # frcn: base_model + new yes - - # conv_layers - conv_layers = clone_conv_layers(base_model) - conv_out = conv_layers(feat_norm) - - # use buffered proposals in target layer - rois, label_targets, bbox_targets, bbox_inside_weights = \ - create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, num_classes=globalvars['num_classes']) - - # Fast RCNN and losses - fc_layers = clone_model(base_model, [pool_node_name], [last_hidden_node_name], CloneMethod.clone) - cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers) - detection_losses = create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights) - pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error") - stage1_frcn_network = combine([rois, cls_score, bbox_pred, detection_losses, pred_error]) - - # train - if debug_output: plot(stage1_frcn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE)) - train_model(image_input, roi_input, dims_input, detection_losses, pred_error, - frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs, - rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1) - buffered_proposals_s1 = None - - print("stage 2a - rpn") - if True: - # Keep conv weights from detection network and fix them - # initial weights train? - # conv: stage1b frcn model no - # rpn: stage1a rpn model yes - # frcn: - - - - # conv_layers - conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) - conv_out = conv_layers(image_input) - - # RPN and losses - rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone) - rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes) - rpn_rois = rpn_net.outputs[0] - rpn_losses = rpn_net.outputs[1] - stage2_rpn_network = combine([rpn_rois, rpn_losses]) - - # train - if debug_output: plot(stage2_rpn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE)) - train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, - rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs) - - print("stage 2a - buffering rpn proposals") - buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input) - - print("stage 2b - frcn") - if True: - # Keep conv and rpn weights from step 3 and fix them - # initial weights train? - # conv: stage2a rpn model no - # rpn: stage2a rpn model no --> use buffered proposals - # frcn: stage1b frcn model yes - - - # conv_layers - conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) - conv_out = conv_layers(image_input) - - # Fast RCNN and losses - frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"], - ["cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error"], CloneMethod.clone) - stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes) - detection_losses = stage2_frcn_network.outputs[3] - pred_error = stage2_frcn_network.outputs[4] - - # train - if debug_output: plot(stage2_frcn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE)) - train_model(image_input, roi_input, dims_input, detection_losses, pred_error, - frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs, - rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2) - buffered_proposals_s2 = None - - return create_eval_model(stage2_frcn_network, image_input, dims_input, rpn_model=stage2_rpn_network) - -def eval_faster_rcnn_mAP(eval_model): - img_map_file = globalvars['test_map_file'] - roi_map_file = globalvars['test_roi_file'] - classes = globalvars['classes'] - image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) - roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) - dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) - frcn_eval = eval_model(image_input, dims_input) - - # Create the minibatch source - minibatch_source = ObjectDetectionMinibatchSource( - img_map_file, roi_map_file, - max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE, - pad_width=image_width, pad_height=image_height, pad_value=img_pad_value, - randomize=False, use_flipping=False, - max_images=cfg["CNTK"].NUM_TEST_IMAGES) - - # define mapping from reader streams to network inputs - input_map = { - minibatch_source.image_si: image_input, - minibatch_source.roi_si: roi_input, - minibatch_source.dims_si: dims_input - } - - # all detections are collected into: - # all_boxes[cls][image] = N x 5 array of detections in - # (x1, y1, x2, y2, score) - all_boxes = [[[] for _ in range(num_test_images)] for _ in range(globalvars['num_classes'])] - - # evaluate test images and write netwrok output to file - print("Evaluating Faster R-CNN model for %s images." % num_test_images) - all_gt_infos = {key: [] for key in classes} - for img_i in range(0, num_test_images): - mb_data = minibatch_source.next_minibatch(1, input_map=input_map) - - gt_row = mb_data[roi_input].asarray() - gt_row = gt_row.reshape((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5)) - all_gt_boxes = gt_row[np.where(gt_row[:,-1] > 0)] - - for cls_index, cls_name in enumerate(classes): - if cls_index == 0: continue - cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:,-1] == cls_index)] - all_gt_infos[cls_name].append({'bbox': np.array(cls_gt_boxes), - 'difficult': [False] * len(cls_gt_boxes), - 'det': [False] * len(cls_gt_boxes)}) - - output = frcn_eval.eval({image_input: mb_data[image_input], dims_input: mb_data[dims_input]}) - out_dict = dict([(k.name, k) for k in output]) - out_cls_pred = output[out_dict['cls_pred']][0] - out_rpn_rois = output[out_dict['rpn_rois']][0] - out_bbox_regr = output[out_dict['bbox_regr']][0] - - labels = out_cls_pred.argmax(axis=1) - scores = out_cls_pred.max(axis=1) - regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray()) - - labels.shape = labels.shape + (1,) - scores.shape = scores.shape + (1,) - coords_score_label = np.hstack((regressed_rois, scores, labels)) - - # shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score - for cls_j in range(1, globalvars['num_classes']): - coords_score_label_for_cls = coords_score_label[np.where(coords_score_label[:,-1] == cls_j)] - all_boxes[cls_j][img_i] = coords_score_label_for_cls[:,:-1].astype(np.float32, copy=False) - - if (img_i+1) % 100 == 0: - print("Processed {} samples".format(img_i+1)) - - # calculate mAP - aps = evaluate_detections(all_boxes, all_gt_infos, classes, - nms_threshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD, - conf_threshold = cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD) - ap_list = [] - for class_name in aps: - ap_list += [aps[class_name]] - print('AP for {:>15} = {:.4f}'.format(class_name, aps[class_name])) - meanAP = np.nanmean(ap_list) - print('Mean AP = {:.4f}'.format(meanAP)) - return meanAP - -# The main method trains and evaluates a Fast R-CNN model. -# If a trained model is already available it is loaded an no training will be performed (if MAKE_MODE=True). -if __name__ == '__main__': - running_locally = os.path.exists(map_file_path) - if running_locally: - os.chdir(map_file_path) - if not os.path.exists(os.path.join(abs_path, "Output")): - os.makedirs(os.path.join(abs_path, "Output")) - if not os.path.exists(os.path.join(abs_path, "Output", cfg["CNTK"].DATASET)): - os.makedirs(os.path.join(abs_path, "Output", cfg["CNTK"].DATASET)) - else: - # disable debug and plot outputs when running on GPU cluster - cfg["CNTK"].DEBUG_OUTPUT = False - cfg["CNTK"].VISUALIZE_RESULTS = False - - set_global_vars() - model_path = os.path.join(globalvars['output_path'], "faster_rcnn_eval_{}_{}.model" - .format(cfg["CNTK"].BASE_MODEL, "e2e" if globalvars['train_e2e'] else "4stage")) - - # Train only if no model exists yet - if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE: - print("Loading existing model from %s" % model_path) - eval_model = load_model(model_path) - else: - if globalvars['train_e2e']: - eval_model = train_faster_rcnn_e2e(base_model_file, debug_output=cfg["CNTK"].DEBUG_OUTPUT) - else: - eval_model = train_faster_rcnn_alternating(base_model_file, debug_output=cfg["CNTK"].DEBUG_OUTPUT) - - eval_model.save(model_path) - if cfg["CNTK"].DEBUG_OUTPUT: - plot(eval_model, os.path.join(globalvars['output_path'], "graph_frcn_eval_{}_{}.{}" - .format(cfg["CNTK"].BASE_MODEL, "e2e" if globalvars['train_e2e'] else "4stage", cfg["CNTK"].GRAPH_TYPE))) - - print("Stored eval model at %s" % model_path) - - # Compute mean average precision on test set - eval_faster_rcnn_mAP(eval_model) - - # Plot results on test set - if cfg["CNTK"].VISUALIZE_RESULTS: - from plot_helpers import eval_and_plot_faster_rcnn - num_eval = min(num_test_images, 100) - img_shape = (num_channels, image_height, image_width) - results_folder = os.path.join(globalvars['output_path'], cfg["CNTK"].DATASET) - eval_and_plot_faster_rcnn(eval_model, num_eval, globalvars['test_map_file'], img_shape, - results_folder, feature_node_name, globalvars['classes'], - drawUnregressedRois=cfg["CNTK"].DRAW_UNREGRESSED_ROIS, - drawNegativeRois=cfg["CNTK"].DRAW_NEGATIVE_ROIS, - nmsThreshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD, - nmsConfThreshold=cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD, - bgrPlotThreshold=cfg["CNTK"].RESULTS_BGR_PLOT_THRESHOLD) - diff --git a/Examples/Image/Detection/FasterRCNN/config.py b/Examples/Image/Detection/FasterRCNN/FasterRCNN_config.py similarity index 94% rename from Examples/Image/Detection/FasterRCNN/config.py rename to Examples/Image/Detection/FasterRCNN/FasterRCNN_config.py index 91d5f7109..128a52c52 100644 --- a/Examples/Image/Detection/FasterRCNN/config.py +++ b/Examples/Image/Detection/FasterRCNN/FasterRCNN_config.py @@ -23,13 +23,15 @@ __C.CNTK = edict() __C.CNTK.MAKE_MODE = False # E2E or 4-stage training __C.CNTK.TRAIN_E2E = True -# set to 'True' to use determininistic algorithms +# set to 'True' to use deterministic algorithms __C.CNTK.FORCE_DETERMINISTIC = False # set to 'True' to run only a single epoch -__C.CNTK.FAST_MODE = True +__C.CNTK.FAST_MODE = False # Debug parameters __C.CNTK.DEBUG_OUTPUT = False __C.CNTK.GRAPH_TYPE = "png" # "png" or "pdf" +# Set to True if you want to store an eval model with native UDFs (e.g. for inference using C++ or C#) +__C.STORE_EVAL_MODEL_WITH_NATIVE_UDF = False # Learning parameters __C.CNTK.L2_REG_WEIGHT = 0.0005 @@ -62,7 +64,7 @@ __C.RESULTS_NMS_THRESHOLD = 0.5 __C.RESULTS_NMS_CONF_THRESHOLD = 0.0 # Enable plotting of results generally / also plot background boxes / also plot unregressed boxes -__C.VISUALIZE_RESULTS = True +__C.VISUALIZE_RESULTS = False __C.DRAW_NEGATIVE_ROIS = False __C.DRAW_UNREGRESSED_ROIS = False # only for plotting results: boxes with a score lower than this threshold will be considered background diff --git a/Examples/Image/Detection/FasterRCNN/FasterRCNN_eval.py b/Examples/Image/Detection/FasterRCNN/FasterRCNN_eval.py index a82161859..5a585baa2 100644 --- a/Examples/Image/Detection/FasterRCNN/FasterRCNN_eval.py +++ b/Examples/Image/Detection/FasterRCNN/FasterRCNN_eval.py @@ -6,64 +6,12 @@ import os import numpy as np -from matplotlib.pyplot import imsave -import cv2 import cntk from cntk import input_variable, Axis -from utils.nms_wrapper import apply_nms_to_single_image_results from utils.map_helpers import evaluate_detections -from utils.plot_helpers import load_resize_and_pad, resize_and_pad, visualize_detections +from utils.plot_helpers import load_resize_and_pad from utils.rpn.bbox_transform import regress_rois from utils.od_mb_source import ObjectDetectionMinibatchSource -from utils.proposal_helpers import ProposalProvider, compute_proposals, compute_image_stats - -class FastRCNN_Evaluator: - def __init__(self, eval_model, cfg): - # load model once in constructor and push images through the model in 'process_image()' - self._img_shape = (cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH) - image_input = input_variable(shape=self._img_shape, - dynamic_axes=[Axis.default_batch_axis()], - name=cfg["MODEL"].FEATURE_NODE_NAME) - roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()], - name="roi_proposals") - self._eval_model = eval_model(image_input, roi_proposals) - self._min_w = cfg['PROPOSALS_MIN_W'] - self._min_h = cfg['PROPOSALS_MIN_H'] - self._num_proposals = cfg['NUM_ROI_PROPOSALS'] - - def process_image(self, img_path): - out_cls_pred, out_rpn_rois, out_bbox_regr, dims = self.process_image_detailed(img_path) - labels = out_cls_pred.argmax(axis=1) - regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims) - - return regressed_rois, out_cls_pred - - def process_image_detailed(self, img_path): - img = cv2.imread(img_path) - _, cntk_img_input, dims = resize_and_pad(img, self._img_shape[2], self._img_shape[1]) - - #import pdb; pdb.set_trace() - - # compute ROI proposals and apply scaling and padding to them - # [target_w, target_h, img_width, img_height, top, bottom, left, right, scale_factor] - img_stats = compute_image_stats(len(img[0]), len(img), self._img_shape[2], self._img_shape[1]) - scale_factor = img_stats[-1] - top = img_stats[4] - left = img_stats[6] - - proposals = compute_proposals(img, self._num_proposals, self._min_w, self._min_h) - proposals = proposals * scale_factor - proposals += (left, top, left, top) - - output = self._eval_model.eval({self._eval_model.arguments[0]: [cntk_img_input], - self._eval_model.arguments[1]: np.array(proposals, dtype=np.float32)}) - - out_dict = dict([(k.name, k) for k in output]) - out_cls_pred = output[out_dict['cls_pred']][0] - out_rpn_rois = proposals - out_bbox_regr = output[out_dict['bbox_regr']][0] - - return out_cls_pred, out_rpn_rois, out_bbox_regr, dims class FasterRCNN_Evaluator: def __init__(self, eval_model, cfg): @@ -178,133 +126,3 @@ def compute_test_set_aps(eval_model, cfg): conf_threshold = cfg.RESULTS_NMS_CONF_THRESHOLD) return aps - -def plot_test_set_results(evaluator, num_images_to_plot, results_base_path, cfg): - # get image paths - with open(cfg["DATA"].TEST_MAP_FILE) as f: - content = f.readlines() - img_base_path = os.path.dirname(os.path.abspath(cfg["DATA"].TEST_MAP_FILE)) - img_file_names = [os.path.join(img_base_path, x.split('\t')[1]) for x in content] - img_shape = (cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH) - - print("Plotting results from Faster R-CNN model for %s images." % num_images_to_plot) - for i in range(0, num_images_to_plot): - img_path = img_file_names[i] - out_cls_pred, out_rpn_rois, out_bbox_regr, dims = evaluator.process_image_detailed(img_path) - labels = out_cls_pred.argmax(axis=1) - scores = out_cls_pred.max(axis=1) - - if cfg.DRAW_UNREGRESSED_ROIS: - # plot results without final regression - imgDebug = visualize_detections(img_path, out_rpn_rois, labels, scores, - img_shape[2], img_shape[1], - classes=cfg["DATA"].CLASSES, - draw_negative_rois=cfg.DRAW_NEGATIVE_ROIS, - decision_threshold=cfg.RESULTS_BGR_PLOT_THRESHOLD) - imsave("{}/{}_{}".format(results_base_path, i, os.path.basename(img_path)), imgDebug) - - # apply regression and nms to bbox coordinates - regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims) - nmsKeepIndices = apply_nms_to_single_image_results(regressed_rois, labels, scores, - use_gpu_nms=cfg.USE_GPU_NMS, - device_id=cfg.GPU_ID, - nms_threshold=cfg.RESULTS_NMS_THRESHOLD, - conf_threshold=cfg.RESULTS_NMS_CONF_THRESHOLD) - - filtered_bboxes = regressed_rois[nmsKeepIndices] - filtered_labels = labels[nmsKeepIndices] - filtered_scores = scores[nmsKeepIndices] - - img = visualize_detections(img_path, filtered_bboxes, filtered_labels, filtered_scores, - img_shape[2], img_shape[1], - classes=cfg["DATA"].CLASSES, - draw_negative_rois=cfg.DRAW_NEGATIVE_ROIS, - decision_threshold=cfg.RESULTS_BGR_PLOT_THRESHOLD) - imsave("{}/{}_regr_{}".format(results_base_path, i, os.path.basename(img_path)), img) - -def compute_test_set_aps_fast_rcnn(eval_model, cfg): - num_test_images = cfg["DATA"].NUM_TEST_IMAGES - classes = cfg["DATA"].CLASSES - image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), - dynamic_axes=[Axis.default_batch_axis()], - name=cfg["MODEL"].FEATURE_NODE_NAME) - roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) - roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()], name="roi_proposals") - dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) - frcn_eval = eval_model(image_input, roi_proposals) - - # Create the minibatch source - proposal_provider = ProposalProvider.fromconfig(cfg) - minibatch_source = ObjectDetectionMinibatchSource( - cfg["DATA"].TEST_MAP_FILE, - cfg["DATA"].TEST_ROI_FILE, - max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE, - pad_width=cfg.IMAGE_WIDTH, - pad_height=cfg.IMAGE_HEIGHT, - pad_value=cfg["MODEL"].IMG_PAD_COLOR, - randomize=False, use_flipping=False, - max_images=cfg["DATA"].NUM_TEST_IMAGES, - num_classes=cfg["DATA"].NUM_CLASSES, - proposal_provider=proposal_provider, - provide_targets=False) - - # define mapping from reader streams to network inputs - input_map = { - minibatch_source.image_si: image_input, - minibatch_source.roi_si: roi_input, - minibatch_source.proposals_si: roi_proposals, - minibatch_source.dims_si: dims_input - } - - # all detections are collected into: - # all_boxes[cls][image] = N x 5 array of detections in (x1, y1, x2, y2, score) - all_boxes = [[[] for _ in range(num_test_images)] for _ in range(cfg["DATA"].NUM_CLASSES)] - - # evaluate test images and write netwrok output to file - print("Evaluating Fast R-CNN model for %s images." % num_test_images) - all_gt_infos = {key: [] for key in classes} - for img_i in range(0, num_test_images): - mb_data = minibatch_source.next_minibatch(1, input_map=input_map) - - gt_row = mb_data[roi_input].asarray() - gt_row = gt_row.reshape((cfg.INPUT_ROIS_PER_IMAGE, 5)) - all_gt_boxes = gt_row[np.where(gt_row[:,-1] > 0)] - - for cls_index, cls_name in enumerate(classes): - if cls_index == 0: continue - cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:,-1] == cls_index)] - all_gt_infos[cls_name].append({'bbox': np.array(cls_gt_boxes), - 'difficult': [False] * len(cls_gt_boxes), - 'det': [False] * len(cls_gt_boxes)}) - - output = frcn_eval.eval({image_input: mb_data[image_input], roi_proposals: mb_data[roi_proposals]}) - out_dict = dict([(k.name, k) for k in output]) - out_cls_pred = output[out_dict['cls_pred']][0] - out_rpn_rois = mb_data[roi_proposals].data.asarray() - out_bbox_regr = output[out_dict['bbox_regr']][0] - - labels = out_cls_pred.argmax(axis=1) - scores = out_cls_pred.max(axis=1) - regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray()) - - labels.shape = labels.shape + (1,) - scores.shape = scores.shape + (1,) - coords_score_label = np.hstack((regressed_rois, scores, labels)) - - # shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score - for cls_j in range(1, cfg["DATA"].NUM_CLASSES): - coords_score_label_for_cls = coords_score_label[np.where(coords_score_label[:,-1] == cls_j)] - all_boxes[cls_j][img_i] = coords_score_label_for_cls[:,:-1].astype(np.float32, copy=False) - - if (img_i+1) % 100 == 0: - print("Processed {} samples".format(img_i+1)) - - # calculate mAP - aps = evaluate_detections(all_boxes, all_gt_infos, classes, - use_gpu_nms = cfg.USE_GPU_NMS, - device_id = cfg.GPU_ID, - nms_threshold=cfg.RESULTS_NMS_THRESHOLD, - conf_threshold = cfg.RESULTS_NMS_CONF_THRESHOLD) - - return aps - diff --git a/Examples/Image/Detection/FasterRCNN/FasterRCNN_train.py b/Examples/Image/Detection/FasterRCNN/FasterRCNN_train.py index f0545fab6..0822826c0 100644 --- a/Examples/Image/Detection/FasterRCNN/FasterRCNN_train.py +++ b/Examples/Image/Detection/FasterRCNN/FasterRCNN_train.py @@ -25,7 +25,7 @@ from _cntk_py import force_deterministic_algorithms abs_path = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.join(abs_path, "..")) -from utils.rpn.rpn_helpers import create_rpn, create_proposal_target_layer, add_proposal_layer +from utils.rpn.rpn_helpers import create_rpn, create_proposal_target_layer, create_proposal_layer from utils.annotations.annotations_helper import parse_class_map_file from utils.od_mb_source import ObjectDetectionMinibatchSource from utils.proposal_helpers import ProposalProvider @@ -64,13 +64,11 @@ def prepare(cfg, use_arg_parser=True): cfg['MODEL_PATH'] = os.path.join(cfg.OUTPUT_PATH, "faster_rcnn_eval_{}_{}.model" .format(cfg["MODEL"].BASE_MODEL, "e2e" if cfg["CNTK"].TRAIN_E2E else "4stage")) - cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "PretrainedModels", + cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "PretrainedModels", cfg["MODEL"].BASE_MODEL_FILE) cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE) cfg["DATA"].NUM_CLASSES = len(cfg["DATA"].CLASSES) - cfg.PROPOSAL_LAYER_PARAMS = "'feat_stride': {}\n'scales':\n - {}".\ - format(cfg["MODEL"].FEATURE_STRIDE, "\n - ".join([str(v) for v in cfg["DATA"].PROPOSAL_LAYER_SCALES])) if cfg["CNTK"].FAST_MODE: cfg["CNTK"].E2E_MAX_EPOCHS = 1 @@ -207,7 +205,7 @@ def create_faster_rcnn_eval_model(model, image_input, dims_input, cfg, rpn_model rpn = clone_model(model_with_rpn, [last_conv_node_name], ["rpn_cls_prob_reshape", "rpn_bbox_pred"], CloneMethod.freeze) rpn_out = rpn(conv_out) # we need to add the proposal layer anew to account for changing configs when buffering proposals in 4-stage training - rpn_rois = add_proposal_layer(rpn_out.outputs[0], rpn_out.outputs[1], dims_input, cfg) + rpn_rois = create_proposal_layer(rpn_out.outputs[0], rpn_out.outputs[1], dims_input, cfg) roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze) pred_net = roi_fc_layers(conv_out, rpn_rois) @@ -225,6 +223,27 @@ def create_faster_rcnn_eval_model(model, image_input, dims_input, cfg, rpn_model return eval_model +def store_eval_model_with_native_udf(eval_model, cfg): + import copy + sys.path.append(os.path.join(abs_path, "..", "..", "Extensibility", "ProposalLayer")) + cntk.ops.register_native_user_function('ProposalLayerOp', + 'Cntk.ProposalLayerLib-' + cntk.__version__.rstrip('+'), + 'CreateProposalLayer') + + def filter(x): + return type(x) == cntk.Function and x.op_name == 'UserFunction' and x.name == 'ProposalLayer' + + def converter(x): + layer_config = copy.deepcopy(x.attributes) + return cntk.ops.native_user_function('ProposalLayerOp', list(x.inputs), layer_config, 'native_proposal_layer') + + + model_w_native_udf = cntk.misc.convert(eval_model, filter, converter) + model_path = cfg['MODEL_PATH'] + new_model_path = model_path[:-6] + '_native.model' + model_w_native_udf.save(new_model_path) + print("Stored eval model with native UDF to {}".format(new_model_path)) + def compute_rpn_proposals(rpn_model, image_input, roi_input, dims_input, cfg): num_images = cfg["DATA"].NUM_TRAIN_IMAGES # Create the minibatch source diff --git a/Examples/Image/Detection/FasterRCNN/README.md b/Examples/Image/Detection/FasterRCNN/README.md index 4dfe76a88..c93f5000f 100644 --- a/Examples/Image/Detection/FasterRCNN/README.md +++ b/Examples/Image/Detection/FasterRCNN/README.md @@ -5,7 +5,7 @@ This folder contains an end-to-end solution for using Faster R-CNN to perform object detection. The original research paper for Faster R-CNN can be found at [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497). Base models that are supported by the current configuration are AlexNet and VGG16. -Two image set that are preconfigured are Pascal VOC 2007 and Grocery. +Two image sets that are preconfigured are Pascal VOC 2007 and Grocery. Other base models or image sets can be used by adding a configuration file similar to the examples in `utils/configs` and importing it in `run_faster_rcnn.py`. ## Running the example @@ -15,7 +15,7 @@ Other base models or image sets can be used by adding a configuration file simil To run Faster R-CNN you need a CNTK Python environment. Install the following additional packages: ``` -pip install opencv-python easydict pyyaml future +pip install opencv-python easydict pyyaml ``` The code uses prebuild Cython modules for parts of the region proposal network (see `Examples/Image/Detection/utils/cython_modules`). @@ -112,7 +112,7 @@ and run `python run_faster_rcnn.py` to train and evaluate Faster R-CNN on your d ### Parameters -All options and parameters are in `config.py` in the `FasterRCNN` folder and all of them are explained there. These include +All options and parameters are in `FasterRCNN_config.py` in the `FasterRCNN` folder and all of them are explained there. These include ``` # E2E or 4-stage training @@ -134,4 +134,4 @@ Most of the code is in `FasterRCNN_train.py` and `FasterRCNN_eval.py` (and `Exam ### Algorithm -All details regarding the Faster R-CNN algortihm can be found in the original research paper: [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497). +All details regarding the Faster R-CNN algorithm can be found in the original research paper: [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497). diff --git a/Examples/Image/Detection/FasterRCNN/run_faster_rcnn.py b/Examples/Image/Detection/FasterRCNN/run_faster_rcnn.py index d8df5e0cf..f5239e3f3 100644 --- a/Examples/Image/Detection/FasterRCNN/run_faster_rcnn.py +++ b/Examples/Image/Detection/FasterRCNN/run_faster_rcnn.py @@ -6,14 +6,15 @@ import os import numpy as np -from FasterRCNN_train import prepare, train_faster_rcnn +import cntk +from FasterRCNN_train import prepare, train_faster_rcnn, store_eval_model_with_native_udf from FasterRCNN_eval import compute_test_set_aps, FasterRCNN_Evaluator from utils.config_helpers import merge_configs from utils.plot_helpers import plot_test_set_results def get_configuration(): # load configs for detector, base network and data set - from config import cfg as detector_cfg + from FasterRCNN_config import cfg as detector_cfg # for VGG16 base model use: from utils.configs.VGG16_config import cfg as network_cfg # for AlexNet base model use: from utils.configs.AlexNet_config import cfg as network_cfg from utils.configs.AlexNet_config import cfg as network_cfg @@ -42,3 +43,8 @@ if __name__ == '__main__': results_folder = os.path.join(cfg.OUTPUT_PATH, cfg["DATA"].DATASET) evaluator = FasterRCNN_Evaluator(trained_model, cfg) plot_test_set_results(evaluator, num_eval, results_folder, cfg) + + if cfg.STORE_EVAL_MODEL_WITH_NATIVE_UDF: + store_eval_model_with_native_udf(trained_model, cfg) + + diff --git a/Examples/Image/Detection/README.md b/Examples/Image/Detection/README.md index ce030a7ab..17a3d8174 100644 --- a/Examples/Image/Detection/README.md +++ b/Examples/Image/Detection/README.md @@ -13,7 +13,7 @@ This folder contains an end-to-end demo to try different object detectors, base ### Setup -To run Fast R-CNN you need a CNTK Python environment. Install the following additional packages: +To run the object detection demo you need a CNTK Python environment. Install the following additional packages: ``` pip install opencv-python easydict pyyaml future diff --git a/Examples/Image/Detection/utils/Readme.md b/Examples/Image/Detection/utils/Readme.md index 1fafa32c7..0dec95795 100644 --- a/Examples/Image/Detection/utils/Readme.md +++ b/Examples/Image/Detection/utils/Readme.md @@ -19,10 +19,6 @@ python setup.py build_ext --inplace ``` Copy the compiled `.pyd` (Windows) or `.so` (Linux) files into the `cython_modules` subfolder of this utils folder. -##### `default_config` - -Contains all required parameters for using a region proposal network in training or evaluation. You can overwrite these parameters by specifying a `config.py` file of the same format inside your working directory. - ### `rpn` module overview The rpn module contains helper methods and required layers to generate region proposal networks for object detection. @@ -48,7 +44,3 @@ Bbox regression targets are specified when the classification label is > 0. Generates training targets/labels for each object proposal: classification labels 0 - K (bg or object class 1, ... , K) and bbox regression targets in that case that the label is > 0. - -##### `generate.py` - -Generate object detection proposals from an imdb using an RPN. diff --git a/Examples/Image/Detection/utils/configs/AlexNet_config.py b/Examples/Image/Detection/utils/configs/AlexNet_config.py index b1ea52650..99ed66d6b 100644 --- a/Examples/Image/Detection/utils/configs/AlexNet_config.py +++ b/Examples/Image/Detection/utils/configs/AlexNet_config.py @@ -13,13 +13,13 @@ cfg = __C # model config __C.MODEL.BASE_MODEL = "AlexNet" -__C.MODEL.BASE_MODEL_FILE = "AlexNet.model" +__C.MODEL.BASE_MODEL_FILE = "AlexNet_ImageNet_Caffe.model" __C.MODEL.IMG_PAD_COLOR = [114, 114, 114] -__C.MODEL.FEATURE_NODE_NAME = "features" -__C.MODEL.LAST_CONV_NODE_NAME = "conv5.y" +__C.MODEL.FEATURE_NODE_NAME = "data" +__C.MODEL.LAST_CONV_NODE_NAME = "relu5" __C.MODEL.START_TRAIN_CONV_NODE_NAME = __C.MODEL.FEATURE_NODE_NAME -__C.MODEL.POOL_NODE_NAME = "pool3" -__C.MODEL.LAST_HIDDEN_NODE_NAME = "h2_d" +__C.MODEL.POOL_NODE_NAME = "pool5" +__C.MODEL.LAST_HIDDEN_NODE_NAME = "drop7" __C.MODEL.FEATURE_STRIDE = 16 __C.MODEL.RPN_NUM_CHANNELS = 256 __C.MODEL.ROI_DIM = 6 diff --git a/Examples/Image/Detection/utils/configs/Grocery_config.py b/Examples/Image/Detection/utils/configs/Grocery_config.py index 18651ccd3..3e1fc20a2 100644 --- a/Examples/Image/Detection/utils/configs/Grocery_config.py +++ b/Examples/Image/Detection/utils/configs/Grocery_config.py @@ -22,3 +22,18 @@ __C.DATA.TEST_ROI_FILE = "test_roi_file.txt" __C.DATA.NUM_TRAIN_IMAGES = 20 __C.DATA.NUM_TEST_IMAGES = 5 __C.DATA.PROPOSAL_LAYER_SCALES = [4, 8, 12] + +# overwriting proposal parameters for Fast R-CNN +# minimum relative width/height of an ROI +__C.roi_min_side_rel = 0.04 +# maximum relative width/height of an ROI +__C.roi_max_side_rel = 0.4 +# minimum relative area of an ROI +__C.roi_min_area_rel = 2 * __C.roi_min_side_rel * __C.roi_min_side_rel +# maximum relative area of an ROI +__C.roi_max_area_rel = 0.33 * __C.roi_max_side_rel * __C.roi_max_side_rel +# maximum aspect ratio of an ROI vertically and horizontally +__C.roi_max_aspect_ratio = 4.0 + +# For this data set use the following lr factor for Fast R-CNN: +# __C.CNTK.LR_FACTOR = 10.0 diff --git a/Examples/Image/Detection/utils/configs/Pascal_config.py b/Examples/Image/Detection/utils/configs/Pascal_config.py index 2306cfd6f..30ddba349 100644 --- a/Examples/Image/Detection/utils/configs/Pascal_config.py +++ b/Examples/Image/Detection/utils/configs/Pascal_config.py @@ -22,3 +22,6 @@ __C.DATA.TEST_ROI_FILE = "test2007_rois_abs-xyxy_noPad_skipDif.txt" __C.DATA.NUM_TRAIN_IMAGES = 5010 __C.DATA.NUM_TEST_IMAGES = 4952 __C.DATA.PROPOSAL_LAYER_SCALES = [8, 16, 32] + +__C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE = "trainval2007_proposals.txt" +__C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE = "test2007_proposals.txt" diff --git a/Examples/Image/Detection/utils/od_reader.py b/Examples/Image/Detection/utils/od_reader.py index 5d4e496c8..8f65698b7 100644 --- a/Examples/Image/Detection/utils/od_reader.py +++ b/Examples/Image/Detection/utils/od_reader.py @@ -222,7 +222,7 @@ class ObjectDetectionReader: if self._flip_image: resized_with_pad = cv2.flip(resized_with_pad, 1) - # transpose(2,0,1) converts the image to the HWC format which CNTK accepts + # transpose(2,0,1) converts the image to the HWC format which CNTK expects model_arg_rep = np.ascontiguousarray(np.array(resized_with_pad, dtype=np.float32).transpose(2, 0, 1)) # dims = pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height diff --git a/Examples/Image/Detection/utils/plot_helpers.py b/Examples/Image/Detection/utils/plot_helpers.py index 2286c306c..7383467e4 100644 --- a/Examples/Image/Detection/utils/plot_helpers.py +++ b/Examples/Image/Detection/utils/plot_helpers.py @@ -12,7 +12,6 @@ from builtins import range import copy, textwrap from PIL import Image, ImageFont, ImageDraw from PIL.ExifTags import TAGS -from matplotlib.pyplot import imsave import cntk from cntk import input_variable, Axis from utils.nms_wrapper import apply_nms_to_single_image_results @@ -121,7 +120,10 @@ def visualize_detections(img_path, roi_coords, roi_labels, roi_scores, thickness = 4 drawRectangles(result_img, [rect], color=color, thickness=thickness) elif iter == 2 and label > 0: - font = ImageFont.truetype(available_font, 18) + try: + font = ImageFont.truetype(available_font, 18) + except: + font = ImageFont.load_default() text = classes[label] if roi_scores is not None: text += "(" + str(round(score, 2)) + ")" @@ -129,6 +131,8 @@ def visualize_detections(img_path, roi_coords, roi_labels, roi_scores, return result_img def plot_test_set_results(evaluator, num_images_to_plot, results_base_path, cfg): + from matplotlib.pyplot import imsave + # get image paths with open(cfg["DATA"].TEST_MAP_FILE) as f: content = f.readlines() @@ -284,12 +288,12 @@ def ptClip(pt, maxWidth, maxHeight): pt[1] = min(pt[1], maxHeight) return pt -def drawText(img, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = ImageFont.truetype("arial.ttf", 16)): +def drawText(img, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = None): pilImg = imconvertCv2Pil(img) pilImg = pilDrawText(pilImg, pt, text, textWidth, color, colorBackground, font) return imconvertPil2Cv(pilImg) -def pilDrawText(pilImg, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = ImageFont.truetype("arial.ttf", 16)): +def pilDrawText(pilImg, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = None): textY = pt[1] draw = ImageDraw.Draw(pilImg) if textWidth == None: diff --git a/Examples/Image/Detection/utils/proposal_helpers.py b/Examples/Image/Detection/utils/proposal_helpers.py index 54f79abd4..079504580 100644 --- a/Examples/Image/Detection/utils/proposal_helpers.py +++ b/Examples/Image/Detection/utils/proposal_helpers.py @@ -1,9 +1,29 @@ +# Copyright (c) Microsoft. All rights reserved. + +# Licensed under the MIT license. See LICENSE.md file in the project root +# for full license information. +# ============================================================================== + +import os, sys +abs_path = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(os.path.join(abs_path, "..")) + import numpy as np -from dlib import find_candidate_object_locations +import cv2 from utils.rpn.bbox_transform import bbox_transform from utils.cython_modules.cython_bbox import bbox_overlaps random_seed = 23 +global ss_lib_loaded, find_candidate_object_locations +ss_lib_loaded = False + +def load_selective_search_lib(): + global find_candidate_object_locations + from dlib import find_candidate_object_locations as algo + find_candidate_object_locations = algo + + global ss_lib_loaded + ss_lib_loaded = True def compute_image_stats(img_width, img_height, pad_width, pad_height): do_scale_w = img_width > img_height @@ -23,63 +43,117 @@ def compute_image_stats(img_width, img_height, pad_width, pad_height): right = pad_width - left - target_w return [target_w, target_h, img_width, img_height, top, bottom, left, right, scale_factor] - -def compute_proposals(img, num_proposals, min_w, min_h): - all_rects = [] - min_size = min_w * min_h - find_candidate_object_locations(img, all_rects, min_size=min_size) - - rects = [] - for k, d in enumerate(all_rects): - w = d.right() - d.left() - h = d.bottom() - d.top() - if w < min_w or h < min_h: +def filterRois(rects, img_w, img_h, roi_min_area, roi_max_area, roi_min_side, roi_max_side, roi_max_aspect_ratio): + filteredRects = [] + filteredRectsSet = set() + for rect in rects: + if tuple(rect) in filteredRectsSet: # excluding rectangles with same co-ordinates continue - rects.append([d.left(), d.top(), d.right(), d.bottom()]) - np_rects = np.array(rects) - num_rects = np_rects.shape[0] + x, y, x2, y2 = rect + w = x2 - x + h = y2 - y + assert(w>=0 and h>=0) + + # apply filters + if h == 0 or w == 0 or \ + x2 > img_w or y2 > img_h or \ + w < roi_min_side or h < roi_min_side or \ + w > roi_max_side or h > roi_max_side or \ + w * h < roi_min_area or w * h > roi_max_area or \ + w / h > roi_max_aspect_ratio or h / w > roi_max_aspect_ratio: + continue + filteredRects.append(rect) + filteredRectsSet.add(tuple(rect)) + + # could combine rectangles using non-maximum surpression or with similar co-ordinates + # groupedRectangles, weights = cv2.groupRectangles(np.asanyarray(rectsInput, np.float).tolist(), 1, 0.3) + # groupedRectangles = nms_python(np.asarray(rectsInput, np.float), 0.5) + assert(len(filteredRects) > 0) + return filteredRects + +def compute_proposals(img, num_proposals, cfg): + img_w = len(img[0]) + img_h = len(img) + + if cfg is None: cfg = {} + roi_ss_kvals = (10, 500, 5) if 'roi_ss_kvals' not in cfg else tuple(cfg['roi_ss_kvals']) + roi_ss_mm_iterations = 30 if 'roi_ss_mm_iterations' not in cfg else cfg['roi_ss_mm_iterations'] + roi_ss_min_size = 9 if 'roi_ss_min_size' not in cfg else cfg['roi_ss_min_size'] + roi_ss_img_size = 200 if 'roi_ss_img_size' not in cfg else cfg['roi_ss_img_size'] + roi_min_side_rel = 0.04 if 'roi_min_side_rel' not in cfg else cfg['roi_min_side_rel'] + roi_max_side_rel = 0.4 if 'roi_max_side_rel' not in cfg else cfg['roi_max_side_rel'] + roi_min_area_rel = 2 * roi_min_side_rel * roi_min_side_rel if 'roi_min_area_rel' not in cfg else cfg['roi_min_area_rel'] + roi_max_area_rel = 0.33 * roi_max_side_rel * roi_max_side_rel if 'roi_max_area_rel' not in cfg else cfg['roi_max_area_rel'] + roi_max_aspect_ratio = 4.0 if 'roi_max_aspect_ratio' not in cfg else cfg['roi_max_aspect_ratio'] + roi_grid_aspect_ratios = [1.0, 2.0, 0.5] if 'roi_grid_aspect_ratios' not in cfg else cfg['roi_grid_aspect_ratios'] + debug_output = False if not ('CNTK' in cfg and 'DEBUG_OUTPUT' in cfg.CNTK) else cfg.CNTK.DEBUG_OUTPUT + + scale = 1.0 * roi_ss_img_size / max(img.shape[:2]) + img = cv2.resize(img, (0,0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR) + + roi_min_side = roi_min_side_rel * roi_ss_img_size + roi_max_side = roi_max_side_rel * roi_ss_img_size + roi_min_area = roi_min_area_rel * roi_ss_img_size * roi_ss_img_size + roi_max_area = roi_max_area_rel * roi_ss_img_size * roi_ss_img_size + + if not ss_lib_loaded: load_selective_search_lib() + rects = [] + tmp = [] + find_candidate_object_locations(img, tmp, kvals=roi_ss_kvals, min_size=roi_ss_min_size, max_merging_iterations=roi_ss_mm_iterations) + for k, d in enumerate(tmp): + rects.append([d.left(), d.top(), d.right(), d.bottom()]) + filtered_rects = filterRois(rects, img_w, img_h, roi_min_area, roi_max_area, roi_min_side, roi_max_side, roi_max_aspect_ratio) + scaled_rects = np.array(filtered_rects) * (1/scale) + if debug_output: + print("selective search rois before | after filtering: {} | {}. Requested: {}".format(len(rects), len(filtered_rects), num_proposals)) + + num_rects = scaled_rects.shape[0] np.random.seed(random_seed) if num_rects < num_proposals: - img_w = len(img[0]) - img_h = len(img) - grid_proposals = compute_grid_proposals(num_proposals - len(rects), img_w, img_h, min_w, min_h) - np_rects = np.vstack([np_rects, grid_proposals]) - elif len(rects) > num_proposals: + try: + shuffle = not cfg.CNTK.FORCE_DETERMINISTIC + except: + shuffle = True + + roi_min_side = roi_min_side_rel * min(img_w, img_h) + roi_max_side = roi_max_side_rel * max(img_w, img_h) + grid_proposals = compute_grid_proposals(num_proposals - num_rects, img_w, img_h, roi_min_side, roi_max_side, roi_grid_aspect_ratios, shuffle) + scaled_rects = np.vstack([scaled_rects, grid_proposals]) + elif num_rects > num_proposals: keep_inds = range(num_rects) keep_inds = np.random.choice(keep_inds, size=num_proposals, replace=False) - np_rects = np_rects[keep_inds] + scaled_rects = scaled_rects[keep_inds] - return np_rects - -def compute_grid_proposals(num_proposals, img_w, img_h, min_w, min_h, max_w=None, max_h=None, aspect_ratios = [1.0], shuffle=True): - min_wh = max(min_w, min_h) - max_wh = min(img_h, img_w) / 2 - if max_w is not None: max_wh = min(max_wh, max_w) - if max_h is not None: max_wh = min(max_wh, max_h) + return scaled_rects +def compute_grid_proposals(num_proposals, img_w, img_h, min_wh, max_wh, aspect_ratios = [1.0, 2.0, 0.5], shuffle=True): rects = [] iter = 0 while len(rects) < num_proposals: - new_ar = [] - for ar in aspect_ratios: - new_ar.append(ar * (0.9 ** iter)) - new_ar.append(ar * (1.1 ** iter)) + if iter == 0: + new_ar = aspect_ratios + else: + new_ar = [] + for ar in aspect_ratios: + new_ar.append(ar * (0.9 ** iter)) + new_ar.append(ar * (1.1 ** iter)) - new_rects = _compute_grid_proposals(img_w, img_h, min_wh, max_wh, new_ar) + new_rects = np.array(_compute_grid_proposals(img_w, img_h, min_wh, max_wh, new_ar)) take = min(num_proposals - len(rects), len(new_rects)) - new_rects = new_rects[:take] + + if shuffle and take < len(new_rects): + keep_inds = range(len(new_rects)) + keep_inds = np.random.choice(keep_inds, size=take, replace=False) + new_rects = new_rects[keep_inds] + else: + new_rects = new_rects[:take] + rects.extend(new_rects) + iter = iter + 1 np_rects = np.array(rects) - num_rects = np_rects.shape[0] - if shuffle and num_proposals < num_rects: - keep_inds = range(num_rects) - keep_inds = np.random.choice(keep_inds, size=num_proposals, replace=False) - np_rects = np_rects[keep_inds] - else: - np_rects = np_rects[:num_proposals] - + assert np_rects.shape[0] == num_proposals return np_rects def _compute_grid_proposals(img_w, img_h, min_wh, max_wh, aspect_ratios): @@ -152,17 +226,30 @@ class ProposalProvider: self._requires_scaling = requires_scaling @classmethod - def fromfile(cls, filename): + def fromfile(cls, filename, max_num_proposals): + print('Reading proposals from file ({}) ...'.format(filename)) with open(filename) as f: lines = f.readlines() proposal_list = [[] for _ in lines] + index = 0 + cut_counter = 0 for line in lines: - # TODO: parse line - index = 0 - rects = np.zeros((4, 200)) + # parse line + numbers = line[line.find('|') + 11:] + parsed_numbers = np.fromstring(numbers, dtype=int, sep=' ') + parsed_rects = parsed_numbers.reshape((int(parsed_numbers.shape[0] / 4), 4)) + num_rects = parsed_rects.shape[0] + if num_rects > max_num_proposals: + rects = parsed_rects[:max_num_proposals,:] + cut_counter += 1 + else: + pad_rects = np.zeros((max_num_proposals - num_rects, 4)) + rects = np.vstack([parsed_rects, pad_rects]) proposal_list[index] = rects + index += 1 + print('Done. {} images had more than {} proposals.'.format(cut_counter, max_num_proposals)) return cls(proposal_list) @classmethod @@ -183,33 +270,25 @@ class ProposalProvider: return self._proposal_cfg['NUM_ROI_PROPOSALS'] def get_proposals(self, index, img=None): - #import pdb; pdb.set_trace() if index in self._proposal_dict: return self._proposal_dict[index] else: - return self._compute_proposals(img) - - def _compute_proposals(self, img): - min_w = self._proposal_cfg['PROPOSALS_MIN_W'] - min_h = self._proposal_cfg['PROPOSALS_MIN_H'] - num_proposals = self._proposal_cfg.NUM_ROI_PROPOSALS - return compute_proposals(img, num_proposals, min_w, min_h) + num_proposals = self._proposal_cfg.NUM_ROI_PROPOSALS + return compute_proposals(img, num_proposals, self._proposal_cfg) if __name__ == '__main__': import cv2 - image_file = r"C:\src\CNTK\Examples\Image\DataSets\Pascal\VOCdevkit\VOC2007\JPEGImages\000015.jpg" + image_file = os.path.join(abs_path, r"..\..\DataSets\Pascal\VOCdevkit\VOC2007\JPEGImages\000015.jpg") img = cv2.imread(image_file) - # 0.18 sec for 4000 - # 0.15 sec for 2000 - # 0.13 sec for 1000 num_proposals = 2000 - num_runs = 100 + num_runs = 500 + proposals = compute_proposals(img, num_proposals, cfg=None) import time start = int(time.time()) for i in range(num_runs): - proposals = compute_proposals(img, num_proposals, 20, 20) + proposals = compute_proposals(img, num_proposals, cfg=None) total = int(time.time() - start) - print ("time: {}".format(total / (1.0 * num_runs))) + print ("time for {} proposals: {} (total time for {} runs: {}".format(num_proposals, total / (1.0 * num_runs), num_runs, total)) assert len(proposals) == num_proposals, "{} != {}".format(len(proposals), num_proposals) diff --git a/Examples/Image/Detection/utils/rpn/proposal_layer.py b/Examples/Image/Detection/utils/rpn/proposal_layer.py index f4929f51c..2defa8c53 100644 --- a/Examples/Image/Detection/utils/rpn/proposal_layer.py +++ b/Examples/Image/Detection/utils/rpn/proposal_layer.py @@ -20,39 +20,17 @@ class ProposalLayer(UserFunction): transformations to a set of regular boxes (called "anchors"). ''' - def __init__(self, arg1, arg2, arg3, - train_pre_nms_topN=12000, - train_post_nms_topN=2000, - train_nms_thresh=0.7, - train_min_size=16, - test_pre_nms_topN=6000, - test_post_nms_topN=300, - test_nms_thresh=0.7, - test_min_size=16, - param_str = None, - name='ProposalLayer'): - super(ProposalLayer, self).__init__([arg1, arg2, arg3], name=name) - self._train_pre_nms_topN = train_pre_nms_topN - self._train_post_nms_topN = train_post_nms_topN - self._train_nms_thresh = train_nms_thresh - self._train_min_size = train_min_size - self._test_pre_nms_topN = test_pre_nms_topN - self._test_post_nms_topN = test_post_nms_topN - self._test_nms_thresh = test_nms_thresh - self._test_min_size = test_min_size - self._param_str = param_str if param_str is not None else "'feat_stride': 16\n'scales':\n - 8 \n - 16 \n - 32" + def __init__(self, arg1, arg2, arg3, layer_config, name='ProposalLayer'): + super(ProposalLayer, self).__init__([arg1, arg2, arg3], attributes=layer_config, name=name) + + self._layer_config = layer_config + self._feat_stride = 16 if 'feat_stride' not in layer_config else layer_config['feat_stride'] + anchor_scales = [8, 16, 32] if 'scales' not in layer_config else layer_config['scales'] # parse the layer parameter string, which must be valid YAML - layer_params = yaml.load(self._param_str) - self._feat_stride = layer_params['feat_stride'] - anchor_scales = layer_params.get('scales', (8, 16, 32)) self._anchors = generate_anchors(scales=np.array(anchor_scales)) self._num_anchors = self._anchors.shape[0] - attributes = {'feat_stride' : self._feat_stride, 'scales' : anchor_scales} - - super(ProposalLayer, self).__init__([arg1, arg2, arg3], attributes=attributes, name=name) - if DEBUG: print ('feat_stride: {}'.format(self._feat_stride)) print ('anchors:') @@ -85,15 +63,15 @@ class ProposalLayer(UserFunction): # use potentially different number of proposals for training vs evaluation if len(outputs_to_retain) == 0: # print("EVAL") - pre_nms_topN = self._test_pre_nms_topN - post_nms_topN = self._test_post_nms_topN - nms_thresh = self._test_nms_thresh - min_size = self._test_min_size + pre_nms_topN = self._layer_config['test_pre_nms_topN'] + post_nms_topN = self._layer_config['test_post_nms_topN'] + nms_thresh = self._layer_config['test_nms_thresh'] + min_size = self._layer_config['test_min_size'] else: - pre_nms_topN = self._train_pre_nms_topN - post_nms_topN = self._train_post_nms_topN - nms_thresh = self._train_nms_thresh - min_size = self._train_min_size + pre_nms_topN = self._layer_config['train_pre_nms_topN'] + post_nms_topN = self._layer_config['train_post_nms_topN'] + nms_thresh = self._layer_config['train_nms_thresh'] + min_size = self._layer_config['train_min_size'] bottom = arguments assert bottom[0].shape[0] == 1, \ @@ -205,44 +183,16 @@ class ProposalLayer(UserFunction): pass def clone(self, cloned_inputs): - return ProposalLayer(cloned_inputs[0], cloned_inputs[1], cloned_inputs[2], - train_pre_nms_topN=self._train_pre_nms_topN, - train_post_nms_topN=self._train_post_nms_topN, - train_nms_thresh=self._train_nms_thresh, - train_min_size=self._train_min_size, - test_pre_nms_topN=self._test_pre_nms_topN, - test_post_nms_topN=self._test_post_nms_topN, - test_nms_thresh=self._test_nms_thresh, - test_min_size=self._test_min_size, - param_str=self._param_str) + return ProposalLayer(cloned_inputs[0], cloned_inputs[1], cloned_inputs[2], layer_config=self._layer_config) def serialize(self): internal_state = {} - internal_state['param_str'] = self._param_str - internal_state['train_pre_nms_topN'] = self._train_pre_nms_topN - internal_state['train_post_nms_topN'] = self._train_post_nms_topN - internal_state['train_nms_thresh'] = self._train_nms_thresh - internal_state['train_min_size'] = self._train_min_size - internal_state['test_pre_nms_topN'] = self._test_pre_nms_topN - internal_state['test_post_nms_topN'] = self._test_post_nms_topN - internal_state['test_nms_thresh'] = self._test_nms_thresh - internal_state['test_min_size'] = self._test_min_size - + internal_state['layer_config'] = self._layer_config return internal_state @staticmethod def deserialize(inputs, name, state): - return ProposalLayer(inputs[0], inputs[1], inputs[2], - train_pre_nms_topN=state['train_pre_nms_topN'], - train_post_nms_topN=state['train_post_nms_topN'], - train_nms_thresh=state['train_nms_thresh'], - train_min_size=state['train_min_size'], - test_pre_nms_topN=state['test_pre_nms_topN'], - test_post_nms_topN=state['test_post_nms_topN'], - test_nms_thresh=state['test_nms_thresh'], - test_min_size=state['test_min_size'], - param_str=state['param_str'], - name=name) + return ProposalLayer(inputs[0], inputs[1], inputs[2], layer_config=state['layer_config'], name=name) def _filter_boxes(boxes, min_size): """Remove all boxes with any side smaller than min_size.""" diff --git a/Examples/Image/Detection/utils/rpn/rpn_helpers.py b/Examples/Image/Detection/utils/rpn/rpn_helpers.py index ab5799c32..13c3ec972 100644 --- a/Examples/Image/Detection/utils/rpn/rpn_helpers.py +++ b/Examples/Image/Detection/utils/rpn/rpn_helpers.py @@ -6,7 +6,7 @@ import numpy as np import cntk -from cntk import reduce_sum +from cntk import reduce_sum, ops from cntk import user_function, relu, softmax, slice, splice, reshape, element_times, plus, minus, alias, classification_error from cntk.initializer import glorot_uniform, normal from cntk.layers import Convolution @@ -16,7 +16,6 @@ from utils.rpn.proposal_layer import ProposalLayer from utils.rpn.proposal_target_layer import ProposalTargetLayer from utils.rpn.cntk_smoothL1_loss import SmoothL1Loss -# Please keep in sync with Readme.md def create_rpn(conv_out, scaled_gt_boxes, im_info, cfg, add_loss_functions=True): ''' Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper: @@ -59,19 +58,21 @@ def create_rpn(conv_out, scaled_gt_boxes, im_info, cfg, add_loss_functions=True) rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape") # proposal layer - rpn_rois = add_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg) + rpn_rois = create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg) rpn_losses = None if(add_loss_functions): # RPN targets # Comment: rpn_cls_score is only passed vvv to get width and height of the conv feature map ... + proposal_layer_params = "'feat_stride': {}\n'scales':\n - {}". \ + format(cfg["MODEL"].FEATURE_STRIDE, "\n - ".join([str(v) for v in cfg["DATA"].PROPOSAL_LAYER_SCALES])) atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, rpn_batch_size=cfg["TRAIN"].RPN_BATCHSIZE, rpn_fg_fraction=cfg["TRAIN"].RPN_FG_FRACTION, clobber_positives=cfg["TRAIN"].RPN_CLOBBER_POSITIVES, positive_overlap=cfg["TRAIN"].RPN_POSITIVE_OVERLAP, negative_overlap=cfg["TRAIN"].RPN_NEGATIVE_OVERLAP, - param_str=cfg.PROPOSAL_LAYER_PARAMS)) + param_str=proposal_layer_params)) rpn_labels = atl.outputs[0] rpn_bbox_targets = atl.outputs[1] rpn_bbox_inside_weights = atl.outputs[2] @@ -114,17 +115,30 @@ def create_rpn(conv_out, scaled_gt_boxes, im_info, cfg, add_loss_functions=True) return rpn_rois, rpn_losses -def add_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg): - rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, - train_pre_nms_topN=cfg["TRAIN"].RPN_PRE_NMS_TOP_N, - train_post_nms_topN=cfg["TRAIN"].RPN_POST_NMS_TOP_N, - train_nms_thresh=cfg["TRAIN"].RPN_NMS_THRESH, - train_min_size=cfg["TRAIN"].RPN_MIN_SIZE, - test_pre_nms_topN=cfg["TEST"].RPN_PRE_NMS_TOP_N, - test_post_nms_topN=cfg["TEST"].RPN_POST_NMS_TOP_N, - test_nms_thresh=cfg["TEST"].RPN_NMS_THRESH, - test_min_size=cfg["TEST"].RPN_MIN_SIZE, - param_str=cfg.PROPOSAL_LAYER_PARAMS)) +def create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg, use_native_proposal_layer=False): + layer_config = {} + layer_config["feat_stride"] = cfg["MODEL"].FEATURE_STRIDE + layer_config["scales"] = cfg["DATA"].PROPOSAL_LAYER_SCALES + + layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N + layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N + layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH) + layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE) + + layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N + layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N + layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH) + layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE) + + if use_native_proposal_layer: + cntk.ops.register_native_user_function('ProposalLayerOp', + 'Cntk.ProposalLayerLib-' + cntk.__version__.rstrip('+'), + 'CreateProposalLayer') + rpn_rois_raw = ops.native_user_function('ProposalLayerOp', [rpn_cls_prob_reshape, rpn_bbox_pred, im_info], + layer_config, 'native_proposal_layer') + else: + rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, layer_config)) + return alias(rpn_rois_raw, name='rpn_rois') def create_proposal_target_layer(rpn_rois, scaled_gt_boxes, cfg): diff --git a/PretrainedModels/download_model.py b/PretrainedModels/download_model.py index 08d1a976c..d1f507b51 100644 --- a/PretrainedModels/download_model.py +++ b/PretrainedModels/download_model.py @@ -14,7 +14,7 @@ except ImportError: # Add models here like this: (category, model_name, model_url) models = (('Image Classification', 'AlexNet_ImageNet_CNTK', 'https://www.cntk.ai/Models/CNTK_Pretrained/AlexNet_ImageNet_CNTK.model'), - ('Image Classification', 'AlexNet_ImageNet_Caffe', 'https://www.cntk.ai/Models/CNTK_Pretrained/AlexNet_ImageNet_Caffe.model'), + ('Image Classification', 'AlexNet_ImageNet_Caffe', 'https://www.cntk.ai/Models/Caffe_Converted/AlexNet_ImageNet_Caffe.model'), ('Image Classification', 'InceptionV3_ImageNet_CNTK', 'https://www.cntk.ai/Models/CNTK_Pretrained/InceptionV3_ImageNet_CNTK.model'), ('Image Classification', 'BNInception_ImageNet_Caffe', 'https://www.cntk.ai/Models/Caffe_Converted/BNInception_ImageNet_Caffe.model'), ('Image Classification', 'ResNet18_ImageNet_CNTK', 'https://www.cntk.ai/Models/CNTK_Pretrained/ResNet18_ImageNet_CNTK.model'), diff --git a/Scripts/install/linux/conda-linux-cntk-py34-environment.yml b/Scripts/install/linux/conda-linux-cntk-py34-environment.yml index 80326ea17..c6d86d89a 100644 --- a/Scripts/install/linux/conda-linux-cntk-py34-environment.yml +++ b/Scripts/install/linux/conda-linux-cntk-py34-environment.yml @@ -23,6 +23,7 @@ dependencies: - setuptools=27.2.0=py34_0 - six=1.10.0=py34_0 - wheel=0.29.0=py34_0 +- dlib=19.0=np111py34_blas_openblas_200 - pip: - easydict==1.6.0 - future==0.16.0 @@ -35,3 +36,4 @@ dependencies: - sphinx==1.5.4 - twine==1.8.1 - protobuf==3.2.0 + \ No newline at end of file diff --git a/Scripts/install/windows/conda-windows-cntk-py35-environment.yml b/Scripts/install/windows/conda-windows-cntk-py35-environment.yml index 7af289da5..4de63a4f7 100644 --- a/Scripts/install/windows/conda-windows-cntk-py35-environment.yml +++ b/Scripts/install/windows/conda-windows-cntk-py35-environment.yml @@ -23,6 +23,7 @@ dependencies: - six=1.10.0=py35_0 - wheel=0.29.0=py35_0 - opencv=3.1.0=np111py35_1 +- dlib=19.0=np111py35_200 - pip: - gym==0.5.2 - keras==2.0.6 diff --git a/Tests/EndToEndTests/CNTKv2Python/Examples/DetectionDemo_test.py b/Tests/EndToEndTests/CNTKv2Python/Examples/DetectionDemo_test.py new file mode 100644 index 000000000..d41aeadd9 --- /dev/null +++ b/Tests/EndToEndTests/CNTKv2Python/Examples/DetectionDemo_test.py @@ -0,0 +1,77 @@ +# Copyright (c) Microsoft. All rights reserved. + +# Licensed under the MIT license. See LICENSE.md file in the project root +# for full license information. +# ============================================================================== + +import numpy as np +import os +import pytest +import sys +from cntk import load_model +from cntk.cntk_py import DeviceKind_GPU +from cntk.device import try_set_default_device, gpu +from cntk.logging.graph import get_node_outputs +from cntk.ops.tests.ops_test_utils import cntk_device +from _cntk_py import force_deterministic_algorithms +force_deterministic_algorithms() + +abs_path = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(abs_path) +sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection")) + +win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version_info[:2] == (3,5)) or + (sys.platform != 'win32' and sys.version_info[:2] == (3,4))), + reason="it runs currently only in windows-py35 and linux-py34 due to precompiled cython modules") + +@win35_linux34 +def test_detection_demo(device_id): + if cntk_device(device_id).type() != DeviceKind_GPU: + pytest.skip('test only runs on GPU') # it runs very slow in CPU + try_set_default_device(cntk_device(device_id)) + + from prepare_test_data import prepare_Grocery_data, prepare_alexnet_v0_model + grocery_path = prepare_Grocery_data() + prepare_alexnet_v0_model() + + from FastRCNN.install_data_and_model import create_grocery_mappings + create_grocery_mappings(grocery_path) + + from DetectionDemo import get_configuration + import utils.od_utils as od + + cfg = get_configuration('FasterRCNN') + cfg["CNTK"].FORCE_DETERMINISTIC = True + cfg["CNTK"].DEBUG_OUTPUT = False + cfg["CNTK"].MAKE_MODE = False + cfg["CNTK"].FAST_MODE = False + cfg.CNTK.E2E_MAX_EPOCHS = 3 + cfg.CNTK.RPN_EPOCHS = 2 + cfg.CNTK.FRCN_EPOCHS = 2 + cfg.IMAGE_WIDTH = 400 + cfg.IMAGE_HEIGHT = 400 + cfg["CNTK"].TRAIN_E2E = True + cfg.USE_GPU_NMS = False + cfg.VISUALIZE_RESULTS = False + cfg["DATA"].MAP_FILE_PATH = grocery_path + + externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ + if externalData: + extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'] + cfg['BASE_MODEL_PATH'] = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v1", "AlexNet_ImageNet_Caffe.model") + else: + cfg['BASE_MODEL_PATH'] = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet_ImageNet_Caffe.model".split("/")) + + # train and test + eval_model = od.train_object_detector(cfg) + eval_results = od.evaluate_test_set(eval_model, cfg) + + meanAP = np.nanmean(list(eval_results.values())) + print('meanAP={}'.format(meanAP)) + assert meanAP > 0.01 + + # detect objects in single image + img_path = os.path.join(grocery_path, "testImages", "WIN_20160803_11_28_42_Pro.jpg") + regressed_rois, cls_probs = od.evaluate_single_image(eval_model, img_path, cfg) + bboxes, labels, scores = od.filter_results(regressed_rois, cls_probs, cfg) + assert bboxes.shape[0] == labels.shape[0] diff --git a/Tests/EndToEndTests/CNTKv2Python/Examples/FastRCNNBS_test.py b/Tests/EndToEndTests/CNTKv2Python/Examples/FastRCNNBS_test.py index 258921043..bf997dc06 100644 --- a/Tests/EndToEndTests/CNTKv2Python/Examples/FastRCNNBS_test.py +++ b/Tests/EndToEndTests/CNTKv2Python/Examples/FastRCNNBS_test.py @@ -69,7 +69,7 @@ def test_fastrcnn_grocery_training(device_id): extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'] model_file = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model") else: - model_file = os.path.join(abs_path, *"../../../../Examples/Image/PretrainedModels/AlexNet.model".split("/")) + model_file = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet.model".split("/")) from A2_RunWithPyModel import train_fast_rcnn, evaluate_fast_rcnn trained_model = train_fast_rcnn(model_path=model_file) diff --git a/Tests/EndToEndTests/CNTKv2Python/Examples/FastRCNN_test.py b/Tests/EndToEndTests/CNTKv2Python/Examples/FastRCNN_test.py index eb1ec9832..2217e3d49 100644 --- a/Tests/EndToEndTests/CNTKv2Python/Examples/FastRCNN_test.py +++ b/Tests/EndToEndTests/CNTKv2Python/Examples/FastRCNN_test.py @@ -19,14 +19,14 @@ force_deterministic_algorithms() abs_path = os.path.dirname(os.path.abspath(__file__)) sys.path.append(abs_path) sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection")) +sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection", "FastRCNN")) from prepare_test_data import prepare_Grocery_data, prepare_alexnet_v0_model grocery_path = prepare_Grocery_data() prepare_alexnet_v0_model() -from FastRCNN.install_data_and_model import create_grocery_mappings +from install_data_and_model import create_grocery_mappings create_grocery_mappings(grocery_path) -from utils.config_helpers import merge_configs win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version_info[:2] == (3,5)) or (sys.platform != 'win32' and sys.version_info[:2] == (3,4))), @@ -34,7 +34,12 @@ win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version @win35_linux34 def test_fastrcnnpy_grocery_training(device_id): - from FastRCNN.config import cfg as detector_cfg + if cntk_device(device_id).type() != DeviceKind_GPU: + pytest.skip('test only runs on GPU') # it runs very slow in CPU + try_set_default_device(cntk_device(device_id)) + + from utils.config_helpers import merge_configs + from FastRCNN_config import cfg as detector_cfg from utils.configs.AlexNet_config import cfg as network_cfg from utils.configs.Grocery_config import cfg as dataset_cfg @@ -43,27 +48,25 @@ def test_fastrcnnpy_grocery_training(device_id): cfg["CNTK"].DEBUG_OUTPUT = False cfg["CNTK"].MAKE_MODE = False cfg["CNTK"].FAST_MODE = False - cfg["CNTK"].MAX_EPOCHS = 2 - cfg.NUM_ROI_PROPOSALS = 100 - cfg.USE_GPU_NMS = True + cfg["CNTK"].MAX_EPOCHS = 4 + cfg.IMAGE_WIDTH = 600 + cfg.IMAGE_HEIGHT = 600 + cfg.NUM_ROI_PROPOSALS = 200 + cfg.USE_GPU_NMS = False cfg.VISUALIZE_RESULTS = False cfg["DATA"].MAP_FILE_PATH = grocery_path externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ if externalData: extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'] - cfg['BASE_MODEL_PATH'] = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model") + cfg['BASE_MODEL_PATH'] = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v1", "AlexNet_ImageNet_Caffe.model") else: - cfg['BASE_MODEL_PATH'] = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet.model".split("/")) + cfg['BASE_MODEL_PATH'] = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet_ImageNet_Caffe.model".split("/")) - from FastRCNN.FastRCNN_train import prepare, train_fast_rcnn - from FastRCNN.FastRCNN_eval import compute_test_set_aps + from FastRCNN_train import prepare, train_fast_rcnn + from FastRCNN_eval import compute_test_set_aps prepare(cfg, False) - if cntk_device(device_id).type() != DeviceKind_GPU: - pytest.skip('test only runs on GPU') # it runs very slow in CPU - try_set_default_device(cntk_device(device_id)) - np.random.seed(seed=3) trained_model = train_fast_rcnn(cfg) eval_results = compute_test_set_aps(trained_model, cfg) diff --git a/Tests/EndToEndTests/CNTKv2Python/Examples/FasterRCNN_test.py b/Tests/EndToEndTests/CNTKv2Python/Examples/FasterRCNN_test.py index aaa3f730f..0075285ec 100644 --- a/Tests/EndToEndTests/CNTKv2Python/Examples/FasterRCNN_test.py +++ b/Tests/EndToEndTests/CNTKv2Python/Examples/FasterRCNN_test.py @@ -18,22 +18,24 @@ force_deterministic_algorithms() abs_path = os.path.dirname(os.path.abspath(__file__)) sys.path.append(abs_path) sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection")) -sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Extensibility", "ProposalLayer")) +sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection", "FastRCNN")) +sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection", "FasterRCNN")) from prepare_test_data import prepare_Grocery_data, prepare_alexnet_v0_model grocery_path = prepare_Grocery_data() prepare_alexnet_v0_model() -from FastRCNN.install_data_and_model import create_grocery_mappings +from install_data_and_model import create_grocery_mappings create_grocery_mappings(grocery_path) -from utils.config_helpers import merge_configs win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version_info[:2] == (3,5)) or (sys.platform != 'win32' and sys.version_info[:2] == (3,4))), reason="it runs currently only in windows-py35 and linux-py34 due to precompiled cython modules") -def run_fasterrcnn_grocery_training(device_id, e2e): - from FasterRCNN.config import cfg as detector_cfg +def run_fasterrcnn_grocery_training(e2e): + from FasterRCNN_eval import compute_test_set_aps + from utils.config_helpers import merge_configs + from FasterRCNN_config import cfg as detector_cfg from utils.configs.AlexNet_config import cfg as network_cfg from utils.configs.Grocery_config import cfg as dataset_cfg @@ -41,101 +43,68 @@ def run_fasterrcnn_grocery_training(device_id, e2e): cfg["CNTK"].FORCE_DETERMINISTIC = True cfg["CNTK"].DEBUG_OUTPUT = False cfg["CNTK"].MAKE_MODE = False - cfg["CNTK"].FAST_MODE = True + cfg["CNTK"].FAST_MODE = False + cfg.CNTK.E2E_MAX_EPOCHS = 3 + cfg.CNTK.RPN_EPOCHS = 2 + cfg.CNTK.FRCN_EPOCHS = 2 + cfg.IMAGE_WIDTH = 400 + cfg.IMAGE_HEIGHT = 400 cfg["CNTK"].TRAIN_E2E = e2e - cfg.USE_GPU_NMS = True + cfg.USE_GPU_NMS = False cfg.VISUALIZE_RESULTS = False cfg["DATA"].MAP_FILE_PATH = grocery_path externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ if externalData: extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'] - cfg['BASE_MODEL_PATH'] = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model") + model_file = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v1", "AlexNet_ImageNet_Caffe.model") else: - model_file = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet.model".split("/")) + model_file = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet_ImageNet_Caffe.model".split("/")) - from FasterRCNN import train_faster_rcnn_e2e, eval_faster_rcnn_mAP + from FasterRCNN_train import prepare, train_faster_rcnn np.random.seed(seed=3) - eval_model = train_faster_rcnn_e2e(model_file, debug_output=False) - meanAP = eval_faster_rcnn_mAP(eval_model) - assert meanAP > 0.01 - -@win35_linux34 -def test_native_fasterrcnn_eval(tmpdir, device_id): - from config import cfg - cfg["CNTK"].FORCE_DETERMINISTIC = True - cfg["CNTK"].DEBUG_OUTPUT = False - cfg["CNTK"].VISUALIZE_RESULTS = False - cfg["CNTK"].FAST_MODE = True - cfg["CNTK"].MAP_FILE_PATH = grocery_path - - from FasterRCNN import set_global_vars - set_global_vars(False) - - if cntk_device(device_id).type() != DeviceKind_GPU: - pytest.skip('test only runs on GPU') # it runs very slow in CPU - try_set_default_device(cntk_device(device_id)) - - # since we do not use a reader for evaluation we need unzipped data - externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ - - if externalData: - extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'] - model_file = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model") - else: - model_file = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet.model".split("/")) - - from FasterRCNN import train_faster_rcnn_e2e, eval_faster_rcnn_mAP - - np.random.seed(seed=3) - - eval_model = train_faster_rcnn_e2e(model_file, debug_output=False) - - meanAP_python = eval_faster_rcnn_mAP(eval_model) - - cntk_py.always_allow_setting_default_device() - - try_set_default_device(cpu()) - - from native_proposal_layer import clone_with_native_proposal_layer - - model_with_native_pl = clone_with_native_proposal_layer(eval_model) - meanAP_native = eval_faster_rcnn_mAP(model_with_native_pl) - - # 0.2067 (python) vs 0.2251 (native) -- the difference stems - # from different sorting algorithms: quicksort in python and - # heapsort in c++ (both are not stable). - assert abs(meanAP_python - meanAP_native) < 0.1 - -@win35_linux34 -def test_fasterrcnn_grocery_training_4stage(device_id): - from config import cfg - cfg["CNTK"].FORCE_DETERMINISTIC = True - cfg["CNTK"].DEBUG_OUTPUT = False - cfg["CNTK"].VISUALIZE_RESULTS = False - cfg["CNTK"].FAST_MODE = True - cfg["CNTK"].MAP_FILE_PATH = grocery_path - - from FasterRCNN.FasterRCNN_train import prepare, train_faster_rcnn - from FasterRCNN.FasterRCNN_eval import compute_test_set_aps prepare(cfg, False) - - if cntk_device(device_id).type() != DeviceKind_GPU: - pytest.skip('test only runs on GPU') # it runs very slow in CPU - try_set_default_device(cntk_device(device_id)) - - np.random.seed(seed=3) + cfg['BASE_MODEL_PATH'] = model_file trained_model = train_faster_rcnn(cfg) eval_results = compute_test_set_aps(trained_model, cfg) meanAP = np.nanmean(list(eval_results.values())) print('meanAP={}'.format(meanAP)) assert meanAP > 0.01 + return trained_model, meanAP, cfg + +@win35_linux34 +def reenable_once_sorting_is_stable_test_native_fasterrcnn_eval(device_id): + if cntk_device(device_id).type() != DeviceKind_GPU: + pytest.skip('test only runs on GPU') # it runs very slow in CPU + try_set_default_device(cntk_device(device_id)) + + from FasterRCNN_eval import compute_test_set_aps + eval_model, meanAP_python, cfg = run_fasterrcnn_grocery_training(True) + + cntk_py.always_allow_setting_default_device() + try_set_default_device(cpu()) + + sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Extensibility", "ProposalLayer")) + from native_proposal_layer import clone_with_native_proposal_layer + model_with_native_pl = clone_with_native_proposal_layer(eval_model) + eval_results = compute_test_set_aps(model_with_native_pl, cfg) + meanAP_native = np.nanmean(list(eval_results.values())) + + # 0.2067 (python) vs 0.2251 (native) -- the difference stems + # from different sorting algorithms: quicksort in python and + # heapsort in c++ (both are not stable). + print("Python: {}, native: {}".format(meanAP_python, meanAP_native)) + assert abs(meanAP_python - meanAP_native) < 0.1 + +@win35_linux34 +def test_fasterrcnn_grocery_training_e2e(device_id): + try_set_default_device(cntk_device(device_id)) + _, _, _ = run_fasterrcnn_grocery_training(e2e = True) @win35_linux34 def test_fasterrcnn_grocery_training_4stage(device_id): - run_fasterrcnn_grocery_training(device_id, e2e = False) - -@win35_linux34 -def test_fasterrcnn_grocery_training_e2e(device_id, e2e=True): - run_fasterrcnn_grocery_training(device_id, e2e = True) + if cntk_device(device_id).type() != DeviceKind_GPU: + pytest.skip('test only runs on GPU') # it runs very slow in CPU + try_set_default_device(cntk_device(device_id)) + _, _, _ = run_fasterrcnn_grocery_training(e2e = False) diff --git a/Tests/EndToEndTests/CNTKv2Python/Examples/prepare_test_data.py b/Tests/EndToEndTests/CNTKv2Python/Examples/prepare_test_data.py index b353ed62d..3f592fab5 100644 --- a/Tests/EndToEndTests/CNTKv2Python/Examples/prepare_test_data.py +++ b/Tests/EndToEndTests/CNTKv2Python/Examples/prepare_test_data.py @@ -152,11 +152,18 @@ def prepare_alexnet_v0_model(): *"../../../../PretrainedModels".split("/")) local_base_path = os.path.normpath(local_base_path) + # v0 model: model_file = os.path.join(local_base_path, "AlexNet.model") - if not os.path.isfile(model_file): external_model_path = os.path.join(os.environ[envvar], "PreTrainedModels", "AlexNet", "v0", "AlexNet.model") copyfile(external_model_path, model_file) + + # v1 model: + model_file = os.path.join(local_base_path, "AlexNet_ImageNet_Caffe.model") + if not os.path.isfile(model_file): + external_model_path = os.path.join(os.environ[envvar], "PreTrainedModels", "AlexNet", "v1", "AlexNet_ImageNet_Caffe.model") + copyfile(external_model_path, model_file) + return local_base_path def prepare_UCF11_data(): diff --git a/Tests/EndToEndTests/CNTKv2Python/Examples/rpn_unit_test.py b/Tests/EndToEndTests/CNTKv2Python/Examples/rpn_unit_test.py index d937cd758..0a95428b9 100644 --- a/Tests/EndToEndTests/CNTKv2Python/Examples/rpn_unit_test.py +++ b/Tests/EndToEndTests/CNTKv2Python/Examples/rpn_unit_test.py @@ -5,23 +5,24 @@ # ============================================================================== import os, sys +import pytest +import numpy as np +from cntk import user_function +from cntk.ops import input_variable abs_path = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.join(abs_path)) sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection")) -import pytest -import numpy as np -import cntk -from cntk import user_function -from cntk.ops import input_variable -from utils.rpn.proposal_layer import ProposalLayer as CntkProposalLayer -from utils.rpn.proposal_target_layer import ProposalTargetLayer as CntkProposalTargetLayer -from utils.rpn.anchor_target_layer import AnchorTargetLayer as CntkAnchorTargetLayer -from utils.caffe_layers.proposal_layer import ProposalLayer as CaffeProposalLayer -from utils.caffe_layers.proposal_target_layer import ProposalTargetLayer as CaffeProposalTargetLayer -from utils.caffe_layers.anchor_target_layer import AnchorTargetLayer as CaffeAnchorTargetLayer +win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version_info[:2] == (3,5)) or + (sys.platform != 'win32' and sys.version_info[:2] == (3,4))), + reason="it runs currently only in windows-py35 and linux-py34 due to precompiled cython modules") +@win35_linux34 def test_proposal_layer(): + from utils.rpn.proposal_layer import ProposalLayer as CntkProposalLayer + from utils.caffe_layers.proposal_layer import ProposalLayer as CaffeProposalLayer + from FasterRCNN.FasterRCNN_config import cfg + cls_prob_shape_cntk = (18,61,61) cls_prob_shape_caffe = (18,61,61) rpn_bbox_shape = (36, 61, 61) @@ -38,7 +39,21 @@ def test_proposal_layer(): rpn_bbox_var = input_variable(rpn_bbox_shape) dims_info_var = input_variable(dims_info_shape) - cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, dims_info_var)) + layer_config = {} + layer_config["feat_stride"] = 16 + layer_config["scales"] = [8, 16, 32] + + layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N + layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N + layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH) + layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE) + + layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N + layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N + layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH) + layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE) + + cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, dims_info_var, layer_config)) state, cntk_output = cntk_layer.forward({cls_prob_var: [cls_prob], rpn_bbox_var: [rpn_bbox_pred], dims_info_var: dims_input}) cntk_proposals = cntk_output[next(iter(cntk_output))][0] @@ -59,7 +74,11 @@ def test_proposal_layer(): assert np.allclose(cntk_proposals, caffe_proposals, rtol=0.0, atol=0.0) print("Verified ProposalLayer") +@win35_linux34 def test_proposal_target_layer(): + from utils.rpn.proposal_target_layer import ProposalTargetLayer as CntkProposalTargetLayer + from utils.caffe_layers.proposal_target_layer import ProposalTargetLayer as CaffeProposalTargetLayer + num_rois = 400 all_rois_shape_cntk = (num_rois,4) num_gt_boxes = 50 @@ -147,7 +166,11 @@ def test_proposal_target_layer(): assert np.allclose(cntk_bbox_inside_weights, caffe_bbox_inside_weights, rtol=0.0, atol=0.0) print("Verified ProposalTargetLayer") +@win35_linux34 def test_anchor_target_layer(): + from utils.rpn.anchor_target_layer import AnchorTargetLayer as CntkAnchorTargetLayer + from utils.caffe_layers.anchor_target_layer import AnchorTargetLayer as CaffeAnchorTargetLayer + rpn_cls_score_shape_cntk = (1, 18, 61, 61) num_gt_boxes = 50 gt_boxes_shape_cntk = (num_gt_boxes,5) diff --git a/Tests/EndToEndTests/CNTKv2Python/Tutorials/CNTK_FastRCNNEval_test.py b/Tests/EndToEndTests/CNTKv2Python/Tutorials/CNTK_FastRCNNEval_test.py index fccfcf0a6..fec1a015b 100644 --- a/Tests/EndToEndTests/CNTKv2Python/Tutorials/CNTK_FastRCNNEval_test.py +++ b/Tests/EndToEndTests/CNTKv2Python/Tutorials/CNTK_FastRCNNEval_test.py @@ -10,7 +10,7 @@ import sys import pytest abs_path = os.path.dirname(os.path.abspath(__file__)) -notebook = os.path.join(abs_path, "..", "..", "..", "..", "Examples","Image","Detection","FastRCNN", "CNTK_FastRCNN_Eval.ipynb") +notebook = os.path.join(abs_path, "..", "..", "..", "..", "Examples","Image","Detection","FastRCNN", "BrainScript", "CNTK_FastRCNN_Eval.ipynb") sys.path.append(abs_path)