enabling native proposal layer and dlib selective search

2017-08-22 09:46:27 +02:00 · 2017-08-22 09:46:27 +02:00 · 63488568fe
--- a/.gitattributes
+++ b/.gitattributes
@ -146,8 +146,8 @@ Examples/Text/LightRNN/test/word-0.location text
 *.vsdm binary
 *.zip binary
 *.dnn binary
-Examples/Image/Detection/FastRCNN/fastRCNN/*/*.pyd binary
-Examples/Image/Detection/FastRCNN/fastRCNN/*/*.so binary
+Examples/Image/Detection/FastRCNN/BrainScript/fastRCNN/*/*.pyd binary
+Examples/Image/Detection/FastRCNN/BrainScript/fastRCNN/*/*.so binary
 Examples/Image/Detection/utils/cython_modules/*.pyd binary
 Examples/Image/Detection/utils/cython_modules/*.so binary
 Tests/UnitTests/V2LibraryTests/data/*.bin binary
--- a/.gitignore
+++ b/.gitignore
@ -291,6 +291,7 @@ Examples/Image/DataSets/grocery/positive/
 Examples/Image/DataSets/grocery/testImages/
 Examples/Image/DataSets/grocery/*.txt
 PretrainedModels/*.model
+Examples/Image/Detection/FastRCNN/BrainScript/Output/
 Examples/Image/Detection/FastRCNN/BrainScript/proc/
 Examples/Image/Detection/FastRCNN/Output/
 Examples/Image/Detection/FasterRCNN/Output/
--- a/Examples/Extensibility/ProposalLayer/native_proposal_layer.py
+++ b/Examples/Extensibility/ProposalLayer/native_proposal_layer.py
@ -13,9 +13,6 @@ sys.path.append(os.path.join(abs_path, "..", "..", "Image", "Detection", "Faster

 C.device.try_set_default_device(C.device.cpu())

-from FasterRCNN import eval_faster_rcnn_mAP, set_global_vars
-from config import cfg
-
 ops.register_native_user_function('ProposalLayerOp', 'Cntk.ProposalLayerLib-' + C.__version__.rstrip('+'), 'CreateProposalLayer')

 def clone_with_native_proposal_layer(model):
@ -26,16 +23,6 @@ def clone_with_native_proposal_layer(model):

    def converter(x):
        layer_config = copy.deepcopy(x.attributes)
-        layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N
-        layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N
-        layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH)
-        layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE)
-
-        layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
-        layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N
-        layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH)
-        layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE)
-        
        return ops.native_user_function('ProposalLayerOp', list(x.inputs), layer_config, 'native_proposal_layer')

    return C.misc.convert(model, filter, converter)
@ -52,8 +39,21 @@ def evaluate(model_path):
    # ProposalLayer currently only runs on the CPU
    eval_device = C.cpu()
    model = C.Function.load(model_path, device=eval_device)
-    set_global_vars(False)
-    return eval_faster_rcnn_mAP(model)
+
+    from FasterRCNN.FasterRCNN_config import cfg as detector_cfg
+    from utils.configs.AlexNet_config import cfg as network_cfg
+    from utils.configs.Grocery_config import cfg as dataset_cfg
+    from utils.config_helpers import merge_configs
+    from FasterRCNN.FasterRCNN_train import prepare
+    from FasterRCNN.FasterRCNN_eval import compute_test_set_aps
+
+    cfg = merge_configs([detector_cfg, network_cfg, dataset_cfg])
+    cfg["CNTK"].FORCE_DETERMINISTIC = True
+
+    prepare(cfg, False)
+    eval_results = compute_test_set_aps(model, cfg)
+    meanAP = np.nanmean(list(eval_results.values()))
+    return meanAP

 ############################# 
 # main function boilerplate #
--- a/Examples/Image/DataSets/Pascal/mappings/create_mappings.py
+++ b/Examples/Image/DataSets/Pascal/mappings/create_mappings.py
@ -7,6 +7,7 @@

 import sys, os
 import numpy as np
+import scipy.io as sio
 import future
 import xml.etree.ElementTree
 from xml.etree import ElementTree
@ -21,9 +22,10 @@ use_pad_scale = False
 pad_width = 850
 pad_height = 850

-pascal_voc2007_jpgimg_rel_path = ".../VOCdevkit/VOC2007/JPEGImages/"
-pascal_voc2007_imgsets_rel_path = ".../VOCdevkit/VOC2007/ImageSets/Main/"
-pascal_voc2007_annotations_rel_path = ".../VOCdevkit/VOC2007/Annotations/"
+pascal_voc2007_jpgimg_rel_path = "../VOCdevkit/VOC2007/JPEGImages/"
+pascal_voc2007_imgsets_rel_path = "../VOCdevkit/VOC2007/ImageSets/Main/"
+pascal_voc2007_annotations_rel_path = "../VOCdevkit/VOC2007/Annotations/"
+pascal_voc2007_proposals_rel_path = "../selective_search_data/"

 abs_path = os.path.dirname(os.path.abspath(__file__))
 cls_file_path = os.path.join(abs_path, "class_map.txt")
@ -47,9 +49,6 @@ def format_roi(cls_index, xmin, ymin, xmax, ymax, img_file_path):
            scale_y = (1.0 * pad_height) / img_height

            min_scale = min(scale_x, scale_y)
-            if round(img_width * min_scale) != pad_width and round(img_height * min_scale) != pad_height:
-                import pdb; pdb.set_trace()
-
            new_width = round(img_width * min_scale)
            new_height = round(img_height * min_scale)
            assert(new_width == pad_width or new_height == pad_height)
@ -87,7 +86,7 @@ def format_roi(cls_index, xmin, ymin, xmax, ymax, img_file_path):

 def create_mappings(train, skip_difficult):
    file_prefix = "trainval" if train else "test"
-    img_map_input = "../VOCdevkit/VOC2007/ImageSets/Main/{}.txt".format(file_prefix)
+    img_map_input = "{}.txt".format(file_prefix)
    img_map_output = "{}2007.txt".format(file_prefix)
    roi_map_output = "{}2007_rois_{}_{}{}.txt".format(
        file_prefix,
@ -95,11 +94,13 @@ def create_mappings(train, skip_difficult):
        "pad" if use_pad_scale else "noPad",
        "_skipDif" if skip_difficult else "")
    size_map_output = "{}_size_file2007.txt".format(file_prefix)
+    proposals_output = "{}2007_proposals.txt".format(file_prefix)

-    in_map_file_path = os.path.join(abs_path, img_map_input)
+    in_map_file_path = os.path.join(abs_path, pascal_voc2007_imgsets_rel_path, img_map_input)
    out_map_file_path = os.path.join(abs_path, img_map_output)
    roi_file_path = os.path.join(abs_path, roi_map_output)
    size_file_path = os.path.join(abs_path, size_map_output)
+    proposals_file_path = os.path.join(abs_path, proposals_output)
    class_map_file_path = os.path.join(abs_path, "class_map.txt")

    # write class map file
@ -115,11 +116,13 @@ def create_mappings(train, skip_difficult):
        input_lines = input_file.readlines()

    counter = 0
+    img_numbers = []
    with open(out_map_file_path, 'w') as img_file:
        with open(roi_file_path, 'w') as roi_file:
            with open(size_file_path, 'w') as size_file:
                for in_line in input_lines:
                    img_number = in_line.strip()
+                    img_numbers.append(img_number)
                    img_file_path = "{}{}.jpg".format(pascal_voc2007_jpgimg_rel_path, img_number)
                    img_line = "{}\t{}\t0\n".format(counter, img_file_path)
                    img_file.write(img_line)
@ -164,6 +167,31 @@ def create_mappings(train, skip_difficult):
        for cls in classes:
            cls_file.write("{}\t{}\n".format(cls, class_dict[cls]))

+    if not skip_difficult: # proposals are the same and need to be processed only once
+        try:
+            # convert selective search proposals from matlab to CNTK text format
+            print("Converting matlab proposal file to CNTK format ({})".format(proposals_file_path))
+            proposal_input = 'voc_2007_{}.mat'.format(file_prefix)
+            in_ss_file_path = os.path.join(abs_path, pascal_voc2007_proposals_rel_path, proposal_input)
+            raw = sio.loadmat(in_ss_file_path)
+            boxes = raw['boxes'][0]
+            images = raw['images']
+
+            with open(proposals_file_path, 'w') as prop_file:
+                for i in range(len(img_numbers)):
+                    img_number = img_numbers[i]
+                    img_name = images[i,0][0]
+                    assert img_number == img_name
+
+                    box_coords = boxes[i]
+                    prop_line = "{} |proposals ".format(i)
+                    for c in range(box_coords.shape[0]):
+                        prop_line += ' ' + ' '.join(str(x) for x in box_coords[c])
+
+                    prop_file.write(prop_line + '\n')
+        except:
+            print("Warning: error converting selective search proposals from matlab to CNTK text format")
+
 if __name__ == '__main__':
    create_mappings(True, skip_difficult=True)
    create_mappings(False, skip_difficult=True)
--- a/Examples/Image/Detection/DetectionDemo.py
+++ b/Examples/Image/Detection/DetectionDemo.py
@ -4,17 +4,39 @@
 # for full license information.
 # ==============================================================================

-import os
+import os, sys
 import numpy as np
 import utils.od_utils as od
 from utils.config_helpers import merge_configs

+available_detectors = ['FastRCNN', 'FasterRCNN']
+
+def get_detector_name(args):
+    detector_name = None
+    default_detector = 'FasterRCNN'
+    if len(args) != 2:
+        print("Please provide a detector name as the single argument. Usage:")
+        print("    python DetectionDemo.py <detector_name>")
+        print("Available detectors: {}".format(available_detectors))
+    else:
+        detector_name = args[1]
+        if not any(detector_name == x for x in available_detectors):
+            print("Unknown detector: {}.".format(detector_name))
+            print("Available detectors: {}".format(available_detectors))
+            detector_name = None
+
+    if detector_name is None:
+        print("Using default detector: {}".format(default_detector))
+        return default_detector
+    else:
+        return detector_name
+
 def get_configuration(detector_name):
    # load configs for detector, base network and data set
    if detector_name == "FastRCNN":
-        from FastRCNN.config import cfg as detector_cfg
+        from FastRCNN.FastRCNN_config import cfg as detector_cfg
    elif detector_name == "FasterRCNN":
-        from FasterRCNN.config import cfg as detector_cfg
+        from FasterRCNN.FasterRCNN_config import cfg as detector_cfg
    else:
        print('Unknown detector: {}'.format(detector_name))

@ -29,7 +51,9 @@ def get_configuration(detector_name):

 if __name__ == '__main__':
    # Currently supported detectors: 'FastRCNN', 'FasterRCNN'
-    cfg = get_configuration('FasterRCNN')
+    args = sys.argv
+    detector_name = get_detector_name(args)
+    cfg = get_configuration(detector_name)

    # train and test
    eval_model = od.train_object_detector(cfg)
@ -40,7 +64,7 @@ if __name__ == '__main__':
    print('Mean AP = {:.4f}'.format(np.nanmean(list(eval_results.values()))))

    # detect objects in single image
-    img_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), r"..\DataSets\Grocery\testImages\WIN_20160803_11_28_42_Pro.jpg")
+    img_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), r"../DataSets/Grocery/testImages/WIN_20160803_11_28_42_Pro.jpg")
    regressed_rois, cls_probs = od.evaluate_single_image(eval_model, img_path, cfg)
    bboxes, labels, scores = od.filter_results(regressed_rois, cls_probs, cfg)

--- a/Examples/Image/Detection/FastRCNN/BrainScript/A2_RunWithPyModel.py
+++ b/Examples/Image/Detection/FastRCNN/BrainScript/A2_RunWithPyModel.py
@ -48,7 +48,7 @@ momentum_time_constant = p.cntk_momentum_time_constant
 # model specific variables (only AlexNet for now)
 base_model = "AlexNet"
 if base_model == "AlexNet":
-    model_file = "../../../../../../../PretrainedModels/AlexNet_ImageNet_CNTK.model"
+    model_file = "../../../../../../../../PretrainedModels/AlexNet.model"
    feature_node_name = "features"
    last_conv_node_name = "conv5.y"
    pool_node_name = "pool3"
--- a/Examples/Image/Detection/FastRCNN/BrainScript/CNTK_FastRCNN_Eval.ipynb
+++ b/Examples/Image/Detection/FastRCNN/BrainScript/CNTK_FastRCNN_Eval.ipynb
@ -73,22 +73,22 @@
    "            cntk.device.try_set_default_device(cntk.device.cpu()) \n",
    "        else:\n",
    "            cntk.device.try_set_default_device(cntk.device.gpu(0))\n",
-    "    sys.path.append(os.path.join(*\"../../../../Tests/EndToEndTests/CNTKv2Python/Examples\".split(\"/\")))\n",
+    "    sys.path.append(os.path.join(*\"../../../../../Tests/EndToEndTests/CNTKv2Python/Examples\".split(\"/\")))\n",
    "    import prepare_test_data as T\n",
    "    T.prepare_Grocery_data()\n",
    "    T.prepare_fastrcnn_grocery_100_model()\n",
    "\n",
    "#Make sure the grocery dataset is installed \n",
-    "sys.path.append('../../DataSets/Grocery')\n",
+    "sys.path.append('../../../DataSets/Grocery')\n",
    "from install_grocery import download_grocery_data\n",
    "download_grocery_data()\n",
    "\n",
    "# Make sure the FRCNN model exists - check if the model was trained and exists, if not - download the existing model\n",
    "\n",
-    "sys.path.append('../../../../PretrainedModels')\n",
+    "sys.path.append('../../../../../PretrainedModels')\n",
    "from download_model import download_model_by_name\n",
    "download_model_by_name(\"Fast-RCNN_grocery100\")\n",
-    "model_path = '../../../../PretrainedModels/Fast-RCNN_grocery100.model'\n"
+    "model_path = '../../../../../PretrainedModels/Fast-RCNN_grocery100.model'\n"
   ]
  },
  {
@ -233,7 +233,7 @@
    "    img = cv2.imread(image_path)\n",
    "    return resize_and_pad(img, width, height, pad_value), img\n",
    "\n",
-    "test_image_path = r\"../../DataSets/Grocery/testImages/WIN_20160803_11_28_42_Pro.jpg\"\n",
+    "test_image_path = r\"../../../DataSets/Grocery/testImages/WIN_20160803_11_28_42_Pro.jpg\"\n",
    "(test_img, test_img_model_arg), original_img = load_image_and_scale(test_image_path, image_width, image_height)\n",
    "\n",
    "plt.imshow(cv2.cvtColor(test_img, cv2.COLOR_BGR2RGB))\n",
--- a/Examples/Image/Detection/FastRCNN/BrainScript/cntk_helpers.py
+++ b/Examples/Image/Detection/FastRCNN/BrainScript/cntk_helpers.py
@ -6,7 +6,7 @@

 from __future__ import print_function
 from builtins import str
-import pdb, sys, os, time
+import sys, os, time
 import numpy as np
 import selectivesearch
 from easydict import EasyDict
--- a/Examples/Image/Detection/FastRCNN/FastRCNN_config.py
+++ b/Examples/Image/Detection/FastRCNN/FastRCNN_config.py
@ -17,7 +17,7 @@ __C.TRAIN = edict()

 # If set to 'True' training will be skipped if a trained model exists already
 __C.CNTK.MAKE_MODE = True
-# set to 'True' to use determininistic algorithms
+# set to 'True' to use deterministic algorithms
 __C.CNTK.FORCE_DETERMINISTIC = False
 # set to 'True' to run only a single epoch
 __C.CNTK.FAST_MODE = False
@ -28,17 +28,14 @@ __C.CNTK.GRAPH_TYPE = "png" # "png" or "pdf"
 # Learning parameters
 __C.CNTK.L2_REG_WEIGHT = 0.0005
 __C.CNTK.MOMENTUM_PER_MB = 0.9
-__C.CNTK.MAX_EPOCHS = 15 # use more epochs and more ROIs (NUM_ROI_PROPOSALS) for better results
-__C.CNTK.LR_FACTOR = 1.0
+__C.CNTK.MAX_EPOCHS = 20
+__C.CNTK.LR_FACTOR = 10.0 # 10.0 is used for the Grocery example data. Start with 1.0 for other data sets.
 __C.CNTK.LR_PER_SAMPLE = [0.001] * 10 + [0.0001] * 10 + [0.00001]
 # The learning rate multiplier for all bias weights
 __C.CNTK.BIAS_LR_MULT = 2.0

 # Number of regions of interest [ROIs] proposals
-__C.NUM_ROI_PROPOSALS = 500 # use 2000 or more for good results
-# minimum width and height for proposals in pixels
-__C.PROPOSALS_MIN_W = 20
-__C.PROPOSALS_MIN_H = 20
+__C.NUM_ROI_PROPOSALS = 200 # use 2000 or more for good results
 # the minimum IoU (overlap) of a proposal to qualify for training regression targets
 __C.BBOX_THRESH = 0.5

@ -53,7 +50,7 @@ __C.IMAGE_WIDTH = 850
 __C.IMAGE_HEIGHT = 850

 # Use horizontally-flipped images during training?
-__C.TRAIN.USE_FLIPPED = False
+__C.TRAIN.USE_FLIPPED = True
 # If set to 'True' conv layers weights from the base model will be trained, too
 __C.TRAIN_CONV_LAYERS = True
 # Sigma parameter for smooth L1 loss in the RPN and the detector (DET)
@ -65,13 +62,52 @@ __C.RESULTS_NMS_THRESHOLD = 0.5
 __C.RESULTS_NMS_CONF_THRESHOLD = 0.0

 # Enable plotting of results generally / also plot background boxes / also plot unregressed boxes
-__C.VISUALIZE_RESULTS = True
+__C.VISUALIZE_RESULTS = False
 __C.DRAW_NEGATIVE_ROIS = False
 __C.DRAW_UNREGRESSED_ROIS = False
 # only for plotting results: boxes with a score lower than this threshold will be considered background
 __C.RESULTS_BGR_PLOT_THRESHOLD = 0.1


+# If set to True the following two parameters need to point to the corresponding files that contain the proposals:
+# __C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE
+# __C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE
+__C.USE_PRECOMPUTED_PROPOSALS = False
+
+# roi proposal parameters for selective search, grid and filtering
+# The first three parameters are for dlib's selective search. For details see
+# http://dlib.net/dlib/image_transforms/segment_image_abstract.h.html#find_candidate_object_locations
+#
+# The basic segmentation is performed kvals.size() times. The k parameter is set (from, to, step_size)
+__C.roi_ss_kvals = (10, 500, 5)
+# When doing the basic segmentations prior to any box merging, all
+# rectangles that have an area < min_size are discarded. Therefore, all outputs and
+# subsequent merged rectangles are built out of rectangles that contain at
+# least min_size pixels. Note that setting min_size to a smaller value than
+# you might otherwise be interested in using can be useful since it allows a
+# larger number of possible merged boxes to be created
+__C.roi_ss_min_size = 9
+# There are max_merging_iterations rounds of neighboring blob merging.
+# Therefore, this parameter has some effect on the number of output rectangles
+# you get, with larger values of the parameter giving more output rectangles.
+# Hint: set __C.CNTK.DEBUG_OUTPUT=True to see the number of ROIs from selective search
+__C.roi_ss_mm_iterations = 30
+#
+# image size used for ROI generation
+__C.roi_ss_img_size = 200
+# minimum relative width/height of an ROI
+__C.roi_min_side_rel = 0.01
+# maximum relative width/height of an ROI
+__C.roi_max_side_rel = 1.0
+# minimum relative area of an ROI
+__C.roi_min_area_rel = 0.0001
+# maximum relative area of an ROI
+__C.roi_max_area_rel = 0.9
+# maximum aspect ratio of an ROI vertically and horizontally
+__C.roi_max_aspect_ratio = 4.0
+# aspect ratios of ROIs for uniform grid ROIs
+__C.roi_grid_aspect_ratios = [1.0, 2.0, 0.5]
+
 # For reproducibility
 __C.RND_SEED = 3

--- a/Examples/Image/Detection/FastRCNN/FastRCNN_eval.py
+++ b/Examples/Image/Detection/FastRCNN/FastRCNN_eval.py
@ -5,16 +5,15 @@
 # ==============================================================================

 import os
-import numpy as np
-from matplotlib.pyplot import imsave
 import cv2
+import numpy as np
 import cntk
 from cntk import input_variable, Axis
 from utils.map_helpers import evaluate_detections
 from utils.plot_helpers import resize_and_pad
 from utils.rpn.bbox_transform import regress_rois
 from utils.od_mb_source import ObjectDetectionMinibatchSource
-from utils.proposal_helpers import ProposalProvider, compute_proposals, compute_image_stats
+from utils.proposal_helpers import ProposalProvider, compute_image_stats, compute_proposals

 class FastRCNN_Evaluator:
    def __init__(self, eval_model, cfg):
@ -26,9 +25,7 @@ class FastRCNN_Evaluator:
        roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()],
                                       name="roi_proposals")
        self._eval_model = eval_model(image_input, roi_proposals)
-        self._min_w = cfg['PROPOSALS_MIN_W']
-        self._min_h = cfg['PROPOSALS_MIN_H']
-        self._num_proposals = cfg['NUM_ROI_PROPOSALS']
+        self._cfg = cfg

    def process_image(self, img_path):
        out_cls_pred, out_rpn_rois, out_bbox_regr, dims = self.process_image_detailed(img_path)
@ -41,8 +38,6 @@ class FastRCNN_Evaluator:
        img = cv2.imread(img_path)
        _, cntk_img_input, dims = resize_and_pad(img, self._img_shape[2], self._img_shape[1])

-        #import pdb; pdb.set_trace()
-
        # compute ROI proposals and apply scaling and padding to them
        # [target_w, target_h, img_width, img_height, top, bottom, left, right, scale_factor]
        img_stats = compute_image_stats(len(img[0]), len(img), self._img_shape[2], self._img_shape[1])
@ -50,7 +45,8 @@ class FastRCNN_Evaluator:
        top = img_stats[4]
        left = img_stats[6]

-        proposals = compute_proposals(img, self._num_proposals, self._min_w, self._min_h)
+        num_proposals = self._cfg['NUM_ROI_PROPOSALS']
+        proposals = compute_proposals(img, num_proposals, self._cfg)
        proposals = proposals * scale_factor
        proposals += (left, top, left, top)

@ -76,7 +72,18 @@ def compute_test_set_aps(eval_model, cfg):
    frcn_eval = eval_model(image_input, roi_proposals)

    # Create the minibatch source
-    proposal_provider = ProposalProvider.fromconfig(cfg)
+    if cfg.USE_PRECOMPUTED_PROPOSALS:
+        try:
+            cfg["DATA"].TEST_PRECOMPUTED_PROPOSALS_FILE = os.path.join(cfg["DATA"].MAP_FILE_PATH, cfg["DATA"].TEST_PRECOMPUTED_PROPOSALS_FILE)
+            proposal_provider = ProposalProvider.fromfile(cfg["DATA"].TEST_PRECOMPUTED_PROPOSALS_FILE, cfg.NUM_ROI_PROPOSALS)
+        except:
+            print("To use precomputed proposals please specify the following parameters in your configuration:\n"
+                  "__C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE\n"
+                  "__C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE")
+            exit(-1)
+    else:
+        proposal_provider = ProposalProvider.fromconfig(cfg)
+
    minibatch_source = ObjectDetectionMinibatchSource(
        cfg["DATA"].TEST_MAP_FILE,
        cfg["DATA"].TEST_ROI_FILE,
--- a/Examples/Image/Detection/FastRCNN/FastRCNN_train.py
+++ b/Examples/Image/Detection/FastRCNN/FastRCNN_train.py
@ -58,9 +58,17 @@ def prepare(cfg, use_arg_parser=True):
    cfg["DATA"].TEST_MAP_FILE = os.path.join(data_path, cfg["DATA"].TEST_MAP_FILE)
    cfg["DATA"].TRAIN_ROI_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_ROI_FILE)
    cfg["DATA"].TEST_ROI_FILE = os.path.join(data_path, cfg["DATA"].TEST_ROI_FILE)
+    if cfg.USE_PRECOMPUTED_PROPOSALS:
+        try:
+            cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE)
+        except:
+            print("To use precomputed proposals please specify the following parameters in your configuration:\n"
+                  "__C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE\n"
+                  "__C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE")
+            exit(-1)

    cfg['MODEL_PATH'] = os.path.join(cfg.OUTPUT_PATH, "fast_rcnn_eval_{}.model".format(cfg["MODEL"].BASE_MODEL))
-    cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "PretrainedModels",
+    cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "PretrainedModels",
                                          cfg["MODEL"].BASE_MODEL_FILE)

    cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE)
@ -300,7 +308,11 @@ def train_fast_rcnn(cfg):
        log_number_of_parameters(loss)

        # Create the minibatch source
-        proposal_provider = ProposalProvider.fromconfig(cfg)
+        if cfg.USE_PRECOMPUTED_PROPOSALS:
+            proposal_provider = ProposalProvider.fromfile(cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE, cfg.NUM_ROI_PROPOSALS)
+        else:
+            proposal_provider = ProposalProvider.fromconfig(cfg)
+
        od_minibatch_source = ObjectDetectionMinibatchSource(
            cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE,
            max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE,
--- a/Examples/Image/Detection/FastRCNN/README.md
+++ b/Examples/Image/Detection/FastRCNN/README.md
@ -5,7 +5,7 @@
 This folder contains an end-to-end solution for using Fast R-CNN to perform object detection. 
 The original research paper for Fast R-CNN can be found at [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083).
 Base models that are supported by the current configuration are AlexNet and VGG16. 
-Two image set that are preconfigured are Pascal VOC 2007 and Grocery. 
+Two image sets that are preconfigured are Pascal VOC 2007 and Grocery. 
 Other base models or image sets can be used by adding a configuration file similar to the examples in `utils/configs` and importing it in `run_fast_rcnn.py`.

 ## Running the example
@ -15,14 +15,14 @@ Other base models or image sets can be used by adding a configuration file simil
 To run Fast R-CNN you need a CNTK Python environment. Install the following additional packages:

 ```
-pip install opencv-python easydict pyyaml future
+pip install opencv-python easydict pyyaml dlib
 ```

 The code uses prebuild Cython modules for parts of the region proposal network (see `Examples/Image/Detection/utils/cython_modules`). 
 These binaries are contained in the repository for Python 3.5 under Windows and Python 3.4 under Linux.
 If you require other versions please follow the instructions at [https://github.com/rbgirshick/py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#installation-sufficient-for-the-demo).

-If you want to use the debug output you need to run ' pip install pydot_ng) ([website](https://pypi.python.org/pypi/pydot-ng)) and install [graphviz](http://graphviz.org/) (GraphViz executable has to be in the system’s PATH) to be able to plot the CNTK graphs.
+If you want to use the debug output you need to run `pip install pydot_ng` ([website](https://pypi.python.org/pypi/pydot-ng)) and install [graphviz](http://graphviz.org/) (GraphViz executable has to be in the system’s PATH) to be able to plot the CNTK graphs.

 ### Getting the data and AlexNet model

@ -90,7 +90,7 @@ and run `python run_fast_rcnn.py` to train and evaluate Fast R-CNN on your data.

 ### Parameters

-All options and parameters are in `config.py` in the `FastRCNN` folder and all of them are explained there. These include
+All options and parameters are in `FastRCNN_config.py` in the `FastRCNN` folder and all of them are explained there. These include

 ```
 # learning parameters
@ -99,9 +99,10 @@ __C.CNTK.LR_PER_SAMPLE = [0.001] * 10 + [0.0001] * 10 + [0.00001]

 # Number of regions of interest [ROIs] proposals
 __C.NUM_ROI_PROPOSALS = 1000
-# minimum width and height for proposals in pixels
-__C.PROPOSALS_MIN_W = 20
-__C.PROPOSALS_MIN_H = 20
+# minimum relative width/height of an ROI
+__C.roi_min_side_rel = 0.01
+# maximum relative width/height of an ROI
+__C.roi_max_side_rel = 1.0
 ```

 ### Fast R-CNN CNTK code
@ -110,4 +111,4 @@ Most of the code is in `FastRCNN_train.py` and `FastRCNN_eval.py` (and `Examples

 ### Algorithm 

-All details regarding the Fast R-CNN algortihm can be found in the original research paper: [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083).
+All details regarding the Fast R-CNN algorithm can be found in the original research paper: [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083).
--- a/Examples/Image/Detection/FastRCNN/install_data_and_model.py
+++ b/Examples/Image/Detection/FastRCNN/install_data_and_model.py
@ -26,7 +26,7 @@ if __name__ == '__main__':

    sys.path.append(os.path.join(base_folder, "..", "..", "..", "..", "PretrainedModels"))
    from download_model import download_model_by_name
-    download_model_by_name("AlexNet_ImageNet_CNTK")
+    download_model_by_name("AlexNet_ImageNet_Caffe")

    print("Creating mapping files for Grocery data set..")
    create_grocery_mappings(base_folder)
--- a/Examples/Image/Detection/FastRCNN/install_fastrcnn.py
+++ b/Examples/Image/Detection/FastRCNN/install_fastrcnn.py
@ -1,19 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-# Licensed under the MIT license. See LICENSE.md file in the project root
-# for full license information.
-# ==============================================================================
-
-from __future__ import print_function
-import zipfile
-import os, sys
-
-base_folder = os.path.dirname(os.path.abspath(__file__))
-
-sys.path.append(os.path.join(base_folder, "..", "..", "DataSets", "Grocery"))
-from install_grocery import download_grocery_data
-download_grocery_data()
-
-sys.path.append(os.path.join(base_folder, "..", "..", "..", "..", "PretrainedModels"))
-from download_model import download_model_by_name
-download_model_by_name("AlexNet_ImageNet_CNTK")
--- a/Examples/Image/Detection/FastRCNN/run_fast_rcnn.py
+++ b/Examples/Image/Detection/FastRCNN/run_fast_rcnn.py
@ -13,7 +13,7 @@ from utils.plot_helpers import plot_test_set_results

 def get_configuration():
    # load configs for detector, base network and data set
-    from config import cfg as detector_cfg
+    from FastRCNN_config import cfg as detector_cfg
    # for VGG16 base model use:         from utils.configs.VGG16_config import cfg as network_cfg
    # for AlexNet base model use:       from utils.configs.AlexNet_config import cfg as network_cfg
    from utils.configs.AlexNet_config import cfg as network_cfg
--- a/Examples/Image/Detection/FasterRCNN/FasterRCNN.py
+++ b/Examples/Image/Detection/FasterRCNN/FasterRCNN.py
@ -1,750 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-# Licensed under the MIT license. See LICENSE.md file in the project root
-# for full license information.
-# ==============================================================================
-
-from __future__ import print_function
-import numpy as np
-import os, sys
-import argparse
-import yaml     # pip install pyyaml
-import easydict # pip install easydict
-import cntk
-import easydict
-from cntk import Trainer, UnitType, load_model, Axis, input_variable, parameter, times, combine, \
-    softmax, roipooling, plus, element_times, CloneMethod, alias, Communicator, reduce_sum
-from cntk.core import Value
-from cntk.io import MinibatchData
-from cntk.initializer import normal
-from cntk.layers import placeholder, Constant, Sequential
-from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_schedule
-from cntk.logging import log_number_of_parameters, ProgressPrinter
-from cntk.logging.graph import find_by_name, plot
-from cntk.losses import cross_entropy_with_softmax
-from cntk.metrics import classification_error
-from _cntk_py import force_deterministic_algorithms
-
-abs_path = os.path.dirname(os.path.abspath(__file__))
-sys.path.append(os.path.join(abs_path, ".."))
-from utils.rpn.rpn_helpers import create_rpn, create_proposal_target_layer
-from utils.rpn.cntk_smoothL1_loss import SmoothL1Loss
-from utils.map.map_helpers import evaluate_detections
-from utils.annotations.annotations_helper import parse_class_map_file
-from config import cfg
-from od_mb_source import ObjectDetectionMinibatchSource
-from cntk_helpers import regress_rois
-
-###############################################################
-###############################################################
-mb_size = 1
-image_width = cfg["CNTK"].IMAGE_WIDTH
-image_height = cfg["CNTK"].IMAGE_HEIGHT
-num_channels = 3
-
-# dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
-dims_input_const = MinibatchData(Value(batch=np.asarray(
-    [image_width, image_height, image_width, image_height, image_width, image_height], dtype=np.float32)), 1, 1, False)
-
-# Color used for padding and normalization (Caffe model uses [102.98010, 115.94650, 122.77170])
-img_pad_value = [103, 116, 123] if cfg["CNTK"].BASE_MODEL == "VGG16" else [114, 114, 114]
-normalization_const = Constant([[[103]], [[116]], [[123]]]) if cfg["CNTK"].BASE_MODEL == "VGG16" else Constant([[[114]], [[114]], [[114]]])
-
-globalvars = {}
-globalvars['output_path'] = os.path.join(abs_path, "Output")
-
-# dataset specific parameters
-map_file_path = os.path.join(abs_path, cfg["CNTK"].MAP_FILE_PATH)
-globalvars['class_map_file'] = cfg["CNTK"].CLASS_MAP_FILE
-globalvars['train_map_file'] = cfg["CNTK"].TRAIN_MAP_FILE
-globalvars['test_map_file'] = cfg["CNTK"].TEST_MAP_FILE
-globalvars['train_roi_file'] = cfg["CNTK"].TRAIN_ROI_FILE
-globalvars['test_roi_file'] = cfg["CNTK"].TEST_ROI_FILE
-epoch_size = cfg["CNTK"].NUM_TRAIN_IMAGES
-num_test_images = cfg["CNTK"].NUM_TEST_IMAGES
-
-# model specific parameters
-model_folder = os.path.join(abs_path, "..", "..", "..", "..", "PretrainedModels")
-base_model_file = os.path.join(model_folder, cfg["CNTK"].BASE_MODEL_FILE)
-feature_node_name = cfg["CNTK"].FEATURE_NODE_NAME
-last_conv_node_name = cfg["CNTK"].LAST_CONV_NODE_NAME
-start_train_conv_node_name = cfg["CNTK"].START_TRAIN_CONV_NODE_NAME
-pool_node_name = cfg["CNTK"].POOL_NODE_NAME
-last_hidden_node_name = cfg["CNTK"].LAST_HIDDEN_NODE_NAME
-roi_dim = cfg["CNTK"].ROI_DIM
-###############################################################
-###############################################################
-
-def set_global_vars(use_arg_parser = True):
-    data_path = map_file_path
-
-    # set and overwrite learning parameters
-    globalvars['rpn_lr_factor'] = cfg["CNTK"].RPN_LR_FACTOR
-    globalvars['frcn_lr_factor'] = cfg["CNTK"].FRCN_LR_FACTOR
-    globalvars['e2e_lr_factor'] = cfg["CNTK"].E2E_LR_FACTOR
-    globalvars['momentum_per_mb'] = cfg["CNTK"].MOMENTUM_PER_MB
-    globalvars['e2e_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].E2E_MAX_EPOCHS
-    globalvars['rpn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].RPN_EPOCHS
-    globalvars['frcn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].FRCN_EPOCHS
-    globalvars['rnd_seed'] = cfg.RNG_SEED
-    globalvars['train_conv'] = cfg["CNTK"].TRAIN_CONV_LAYERS
-    globalvars['train_e2e'] = cfg["CNTK"].TRAIN_E2E
-
-
-    if use_arg_parser:
-        parser = argparse.ArgumentParser()
-        parser.add_argument('-datadir', '--datadir', help='Data directory where the ImageNet dataset is located',
-                            required=False, default=data_path)
-        parser.add_argument('-outputdir', '--outputdir', help='Output directory for checkpoints and models',
-                            required=False, default=None)
-        parser.add_argument('-logdir', '--logdir', help='Log file',
-                            required=False, default=None)
-        parser.add_argument('-n', '--num_epochs', help='Total number of epochs to train', type=int,
-                            required=False, default=cfg["CNTK"].E2E_MAX_EPOCHS)
-        parser.add_argument('-m', '--minibatch_size', help='Minibatch size', type=int,
-                            required=False, default=mb_size)
-        parser.add_argument('-e', '--epoch_size', help='Epoch size', type=int,
-                            required=False, default=epoch_size)
-        parser.add_argument('-q', '--quantized_bits', help='Number of quantized bits used for gradient aggregation', type=int,
-                            required=False, default='32')
-        parser.add_argument('-r', '--restart',
-                            help='Indicating whether to restart from scratch (instead of restart from checkpoint file by default)',
-                            action='store_true')
-        parser.add_argument('-device', '--device', type=int, help="Force to run the script on a specified device",
-                            required=False, default=None)
-        parser.add_argument('-rpnLrFactor', '--rpnLrFactor', type=float, help="Scale factor for rpn lr schedule", required=False)
-        parser.add_argument('-frcnLrFactor', '--frcnLrFactor', type=float, help="Scale factor for frcn lr schedule", required=False)
-        parser.add_argument('-e2eLrFactor', '--e2eLrFactor', type=float, help="Scale factor for e2e lr schedule", required=False)
-        parser.add_argument('-momentumPerMb', '--momentumPerMb', type=float, help="momentum per minibatch", required=False)
-        parser.add_argument('-e2eEpochs', '--e2eEpochs', type=int, help="number of epochs for e2e training", required=False)
-        parser.add_argument('-rpnEpochs', '--rpnEpochs', type=int, help="number of epochs for rpn training", required=False)
-        parser.add_argument('-frcnEpochs', '--frcnEpochs', type=int, help="number of epochs for frcn training", required=False)
-        parser.add_argument('-rndSeed', '--rndSeed', type=int, help="the random seed", required=False)
-        parser.add_argument('-trainConv', '--trainConv', type=int, help="whether to train conv layers", required=False)
-        parser.add_argument('-trainE2E', '--trainE2E', type=int, help="whether to train e2e (otherwise 4 stage)", required=False)
-
-        args = vars(parser.parse_args())
-
-        if args['rpnLrFactor'] is not None:
-            globalvars['rpn_lr_factor'] = args['rpnLrFactor']
-        if args['frcnLrFactor'] is not None:
-            globalvars['frcn_lr_factor'] = args['frcnLrFactor']
-        if args['e2eLrFactor'] is not None:
-            globalvars['e2e_lr_factor'] = args['e2eLrFactor']
-        if args['momentumPerMb'] is not None:
-            globalvars['momentum_per_mb'] = args['momentumPerMb']
-        if args['e2eEpochs'] is not None:
-            globalvars['e2e_epochs'] = args['e2eEpochs']
-        if args['rpnEpochs'] is not None:
-            globalvars['rpn_epochs'] = args['rpnEpochs']
-        if args['frcnEpochs'] is not None:
-            globalvars['frcn_epochs'] = args['frcnEpochs']
-        if args['rndSeed'] is not None:
-            globalvars['rnd_seed'] = args['rndSeed']
-        if args['trainConv'] is not None:
-            globalvars['train_conv'] = True if args['trainConv']==1 else False
-        if args['trainE2E'] is not None:
-            globalvars['train_e2e'] = True if args['trainE2E']==1 else False
-
-        if args['outputdir'] is not None:
-            globalvars['output_path'] = args['outputdir']
-        if args['logdir'] is not None:
-            log_dir = args['logdir']
-        if args['device'] is not None:
-            # Setting one worker on GPU and one worker on CPU. Otherwise memory consumption is too high for a single GPU.
-            if Communicator.rank() == 0:
-                cntk.device.try_set_default_device(cntk.device.gpu(args['device']))
-            else:
-                cntk.device.try_set_default_device(cntk.device.cpu())
-
-        if args['datadir'] is not None:
-            data_path = args['datadir']
-
-    if not os.path.isdir(data_path):
-        raise RuntimeError("Directory %s does not exist" % data_path)
-
-    globalvars['class_map_file'] = os.path.join(data_path, globalvars['class_map_file'])
-    globalvars['train_map_file'] = os.path.join(data_path, globalvars['train_map_file'])
-    globalvars['test_map_file'] = os.path.join(data_path, globalvars['test_map_file'])
-    globalvars['train_roi_file'] = os.path.join(data_path, globalvars['train_roi_file'])
-    globalvars['test_roi_file'] = os.path.join(data_path, globalvars['test_roi_file'])
-
-    if cfg["CNTK"].FORCE_DETERMINISTIC:
-        force_deterministic_algorithms()
-    np.random.seed(seed=globalvars['rnd_seed'])
-    globalvars['classes'] = parse_class_map_file(globalvars['class_map_file'])
-    globalvars['num_classes'] = len(globalvars['classes'])
-
-    if cfg["CNTK"].DEBUG_OUTPUT:
-        # report args
-        print("Using the following parameters:")
-        print("Flip image       : {}".format(cfg["TRAIN"].USE_FLIPPED))
-        print("Train conv layers: {}".format(globalvars['train_conv']))
-        print("Random seed      : {}".format(globalvars['rnd_seed']))
-        print("Momentum per MB  : {}".format(globalvars['momentum_per_mb']))
-        if globalvars['train_e2e']:
-            print("E2E epochs       : {}".format(globalvars['e2e_epochs']))
-        else:
-            print("RPN lr factor    : {}".format(globalvars['rpn_lr_factor']))
-            print("RPN epochs       : {}".format(globalvars['rpn_epochs']))
-            print("FRCN lr factor   : {}".format(globalvars['frcn_lr_factor']))
-            print("FRCN epochs      : {}".format(globalvars['frcn_epochs']))
-
-###############################################################
-###############################################################
-
-def clone_model(base_model, from_node_names, to_node_names, clone_method):
-    from_nodes = [find_by_name(base_model, node_name) for node_name in from_node_names]
-    if None in from_nodes:
-        print("Error: could not find all specified 'from_nodes' in clone. Looking for {}, found {}"
-              .format(from_node_names, from_nodes))
-    to_nodes = [find_by_name(base_model, node_name) for node_name in to_node_names]
-    if None in to_nodes:
-        print("Error: could not find all specified 'to_nodes' in clone. Looking for {}, found {}"
-              .format(to_node_names, to_nodes))
-    input_placeholders = dict(zip(from_nodes, [placeholder() for x in from_nodes]))
-    cloned_net = combine(to_nodes).clone(clone_method, input_placeholders)
-    return cloned_net
-
-def clone_conv_layers(base_model):
-    if not globalvars['train_conv']:
-        conv_layers = clone_model(base_model, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
-    elif feature_node_name == start_train_conv_node_name:
-        conv_layers = clone_model(base_model, [feature_node_name], [last_conv_node_name], CloneMethod.clone)
-    else:
-        fixed_conv_layers = clone_model(base_model, [feature_node_name], [start_train_conv_node_name],
-                                        CloneMethod.freeze)
-        train_conv_layers = clone_model(base_model, [start_train_conv_node_name], [last_conv_node_name],
-                                        CloneMethod.clone)
-        conv_layers = Sequential([fixed_conv_layers, train_conv_layers])
-    return conv_layers
-
-# Please keep in sync with Readme.md
-def create_fast_rcnn_predictor(conv_out, rois, fc_layers):
-    # RCNN
-    roi_out = roipooling(conv_out, rois, cntk.MAX_POOLING, (roi_dim, roi_dim), spatial_scale=1/16.0)
-    fc_out = fc_layers(roi_out)
-
-    # prediction head
-    W_pred = parameter(shape=(4096, globalvars['num_classes']), init=normal(scale=0.01), name="cls_score.W")
-    b_pred = parameter(shape=globalvars['num_classes'], init=0, name="cls_score.b")
-    cls_score = plus(times(fc_out, W_pred), b_pred, name='cls_score')
-
-    # regression head
-    W_regr = parameter(shape=(4096, globalvars['num_classes']*4), init=normal(scale=0.001), name="bbox_regr.W")
-    b_regr = parameter(shape=globalvars['num_classes']*4, init=0, name="bbox_regr.b")
-    bbox_pred = plus(times(fc_out, W_regr), b_regr, name='bbox_regr')
-
-    return cls_score, bbox_pred
-
-# Please keep in sync with Readme.md
-# Defines the Faster R-CNN network model for detecting objects in images
-def create_faster_rcnn_predictor(base_model_file_name, features, scaled_gt_boxes, dims_input):
-    # Load the pre-trained classification net and clone layers
-    base_model = load_model(base_model_file_name)
-    conv_layers = clone_conv_layers(base_model)
-    fc_layers = clone_model(base_model, [pool_node_name], [last_hidden_node_name], clone_method=CloneMethod.clone)
-
-    # Normalization and conv layers
-    feat_norm = features - normalization_const
-    conv_out = conv_layers(feat_norm)
-
-    # RPN and prediction targets
-    rpn_rois, rpn_losses = \
-        create_rpn(conv_out, scaled_gt_boxes, dims_input, proposal_layer_param_string=cfg["CNTK"].PROPOSAL_LAYER_PARAMS)
-    rois, label_targets, bbox_targets, bbox_inside_weights = \
-        create_proposal_target_layer(rpn_rois, scaled_gt_boxes, num_classes=globalvars['num_classes'])
-
-    # Fast RCNN and losses
-    cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers)
-    detection_losses = create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights)
-    loss = rpn_losses + detection_losses
-    pred_error = classification_error(cls_score, label_targets, axis=1)
-
-    return loss, pred_error
-
-def create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights):
-    # classification loss
-    cls_loss = cross_entropy_with_softmax(cls_score, label_targets, axis=1)
-
-    p_cls_loss = placeholder()
-    p_rois = placeholder()
-    # The terms that are accounted for in the cls loss are those that correspond to an actual roi proposal --> do not count no-op (all-zero) rois
-    roi_indicator = reduce_sum(p_rois, axis=1)
-    cls_num_terms = reduce_sum(cntk.greater_equal(roi_indicator, 0.0))
-    cls_normalization_factor = 1.0 / cls_num_terms
-    normalized_cls_loss = reduce_sum(p_cls_loss) * cls_normalization_factor
-
-    reduced_cls_loss = cntk.as_block(normalized_cls_loss,
-                                     [(p_cls_loss, cls_loss), (p_rois, rois)],
-                                     'Normalize', 'norm_cls_loss')
-
-    # regression loss
-    p_bbox_pred = placeholder()
-    p_bbox_targets = placeholder()
-    p_bbox_inside_weights = placeholder()
-    bbox_loss = SmoothL1Loss(cfg["CNTK"].SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0)
-    # The bbox loss is normalized by the batch size
-    bbox_normalization_factor = 1.0 / cfg["TRAIN"].BATCH_SIZE
-    normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor
-
-    reduced_bbox_loss = cntk.as_block(normalized_bbox_loss,
-                                     [(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets), (p_bbox_inside_weights, bbox_inside_weights)],
-                                     'SmoothL1Loss', 'norm_bbox_loss')
-
-    detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses")
-
-    return detection_losses
-
-def create_eval_model(model, image_input, dims_input, rpn_model=None):
-    print("creating eval model")
-    conv_layers = clone_model(model, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
-    conv_out = conv_layers(image_input)
-
-    model_with_rpn = model if rpn_model is None else rpn_model
-    rpn = clone_model(model_with_rpn, [last_conv_node_name, "dims_input"], ["rpn_rois"], CloneMethod.freeze)
-    rpn_rois = rpn(conv_out, dims_input)
-
-    roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze)
-    pred_net = roi_fc_layers(conv_out, rpn_rois)
-    cls_score = pred_net.outputs[0]
-    bbox_regr = pred_net.outputs[1]
-
-    if cfg["TRAIN"].BBOX_NORMALIZE_TARGETS and cfg["TRAIN"].BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
-        num_boxes = int(bbox_regr.shape[1] / 4)
-        bbox_normalize_means = np.array(cfg["TRAIN"].BBOX_NORMALIZE_MEANS * num_boxes)
-        bbox_normalize_stds = np.array(cfg["TRAIN"].BBOX_NORMALIZE_STDS * num_boxes)
-        bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr')
-
-    cls_pred = softmax(cls_score, axis=1, name='cls_pred')
-    eval_model = combine([cls_pred, rpn_rois, bbox_regr])
-
-    return eval_model
-
-def train_model(image_input, roi_input, dims_input, loss, pred_error,
-                lr_per_sample, mm_schedule, l2_reg_weight, epochs_to_train,
-                rpn_rois_input=None, buffered_rpn_proposals=None):
-    if isinstance(loss, cntk.Variable):
-        loss = combine([loss])
-
-    params = loss.parameters
-    biases = [p for p in params if '.b' in p.name or 'b' == p.name]
-    others = [p for p in params if not p in biases]
-    bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT
-
-    if cfg["CNTK"].DEBUG_OUTPUT:
-        print("biases")
-        for p in biases: print(p)
-        print("others")
-        for p in others: print(p)
-        print("bias_lr_mult: {}".format(bias_lr_mult))
-
-    # Instantiate the learners and the trainer object
-    lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
-    learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
-                           unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT)
-
-    bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample]
-    bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample)
-    bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
-                           unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT)
-    trainer = Trainer(None, (loss, pred_error), [learner, bias_learner])
-
-    # Get minibatches of images and perform model training
-    print("Training model for %s epochs." % epochs_to_train)
-    log_number_of_parameters(loss)
-
-    # Create the minibatch source
-    od_minibatch_source = ObjectDetectionMinibatchSource(
-        globalvars['train_map_file'], globalvars['train_roi_file'],
-        max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
-        pad_width=image_width, pad_height=image_height, pad_value=img_pad_value,
-        randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED,
-        max_images=cfg["CNTK"].NUM_TRAIN_IMAGES,
-        buffered_rpn_proposals=buffered_rpn_proposals)
-
-    # define mapping from reader streams to network inputs
-    input_map = {
-        od_minibatch_source.image_si: image_input,
-        od_minibatch_source.roi_si: roi_input,
-        od_minibatch_source.dims_si: dims_input
-    }
-
-    use_buffered_proposals = buffered_rpn_proposals is not None
-    progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True)
-    for epoch in range(epochs_to_train):       # loop over epochs
-        sample_count = 0
-        while sample_count < epoch_size:  # loop over minibatches in the epoch
-            data, proposals = od_minibatch_source.next_minibatch_with_proposals(min(mb_size, epoch_size-sample_count), input_map=input_map)
-            if use_buffered_proposals:
-                data[rpn_rois_input] = MinibatchData(Value(batch=np.asarray(proposals, dtype=np.float32)), 1, 1, False)
-                # remove dims input if no rpn is required to avoid warnings
-                del data[[k for k in data if '[6]' in str(k)][0]]
-
-            trainer.train_minibatch(data)                                    # update model with it
-            sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far
-            progress_printer.update_with_trainer(trainer, with_metric=True)  # log progress
-            if sample_count % 100 == 0:
-                print("Processed {} samples".format(sample_count))
-
-        progress_printer.epoch_summary(with_metric=True)
-
-def compute_rpn_proposals(rpn_model, image_input, roi_input, dims_input):
-    num_images = cfg["CNTK"].NUM_TRAIN_IMAGES
-    # Create the minibatch source
-    od_minibatch_source = ObjectDetectionMinibatchSource(
-        globalvars['train_map_file'], globalvars['train_roi_file'],
-        max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
-        pad_width=image_width, pad_height=image_height, pad_value=img_pad_value,
-        max_images=num_images,
-        randomize=False, use_flipping=False)
-
-    # define mapping from reader streams to network inputs
-    input_map = {
-        od_minibatch_source.image_si: image_input,
-        od_minibatch_source.roi_si: roi_input,
-        od_minibatch_source.dims_si: dims_input
-    }
-
-    # setting pre- and post-nms top N to training values since buffered proposals are used for further training
-    test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N
-    test_post = cfg["TEST"].RPN_POST_NMS_TOP_N
-    cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
-    cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N
-
-    buffered_proposals = [None for _ in range(num_images)]
-    sample_count = 0
-    while sample_count < num_images:
-        data = od_minibatch_source.next_minibatch(1, input_map=input_map)
-        output = rpn_model.eval(data)
-        out_dict = dict([(k.name, k) for k in output])
-        out_rpn_rois = output[out_dict['rpn_rois']][0]
-        buffered_proposals[sample_count] = np.round(out_rpn_rois).astype(np.int16)
-        sample_count += 1
-        if sample_count % 500 == 0:
-            print("Buffered proposals for {} samples".format(sample_count))
-
-    # resetting config values to original test values
-    cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre
-    cfg["TEST"].RPN_POST_NMS_TOP_N = test_post
-
-    return buffered_proposals
-
-# Trains a Faster R-CNN model end-to-end
-def train_faster_rcnn_e2e(base_model_file_name, debug_output=False):
-    # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi)
-    image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name)
-    roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
-    dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
-    dims_node = alias(dims_input, name='dims_input')
-
-    # Instantiate the Faster R-CNN prediction model and loss function
-    loss, pred_error = create_faster_rcnn_predictor(base_model_file_name, image_input, roi_input, dims_node)
-
-    if debug_output:
-        print("Storing graphs and models to %s." % globalvars['output_path'])
-        plot(loss, os.path.join(globalvars['output_path'], "graph_frcn_train_e2e." + cfg["CNTK"].GRAPH_TYPE))
-
-    # Set learning parameters
-    e2e_lr_factor = globalvars['e2e_lr_factor']
-    e2e_lr_per_sample_scaled = [x * e2e_lr_factor for x in cfg["CNTK"].E2E_LR_PER_SAMPLE]
-    mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB)
-
-    print("Using base model:   {}".format(cfg["CNTK"].BASE_MODEL))
-    print("lr_per_sample:      {}".format(e2e_lr_per_sample_scaled))
-
-    train_model(image_input, roi_input, dims_input, loss, pred_error,
-                e2e_lr_per_sample_scaled, mm_schedule, cfg["CNTK"].L2_REG_WEIGHT, globalvars['e2e_epochs'])
-
-    return create_eval_model(loss, image_input, dims_input)
-
-# Trains a Faster R-CNN model using 4-stage alternating training
-def train_faster_rcnn_alternating(base_model_file_name, debug_output=False):
-    '''
-        4-Step Alternating Training scheme from the Faster R-CNN paper:
-        
-        # Create initial network, only rpn, without detection network
-            # --> train only the rpn (and conv3_1 and up for VGG16)
-        # buffer region proposals from rpn
-        # Create full network, initialize conv layers with imagenet, use buffered proposals
-            # --> train only detection network (and conv3_1 and up for VGG16)
-        # Keep conv weights from detection network and fix them
-            # --> train only rpn
-        # buffer region proposals from rpn
-        # Keep conv and rpn weights from step 3 and fix them
-            # --> train only detection network
-    '''
-
-    # Learning parameters
-    rpn_lr_factor = globalvars['rpn_lr_factor']
-    rpn_lr_per_sample_scaled = [x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE]
-    frcn_lr_factor = globalvars['frcn_lr_factor']
-    frcn_lr_per_sample_scaled = [x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE]
-
-    l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT
-    mm_schedule = momentum_schedule(globalvars['momentum_per_mb'])
-    rpn_epochs = globalvars['rpn_epochs']
-    frcn_epochs = globalvars['frcn_epochs']
-
-    print("Using base model:   {}".format(cfg["CNTK"].BASE_MODEL))
-    print("rpn_lr_per_sample:  {}".format(rpn_lr_per_sample_scaled))
-    print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled))
-    if debug_output:
-        print("Storing graphs and models to %s." % globalvars['output_path'])
-
-    # Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions
-    image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()],
-                                 name=feature_node_name)
-    feat_norm = image_input - normalization_const
-    roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
-    scaled_gt_boxes = alias(roi_input, name='roi_input')
-    dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
-    dims_node = alias(dims_input, name='dims_input')
-    rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()])
-    rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois')
-
-    # base image classification model (e.g. VGG16 or AlexNet)
-    base_model = load_model(base_model_file_name)
-
-    print("stage 1a - rpn")
-    if True:
-        # Create initial network, only rpn, without detection network
-            #       initial weights     train?
-            # conv: base_model          only conv3_1 and up
-            # rpn:  init new            yes
-            # frcn: -                   -
-
-        # conv layers
-        conv_layers = clone_conv_layers(base_model)
-        conv_out = conv_layers(feat_norm)
-
-        # RPN and losses
-        rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_node, proposal_layer_param_string=cfg["CNTK"].PROPOSAL_LAYER_PARAMS)
-        stage1_rpn_network = combine([rpn_rois, rpn_losses])
-
-        # train
-        if debug_output: plot(stage1_rpn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE))
-        train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses,
-                    rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs)
-
-    print("stage 1a - buffering rpn proposals")
-    buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input)
-
-    print("stage 1b - frcn")
-    if True:
-        # Create full network, initialize conv layers with imagenet, fix rpn weights
-            #       initial weights     train?
-            # conv: base_model          only conv3_1 and up
-            # rpn:  stage1a rpn model   no --> use buffered proposals
-            # frcn: base_model + new    yes
-
-        # conv_layers
-        conv_layers = clone_conv_layers(base_model)
-        conv_out = conv_layers(feat_norm)
-
-        # use buffered proposals in target layer
-        rois, label_targets, bbox_targets, bbox_inside_weights = \
-            create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, num_classes=globalvars['num_classes'])
-
-        # Fast RCNN and losses
-        fc_layers = clone_model(base_model, [pool_node_name], [last_hidden_node_name], CloneMethod.clone)
-        cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers)
-        detection_losses = create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights)
-        pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error")
-        stage1_frcn_network = combine([rois, cls_score, bbox_pred, detection_losses, pred_error])
-
-        # train
-        if debug_output: plot(stage1_frcn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE))
-        train_model(image_input, roi_input, dims_input, detection_losses, pred_error,
-                    frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs,
-                    rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1)
-        buffered_proposals_s1 = None
-
-    print("stage 2a - rpn")
-    if True:
-        # Keep conv weights from detection network and fix them
-            #       initial weights     train?
-            # conv: stage1b frcn model  no
-            # rpn:  stage1a rpn model   yes
-            # frcn: -                   -
-
-        # conv_layers
-        conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
-        conv_out = conv_layers(image_input)
-
-        # RPN and losses
-        rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone)
-        rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes)
-        rpn_rois = rpn_net.outputs[0]
-        rpn_losses = rpn_net.outputs[1]
-        stage2_rpn_network = combine([rpn_rois, rpn_losses])
-
-        # train
-        if debug_output: plot(stage2_rpn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE))
-        train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses,
-                    rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs)
-
-    print("stage 2a - buffering rpn proposals")
-    buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input)
-
-    print("stage 2b - frcn")
-    if True:
-        # Keep conv and rpn weights from step 3 and fix them
-            #       initial weights     train?
-            # conv: stage2a rpn model   no
-            # rpn:  stage2a rpn model   no --> use buffered proposals
-            # frcn: stage1b frcn model  yes                   -
-
-        # conv_layers
-        conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
-        conv_out = conv_layers(image_input)
-
-        # Fast RCNN and losses
-        frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"],
-                           ["cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error"], CloneMethod.clone)
-        stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes)
-        detection_losses = stage2_frcn_network.outputs[3]
-        pred_error = stage2_frcn_network.outputs[4]
-
-        # train
-        if debug_output: plot(stage2_frcn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE))
-        train_model(image_input, roi_input, dims_input, detection_losses, pred_error,
-                    frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs,
-                    rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2)
-        buffered_proposals_s2 = None
-
-    return create_eval_model(stage2_frcn_network, image_input, dims_input, rpn_model=stage2_rpn_network)
-
-def eval_faster_rcnn_mAP(eval_model):
-    img_map_file = globalvars['test_map_file']
-    roi_map_file = globalvars['test_roi_file']
-    classes = globalvars['classes']
-    image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name)
-    roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
-    dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
-    frcn_eval = eval_model(image_input, dims_input)
-
-    # Create the minibatch source
-    minibatch_source = ObjectDetectionMinibatchSource(
-        img_map_file, roi_map_file,
-        max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
-        pad_width=image_width, pad_height=image_height, pad_value=img_pad_value,
-        randomize=False, use_flipping=False,
-        max_images=cfg["CNTK"].NUM_TEST_IMAGES)
-
-    # define mapping from reader streams to network inputs
-    input_map = {
-        minibatch_source.image_si: image_input,
-        minibatch_source.roi_si: roi_input,
-        minibatch_source.dims_si: dims_input
-    }
-
-    # all detections are collected into:
-    #    all_boxes[cls][image] = N x 5 array of detections in
-    #    (x1, y1, x2, y2, score)
-    all_boxes = [[[] for _ in range(num_test_images)] for _ in range(globalvars['num_classes'])]
-
-    # evaluate test images and write netwrok output to file
-    print("Evaluating Faster R-CNN model for %s images." % num_test_images)
-    all_gt_infos = {key: [] for key in classes}
-    for img_i in range(0, num_test_images):
-        mb_data = minibatch_source.next_minibatch(1, input_map=input_map)
-
-        gt_row = mb_data[roi_input].asarray()
-        gt_row = gt_row.reshape((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5))
-        all_gt_boxes = gt_row[np.where(gt_row[:,-1] > 0)]
-
-        for cls_index, cls_name in enumerate(classes):
-            if cls_index == 0: continue
-            cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:,-1] == cls_index)]
-            all_gt_infos[cls_name].append({'bbox': np.array(cls_gt_boxes),
-                                           'difficult': [False] * len(cls_gt_boxes),
-                                           'det': [False] * len(cls_gt_boxes)})
-
-        output = frcn_eval.eval({image_input: mb_data[image_input], dims_input: mb_data[dims_input]})
-        out_dict = dict([(k.name, k) for k in output])
-        out_cls_pred = output[out_dict['cls_pred']][0]
-        out_rpn_rois = output[out_dict['rpn_rois']][0]
-        out_bbox_regr = output[out_dict['bbox_regr']][0]
-
-        labels = out_cls_pred.argmax(axis=1)
-        scores = out_cls_pred.max(axis=1)
-        regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray())
-
-        labels.shape = labels.shape + (1,)
-        scores.shape = scores.shape + (1,)
-        coords_score_label = np.hstack((regressed_rois, scores, labels))
-
-        #   shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score
-        for cls_j in range(1, globalvars['num_classes']):
-            coords_score_label_for_cls = coords_score_label[np.where(coords_score_label[:,-1] == cls_j)]
-            all_boxes[cls_j][img_i] = coords_score_label_for_cls[:,:-1].astype(np.float32, copy=False)
-
-        if (img_i+1) % 100 == 0:
-            print("Processed {} samples".format(img_i+1))
-
-    # calculate mAP
-    aps = evaluate_detections(all_boxes, all_gt_infos, classes,
-                              nms_threshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD,
-                              conf_threshold = cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD)
-    ap_list = []
-    for class_name in aps:
-        ap_list += [aps[class_name]]
-        print('AP for {:>15} = {:.4f}'.format(class_name, aps[class_name]))
-    meanAP = np.nanmean(ap_list)
-    print('Mean AP = {:.4f}'.format(meanAP))
-    return meanAP
-
-# The main method trains and evaluates a Fast R-CNN model.
-# If a trained model is already available it is loaded an no training will be performed (if MAKE_MODE=True).
-if __name__ == '__main__':
-    running_locally = os.path.exists(map_file_path)
-    if running_locally:
-        os.chdir(map_file_path)
-        if not os.path.exists(os.path.join(abs_path, "Output")):
-            os.makedirs(os.path.join(abs_path, "Output"))
-        if not os.path.exists(os.path.join(abs_path, "Output", cfg["CNTK"].DATASET)):
-            os.makedirs(os.path.join(abs_path, "Output", cfg["CNTK"].DATASET))
-    else:
-        # disable debug and plot outputs when running on GPU cluster
-        cfg["CNTK"].DEBUG_OUTPUT = False
-        cfg["CNTK"].VISUALIZE_RESULTS = False
-
-    set_global_vars()
-    model_path = os.path.join(globalvars['output_path'], "faster_rcnn_eval_{}_{}.model"
-                              .format(cfg["CNTK"].BASE_MODEL, "e2e" if globalvars['train_e2e'] else "4stage"))
-
-    # Train only if no model exists yet
-    if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE:
-        print("Loading existing model from %s" % model_path)
-        eval_model = load_model(model_path)
-    else:
-        if globalvars['train_e2e']:
-            eval_model = train_faster_rcnn_e2e(base_model_file, debug_output=cfg["CNTK"].DEBUG_OUTPUT)
-        else:
-            eval_model = train_faster_rcnn_alternating(base_model_file, debug_output=cfg["CNTK"].DEBUG_OUTPUT)
-
-        eval_model.save(model_path)
-        if cfg["CNTK"].DEBUG_OUTPUT:
-            plot(eval_model, os.path.join(globalvars['output_path'], "graph_frcn_eval_{}_{}.{}"
-                                          .format(cfg["CNTK"].BASE_MODEL, "e2e" if globalvars['train_e2e'] else "4stage", cfg["CNTK"].GRAPH_TYPE)))
-
-        print("Stored eval model at %s" % model_path)
-
-    # Compute mean average precision on test set
-    eval_faster_rcnn_mAP(eval_model)
-
-    # Plot results on test set
-    if cfg["CNTK"].VISUALIZE_RESULTS:
-        from plot_helpers import eval_and_plot_faster_rcnn
-        num_eval = min(num_test_images, 100)
-        img_shape = (num_channels, image_height, image_width)
-        results_folder = os.path.join(globalvars['output_path'], cfg["CNTK"].DATASET)
-        eval_and_plot_faster_rcnn(eval_model, num_eval, globalvars['test_map_file'], img_shape,
-                                  results_folder, feature_node_name, globalvars['classes'],
-                                  drawUnregressedRois=cfg["CNTK"].DRAW_UNREGRESSED_ROIS,
-                                  drawNegativeRois=cfg["CNTK"].DRAW_NEGATIVE_ROIS,
-                                  nmsThreshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD,
-                                  nmsConfThreshold=cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD,
-                                  bgrPlotThreshold=cfg["CNTK"].RESULTS_BGR_PLOT_THRESHOLD)
-
--- a/Examples/Image/Detection/FasterRCNN/FasterRCNN_config.py
+++ b/Examples/Image/Detection/FasterRCNN/FasterRCNN_config.py
@ -23,13 +23,15 @@ __C.CNTK = edict()
 __C.CNTK.MAKE_MODE = False
 # E2E or 4-stage training
 __C.CNTK.TRAIN_E2E = True
-# set to 'True' to use determininistic algorithms
+# set to 'True' to use deterministic algorithms
 __C.CNTK.FORCE_DETERMINISTIC = False
 # set to 'True' to run only a single epoch
-__C.CNTK.FAST_MODE = True
+__C.CNTK.FAST_MODE = False
 # Debug parameters
 __C.CNTK.DEBUG_OUTPUT = False
 __C.CNTK.GRAPH_TYPE = "png" # "png" or "pdf"
+# Set to True if you want to store an eval model with native UDFs (e.g. for inference using C++ or C#)
+__C.STORE_EVAL_MODEL_WITH_NATIVE_UDF = False

 # Learning parameters
 __C.CNTK.L2_REG_WEIGHT = 0.0005
@ -62,7 +64,7 @@ __C.RESULTS_NMS_THRESHOLD = 0.5
 __C.RESULTS_NMS_CONF_THRESHOLD = 0.0

 # Enable plotting of results generally / also plot background boxes / also plot unregressed boxes
-__C.VISUALIZE_RESULTS = True
+__C.VISUALIZE_RESULTS = False
 __C.DRAW_NEGATIVE_ROIS = False
 __C.DRAW_UNREGRESSED_ROIS = False
 # only for plotting results: boxes with a score lower than this threshold will be considered background
--- a/Examples/Image/Detection/FasterRCNN/FasterRCNN_eval.py
+++ b/Examples/Image/Detection/FasterRCNN/FasterRCNN_eval.py
@ -6,64 +6,12 @@

 import os
 import numpy as np
-from matplotlib.pyplot import imsave
-import cv2
 import cntk
 from cntk import input_variable, Axis
-from utils.nms_wrapper import apply_nms_to_single_image_results
 from utils.map_helpers import evaluate_detections
-from utils.plot_helpers import load_resize_and_pad, resize_and_pad, visualize_detections
+from utils.plot_helpers import load_resize_and_pad
 from utils.rpn.bbox_transform import regress_rois
 from utils.od_mb_source import ObjectDetectionMinibatchSource
-from utils.proposal_helpers import ProposalProvider, compute_proposals, compute_image_stats
-
-class FastRCNN_Evaluator:
-    def __init__(self, eval_model, cfg):
-        # load model once in constructor and push images through the model in 'process_image()'
-        self._img_shape = (cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH)
-        image_input = input_variable(shape=self._img_shape,
-                                     dynamic_axes=[Axis.default_batch_axis()],
-                                     name=cfg["MODEL"].FEATURE_NODE_NAME)
-        roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()],
-                                       name="roi_proposals")
-        self._eval_model = eval_model(image_input, roi_proposals)
-        self._min_w = cfg['PROPOSALS_MIN_W']
-        self._min_h = cfg['PROPOSALS_MIN_H']
-        self._num_proposals = cfg['NUM_ROI_PROPOSALS']
-
-    def process_image(self, img_path):
-        out_cls_pred, out_rpn_rois, out_bbox_regr, dims = self.process_image_detailed(img_path)
-        labels = out_cls_pred.argmax(axis=1)
-        regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims)
-
-        return regressed_rois, out_cls_pred
-
-    def process_image_detailed(self, img_path):
-        img = cv2.imread(img_path)
-        _, cntk_img_input, dims = resize_and_pad(img, self._img_shape[2], self._img_shape[1])
-
-        #import pdb; pdb.set_trace()
-
-        # compute ROI proposals and apply scaling and padding to them
-        # [target_w, target_h, img_width, img_height, top, bottom, left, right, scale_factor]
-        img_stats = compute_image_stats(len(img[0]), len(img), self._img_shape[2], self._img_shape[1])
-        scale_factor = img_stats[-1]
-        top = img_stats[4]
-        left = img_stats[6]
-
-        proposals = compute_proposals(img, self._num_proposals, self._min_w, self._min_h)
-        proposals = proposals * scale_factor
-        proposals += (left, top, left, top)
-
-        output = self._eval_model.eval({self._eval_model.arguments[0]: [cntk_img_input],
-                                        self._eval_model.arguments[1]: np.array(proposals, dtype=np.float32)})
-
-        out_dict = dict([(k.name, k) for k in output])
-        out_cls_pred = output[out_dict['cls_pred']][0]
-        out_rpn_rois = proposals
-        out_bbox_regr = output[out_dict['bbox_regr']][0]
-
-        return out_cls_pred, out_rpn_rois, out_bbox_regr, dims

 class FasterRCNN_Evaluator:
    def __init__(self, eval_model, cfg):
@ -178,133 +126,3 @@ def compute_test_set_aps(eval_model, cfg):
                              conf_threshold = cfg.RESULTS_NMS_CONF_THRESHOLD)

    return aps
-
-def plot_test_set_results(evaluator, num_images_to_plot, results_base_path, cfg):
-    # get image paths
-    with open(cfg["DATA"].TEST_MAP_FILE) as f:
-        content = f.readlines()
-    img_base_path = os.path.dirname(os.path.abspath(cfg["DATA"].TEST_MAP_FILE))
-    img_file_names = [os.path.join(img_base_path, x.split('\t')[1]) for x in content]
-    img_shape = (cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH)
-
-    print("Plotting results from Faster R-CNN model for %s images." % num_images_to_plot)
-    for i in range(0, num_images_to_plot):
-        img_path = img_file_names[i]
-        out_cls_pred, out_rpn_rois, out_bbox_regr, dims = evaluator.process_image_detailed(img_path)
-        labels = out_cls_pred.argmax(axis=1)
-        scores = out_cls_pred.max(axis=1)
-
-        if cfg.DRAW_UNREGRESSED_ROIS:
-            # plot results without final regression
-            imgDebug = visualize_detections(img_path, out_rpn_rois, labels, scores,
-                                            img_shape[2], img_shape[1],
-                                            classes=cfg["DATA"].CLASSES,
-                                            draw_negative_rois=cfg.DRAW_NEGATIVE_ROIS,
-                                            decision_threshold=cfg.RESULTS_BGR_PLOT_THRESHOLD)
-            imsave("{}/{}_{}".format(results_base_path, i, os.path.basename(img_path)), imgDebug)
-
-        # apply regression and nms to bbox coordinates
-        regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims)
-        nmsKeepIndices = apply_nms_to_single_image_results(regressed_rois, labels, scores,
-                                                           use_gpu_nms=cfg.USE_GPU_NMS,
-                                                           device_id=cfg.GPU_ID,
-                                                           nms_threshold=cfg.RESULTS_NMS_THRESHOLD,
-                                                           conf_threshold=cfg.RESULTS_NMS_CONF_THRESHOLD)
-
-        filtered_bboxes = regressed_rois[nmsKeepIndices]
-        filtered_labels = labels[nmsKeepIndices]
-        filtered_scores = scores[nmsKeepIndices]
-
-        img = visualize_detections(img_path, filtered_bboxes, filtered_labels, filtered_scores,
-                                   img_shape[2], img_shape[1],
-                                   classes=cfg["DATA"].CLASSES,
-                                   draw_negative_rois=cfg.DRAW_NEGATIVE_ROIS,
-                                   decision_threshold=cfg.RESULTS_BGR_PLOT_THRESHOLD)
-        imsave("{}/{}_regr_{}".format(results_base_path, i, os.path.basename(img_path)), img)
-
-def compute_test_set_aps_fast_rcnn(eval_model, cfg):
-    num_test_images = cfg["DATA"].NUM_TEST_IMAGES
-    classes = cfg["DATA"].CLASSES
-    image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH),
-                                 dynamic_axes=[Axis.default_batch_axis()],
-                                 name=cfg["MODEL"].FEATURE_NODE_NAME)
-    roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
-    roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()], name="roi_proposals")
-    dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
-    frcn_eval = eval_model(image_input, roi_proposals)
-
-    # Create the minibatch source
-    proposal_provider = ProposalProvider.fromconfig(cfg)
-    minibatch_source = ObjectDetectionMinibatchSource(
-        cfg["DATA"].TEST_MAP_FILE,
-        cfg["DATA"].TEST_ROI_FILE,
-        max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE,
-        pad_width=cfg.IMAGE_WIDTH,
-        pad_height=cfg.IMAGE_HEIGHT,
-        pad_value=cfg["MODEL"].IMG_PAD_COLOR,
-        randomize=False, use_flipping=False,
-        max_images=cfg["DATA"].NUM_TEST_IMAGES,
-        num_classes=cfg["DATA"].NUM_CLASSES,
-        proposal_provider=proposal_provider,
-        provide_targets=False)
-
-    # define mapping from reader streams to network inputs
-    input_map = {
-        minibatch_source.image_si: image_input,
-        minibatch_source.roi_si: roi_input,
-        minibatch_source.proposals_si: roi_proposals,
-        minibatch_source.dims_si: dims_input
-    }
-
-    # all detections are collected into:
-    #    all_boxes[cls][image] = N x 5 array of detections in (x1, y1, x2, y2, score)
-    all_boxes = [[[] for _ in range(num_test_images)] for _ in range(cfg["DATA"].NUM_CLASSES)]
-
-    # evaluate test images and write netwrok output to file
-    print("Evaluating Fast R-CNN model for %s images." % num_test_images)
-    all_gt_infos = {key: [] for key in classes}
-    for img_i in range(0, num_test_images):
-        mb_data = minibatch_source.next_minibatch(1, input_map=input_map)
-
-        gt_row = mb_data[roi_input].asarray()
-        gt_row = gt_row.reshape((cfg.INPUT_ROIS_PER_IMAGE, 5))
-        all_gt_boxes = gt_row[np.where(gt_row[:,-1] > 0)]
-
-        for cls_index, cls_name in enumerate(classes):
-            if cls_index == 0: continue
-            cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:,-1] == cls_index)]
-            all_gt_infos[cls_name].append({'bbox': np.array(cls_gt_boxes),
-                                           'difficult': [False] * len(cls_gt_boxes),
-                                           'det': [False] * len(cls_gt_boxes)})
-
-        output = frcn_eval.eval({image_input: mb_data[image_input], roi_proposals: mb_data[roi_proposals]})
-        out_dict = dict([(k.name, k) for k in output])
-        out_cls_pred = output[out_dict['cls_pred']][0]
-        out_rpn_rois = mb_data[roi_proposals].data.asarray()
-        out_bbox_regr = output[out_dict['bbox_regr']][0]
-
-        labels = out_cls_pred.argmax(axis=1)
-        scores = out_cls_pred.max(axis=1)
-        regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray())
-
-        labels.shape = labels.shape + (1,)
-        scores.shape = scores.shape + (1,)
-        coords_score_label = np.hstack((regressed_rois, scores, labels))
-
-        #   shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score
-        for cls_j in range(1, cfg["DATA"].NUM_CLASSES):
-            coords_score_label_for_cls = coords_score_label[np.where(coords_score_label[:,-1] == cls_j)]
-            all_boxes[cls_j][img_i] = coords_score_label_for_cls[:,:-1].astype(np.float32, copy=False)
-
-        if (img_i+1) % 100 == 0:
-            print("Processed {} samples".format(img_i+1))
-
-    # calculate mAP
-    aps = evaluate_detections(all_boxes, all_gt_infos, classes,
-                              use_gpu_nms = cfg.USE_GPU_NMS,
-                              device_id = cfg.GPU_ID,
-                              nms_threshold=cfg.RESULTS_NMS_THRESHOLD,
-                              conf_threshold = cfg.RESULTS_NMS_CONF_THRESHOLD)
-
-    return aps
-
--- a/Examples/Image/Detection/FasterRCNN/FasterRCNN_train.py
+++ b/Examples/Image/Detection/FasterRCNN/FasterRCNN_train.py
@ -25,7 +25,7 @@ from _cntk_py import force_deterministic_algorithms

 abs_path = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(os.path.join(abs_path, ".."))
-from utils.rpn.rpn_helpers import create_rpn, create_proposal_target_layer, add_proposal_layer
+from utils.rpn.rpn_helpers import create_rpn, create_proposal_target_layer, create_proposal_layer
 from utils.annotations.annotations_helper import parse_class_map_file
 from utils.od_mb_source import ObjectDetectionMinibatchSource
 from utils.proposal_helpers import ProposalProvider
@ -64,13 +64,11 @@ def prepare(cfg, use_arg_parser=True):

    cfg['MODEL_PATH'] = os.path.join(cfg.OUTPUT_PATH, "faster_rcnn_eval_{}_{}.model"
                                     .format(cfg["MODEL"].BASE_MODEL, "e2e" if cfg["CNTK"].TRAIN_E2E else "4stage"))
-    cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "PretrainedModels",
+    cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "PretrainedModels",
                                          cfg["MODEL"].BASE_MODEL_FILE)

    cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE)
    cfg["DATA"].NUM_CLASSES = len(cfg["DATA"].CLASSES)
-    cfg.PROPOSAL_LAYER_PARAMS = "'feat_stride': {}\n'scales':\n - {}".\
-        format(cfg["MODEL"].FEATURE_STRIDE, "\n - ".join([str(v) for v in cfg["DATA"].PROPOSAL_LAYER_SCALES]))

    if cfg["CNTK"].FAST_MODE:
        cfg["CNTK"].E2E_MAX_EPOCHS = 1
@ -207,7 +205,7 @@ def create_faster_rcnn_eval_model(model, image_input, dims_input, cfg, rpn_model
    rpn = clone_model(model_with_rpn, [last_conv_node_name], ["rpn_cls_prob_reshape", "rpn_bbox_pred"], CloneMethod.freeze)
    rpn_out = rpn(conv_out)
    # we need to add the proposal layer anew to account for changing configs when buffering proposals in 4-stage training
-    rpn_rois = add_proposal_layer(rpn_out.outputs[0], rpn_out.outputs[1], dims_input, cfg)
+    rpn_rois = create_proposal_layer(rpn_out.outputs[0], rpn_out.outputs[1], dims_input, cfg)

    roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze)
    pred_net = roi_fc_layers(conv_out, rpn_rois)
@ -225,6 +223,27 @@ def create_faster_rcnn_eval_model(model, image_input, dims_input, cfg, rpn_model

    return eval_model

+def store_eval_model_with_native_udf(eval_model, cfg):
+    import copy
+    sys.path.append(os.path.join(abs_path, "..", "..", "Extensibility", "ProposalLayer"))
+    cntk.ops.register_native_user_function('ProposalLayerOp',
+                                           'Cntk.ProposalLayerLib-' + cntk.__version__.rstrip('+'),
+                                           'CreateProposalLayer')
+
+    def filter(x):
+        return type(x) == cntk.Function and x.op_name == 'UserFunction' and x.name == 'ProposalLayer'
+
+    def converter(x):
+        layer_config = copy.deepcopy(x.attributes)
+        return cntk.ops.native_user_function('ProposalLayerOp', list(x.inputs), layer_config, 'native_proposal_layer')
+
+
+    model_w_native_udf = cntk.misc.convert(eval_model, filter, converter)
+    model_path = cfg['MODEL_PATH']
+    new_model_path = model_path[:-6] + '_native.model'
+    model_w_native_udf.save(new_model_path)
+    print("Stored eval model with native UDF to {}".format(new_model_path))
+
 def compute_rpn_proposals(rpn_model, image_input, roi_input, dims_input, cfg):
    num_images = cfg["DATA"].NUM_TRAIN_IMAGES
    # Create the minibatch source
--- a/Examples/Image/Detection/FasterRCNN/README.md
+++ b/Examples/Image/Detection/FasterRCNN/README.md
@ -5,7 +5,7 @@
 This folder contains an end-to-end solution for using Faster R-CNN to perform object detection. 
 The original research paper for Faster R-CNN can be found at [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497).
 Base models that are supported by the current configuration are AlexNet and VGG16. 
-Two image set that are preconfigured are Pascal VOC 2007 and Grocery. 
+Two image sets that are preconfigured are Pascal VOC 2007 and Grocery. 
 Other base models or image sets can be used by adding a configuration file similar to the examples in `utils/configs` and importing it in `run_faster_rcnn.py`.

 ## Running the example
@ -15,7 +15,7 @@ Other base models or image sets can be used by adding a configuration file simil
 To run Faster R-CNN you need a CNTK Python environment. Install the following additional packages:

 ```
-pip install opencv-python easydict pyyaml future
+pip install opencv-python easydict pyyaml
 ```

 The code uses prebuild Cython modules for parts of the region proposal network (see `Examples/Image/Detection/utils/cython_modules`). 
@ -112,7 +112,7 @@ and run `python run_faster_rcnn.py` to train and evaluate Faster R-CNN on your d

 ### Parameters

-All options and parameters are in `config.py` in the `FasterRCNN` folder and all of them are explained there. These include
+All options and parameters are in `FasterRCNN_config.py` in the `FasterRCNN` folder and all of them are explained there. These include

 ```
 # E2E or 4-stage training
@ -134,4 +134,4 @@ Most of the code is in `FasterRCNN_train.py` and `FasterRCNN_eval.py` (and `Exam

 ### Algorithm 

-All details regarding the Faster R-CNN algortihm can be found in the original research paper: [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497).
+All details regarding the Faster R-CNN algorithm can be found in the original research paper: [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497).
--- a/Examples/Image/Detection/FasterRCNN/run_faster_rcnn.py
+++ b/Examples/Image/Detection/FasterRCNN/run_faster_rcnn.py
@ -6,14 +6,15 @@

 import os
 import numpy as np
-from FasterRCNN_train import prepare, train_faster_rcnn
+import cntk
+from FasterRCNN_train import prepare, train_faster_rcnn, store_eval_model_with_native_udf
 from FasterRCNN_eval import compute_test_set_aps, FasterRCNN_Evaluator
 from utils.config_helpers import merge_configs
 from utils.plot_helpers import plot_test_set_results

 def get_configuration():
    # load configs for detector, base network and data set
-    from config import cfg as detector_cfg
+    from FasterRCNN_config import cfg as detector_cfg
    # for VGG16 base model use:         from utils.configs.VGG16_config import cfg as network_cfg
    # for AlexNet base model use:       from utils.configs.AlexNet_config import cfg as network_cfg
    from utils.configs.AlexNet_config import cfg as network_cfg
@ -42,3 +43,8 @@ if __name__ == '__main__':
        results_folder = os.path.join(cfg.OUTPUT_PATH, cfg["DATA"].DATASET)
        evaluator = FasterRCNN_Evaluator(trained_model, cfg)
        plot_test_set_results(evaluator, num_eval, results_folder, cfg)
+
+    if cfg.STORE_EVAL_MODEL_WITH_NATIVE_UDF:
+        store_eval_model_with_native_udf(trained_model, cfg)
+
+
--- a/Examples/Image/Detection/README.md
+++ b/Examples/Image/Detection/README.md
@ -13,7 +13,7 @@ This folder contains an end-to-end demo to try different object detectors, base

 ### Setup

-To run Fast R-CNN you need a CNTK Python environment. Install the following additional packages:
+To run the object detection demo you need a CNTK Python environment. Install the following additional packages:

 ```
 pip install opencv-python easydict pyyaml future
--- a/Examples/Image/Detection/utils/Readme.md
+++ b/Examples/Image/Detection/utils/Readme.md
@ -19,10 +19,6 @@ python setup.py build_ext --inplace
 ```
 Copy the compiled `.pyd` (Windows) or `.so` (Linux) files into the `cython_modules` subfolder of this utils folder.

-##### `default_config`
-
-Contains all required parameters for using a region proposal network in training or evaluation. You can overwrite these parameters by specifying a `config.py` file of the same format inside your working directory.
-
 ### `rpn` module overview

 The rpn module contains helper methods and required layers to generate region proposal networks for object detection.
@ -48,7 +44,3 @@ Bbox regression targets are specified when the classification label is > 0.

 Generates training targets/labels for each object proposal: classification labels 0 - K (bg or object class 1, ... , K)
 and bbox regression targets in that case that the label is > 0.
-
-##### `generate.py`
-
-Generate object detection proposals from an imdb using an RPN.
--- a/Examples/Image/Detection/utils/configs/AlexNet_config.py
+++ b/Examples/Image/Detection/utils/configs/AlexNet_config.py
@ -13,13 +13,13 @@ cfg = __C

 # model config
 __C.MODEL.BASE_MODEL = "AlexNet"
-__C.MODEL.BASE_MODEL_FILE = "AlexNet.model"
+__C.MODEL.BASE_MODEL_FILE = "AlexNet_ImageNet_Caffe.model"
 __C.MODEL.IMG_PAD_COLOR = [114, 114, 114]
-__C.MODEL.FEATURE_NODE_NAME = "features"
-__C.MODEL.LAST_CONV_NODE_NAME = "conv5.y"
+__C.MODEL.FEATURE_NODE_NAME = "data"
+__C.MODEL.LAST_CONV_NODE_NAME = "relu5"
 __C.MODEL.START_TRAIN_CONV_NODE_NAME = __C.MODEL.FEATURE_NODE_NAME
-__C.MODEL.POOL_NODE_NAME = "pool3"
-__C.MODEL.LAST_HIDDEN_NODE_NAME = "h2_d"
+__C.MODEL.POOL_NODE_NAME = "pool5"
+__C.MODEL.LAST_HIDDEN_NODE_NAME = "drop7"
 __C.MODEL.FEATURE_STRIDE = 16
 __C.MODEL.RPN_NUM_CHANNELS = 256
 __C.MODEL.ROI_DIM = 6
--- a/Examples/Image/Detection/utils/configs/Grocery_config.py
+++ b/Examples/Image/Detection/utils/configs/Grocery_config.py
@ -22,3 +22,18 @@ __C.DATA.TEST_ROI_FILE = "test_roi_file.txt"
 __C.DATA.NUM_TRAIN_IMAGES = 20
 __C.DATA.NUM_TEST_IMAGES = 5
 __C.DATA.PROPOSAL_LAYER_SCALES = [4, 8, 12]
+
+# overwriting proposal parameters for Fast R-CNN
+# minimum relative width/height of an ROI
+__C.roi_min_side_rel = 0.04
+# maximum relative width/height of an ROI
+__C.roi_max_side_rel = 0.4
+# minimum relative area of an ROI
+__C.roi_min_area_rel = 2 * __C.roi_min_side_rel * __C.roi_min_side_rel
+# maximum relative area of an ROI
+__C.roi_max_area_rel = 0.33 * __C.roi_max_side_rel * __C.roi_max_side_rel
+# maximum aspect ratio of an ROI vertically and horizontally
+__C.roi_max_aspect_ratio = 4.0
+
+# For this data set use the following lr factor for Fast R-CNN:
+# __C.CNTK.LR_FACTOR = 10.0
--- a/Examples/Image/Detection/utils/configs/Pascal_config.py
+++ b/Examples/Image/Detection/utils/configs/Pascal_config.py
@ -22,3 +22,6 @@ __C.DATA.TEST_ROI_FILE = "test2007_rois_abs-xyxy_noPad_skipDif.txt"
 __C.DATA.NUM_TRAIN_IMAGES = 5010
 __C.DATA.NUM_TEST_IMAGES = 4952
 __C.DATA.PROPOSAL_LAYER_SCALES = [8, 16, 32]
+
+__C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE = "trainval2007_proposals.txt"
+__C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE = "test2007_proposals.txt"
--- a/Examples/Image/Detection/utils/od_reader.py
+++ b/Examples/Image/Detection/utils/od_reader.py
@ -222,7 +222,7 @@ class ObjectDetectionReader:
        if self._flip_image:
            resized_with_pad = cv2.flip(resized_with_pad, 1)

-        # transpose(2,0,1) converts the image to the HWC format which CNTK accepts
+        # transpose(2,0,1) converts the image to the HWC format which CNTK expects
        model_arg_rep = np.ascontiguousarray(np.array(resized_with_pad, dtype=np.float32).transpose(2, 0, 1))

        # dims = pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height
--- a/Examples/Image/Detection/utils/plot_helpers.py
+++ b/Examples/Image/Detection/utils/plot_helpers.py
@ -12,7 +12,6 @@ from builtins import range
 import copy, textwrap
 from PIL import Image, ImageFont, ImageDraw
 from PIL.ExifTags import TAGS
-from matplotlib.pyplot import imsave
 import cntk
 from cntk import input_variable, Axis
 from utils.nms_wrapper import apply_nms_to_single_image_results
@ -121,7 +120,10 @@ def visualize_detections(img_path, roi_coords, roi_labels, roi_scores,
                thickness = 4
                drawRectangles(result_img, [rect], color=color, thickness=thickness)
            elif iter == 2 and label > 0:
-                font = ImageFont.truetype(available_font, 18)
+                try:
+                    font = ImageFont.truetype(available_font, 18)
+                except:
+                    font = ImageFont.load_default()
                text = classes[label]
                if roi_scores is not None:
                    text += "(" + str(round(score, 2)) + ")"
@ -129,6 +131,8 @@ def visualize_detections(img_path, roi_coords, roi_labels, roi_scores,
    return result_img

 def plot_test_set_results(evaluator, num_images_to_plot, results_base_path, cfg):
+    from matplotlib.pyplot import imsave
+
    # get image paths
    with open(cfg["DATA"].TEST_MAP_FILE) as f:
        content = f.readlines()
@ -284,12 +288,12 @@ def ptClip(pt, maxWidth, maxHeight):
    pt[1] = min(pt[1], maxHeight)
    return pt

-def drawText(img, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = ImageFont.truetype("arial.ttf", 16)):
+def drawText(img, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = None):
    pilImg = imconvertCv2Pil(img)
    pilImg = pilDrawText(pilImg,  pt, text, textWidth, color, colorBackground, font)
    return imconvertPil2Cv(pilImg)

-def pilDrawText(pilImg, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = ImageFont.truetype("arial.ttf", 16)):
+def pilDrawText(pilImg, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = None):
    textY = pt[1]
    draw = ImageDraw.Draw(pilImg)
    if textWidth == None:
--- a/Examples/Image/Detection/utils/proposal_helpers.py
+++ b/Examples/Image/Detection/utils/proposal_helpers.py
@ -1,9 +1,29 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+# Licensed under the MIT license. See LICENSE.md file in the project root
+# for full license information.
+# ==============================================================================
+
+import os, sys
+abs_path = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(os.path.join(abs_path, ".."))
+
 import numpy as np
-from dlib import find_candidate_object_locations
+import cv2
 from utils.rpn.bbox_transform import bbox_transform
 from utils.cython_modules.cython_bbox import bbox_overlaps

 random_seed = 23
+global ss_lib_loaded, find_candidate_object_locations
+ss_lib_loaded = False
+
+def load_selective_search_lib():
+    global find_candidate_object_locations
+    from dlib import find_candidate_object_locations as algo
+    find_candidate_object_locations = algo
+
+    global ss_lib_loaded
+    ss_lib_loaded = True

 def compute_image_stats(img_width, img_height, pad_width, pad_height):
    do_scale_w = img_width > img_height
@ -23,63 +43,117 @@ def compute_image_stats(img_width, img_height, pad_width, pad_height):
    right = pad_width - left - target_w
    return [target_w, target_h, img_width, img_height, top, bottom, left, right, scale_factor]

-
-def compute_proposals(img, num_proposals, min_w, min_h):
-    all_rects = []
-    min_size = min_w * min_h
-    find_candidate_object_locations(img, all_rects, min_size=min_size)
-
-    rects = []
-    for k, d in enumerate(all_rects):
-        w = d.right() - d.left()
-        h = d.bottom() - d.top()
-        if w < min_w or h < min_h:
+def filterRois(rects, img_w, img_h, roi_min_area, roi_max_area, roi_min_side, roi_max_side, roi_max_aspect_ratio):
+    filteredRects = []
+    filteredRectsSet = set()
+    for rect in rects:
+        if tuple(rect) in filteredRectsSet: # excluding rectangles with same co-ordinates
            continue
-        rects.append([d.left(), d.top(), d.right(), d.bottom()])

-    np_rects = np.array(rects)
-    num_rects = np_rects.shape[0]
+        x, y, x2, y2 = rect
+        w = x2 - x
+        h = y2 - y
+        assert(w>=0 and h>=0)
+
+        # apply filters
+        if h == 0 or w == 0 or \
+           x2 > img_w or y2 > img_h or \
+           w < roi_min_side or h < roi_min_side or \
+           w > roi_max_side or h > roi_max_side or \
+           w * h < roi_min_area or w * h > roi_max_area or \
+           w / h > roi_max_aspect_ratio or h / w > roi_max_aspect_ratio:
+               continue
+        filteredRects.append(rect)
+        filteredRectsSet.add(tuple(rect))
+
+    # could combine rectangles using non-maximum surpression or with similar co-ordinates
+    # groupedRectangles, weights = cv2.groupRectangles(np.asanyarray(rectsInput, np.float).tolist(), 1, 0.3)
+    # groupedRectangles = nms_python(np.asarray(rectsInput, np.float), 0.5)
+    assert(len(filteredRects) > 0)
+    return filteredRects
+
+def compute_proposals(img, num_proposals, cfg):
+    img_w = len(img[0])
+    img_h = len(img)
+
+    if cfg is None: cfg = {}
+    roi_ss_kvals = (10, 500, 5)                                     if 'roi_ss_kvals' not in cfg else tuple(cfg['roi_ss_kvals'])
+    roi_ss_mm_iterations = 30                                       if 'roi_ss_mm_iterations' not in cfg else cfg['roi_ss_mm_iterations']
+    roi_ss_min_size = 9                                             if 'roi_ss_min_size' not in cfg else cfg['roi_ss_min_size']
+    roi_ss_img_size = 200                                           if 'roi_ss_img_size' not in cfg else cfg['roi_ss_img_size']
+    roi_min_side_rel = 0.04                                         if 'roi_min_side_rel' not in cfg else cfg['roi_min_side_rel']
+    roi_max_side_rel = 0.4                                          if 'roi_max_side_rel' not in cfg else cfg['roi_max_side_rel']
+    roi_min_area_rel = 2 * roi_min_side_rel * roi_min_side_rel      if 'roi_min_area_rel' not in cfg else cfg['roi_min_area_rel']
+    roi_max_area_rel = 0.33 * roi_max_side_rel * roi_max_side_rel   if 'roi_max_area_rel' not in cfg else cfg['roi_max_area_rel']
+    roi_max_aspect_ratio = 4.0                                      if 'roi_max_aspect_ratio' not in cfg else cfg['roi_max_aspect_ratio']
+    roi_grid_aspect_ratios = [1.0, 2.0, 0.5]                        if 'roi_grid_aspect_ratios' not in cfg else cfg['roi_grid_aspect_ratios']
+    debug_output = False if not ('CNTK' in cfg and 'DEBUG_OUTPUT' in cfg.CNTK) else cfg.CNTK.DEBUG_OUTPUT
+
+    scale = 1.0 * roi_ss_img_size / max(img.shape[:2])
+    img = cv2.resize(img, (0,0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
+
+    roi_min_side = roi_min_side_rel * roi_ss_img_size
+    roi_max_side = roi_max_side_rel * roi_ss_img_size
+    roi_min_area = roi_min_area_rel * roi_ss_img_size * roi_ss_img_size
+    roi_max_area = roi_max_area_rel * roi_ss_img_size * roi_ss_img_size
+
+    if not ss_lib_loaded: load_selective_search_lib()
+    rects = []
+    tmp = []
+    find_candidate_object_locations(img, tmp, kvals=roi_ss_kvals, min_size=roi_ss_min_size, max_merging_iterations=roi_ss_mm_iterations)
+    for k, d in enumerate(tmp):
+        rects.append([d.left(), d.top(), d.right(), d.bottom()])
+    filtered_rects = filterRois(rects, img_w, img_h, roi_min_area, roi_max_area, roi_min_side, roi_max_side, roi_max_aspect_ratio)
+    scaled_rects = np.array(filtered_rects) * (1/scale)
+    if debug_output:
+        print("selective search rois before | after filtering: {} | {}. Requested: {}".format(len(rects), len(filtered_rects), num_proposals))
+
+    num_rects = scaled_rects.shape[0]
    np.random.seed(random_seed)
    if num_rects < num_proposals:
-        img_w = len(img[0])
-        img_h = len(img)
-        grid_proposals = compute_grid_proposals(num_proposals - len(rects), img_w, img_h, min_w, min_h)
-        np_rects = np.vstack([np_rects, grid_proposals])
-    elif len(rects) > num_proposals:
+        try:
+            shuffle = not cfg.CNTK.FORCE_DETERMINISTIC
+        except:
+            shuffle = True
+
+        roi_min_side = roi_min_side_rel * min(img_w, img_h)
+        roi_max_side = roi_max_side_rel * max(img_w, img_h)
+        grid_proposals = compute_grid_proposals(num_proposals - num_rects, img_w, img_h, roi_min_side, roi_max_side, roi_grid_aspect_ratios, shuffle)
+        scaled_rects = np.vstack([scaled_rects, grid_proposals])
+    elif num_rects > num_proposals:
        keep_inds = range(num_rects)
        keep_inds = np.random.choice(keep_inds, size=num_proposals, replace=False)
-        np_rects = np_rects[keep_inds]
+        scaled_rects = scaled_rects[keep_inds]

-    return np_rects
-
-def compute_grid_proposals(num_proposals, img_w, img_h, min_w, min_h, max_w=None, max_h=None, aspect_ratios = [1.0], shuffle=True):
-    min_wh = max(min_w, min_h)
-    max_wh = min(img_h, img_w) / 2
-    if max_w is not None: max_wh = min(max_wh, max_w)
-    if max_h is not None: max_wh = min(max_wh, max_h)
+    return scaled_rects

+def compute_grid_proposals(num_proposals, img_w, img_h, min_wh, max_wh, aspect_ratios = [1.0, 2.0, 0.5], shuffle=True):
    rects = []
    iter = 0
    while len(rects) < num_proposals:
-        new_ar = []
-        for ar in aspect_ratios:
-            new_ar.append(ar * (0.9 ** iter))
-            new_ar.append(ar * (1.1 ** iter))
+        if iter == 0:
+            new_ar = aspect_ratios
+        else:
+            new_ar = []
+            for ar in aspect_ratios:
+                new_ar.append(ar * (0.9 ** iter))
+                new_ar.append(ar * (1.1 ** iter))

-        new_rects = _compute_grid_proposals(img_w, img_h, min_wh, max_wh, new_ar)
+        new_rects = np.array(_compute_grid_proposals(img_w, img_h, min_wh, max_wh, new_ar))
        take = min(num_proposals - len(rects), len(new_rects))
-        new_rects = new_rects[:take]
+
+        if shuffle and take < len(new_rects):
+            keep_inds = range(len(new_rects))
+            keep_inds = np.random.choice(keep_inds, size=take, replace=False)
+            new_rects = new_rects[keep_inds]
+        else:
+            new_rects = new_rects[:take]
+
        rects.extend(new_rects)
+        iter = iter + 1

    np_rects = np.array(rects)
-    num_rects = np_rects.shape[0]
-    if shuffle and num_proposals < num_rects:
-        keep_inds = range(num_rects)
-        keep_inds = np.random.choice(keep_inds, size=num_proposals, replace=False)
-        np_rects = np_rects[keep_inds]
-    else:
-        np_rects = np_rects[:num_proposals]
-
+    assert np_rects.shape[0] == num_proposals
    return np_rects

 def _compute_grid_proposals(img_w, img_h, min_wh, max_wh, aspect_ratios):
@ -152,17 +226,30 @@ class ProposalProvider:
        self._requires_scaling = requires_scaling

    @classmethod
-    def fromfile(cls, filename):
+    def fromfile(cls, filename, max_num_proposals):
+        print('Reading proposals from file ({}) ...'.format(filename))
        with open(filename) as f:
            lines = f.readlines()

        proposal_list = [[] for _ in lines]
+        index = 0
+        cut_counter = 0
        for line in lines:
-            # TODO: parse line
-            index = 0
-            rects = np.zeros((4, 200))
+            # parse line
+            numbers = line[line.find('|') + 11:]
+            parsed_numbers = np.fromstring(numbers, dtype=int, sep=' ')
+            parsed_rects = parsed_numbers.reshape((int(parsed_numbers.shape[0] / 4), 4))
+            num_rects = parsed_rects.shape[0]
+            if num_rects > max_num_proposals:
+                rects = parsed_rects[:max_num_proposals,:]
+                cut_counter += 1
+            else:
+                pad_rects = np.zeros((max_num_proposals - num_rects, 4))
+                rects = np.vstack([parsed_rects, pad_rects])
            proposal_list[index] = rects
+            index += 1

+        print('Done. {} images had more than {} proposals.'.format(cut_counter, max_num_proposals))
        return cls(proposal_list)

    @classmethod
@ -183,33 +270,25 @@ class ProposalProvider:
            return self._proposal_cfg['NUM_ROI_PROPOSALS']

    def get_proposals(self, index, img=None):
-        #import pdb; pdb.set_trace()
        if index in self._proposal_dict:
            return self._proposal_dict[index]
        else:
-            return self._compute_proposals(img)
-
-    def _compute_proposals(self, img):
-        min_w = self._proposal_cfg['PROPOSALS_MIN_W']
-        min_h = self._proposal_cfg['PROPOSALS_MIN_H']
-        num_proposals = self._proposal_cfg.NUM_ROI_PROPOSALS
-        return compute_proposals(img, num_proposals, min_w, min_h)
+            num_proposals = self._proposal_cfg.NUM_ROI_PROPOSALS
+            return compute_proposals(img, num_proposals, self._proposal_cfg)

 if __name__ == '__main__':
    import cv2
-    image_file = r"C:\src\CNTK\Examples\Image\DataSets\Pascal\VOCdevkit\VOC2007\JPEGImages\000015.jpg"
+    image_file = os.path.join(abs_path, r"..\..\DataSets\Pascal\VOCdevkit\VOC2007\JPEGImages\000015.jpg")
    img = cv2.imread(image_file)

-    # 0.18 sec for 4000
-    # 0.15 sec for 2000
-    # 0.13 sec for 1000
    num_proposals = 2000
-    num_runs = 100
+    num_runs = 500
+    proposals = compute_proposals(img, num_proposals, cfg=None)
    import time
    start = int(time.time())
    for i in range(num_runs):
-        proposals = compute_proposals(img, num_proposals, 20, 20)
+        proposals = compute_proposals(img, num_proposals, cfg=None)
    total = int(time.time() - start)
-    print ("time: {}".format(total / (1.0 * num_runs)))
+    print ("time for {} proposals: {} (total time for {} runs: {}".format(num_proposals, total / (1.0 * num_runs), num_runs, total))

    assert len(proposals) == num_proposals, "{} != {}".format(len(proposals), num_proposals)
--- a/Examples/Image/Detection/utils/rpn/proposal_layer.py
+++ b/Examples/Image/Detection/utils/rpn/proposal_layer.py
@ -20,39 +20,17 @@ class ProposalLayer(UserFunction):
    transformations to a set of regular boxes (called "anchors").
    '''

-    def __init__(self, arg1, arg2, arg3,
-                 train_pre_nms_topN=12000,
-                 train_post_nms_topN=2000,
-                 train_nms_thresh=0.7,
-                 train_min_size=16,
-                 test_pre_nms_topN=6000,
-                 test_post_nms_topN=300,
-                 test_nms_thresh=0.7,
-                 test_min_size=16,
-                 param_str = None,
-                 name='ProposalLayer'):
-        super(ProposalLayer, self).__init__([arg1, arg2, arg3], name=name)
-        self._train_pre_nms_topN = train_pre_nms_topN
-        self._train_post_nms_topN = train_post_nms_topN
-        self._train_nms_thresh = train_nms_thresh
-        self._train_min_size = train_min_size
-        self._test_pre_nms_topN = test_pre_nms_topN
-        self._test_post_nms_topN = test_post_nms_topN
-        self._test_nms_thresh = test_nms_thresh
-        self._test_min_size = test_min_size
-        self._param_str = param_str if param_str is not None else "'feat_stride': 16\n'scales':\n - 8 \n - 16 \n - 32"
+    def __init__(self, arg1, arg2, arg3, layer_config, name='ProposalLayer'):
+        super(ProposalLayer, self).__init__([arg1, arg2, arg3], attributes=layer_config, name=name)
+
+        self._layer_config = layer_config
+        self._feat_stride = 16 if 'feat_stride' not in layer_config else layer_config['feat_stride']
+        anchor_scales = [8, 16, 32] if 'scales' not in layer_config else layer_config['scales']

        # parse the layer parameter string, which must be valid YAML
-        layer_params = yaml.load(self._param_str)
-        self._feat_stride = layer_params['feat_stride']
-        anchor_scales = layer_params.get('scales', (8, 16, 32))
        self._anchors = generate_anchors(scales=np.array(anchor_scales))
        self._num_anchors = self._anchors.shape[0]

-        attributes = {'feat_stride' : self._feat_stride, 'scales' : anchor_scales}
-
-        super(ProposalLayer, self).__init__([arg1, arg2, arg3], attributes=attributes, name=name)
-
        if DEBUG:
            print ('feat_stride: {}'.format(self._feat_stride))
            print ('anchors:')
@ -85,15 +63,15 @@ class ProposalLayer(UserFunction):
        # use potentially different number of proposals for training vs evaluation
        if len(outputs_to_retain) == 0:
            # print("EVAL")
-            pre_nms_topN = self._test_pre_nms_topN
-            post_nms_topN = self._test_post_nms_topN
-            nms_thresh = self._test_nms_thresh
-            min_size = self._test_min_size
+            pre_nms_topN = self._layer_config['test_pre_nms_topN']
+            post_nms_topN = self._layer_config['test_post_nms_topN']
+            nms_thresh = self._layer_config['test_nms_thresh']
+            min_size = self._layer_config['test_min_size']
        else:
-            pre_nms_topN = self._train_pre_nms_topN
-            post_nms_topN = self._train_post_nms_topN
-            nms_thresh = self._train_nms_thresh
-            min_size = self._train_min_size
+            pre_nms_topN = self._layer_config['train_pre_nms_topN']
+            post_nms_topN = self._layer_config['train_post_nms_topN']
+            nms_thresh = self._layer_config['train_nms_thresh']
+            min_size = self._layer_config['train_min_size']

        bottom = arguments
        assert bottom[0].shape[0] == 1, \
@ -205,44 +183,16 @@ class ProposalLayer(UserFunction):
        pass

    def clone(self, cloned_inputs):
-        return ProposalLayer(cloned_inputs[0], cloned_inputs[1], cloned_inputs[2],
-                             train_pre_nms_topN=self._train_pre_nms_topN,
-                             train_post_nms_topN=self._train_post_nms_topN,
-                             train_nms_thresh=self._train_nms_thresh,
-                             train_min_size=self._train_min_size,
-                             test_pre_nms_topN=self._test_pre_nms_topN,
-                             test_post_nms_topN=self._test_post_nms_topN,
-                             test_nms_thresh=self._test_nms_thresh,
-                             test_min_size=self._test_min_size,
-                             param_str=self._param_str)
+        return ProposalLayer(cloned_inputs[0], cloned_inputs[1], cloned_inputs[2], layer_config=self._layer_config)

    def serialize(self):
        internal_state = {}
-        internal_state['param_str'] = self._param_str
-        internal_state['train_pre_nms_topN'] = self._train_pre_nms_topN
-        internal_state['train_post_nms_topN'] = self._train_post_nms_topN
-        internal_state['train_nms_thresh'] = self._train_nms_thresh
-        internal_state['train_min_size'] = self._train_min_size
-        internal_state['test_pre_nms_topN'] = self._test_pre_nms_topN
-        internal_state['test_post_nms_topN'] = self._test_post_nms_topN
-        internal_state['test_nms_thresh'] = self._test_nms_thresh
-        internal_state['test_min_size'] = self._test_min_size
-
+        internal_state['layer_config'] = self._layer_config
        return internal_state

    @staticmethod
    def deserialize(inputs, name, state):
-        return ProposalLayer(inputs[0], inputs[1], inputs[2],
-                             train_pre_nms_topN=state['train_pre_nms_topN'],
-                             train_post_nms_topN=state['train_post_nms_topN'],
-                             train_nms_thresh=state['train_nms_thresh'],
-                             train_min_size=state['train_min_size'],
-                             test_pre_nms_topN=state['test_pre_nms_topN'],
-                             test_post_nms_topN=state['test_post_nms_topN'],
-                             test_nms_thresh=state['test_nms_thresh'],
-                             test_min_size=state['test_min_size'],
-                             param_str=state['param_str'],
-                             name=name)
+        return ProposalLayer(inputs[0], inputs[1], inputs[2], layer_config=state['layer_config'], name=name)

 def _filter_boxes(boxes, min_size):
    """Remove all boxes with any side smaller than min_size."""
--- a/Examples/Image/Detection/utils/rpn/rpn_helpers.py
+++ b/Examples/Image/Detection/utils/rpn/rpn_helpers.py
@ -6,7 +6,7 @@

 import numpy as np
 import cntk
-from cntk import reduce_sum
+from cntk import reduce_sum, ops
 from cntk import user_function, relu, softmax, slice, splice, reshape, element_times, plus, minus, alias, classification_error
 from cntk.initializer import glorot_uniform, normal
 from cntk.layers import Convolution
@ -16,7 +16,6 @@ from utils.rpn.proposal_layer import ProposalLayer
 from utils.rpn.proposal_target_layer import ProposalTargetLayer
 from utils.rpn.cntk_smoothL1_loss import SmoothL1Loss

-# Please keep in sync with Readme.md
 def create_rpn(conv_out, scaled_gt_boxes, im_info, cfg, add_loss_functions=True):
    '''
    Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper:
@ -59,19 +58,21 @@ def create_rpn(conv_out, scaled_gt_boxes, im_info, cfg, add_loss_functions=True)
    rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape")

    # proposal layer
-    rpn_rois = add_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg)
+    rpn_rois = create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg)

    rpn_losses = None
    if(add_loss_functions):
        # RPN targets
        # Comment: rpn_cls_score is only passed   vvv   to get width and height of the conv feature map ...
+        proposal_layer_params = "'feat_stride': {}\n'scales':\n - {}". \
+            format(cfg["MODEL"].FEATURE_STRIDE, "\n - ".join([str(v) for v in cfg["DATA"].PROPOSAL_LAYER_SCALES]))
        atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info,
                                              rpn_batch_size=cfg["TRAIN"].RPN_BATCHSIZE,
                                              rpn_fg_fraction=cfg["TRAIN"].RPN_FG_FRACTION,
                                              clobber_positives=cfg["TRAIN"].RPN_CLOBBER_POSITIVES,
                                              positive_overlap=cfg["TRAIN"].RPN_POSITIVE_OVERLAP,
                                              negative_overlap=cfg["TRAIN"].RPN_NEGATIVE_OVERLAP,
-                                              param_str=cfg.PROPOSAL_LAYER_PARAMS))
+                                              param_str=proposal_layer_params))
        rpn_labels = atl.outputs[0]
        rpn_bbox_targets = atl.outputs[1]
        rpn_bbox_inside_weights = atl.outputs[2]
@ -114,17 +115,30 @@ def create_rpn(conv_out, scaled_gt_boxes, im_info, cfg, add_loss_functions=True)

    return rpn_rois, rpn_losses

-def add_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg):
-    rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info,
-                                               train_pre_nms_topN=cfg["TRAIN"].RPN_PRE_NMS_TOP_N,
-                                               train_post_nms_topN=cfg["TRAIN"].RPN_POST_NMS_TOP_N,
-                                               train_nms_thresh=cfg["TRAIN"].RPN_NMS_THRESH,
-                                               train_min_size=cfg["TRAIN"].RPN_MIN_SIZE,
-                                               test_pre_nms_topN=cfg["TEST"].RPN_PRE_NMS_TOP_N,
-                                               test_post_nms_topN=cfg["TEST"].RPN_POST_NMS_TOP_N,
-                                               test_nms_thresh=cfg["TEST"].RPN_NMS_THRESH,
-                                               test_min_size=cfg["TEST"].RPN_MIN_SIZE,
-                                               param_str=cfg.PROPOSAL_LAYER_PARAMS))
+def create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg, use_native_proposal_layer=False):
+    layer_config = {}
+    layer_config["feat_stride"] = cfg["MODEL"].FEATURE_STRIDE
+    layer_config["scales"] = cfg["DATA"].PROPOSAL_LAYER_SCALES
+
+    layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
+    layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N
+    layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH)
+    layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE)
+
+    layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N
+    layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N
+    layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH)
+    layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE)
+
+    if use_native_proposal_layer:
+        cntk.ops.register_native_user_function('ProposalLayerOp',
+                                               'Cntk.ProposalLayerLib-' + cntk.__version__.rstrip('+'),
+                                               'CreateProposalLayer')
+        rpn_rois_raw = ops.native_user_function('ProposalLayerOp', [rpn_cls_prob_reshape, rpn_bbox_pred, im_info],
+                                                layer_config, 'native_proposal_layer')
+    else:
+        rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, layer_config))
+
    return alias(rpn_rois_raw, name='rpn_rois')

 def create_proposal_target_layer(rpn_rois, scaled_gt_boxes, cfg):
--- a/PretrainedModels/download_model.py
+++ b/PretrainedModels/download_model.py
@ -14,7 +14,7 @@ except ImportError:

 # Add models here like this: (category, model_name, model_url)
 models = (('Image Classification', 'AlexNet_ImageNet_CNTK', 'https://www.cntk.ai/Models/CNTK_Pretrained/AlexNet_ImageNet_CNTK.model'),
-          ('Image Classification', 'AlexNet_ImageNet_Caffe', 'https://www.cntk.ai/Models/CNTK_Pretrained/AlexNet_ImageNet_Caffe.model'),
+          ('Image Classification', 'AlexNet_ImageNet_Caffe', 'https://www.cntk.ai/Models/Caffe_Converted/AlexNet_ImageNet_Caffe.model'),
          ('Image Classification', 'InceptionV3_ImageNet_CNTK', 'https://www.cntk.ai/Models/CNTK_Pretrained/InceptionV3_ImageNet_CNTK.model'),
          ('Image Classification', 'BNInception_ImageNet_Caffe', 'https://www.cntk.ai/Models/Caffe_Converted/BNInception_ImageNet_Caffe.model'),
          ('Image Classification', 'ResNet18_ImageNet_CNTK', 'https://www.cntk.ai/Models/CNTK_Pretrained/ResNet18_ImageNet_CNTK.model'),
--- a/Scripts/install/linux/conda-linux-cntk-py34-environment.yml
+++ b/Scripts/install/linux/conda-linux-cntk-py34-environment.yml
@ -23,6 +23,7 @@ dependencies:
 - setuptools=27.2.0=py34_0
 - six=1.10.0=py34_0
 - wheel=0.29.0=py34_0
+- dlib=19.0=np111py34_blas_openblas_200
 - pip:
  - easydict==1.6.0
  - future==0.16.0
@ -35,3 +36,4 @@ dependencies:
  - sphinx==1.5.4
  - twine==1.8.1
  - protobuf==3.2.0
+  
--- a/Scripts/install/windows/conda-windows-cntk-py35-environment.yml
+++ b/Scripts/install/windows/conda-windows-cntk-py35-environment.yml
@ -23,6 +23,7 @@ dependencies:
 - six=1.10.0=py35_0
 - wheel=0.29.0=py35_0
 - opencv=3.1.0=np111py35_1
+- dlib=19.0=np111py35_200
 - pip:
  - gym==0.5.2
  - keras==2.0.6
--- a/Tests/EndToEndTests/CNTKv2Python/Examples/DetectionDemo_test.py
+++ b/Tests/EndToEndTests/CNTKv2Python/Examples/DetectionDemo_test.py
@ -0,0 +1,77 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+# Licensed under the MIT license. See LICENSE.md file in the project root
+# for full license information.
+# ==============================================================================
+
+import numpy as np
+import os
+import pytest
+import sys
+from cntk import load_model
+from cntk.cntk_py import DeviceKind_GPU
+from cntk.device import try_set_default_device, gpu
+from cntk.logging.graph import get_node_outputs
+from cntk.ops.tests.ops_test_utils import cntk_device
+from _cntk_py import force_deterministic_algorithms
+force_deterministic_algorithms()
+
+abs_path = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(abs_path)
+sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection"))
+
+win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version_info[:2] == (3,5)) or
+                                        (sys.platform != 'win32' and sys.version_info[:2] == (3,4))),
+                                   reason="it runs currently only in windows-py35 and linux-py34 due to precompiled cython modules")
+
+@win35_linux34
+def test_detection_demo(device_id):
+    if cntk_device(device_id).type() != DeviceKind_GPU:
+        pytest.skip('test only runs on GPU')  # it runs very slow in CPU
+    try_set_default_device(cntk_device(device_id))
+
+    from prepare_test_data import prepare_Grocery_data, prepare_alexnet_v0_model
+    grocery_path = prepare_Grocery_data()
+    prepare_alexnet_v0_model()
+
+    from FastRCNN.install_data_and_model import create_grocery_mappings
+    create_grocery_mappings(grocery_path)
+
+    from DetectionDemo import get_configuration
+    import utils.od_utils as od
+
+    cfg = get_configuration('FasterRCNN')
+    cfg["CNTK"].FORCE_DETERMINISTIC = True
+    cfg["CNTK"].DEBUG_OUTPUT = False
+    cfg["CNTK"].MAKE_MODE = False
+    cfg["CNTK"].FAST_MODE = False
+    cfg.CNTK.E2E_MAX_EPOCHS = 3
+    cfg.CNTK.RPN_EPOCHS = 2
+    cfg.CNTK.FRCN_EPOCHS = 2
+    cfg.IMAGE_WIDTH = 400
+    cfg.IMAGE_HEIGHT = 400
+    cfg["CNTK"].TRAIN_E2E = True
+    cfg.USE_GPU_NMS = False
+    cfg.VISUALIZE_RESULTS = False
+    cfg["DATA"].MAP_FILE_PATH = grocery_path
+
+    externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
+    if externalData:
+        extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
+        cfg['BASE_MODEL_PATH'] = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v1", "AlexNet_ImageNet_Caffe.model")
+    else:
+        cfg['BASE_MODEL_PATH'] = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet_ImageNet_Caffe.model".split("/"))
+
+    # train and test
+    eval_model = od.train_object_detector(cfg)
+    eval_results = od.evaluate_test_set(eval_model, cfg)
+
+    meanAP = np.nanmean(list(eval_results.values()))
+    print('meanAP={}'.format(meanAP))
+    assert meanAP > 0.01
+
+    # detect objects in single image
+    img_path = os.path.join(grocery_path, "testImages", "WIN_20160803_11_28_42_Pro.jpg")
+    regressed_rois, cls_probs = od.evaluate_single_image(eval_model, img_path, cfg)
+    bboxes, labels, scores = od.filter_results(regressed_rois, cls_probs, cfg)
+    assert bboxes.shape[0] == labels.shape[0]
--- a/Tests/EndToEndTests/CNTKv2Python/Examples/FastRCNNBS_test.py
+++ b/Tests/EndToEndTests/CNTKv2Python/Examples/FastRCNNBS_test.py
@ -69,7 +69,7 @@ def test_fastrcnn_grocery_training(device_id):
        extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
        model_file = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model")
    else:
-        model_file = os.path.join(abs_path, *"../../../../Examples/Image/PretrainedModels/AlexNet.model".split("/"))
+        model_file = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet.model".split("/"))

    from A2_RunWithPyModel import train_fast_rcnn, evaluate_fast_rcnn
    trained_model = train_fast_rcnn(model_path=model_file)
--- a/Tests/EndToEndTests/CNTKv2Python/Examples/FastRCNN_test.py
+++ b/Tests/EndToEndTests/CNTKv2Python/Examples/FastRCNN_test.py
@ -19,14 +19,14 @@ force_deterministic_algorithms()
 abs_path = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(abs_path)
 sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection"))
+sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection", "FastRCNN"))

 from prepare_test_data import prepare_Grocery_data, prepare_alexnet_v0_model
 grocery_path = prepare_Grocery_data()
 prepare_alexnet_v0_model()

-from FastRCNN.install_data_and_model import create_grocery_mappings
+from install_data_and_model import create_grocery_mappings
 create_grocery_mappings(grocery_path)
-from utils.config_helpers import merge_configs

 win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version_info[:2] == (3,5)) or
                                        (sys.platform != 'win32' and sys.version_info[:2] == (3,4))),
@ -34,7 +34,12 @@ win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version

@win35_linux34
 def test_fastrcnnpy_grocery_training(device_id):
-    from FastRCNN.config import cfg as detector_cfg
+    if cntk_device(device_id).type() != DeviceKind_GPU:
+        pytest.skip('test only runs on GPU')  # it runs very slow in CPU
+    try_set_default_device(cntk_device(device_id))
+
+    from utils.config_helpers import merge_configs
+    from FastRCNN_config import cfg as detector_cfg
    from utils.configs.AlexNet_config import cfg as network_cfg
    from utils.configs.Grocery_config import cfg as dataset_cfg

@ -43,27 +48,25 @@ def test_fastrcnnpy_grocery_training(device_id):
    cfg["CNTK"].DEBUG_OUTPUT = False
    cfg["CNTK"].MAKE_MODE = False
    cfg["CNTK"].FAST_MODE = False
-    cfg["CNTK"].MAX_EPOCHS = 2
-    cfg.NUM_ROI_PROPOSALS = 100
-    cfg.USE_GPU_NMS = True
+    cfg["CNTK"].MAX_EPOCHS = 4
+    cfg.IMAGE_WIDTH = 600
+    cfg.IMAGE_HEIGHT = 600
+    cfg.NUM_ROI_PROPOSALS = 200
+    cfg.USE_GPU_NMS = False
    cfg.VISUALIZE_RESULTS = False
    cfg["DATA"].MAP_FILE_PATH = grocery_path

    externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
    if externalData:
        extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
-        cfg['BASE_MODEL_PATH'] = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model")
+        cfg['BASE_MODEL_PATH'] = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v1", "AlexNet_ImageNet_Caffe.model")
    else:
-        cfg['BASE_MODEL_PATH'] = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet.model".split("/"))
+        cfg['BASE_MODEL_PATH'] = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet_ImageNet_Caffe.model".split("/"))

-    from FastRCNN.FastRCNN_train import prepare, train_fast_rcnn
-    from FastRCNN.FastRCNN_eval import compute_test_set_aps
+    from FastRCNN_train import prepare, train_fast_rcnn
+    from FastRCNN_eval import compute_test_set_aps
    prepare(cfg, False)

-    if cntk_device(device_id).type() != DeviceKind_GPU:
-        pytest.skip('test only runs on GPU')  # it runs very slow in CPU
-    try_set_default_device(cntk_device(device_id))
-
    np.random.seed(seed=3)
    trained_model = train_fast_rcnn(cfg)
    eval_results = compute_test_set_aps(trained_model, cfg)
--- a/Tests/EndToEndTests/CNTKv2Python/Examples/FasterRCNN_test.py
+++ b/Tests/EndToEndTests/CNTKv2Python/Examples/FasterRCNN_test.py
@ -18,22 +18,24 @@ force_deterministic_algorithms()
 abs_path = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(abs_path)
 sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection"))
-sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Extensibility", "ProposalLayer"))
+sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection", "FastRCNN"))
+sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection", "FasterRCNN"))

 from prepare_test_data import prepare_Grocery_data, prepare_alexnet_v0_model
 grocery_path = prepare_Grocery_data()
 prepare_alexnet_v0_model()

-from FastRCNN.install_data_and_model import create_grocery_mappings
+from install_data_and_model import create_grocery_mappings
 create_grocery_mappings(grocery_path)
-from utils.config_helpers import merge_configs

 win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version_info[:2] == (3,5)) or
                                        (sys.platform != 'win32' and sys.version_info[:2] == (3,4))),
                                   reason="it runs currently only in windows-py35 and linux-py34 due to precompiled cython modules")

-def run_fasterrcnn_grocery_training(device_id, e2e):
-    from FasterRCNN.config import cfg as detector_cfg
+def run_fasterrcnn_grocery_training(e2e):
+    from FasterRCNN_eval import compute_test_set_aps
+    from utils.config_helpers import merge_configs
+    from FasterRCNN_config import cfg as detector_cfg
    from utils.configs.AlexNet_config import cfg as network_cfg
    from utils.configs.Grocery_config import cfg as dataset_cfg

@ -41,101 +43,68 @@ def run_fasterrcnn_grocery_training(device_id, e2e):
    cfg["CNTK"].FORCE_DETERMINISTIC = True
    cfg["CNTK"].DEBUG_OUTPUT = False
    cfg["CNTK"].MAKE_MODE = False
-    cfg["CNTK"].FAST_MODE = True
+    cfg["CNTK"].FAST_MODE = False
+    cfg.CNTK.E2E_MAX_EPOCHS = 3
+    cfg.CNTK.RPN_EPOCHS = 2
+    cfg.CNTK.FRCN_EPOCHS = 2
+    cfg.IMAGE_WIDTH = 400
+    cfg.IMAGE_HEIGHT = 400
    cfg["CNTK"].TRAIN_E2E = e2e
-    cfg.USE_GPU_NMS = True
+    cfg.USE_GPU_NMS = False
    cfg.VISUALIZE_RESULTS = False
    cfg["DATA"].MAP_FILE_PATH = grocery_path

    externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
    if externalData:
        extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
-        cfg['BASE_MODEL_PATH'] = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model")
+        model_file = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v1", "AlexNet_ImageNet_Caffe.model")
    else:
-        model_file = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet.model".split("/"))
+        model_file = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet_ImageNet_Caffe.model".split("/"))

-    from FasterRCNN import train_faster_rcnn_e2e, eval_faster_rcnn_mAP
+    from FasterRCNN_train import prepare, train_faster_rcnn

    np.random.seed(seed=3)
-    eval_model = train_faster_rcnn_e2e(model_file, debug_output=False)
-    meanAP = eval_faster_rcnn_mAP(eval_model)
-    assert meanAP > 0.01
-
-@win35_linux34
-def test_native_fasterrcnn_eval(tmpdir, device_id):
-    from config import cfg
-    cfg["CNTK"].FORCE_DETERMINISTIC = True
-    cfg["CNTK"].DEBUG_OUTPUT = False
-    cfg["CNTK"].VISUALIZE_RESULTS = False
-    cfg["CNTK"].FAST_MODE = True
-    cfg["CNTK"].MAP_FILE_PATH = grocery_path
-
-    from FasterRCNN import set_global_vars
-    set_global_vars(False)
-
-    if cntk_device(device_id).type() != DeviceKind_GPU:
-        pytest.skip('test only runs on GPU')  # it runs very slow in CPU
-    try_set_default_device(cntk_device(device_id))
-
-    # since we do not use a reader for evaluation we need unzipped data
-    externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
-
-    if externalData:
-        extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
-        model_file = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model")
-    else:
-        model_file = os.path.join(abs_path, *"../../../../PretrainedModels/AlexNet.model".split("/"))
-
-    from FasterRCNN import train_faster_rcnn_e2e, eval_faster_rcnn_mAP
-
-    np.random.seed(seed=3)
-
-    eval_model = train_faster_rcnn_e2e(model_file, debug_output=False)
-
-    meanAP_python = eval_faster_rcnn_mAP(eval_model)
-
-    cntk_py.always_allow_setting_default_device()
-    
-    try_set_default_device(cpu())
-
-    from native_proposal_layer import clone_with_native_proposal_layer
-    
-    model_with_native_pl = clone_with_native_proposal_layer(eval_model)
-    meanAP_native = eval_faster_rcnn_mAP(model_with_native_pl)
-
-    # 0.2067 (python) vs 0.2251 (native) -- the difference stems
-    # from different sorting algorithms: quicksort in python and 
-    # heapsort in c++ (both are not stable).
-    assert abs(meanAP_python - meanAP_native) < 0.1
-
-@win35_linux34
-def test_fasterrcnn_grocery_training_4stage(device_id):
-    from config import cfg
-    cfg["CNTK"].FORCE_DETERMINISTIC = True
-    cfg["CNTK"].DEBUG_OUTPUT = False
-    cfg["CNTK"].VISUALIZE_RESULTS = False
-    cfg["CNTK"].FAST_MODE = True
-    cfg["CNTK"].MAP_FILE_PATH = grocery_path
-
-    from FasterRCNN.FasterRCNN_train import prepare, train_faster_rcnn
-    from FasterRCNN.FasterRCNN_eval import compute_test_set_aps
    prepare(cfg, False)
-
-    if cntk_device(device_id).type() != DeviceKind_GPU:
-        pytest.skip('test only runs on GPU')  # it runs very slow in CPU
-    try_set_default_device(cntk_device(device_id))
-
-    np.random.seed(seed=3)
+    cfg['BASE_MODEL_PATH'] = model_file
    trained_model = train_faster_rcnn(cfg)
    eval_results = compute_test_set_aps(trained_model, cfg)
    meanAP = np.nanmean(list(eval_results.values()))
    print('meanAP={}'.format(meanAP))
    assert meanAP > 0.01
+    return trained_model, meanAP, cfg
+
+@win35_linux34
+def reenable_once_sorting_is_stable_test_native_fasterrcnn_eval(device_id):
+    if cntk_device(device_id).type() != DeviceKind_GPU:
+        pytest.skip('test only runs on GPU')  # it runs very slow in CPU
+    try_set_default_device(cntk_device(device_id))
+
+    from FasterRCNN_eval import compute_test_set_aps
+    eval_model, meanAP_python, cfg = run_fasterrcnn_grocery_training(True)
+
+    cntk_py.always_allow_setting_default_device()
+    try_set_default_device(cpu())
+
+    sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Extensibility", "ProposalLayer"))
+    from native_proposal_layer import clone_with_native_proposal_layer
+    model_with_native_pl = clone_with_native_proposal_layer(eval_model)
+    eval_results = compute_test_set_aps(model_with_native_pl, cfg)
+    meanAP_native = np.nanmean(list(eval_results.values()))
+
+    # 0.2067 (python) vs 0.2251 (native) -- the difference stems
+    # from different sorting algorithms: quicksort in python and 
+    # heapsort in c++ (both are not stable).
+    print("Python: {}, native: {}".format(meanAP_python, meanAP_native))
+    assert abs(meanAP_python - meanAP_native) < 0.1
+
+@win35_linux34
+def test_fasterrcnn_grocery_training_e2e(device_id):
+    try_set_default_device(cntk_device(device_id))
+    _, _, _ = run_fasterrcnn_grocery_training(e2e = True)

@win35_linux34
 def test_fasterrcnn_grocery_training_4stage(device_id):
-    run_fasterrcnn_grocery_training(device_id, e2e = False)
-
-@win35_linux34
-def test_fasterrcnn_grocery_training_e2e(device_id, e2e=True):
-    run_fasterrcnn_grocery_training(device_id, e2e = True)
+    if cntk_device(device_id).type() != DeviceKind_GPU:
+        pytest.skip('test only runs on GPU')  # it runs very slow in CPU
+    try_set_default_device(cntk_device(device_id))
+    _, _, _ = run_fasterrcnn_grocery_training(e2e = False)
--- a/Tests/EndToEndTests/CNTKv2Python/Examples/prepare_test_data.py
+++ b/Tests/EndToEndTests/CNTKv2Python/Examples/prepare_test_data.py
@ -152,11 +152,18 @@ def prepare_alexnet_v0_model():
                         *"../../../../PretrainedModels".split("/"))
    local_base_path = os.path.normpath(local_base_path)

+    # v0 model:
    model_file = os.path.join(local_base_path, "AlexNet.model")
-
    if not os.path.isfile(model_file):
        external_model_path = os.path.join(os.environ[envvar], "PreTrainedModels", "AlexNet", "v0", "AlexNet.model")
        copyfile(external_model_path, model_file)
+
+    # v1 model:
+    model_file = os.path.join(local_base_path, "AlexNet_ImageNet_Caffe.model")
+    if not os.path.isfile(model_file):
+        external_model_path = os.path.join(os.environ[envvar], "PreTrainedModels", "AlexNet", "v1", "AlexNet_ImageNet_Caffe.model")
+        copyfile(external_model_path, model_file)
+
    return local_base_path

 def prepare_UCF11_data():
--- a/Tests/EndToEndTests/CNTKv2Python/Examples/rpn_unit_test.py
+++ b/Tests/EndToEndTests/CNTKv2Python/Examples/rpn_unit_test.py
@ -5,23 +5,24 @@
 # ==============================================================================

 import os, sys
+import pytest
+import numpy as np
+from cntk import user_function
+from cntk.ops import input_variable
 abs_path = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(os.path.join(abs_path))
 sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection"))

-import pytest
-import numpy as np
-import cntk
-from cntk import user_function
-from cntk.ops import input_variable
-from utils.rpn.proposal_layer import ProposalLayer as CntkProposalLayer
-from utils.rpn.proposal_target_layer import ProposalTargetLayer as CntkProposalTargetLayer
-from utils.rpn.anchor_target_layer import AnchorTargetLayer as CntkAnchorTargetLayer
-from utils.caffe_layers.proposal_layer import ProposalLayer as CaffeProposalLayer
-from utils.caffe_layers.proposal_target_layer import ProposalTargetLayer as CaffeProposalTargetLayer
-from utils.caffe_layers.anchor_target_layer import AnchorTargetLayer as CaffeAnchorTargetLayer
+win35_linux34 = pytest.mark.skipif(not ((sys.platform == 'win32' and sys.version_info[:2] == (3,5)) or
+                                        (sys.platform != 'win32' and sys.version_info[:2] == (3,4))),
+                                   reason="it runs currently only in windows-py35 and linux-py34 due to precompiled cython modules")

+@win35_linux34
 def test_proposal_layer():
+    from utils.rpn.proposal_layer import ProposalLayer as CntkProposalLayer
+    from utils.caffe_layers.proposal_layer import ProposalLayer as CaffeProposalLayer
+    from FasterRCNN.FasterRCNN_config import cfg
+
    cls_prob_shape_cntk = (18,61,61)
    cls_prob_shape_caffe = (18,61,61)
    rpn_bbox_shape = (36, 61, 61)
@ -38,7 +39,21 @@ def test_proposal_layer():
    rpn_bbox_var = input_variable(rpn_bbox_shape)
    dims_info_var = input_variable(dims_info_shape)

-    cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, dims_info_var))
+    layer_config = {}
+    layer_config["feat_stride"] = 16
+    layer_config["scales"] = [8, 16, 32]
+
+    layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
+    layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N
+    layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH)
+    layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE)
+
+    layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N
+    layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N
+    layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH)
+    layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE)
+
+    cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, dims_info_var, layer_config))
    state, cntk_output = cntk_layer.forward({cls_prob_var: [cls_prob], rpn_bbox_var: [rpn_bbox_pred], dims_info_var: dims_input})
    cntk_proposals = cntk_output[next(iter(cntk_output))][0]

@ -59,7 +74,11 @@ def test_proposal_layer():
    assert np.allclose(cntk_proposals, caffe_proposals, rtol=0.0, atol=0.0)
    print("Verified ProposalLayer")

+@win35_linux34
 def test_proposal_target_layer():
+    from utils.rpn.proposal_target_layer import ProposalTargetLayer as CntkProposalTargetLayer
+    from utils.caffe_layers.proposal_target_layer import ProposalTargetLayer as CaffeProposalTargetLayer
+
    num_rois = 400
    all_rois_shape_cntk = (num_rois,4)
    num_gt_boxes = 50
@ -147,7 +166,11 @@ def test_proposal_target_layer():
    assert np.allclose(cntk_bbox_inside_weights, caffe_bbox_inside_weights, rtol=0.0, atol=0.0)
    print("Verified ProposalTargetLayer")

+@win35_linux34
 def test_anchor_target_layer():
+    from utils.rpn.anchor_target_layer import AnchorTargetLayer as CntkAnchorTargetLayer
+    from utils.caffe_layers.anchor_target_layer import AnchorTargetLayer as CaffeAnchorTargetLayer
+
    rpn_cls_score_shape_cntk = (1, 18, 61, 61)
    num_gt_boxes = 50
    gt_boxes_shape_cntk = (num_gt_boxes,5)
--- a/Tests/EndToEndTests/CNTKv2Python/Tutorials/CNTK_FastRCNNEval_test.py
+++ b/Tests/EndToEndTests/CNTKv2Python/Tutorials/CNTK_FastRCNNEval_test.py
@ -10,7 +10,7 @@ import sys
 import pytest

 abs_path = os.path.dirname(os.path.abspath(__file__))
-notebook = os.path.join(abs_path, "..", "..", "..", "..", "Examples","Image","Detection","FastRCNN", "CNTK_FastRCNN_Eval.ipynb")
+notebook = os.path.join(abs_path, "..", "..", "..", "..", "Examples","Image","Detection","FastRCNN", "BrainScript", "CNTK_FastRCNN_Eval.ipynb")

 sys.path.append(abs_path)