Merge branch 'staging' into byod

2020-07-01 21:22:47 -04:00 · 2020-07-01 21:22:47 -04:00 · 358217d873
--- a/interpretation/deepseismic_interpretation/dutchf3/data.py
+++ b/interpretation/deepseismic_interpretation/dutchf3/data.py
@ -415,9 +415,7 @@ class TestSectionLoaderWithDepth(TestSectionLoader):

        # dump images and labels to disk after augmentation
        if self.debug:
-            outdir = (
-                f"debug/test/testSectionLoaderWithDepth_{self.split}_{'aug' if self.augmentations is not None else 'noaug'}"
-            )
+            outdir = f"debug/test/testSectionLoaderWithDepth_{self.split}_{'aug' if self.augmentations is not None else 'noaug'}"
            generate_path(outdir)
            path_prefix = f"{outdir}/index_{index}_section_{section_name}"
            image_to_disk(np.array(im[0, :, :]), path_prefix + "_img.png", self.MIN, self.MAX)
@ -441,11 +439,10 @@ class PatchLoader(data.Dataset):
    :param bool debug: enable debugging output
    """

-    def __init__(
-        self, config, is_transform=True, augmentations=None, debug=False,
-        ):
+    def __init__(self, config, split="train", is_transform=True, augmentations=None, debug=False):
        self.data_dir = config.DATASET.ROOT
        self.n_classes = config.DATASET.NUM_CLASSES
+        self.split = split
        self.MIN = config.DATASET.MIN
        self.MAX = config.DATASET.MAX
        self.patch_size = config.TRAIN.PATCH_SIZE
@ -455,11 +452,21 @@ class PatchLoader(data.Dataset):
        self.patches = list()
        self.debug = debug

-    def pad_volume(self, volume):
+    def pad_volume(self, volume, value):
        """
-        Only used for train/val!! Not test.
+        Pads a 3D numpy array with a constant value along the depth direction only. 
+
+        Args:
+            volume (numpy ndarrray): numpy array containing the seismic amplitude or labels. 
+            value (int): value to pad the array with. 
        """
-        return np.pad(volume, pad_width=self.patch_size, mode="constant", constant_values=255)
+
+        return np.pad(
+            volume,
+            pad_width=[(0, 0), (0, 0), (self.patch_size, self.patch_size)],
+            mode="constant",
+            constant_values=value,
+        )

    def __len__(self):
        return len(self.patches)
@ -468,12 +475,7 @@ class PatchLoader(data.Dataset):

        patch_name = self.patches[index]
        direction, idx, xdx, ddx = patch_name.split(sep="_")
-
-        # Shift offsets the padding that is added in training
-        # shift = self.patch_size if "test" not in self.split else 0
-        # Remember we are cancelling the shift since we no longer pad
-        shift = 0
-        idx, xdx, ddx = int(idx) + shift, int(xdx) + shift, int(ddx) + shift
+        idx, xdx, ddx = int(idx), int(xdx), int(ddx)

        if direction == "i":
            im = self.seismic[idx, xdx : xdx + self.patch_size, ddx : ddx + self.patch_size]
@ -525,34 +527,6 @@ class PatchLoader(data.Dataset):
        return torch.from_numpy(img).float(), torch.from_numpy(lbl).long()


-class TestPatchLoader(PatchLoader):
-    """
-    Test Data loader for the patch-based deconvnet
-    :param config: configuration object to define other attributes in loaders
-    :param bool is_transform: Transform patch to dimensions expected by PyTorch
-    :param list augmentations: Data augmentations to apply to patches
-    :param bool debug: enable debugging output
-    """
-
-    def __init__(
-        self, config, is_transform=True, augmentations=None, debug=False
-    ):
-        super(TestPatchLoader, self).__init__(
-            config,
-            is_transform=is_transform,
-            augmentations=augmentations,
-            debug=debug,
-        )
-        ## Warning: this is not used or tested
-        raise NotImplementedError("This class is not correctly implemented.")
-        self.seismic = np.load(_train_data_for(self.data_dir))
-        self.labels = np.load(_train_labels_for(self.data_dir))
-
-        patch_list = tuple(open(txt_path, "r"))
-        patch_list = [id_.rstrip() for id_ in patch_list]
-        self.patches = patch_list
-
-
 class TrainPatchLoader(PatchLoader):
    """
    Train data loader for the patch-based deconvnet
@ -574,13 +548,9 @@ class TrainPatchLoader(PatchLoader):
        debug=False,
    ):
        super(TrainPatchLoader, self).__init__(
-            config,
-            is_transform=is_transform,
-            augmentations=augmentations,
-            debug=debug,
+            config, is_transform=is_transform, augmentations=augmentations, debug=debug,
        )

-        warnings.warn("This no longer pads the volume")
        if seismic_path is not None and label_path is not None:
            # Load npy files (seismc and corresponding labels) from provided
            # location (path)
@ -593,8 +563,11 @@ class TrainPatchLoader(PatchLoader):
        else:
            self.seismic = np.load(_train_data_for(self.data_dir))
            self.labels = np.load(_train_labels_for(self.data_dir))
-        # We are in train/val mode. Most likely the test splits are not saved yet,
-        # so don't attempt to load them.
+
+        # pad the data:
+        self.seismic = self.pad_volume(self.seismic, value=0)
+        self.labels = self.pad_volume(self.labels, value=255)
+
        self.split = split
        # reading the file names for split
        txt_path = path.join(self.data_dir, "splits", "patch_" + split + ".txt")
@ -637,12 +610,7 @@ class TrainPatchLoaderWithDepth(TrainPatchLoader):

        patch_name = self.patches[index]
        direction, idx, xdx, ddx = patch_name.split(sep="_")
-
-        # Shift offsets the padding that is added in training
-        # shift = self.patch_size if "test" not in self.split else 0
-        # Remember we are cancelling the shift since we no longer pad
-        shift = 0
-        idx, xdx, ddx = int(idx) + shift, int(xdx) + shift, int(ddx) + shift
+        idx, xdx, ddx = int(idx), int(xdx), int(ddx)

        if direction == "i":
            im = self.seismic[idx, xdx : xdx + self.patch_size, ddx : ddx + self.patch_size]
@ -708,12 +676,7 @@ class TrainPatchLoaderWithSectionDepth(TrainPatchLoader):

        patch_name = self.patches[index]
        direction, idx, xdx, ddx = patch_name.split(sep="_")
-
-        # Shift offsets the padding that is added in training
-        # shift = self.patch_size if "test" not in self.split else 0
-        # Remember we are cancelling the shift since we no longer pad
-        shift = 0
-        idx, xdx, ddx = int(idx) + shift, int(xdx) + shift, int(ddx) + shift
+        idx, xdx, ddx = int(idx), int(xdx), int(ddx)

        if direction == "i":
            im = self.seismic[idx, :, xdx : xdx + self.patch_size, ddx : ddx + self.patch_size]
@ -773,6 +736,7 @@ _TRAIN_PATCH_LOADERS = {
    "patch": TrainPatchLoaderWithDepth,
 }

+
 def get_patch_loader(cfg):
    assert str(cfg.TRAIN.DEPTH).lower() in [
        "section",
@ -782,8 +746,10 @@ def get_patch_loader(cfg):
            Valid values: section, patch, none."
    return _TRAIN_PATCH_LOADERS.get(cfg.TRAIN.DEPTH, TrainPatchLoader)

+
 _TRAIN_SECTION_LOADERS = {"section": TrainSectionLoaderWithDepth}

+
 def get_section_loader(cfg):
    assert str(cfg.TRAIN.DEPTH).lower() in [
        "section",
@ -795,6 +761,7 @@ def get_section_loader(cfg):

 _TEST_LOADERS = {"section": TestSectionLoaderWithDepth}

+
 def get_test_loader(cfg):
    logger = logging.getLogger(__name__)
    logger.info(f"Test loader {cfg.TRAIN.DEPTH}")
--- a/interpretation/deepseismic_interpretation/dutchf3/tests/test_dataloaders.py
+++ b/interpretation/deepseismic_interpretation/dutchf3/tests/test_dataloaders.py
@ -208,5 +208,5 @@ def test_TrainPatchLoaderWithDepth_should_load_with_one_train_and_label_file(tmp
        label_path=os.path.join(tmpdir, "volume_name", "labels.npy"),
    )

-    assert train_set.labels.shape == (IL, XL, D)
-    assert train_set.seismic.shape == (IL, XL, D)
+    assert train_set.labels.shape == (IL, XL, D + 2 * config.TRAIN.PATCH_SIZE)
+    assert train_set.seismic.shape == (IL, XL, D + 2 * config.TRAIN.PATCH_SIZE)
--- a/scripts/prepare_dutchf3.py
+++ b/scripts/prepare_dutchf3.py
@ -39,8 +39,7 @@ def _write_split_files(splits_path, train_list, val_list, loader_type):
    file_object = open(path.join(splits_path, loader_type + "_train_val.txt"), "w")
    file_object.write("\n".join(train_list + val_list))
    file_object.close()
-    file_object = open(path.join(splits_path,
-                       loader_type + "_train.txt"), "w")
+    file_object = open(path.join(splits_path, loader_type + "_train.txt"), "w")
    file_object.write("\n".join(train_list))
    file_object.close()
    file_object = open(path.join(splits_path, loader_type + "_val.txt"), "w")
@ -149,6 +148,10 @@ def split_patch_train_val(

    iline, xline, depth = labels.shape

+    # Since the locations we will save reference the padded volume, we will increase
+    # the depth of the volume by the padding amount (2*patch_size).
+    depth += 2 * patch_size
+
    split_direction = split_direction.lower()
    if split_direction == "inline":
        num_sections, section_length = iline, xline
@ -158,8 +161,10 @@ def split_patch_train_val(
        raise ValueError(f"Unknown split_direction: {split_direction}")

    train_range, val_range = _get_aline_range(num_sections, per_val, section_stride)
-    vert_locations = range(0, depth, patch_stride)
+    buffer = patch_size // 2
+    vert_locations = range(buffer, depth - patch_size - buffer, patch_stride)
    horz_locations = range(0, section_length, patch_stride)
+
    logger.debug(vert_locations)
    logger.debug(horz_locations)

--- a/scripts/prepare_penobscot.py
+++ b/scripts/prepare_penobscot.py
@ -41,6 +41,8 @@ def _copy_files(files_iter, new_dir):


 def _split_train_val_test(partition, val_ratio, test_ratio):
+    logger = logging.getLogger("__name__")
+    logger.warning(f"prepare_penobscot.py does not support padding. Results might be incorrect. ")
    total_samples = len(partition)
    val_samples = math.floor(val_ratio * total_samples)
    test_samples = math.floor(test_ratio * total_samples)
--- a/tests/cicd/src/check_data_flow.py
+++ b/tests/cicd/src/check_data_flow.py
@ -36,134 +36,132 @@ def main(args):
    with open(args.infile, "r") as fp:
        data = json.load(fp)

-    # Note: these are specific to the setup in 
-    # main_build.yml for train.py 
+    # Note: these are specific to the setup in
+    # main_build.yml for train.py
    # and get_data_for_builds.sh and prepare_dutchf3.py and prepare_dutchf3.py

-    if args.step=='test':
+    if args.step == "test":

        for test_key in data.keys():
-            if args.train_depth=='none':
+            if args.train_depth == "none":
                expected_test_input_shape = (200, 200, 200)
                expected_img = (1, 1, 200, 200)

-            elif args.train_depth=='section':
+            elif args.train_depth == "section":
                expected_test_input_shape = (200, 3, 200, 200)
                expected_img = (1, 3, 200, 200)
-            
-            elif args.train_depth=='patch':
-                expected_test_input_shape = 'TBD'
-                expected_img = 'TBD'
-                raise Exception('Must be added')
-    
+
+            elif args.train_depth == "patch":
+                expected_test_input_shape = "TBD"
+                expected_img = "TBD"
+                raise Exception("Must be added")
+
            msg = f"Expected {expected_test_input_shape} for shape, received {tuple(data[test_key]['test_input_shape'])} instead, in {args.infile.split('.')[0]}"
-            assert tuple(data[test_key]['test_input_shape'])==expected_test_input_shape, msg
+            assert tuple(data[test_key]["test_input_shape"]) == expected_test_input_shape, msg

            expected_test_label_shape = (200, 200, 200)
            msg = f"Expected {expected_test_label_shape} for shape, received {tuple(data[test_key]['test_label_shape'])} instead, in {args.infile.split('.')[0]}"
-            assert tuple(data[test_key]['test_label_shape'])==expected_test_label_shape, msg
+            assert tuple(data[test_key]["test_label_shape"]) == expected_test_label_shape, msg

            for img in data[test_key]["img_shape"]:
-                msg = f"Expected {expected_img} for shape, received {tuple(img)} instead, in {args.infile.split('.')[0]}"
-                assert tuple(img)==expected_img, msg
+                msg = (
+                    f"Expected {expected_img} for shape, received {tuple(img)} instead, in {args.infile.split('.')[0]}"
+                )
+                assert tuple(img) == expected_img, msg

            # -----------------------------------------------
            exp_n_section = data[test_key]["take_n_sections"]
            pred_shape_len = len(data[test_key]["pred_shape"])
            msg = f"Expected {exp_n_section} number of items, received {pred_shape_len} instead, in {args.infile.split('.')[0]}"
-            assert pred_shape_len==exp_n_section, msg
+            assert pred_shape_len == exp_n_section, msg

            gt_shape_len = len(data[test_key]["gt_shape"])
            msg = f"Expected {exp_n_section} number of items, received {gt_shape_len} instead, in {args.infile.split('.')[0]}"
-            assert gt_shape_len==exp_n_section, msg
+            assert gt_shape_len == exp_n_section, msg

            img_shape_len = len(data[test_key]["img_shape"])
            msg = f"Expected {exp_n_section} number of items, received {img_shape_len} instead, in {args.infile.split('.')[0]}"
-            assert img_shape_len==exp_n_section, msg
+            assert img_shape_len == exp_n_section, msg

-            
            expected_len = 400
            lhs_assertion = data[test_key]["test_section_loader_length"]
            msg = f"Expected {expected_len} for test section loader length, received {lhs_assertion} instead, in {args.infile.split('.')[0]}"
-            assert lhs_assertion==expected_len, msg
+            assert lhs_assertion == expected_len, msg

            lhs_assertion = data[test_key]["test_loader_length"]
            msg = f"Expected {expected_len} for test loader length, received {lhs_assertion} instead, in {args.infile.split('.')[0]}"
-            assert lhs_assertion==expected_len, msg
+            assert lhs_assertion == expected_len, msg

            expected_n_classes = 2
            lhs_assertion = data[test_key]["n_classes"]
            msg = f"Expected {expected_n_classes} for test loader length, received {lhs_assertion} instead, in {args.infile.split('.')[0]}"
-            assert lhs_assertion==expected_n_classes, msg
-            
+            assert lhs_assertion == expected_n_classes, msg
+
            expected_pred = (1, 200, 200)
            expected_gt = (1, 1, 200, 200)

            for pred, gt in zip(data[test_key]["pred_shape"], data[test_key]["gt_shape"]):
-                #dimenstion
+                # dimenstion
                msg = f"Expected {expected_pred} for prediction shape, received {tuple(pred[0])} instead, in {args.infile.split('.')[0]}"
-                assert tuple(pred[0])==expected_pred, msg
+                assert tuple(pred[0]) == expected_pred, msg

                # unique classes
                msg = f"Expected up to {expected_n_classes} unique prediction classes, received {pred[1]} instead, in {args.infile.split('.')[0]}"
-                assert pred[1]<=expected_n_classes, msg
+                assert pred[1] <= expected_n_classes, msg

-                #dimenstion
+                # dimenstion
                msg = f"Expected {expected_gt} for ground truth mask shape, received {tuple(gt[0])} instead, in {args.infile.split('.')[0]}"
-                assert tuple(gt[0])==expected_gt, msg
+                assert tuple(gt[0]) == expected_gt, msg

                # unique classes
                msg = f"Expected up to {expected_n_classes} unique ground truth classes, received {gt[1]} instead, in {args.infile.split('.')[0]}"
-                assert gt[1]<=expected_n_classes, msg
-        
+                assert gt[1] <= expected_n_classes, msg
+
+    elif args.step == "train":
+        if args.train_depth == "none":
+            expected_shape_in = (200, 200, 400)
+        elif args.train_depth == "section":
+            expected_shape_in = (200, 3, 200, 400)
+        elif args.train_depth == "patch":
+            expected_shape_in = "TBD"
+            raise Exception("Must be added")

-    elif args.step=='train':
-        if args.train_depth=='none':
-            expected_shape_in = (200, 200, 200)
-        elif args.train_depth=='section':
-            expected_shape_in = (200, 3, 200, 200)
-        elif args.train_depth=='patch':
-            expected_shape_in = 'TBD'
-            raise Exception('Must be added')
-        
        msg = f"Expected {expected_shape_in} for shape, received {tuple(data['train_input_shape'])} instead, in {args.infile.split('.')[0]}"
-        assert tuple(data['train_input_shape'])==expected_shape_in, msg
+        assert tuple(data["train_input_shape"]) == expected_shape_in, msg

-        expected_shape_label = (200, 200, 200)
+        expected_shape_label = (200, 200, 400)
        msg = f"Expected {expected_shape_label} for shape, received {tuple(data['train_label_shape'])} instead, in {args.infile.split('.')[0]}"
-        assert tuple(data['train_label_shape'])==expected_shape_label, msg
-
+        assert tuple(data["train_label_shape"]) == expected_shape_label, msg

        expected_len = 64
        msg = f"Expected {expected_len} for train patch loader length, received {data['train_patch_loader_length']} instead, in {args.infile.split('.')[0]}"
-        assert data['train_patch_loader_length']==expected_len, msg
+        assert data["train_patch_loader_length"] == expected_len, msg

        expected_len = 1280
        msg = f"Expected {expected_len} for validation patch loader length, received {data['validation_patch_loader_length']} instead, in {args.infile.split('.')[0]}"
-        assert data['validation_patch_loader_length']==expected_len, msg
+        assert data["validation_patch_loader_length"] == expected_len, msg

        expected_len = 64
        msg = f"Expected {expected_len} for train subset length, received {data['train_length_subset']} instead, in {args.infile.split('.')[0]}"
-        assert data['train_length_subset']==expected_len, msg
+        assert data["train_length_subset"] == expected_len, msg

        expected_len = 32
        msg = f"Expected {expected_len} for validation subset length, received {data['validation_length_subset']} instead, in {args.infile.split('.')[0]}"
-        assert data['validation_length_subset']==expected_len, msg
+        assert data["validation_length_subset"] == expected_len, msg

        expected_len = 4
        msg = f"Expected {expected_len} for train loader length, received {data['train_loader_length']} instead, in {args.infile.split('.')[0]}"
-        assert data['train_loader_length']==expected_len, msg
+        assert data["train_loader_length"] == expected_len, msg

        expected_len = 1
        msg = f"Expected {expected_len} for train loader length, received {data['train_loader_length']} instead, in {args.infile.split('.')[0]}"
-        assert data['validation_loader_length']==expected_len, msg
+        assert data["validation_loader_length"] == expected_len, msg

        expected_n_classes = 2
        msg = f"Expected {expected_n_classes} for number of classes, received {data['n_classes']} instead, in {args.infile.split('.')[0]}"
-        assert data['n_classes']==expected_n_classes, msg
-    
-    logging.info("all done")
+        assert data["n_classes"] == expected_n_classes, msg

+    logging.info("all done")


 """ cmd-line arguments """
@ -171,8 +169,12 @@ STEPS = ["test", "train"]
 TRAIN_DEPTH = ["none", "patch", "section"]

 parser.add_argument("--infile", help="Location of the file which has the metrics", type=str, required=True)
-parser.add_argument("--step", choices=STEPS, type=str, required=True, help="Data flow checks for test or training pipeline")
-parser.add_argument("--train_depth", choices=TRAIN_DEPTH, type=str, required=True, help="Train depth flag, to check the dimensions")
+parser.add_argument(
+    "--step", choices=STEPS, type=str, required=True, help="Data flow checks for test or training pipeline"
+)
+parser.add_argument(
+    "--train_depth", choices=TRAIN_DEPTH, type=str, required=True, help="Train depth flag, to check the dimensions"
+)
 """ main wrapper with profiler """
 if __name__ == "__main__":
    main(parser.parse_args())