Merge branch 'staging' into byod

This commit is contained in:
maxkazmsft 2020-07-01 21:22:47 -04:00 коммит произвёл GitHub
Родитель ba1e15c473 2126250423
Коммит 358217d873
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 97 добавлений и 121 удалений

Просмотреть файл

@ -415,9 +415,7 @@ class TestSectionLoaderWithDepth(TestSectionLoader):
# dump images and labels to disk after augmentation
if self.debug:
outdir = (
f"debug/test/testSectionLoaderWithDepth_{self.split}_{'aug' if self.augmentations is not None else 'noaug'}"
)
outdir = f"debug/test/testSectionLoaderWithDepth_{self.split}_{'aug' if self.augmentations is not None else 'noaug'}"
generate_path(outdir)
path_prefix = f"{outdir}/index_{index}_section_{section_name}"
image_to_disk(np.array(im[0, :, :]), path_prefix + "_img.png", self.MIN, self.MAX)
@ -441,11 +439,10 @@ class PatchLoader(data.Dataset):
:param bool debug: enable debugging output
"""
def __init__(
self, config, is_transform=True, augmentations=None, debug=False,
):
def __init__(self, config, split="train", is_transform=True, augmentations=None, debug=False):
self.data_dir = config.DATASET.ROOT
self.n_classes = config.DATASET.NUM_CLASSES
self.split = split
self.MIN = config.DATASET.MIN
self.MAX = config.DATASET.MAX
self.patch_size = config.TRAIN.PATCH_SIZE
@ -455,11 +452,21 @@ class PatchLoader(data.Dataset):
self.patches = list()
self.debug = debug
def pad_volume(self, volume):
def pad_volume(self, volume, value):
"""
Only used for train/val!! Not test.
Pads a 3D numpy array with a constant value along the depth direction only.
Args:
volume (numpy ndarrray): numpy array containing the seismic amplitude or labels.
value (int): value to pad the array with.
"""
return np.pad(volume, pad_width=self.patch_size, mode="constant", constant_values=255)
return np.pad(
volume,
pad_width=[(0, 0), (0, 0), (self.patch_size, self.patch_size)],
mode="constant",
constant_values=value,
)
def __len__(self):
return len(self.patches)
@ -468,12 +475,7 @@ class PatchLoader(data.Dataset):
patch_name = self.patches[index]
direction, idx, xdx, ddx = patch_name.split(sep="_")
# Shift offsets the padding that is added in training
# shift = self.patch_size if "test" not in self.split else 0
# Remember we are cancelling the shift since we no longer pad
shift = 0
idx, xdx, ddx = int(idx) + shift, int(xdx) + shift, int(ddx) + shift
idx, xdx, ddx = int(idx), int(xdx), int(ddx)
if direction == "i":
im = self.seismic[idx, xdx : xdx + self.patch_size, ddx : ddx + self.patch_size]
@ -525,34 +527,6 @@ class PatchLoader(data.Dataset):
return torch.from_numpy(img).float(), torch.from_numpy(lbl).long()
class TestPatchLoader(PatchLoader):
"""
Test Data loader for the patch-based deconvnet
:param config: configuration object to define other attributes in loaders
:param bool is_transform: Transform patch to dimensions expected by PyTorch
:param list augmentations: Data augmentations to apply to patches
:param bool debug: enable debugging output
"""
def __init__(
self, config, is_transform=True, augmentations=None, debug=False
):
super(TestPatchLoader, self).__init__(
config,
is_transform=is_transform,
augmentations=augmentations,
debug=debug,
)
## Warning: this is not used or tested
raise NotImplementedError("This class is not correctly implemented.")
self.seismic = np.load(_train_data_for(self.data_dir))
self.labels = np.load(_train_labels_for(self.data_dir))
patch_list = tuple(open(txt_path, "r"))
patch_list = [id_.rstrip() for id_ in patch_list]
self.patches = patch_list
class TrainPatchLoader(PatchLoader):
"""
Train data loader for the patch-based deconvnet
@ -574,13 +548,9 @@ class TrainPatchLoader(PatchLoader):
debug=False,
):
super(TrainPatchLoader, self).__init__(
config,
is_transform=is_transform,
augmentations=augmentations,
debug=debug,
config, is_transform=is_transform, augmentations=augmentations, debug=debug,
)
warnings.warn("This no longer pads the volume")
if seismic_path is not None and label_path is not None:
# Load npy files (seismc and corresponding labels) from provided
# location (path)
@ -593,8 +563,11 @@ class TrainPatchLoader(PatchLoader):
else:
self.seismic = np.load(_train_data_for(self.data_dir))
self.labels = np.load(_train_labels_for(self.data_dir))
# We are in train/val mode. Most likely the test splits are not saved yet,
# so don't attempt to load them.
# pad the data:
self.seismic = self.pad_volume(self.seismic, value=0)
self.labels = self.pad_volume(self.labels, value=255)
self.split = split
# reading the file names for split
txt_path = path.join(self.data_dir, "splits", "patch_" + split + ".txt")
@ -637,12 +610,7 @@ class TrainPatchLoaderWithDepth(TrainPatchLoader):
patch_name = self.patches[index]
direction, idx, xdx, ddx = patch_name.split(sep="_")
# Shift offsets the padding that is added in training
# shift = self.patch_size if "test" not in self.split else 0
# Remember we are cancelling the shift since we no longer pad
shift = 0
idx, xdx, ddx = int(idx) + shift, int(xdx) + shift, int(ddx) + shift
idx, xdx, ddx = int(idx), int(xdx), int(ddx)
if direction == "i":
im = self.seismic[idx, xdx : xdx + self.patch_size, ddx : ddx + self.patch_size]
@ -708,12 +676,7 @@ class TrainPatchLoaderWithSectionDepth(TrainPatchLoader):
patch_name = self.patches[index]
direction, idx, xdx, ddx = patch_name.split(sep="_")
# Shift offsets the padding that is added in training
# shift = self.patch_size if "test" not in self.split else 0
# Remember we are cancelling the shift since we no longer pad
shift = 0
idx, xdx, ddx = int(idx) + shift, int(xdx) + shift, int(ddx) + shift
idx, xdx, ddx = int(idx), int(xdx), int(ddx)
if direction == "i":
im = self.seismic[idx, :, xdx : xdx + self.patch_size, ddx : ddx + self.patch_size]
@ -773,6 +736,7 @@ _TRAIN_PATCH_LOADERS = {
"patch": TrainPatchLoaderWithDepth,
}
def get_patch_loader(cfg):
assert str(cfg.TRAIN.DEPTH).lower() in [
"section",
@ -782,8 +746,10 @@ def get_patch_loader(cfg):
Valid values: section, patch, none."
return _TRAIN_PATCH_LOADERS.get(cfg.TRAIN.DEPTH, TrainPatchLoader)
_TRAIN_SECTION_LOADERS = {"section": TrainSectionLoaderWithDepth}
def get_section_loader(cfg):
assert str(cfg.TRAIN.DEPTH).lower() in [
"section",
@ -795,6 +761,7 @@ def get_section_loader(cfg):
_TEST_LOADERS = {"section": TestSectionLoaderWithDepth}
def get_test_loader(cfg):
logger = logging.getLogger(__name__)
logger.info(f"Test loader {cfg.TRAIN.DEPTH}")

Просмотреть файл

@ -208,5 +208,5 @@ def test_TrainPatchLoaderWithDepth_should_load_with_one_train_and_label_file(tmp
label_path=os.path.join(tmpdir, "volume_name", "labels.npy"),
)
assert train_set.labels.shape == (IL, XL, D)
assert train_set.seismic.shape == (IL, XL, D)
assert train_set.labels.shape == (IL, XL, D + 2 * config.TRAIN.PATCH_SIZE)
assert train_set.seismic.shape == (IL, XL, D + 2 * config.TRAIN.PATCH_SIZE)

Просмотреть файл

@ -39,8 +39,7 @@ def _write_split_files(splits_path, train_list, val_list, loader_type):
file_object = open(path.join(splits_path, loader_type + "_train_val.txt"), "w")
file_object.write("\n".join(train_list + val_list))
file_object.close()
file_object = open(path.join(splits_path,
loader_type + "_train.txt"), "w")
file_object = open(path.join(splits_path, loader_type + "_train.txt"), "w")
file_object.write("\n".join(train_list))
file_object.close()
file_object = open(path.join(splits_path, loader_type + "_val.txt"), "w")
@ -149,6 +148,10 @@ def split_patch_train_val(
iline, xline, depth = labels.shape
# Since the locations we will save reference the padded volume, we will increase
# the depth of the volume by the padding amount (2*patch_size).
depth += 2 * patch_size
split_direction = split_direction.lower()
if split_direction == "inline":
num_sections, section_length = iline, xline
@ -158,8 +161,10 @@ def split_patch_train_val(
raise ValueError(f"Unknown split_direction: {split_direction}")
train_range, val_range = _get_aline_range(num_sections, per_val, section_stride)
vert_locations = range(0, depth, patch_stride)
buffer = patch_size // 2
vert_locations = range(buffer, depth - patch_size - buffer, patch_stride)
horz_locations = range(0, section_length, patch_stride)
logger.debug(vert_locations)
logger.debug(horz_locations)

Просмотреть файл

@ -41,6 +41,8 @@ def _copy_files(files_iter, new_dir):
def _split_train_val_test(partition, val_ratio, test_ratio):
logger = logging.getLogger("__name__")
logger.warning(f"prepare_penobscot.py does not support padding. Results might be incorrect. ")
total_samples = len(partition)
val_samples = math.floor(val_ratio * total_samples)
test_samples = math.floor(test_ratio * total_samples)

Просмотреть файл

@ -36,134 +36,132 @@ def main(args):
with open(args.infile, "r") as fp:
data = json.load(fp)
# Note: these are specific to the setup in
# main_build.yml for train.py
# Note: these are specific to the setup in
# main_build.yml for train.py
# and get_data_for_builds.sh and prepare_dutchf3.py and prepare_dutchf3.py
if args.step=='test':
if args.step == "test":
for test_key in data.keys():
if args.train_depth=='none':
if args.train_depth == "none":
expected_test_input_shape = (200, 200, 200)
expected_img = (1, 1, 200, 200)
elif args.train_depth=='section':
elif args.train_depth == "section":
expected_test_input_shape = (200, 3, 200, 200)
expected_img = (1, 3, 200, 200)
elif args.train_depth=='patch':
expected_test_input_shape = 'TBD'
expected_img = 'TBD'
raise Exception('Must be added')
elif args.train_depth == "patch":
expected_test_input_shape = "TBD"
expected_img = "TBD"
raise Exception("Must be added")
msg = f"Expected {expected_test_input_shape} for shape, received {tuple(data[test_key]['test_input_shape'])} instead, in {args.infile.split('.')[0]}"
assert tuple(data[test_key]['test_input_shape'])==expected_test_input_shape, msg
assert tuple(data[test_key]["test_input_shape"]) == expected_test_input_shape, msg
expected_test_label_shape = (200, 200, 200)
msg = f"Expected {expected_test_label_shape} for shape, received {tuple(data[test_key]['test_label_shape'])} instead, in {args.infile.split('.')[0]}"
assert tuple(data[test_key]['test_label_shape'])==expected_test_label_shape, msg
assert tuple(data[test_key]["test_label_shape"]) == expected_test_label_shape, msg
for img in data[test_key]["img_shape"]:
msg = f"Expected {expected_img} for shape, received {tuple(img)} instead, in {args.infile.split('.')[0]}"
assert tuple(img)==expected_img, msg
msg = (
f"Expected {expected_img} for shape, received {tuple(img)} instead, in {args.infile.split('.')[0]}"
)
assert tuple(img) == expected_img, msg
# -----------------------------------------------
exp_n_section = data[test_key]["take_n_sections"]
pred_shape_len = len(data[test_key]["pred_shape"])
msg = f"Expected {exp_n_section} number of items, received {pred_shape_len} instead, in {args.infile.split('.')[0]}"
assert pred_shape_len==exp_n_section, msg
assert pred_shape_len == exp_n_section, msg
gt_shape_len = len(data[test_key]["gt_shape"])
msg = f"Expected {exp_n_section} number of items, received {gt_shape_len} instead, in {args.infile.split('.')[0]}"
assert gt_shape_len==exp_n_section, msg
assert gt_shape_len == exp_n_section, msg
img_shape_len = len(data[test_key]["img_shape"])
msg = f"Expected {exp_n_section} number of items, received {img_shape_len} instead, in {args.infile.split('.')[0]}"
assert img_shape_len==exp_n_section, msg
assert img_shape_len == exp_n_section, msg
expected_len = 400
lhs_assertion = data[test_key]["test_section_loader_length"]
msg = f"Expected {expected_len} for test section loader length, received {lhs_assertion} instead, in {args.infile.split('.')[0]}"
assert lhs_assertion==expected_len, msg
assert lhs_assertion == expected_len, msg
lhs_assertion = data[test_key]["test_loader_length"]
msg = f"Expected {expected_len} for test loader length, received {lhs_assertion} instead, in {args.infile.split('.')[0]}"
assert lhs_assertion==expected_len, msg
assert lhs_assertion == expected_len, msg
expected_n_classes = 2
lhs_assertion = data[test_key]["n_classes"]
msg = f"Expected {expected_n_classes} for test loader length, received {lhs_assertion} instead, in {args.infile.split('.')[0]}"
assert lhs_assertion==expected_n_classes, msg
assert lhs_assertion == expected_n_classes, msg
expected_pred = (1, 200, 200)
expected_gt = (1, 1, 200, 200)
for pred, gt in zip(data[test_key]["pred_shape"], data[test_key]["gt_shape"]):
#dimenstion
# dimenstion
msg = f"Expected {expected_pred} for prediction shape, received {tuple(pred[0])} instead, in {args.infile.split('.')[0]}"
assert tuple(pred[0])==expected_pred, msg
assert tuple(pred[0]) == expected_pred, msg
# unique classes
msg = f"Expected up to {expected_n_classes} unique prediction classes, received {pred[1]} instead, in {args.infile.split('.')[0]}"
assert pred[1]<=expected_n_classes, msg
assert pred[1] <= expected_n_classes, msg
#dimenstion
# dimenstion
msg = f"Expected {expected_gt} for ground truth mask shape, received {tuple(gt[0])} instead, in {args.infile.split('.')[0]}"
assert tuple(gt[0])==expected_gt, msg
assert tuple(gt[0]) == expected_gt, msg
# unique classes
msg = f"Expected up to {expected_n_classes} unique ground truth classes, received {gt[1]} instead, in {args.infile.split('.')[0]}"
assert gt[1]<=expected_n_classes, msg
assert gt[1] <= expected_n_classes, msg
elif args.step == "train":
if args.train_depth == "none":
expected_shape_in = (200, 200, 400)
elif args.train_depth == "section":
expected_shape_in = (200, 3, 200, 400)
elif args.train_depth == "patch":
expected_shape_in = "TBD"
raise Exception("Must be added")
elif args.step=='train':
if args.train_depth=='none':
expected_shape_in = (200, 200, 200)
elif args.train_depth=='section':
expected_shape_in = (200, 3, 200, 200)
elif args.train_depth=='patch':
expected_shape_in = 'TBD'
raise Exception('Must be added')
msg = f"Expected {expected_shape_in} for shape, received {tuple(data['train_input_shape'])} instead, in {args.infile.split('.')[0]}"
assert tuple(data['train_input_shape'])==expected_shape_in, msg
assert tuple(data["train_input_shape"]) == expected_shape_in, msg
expected_shape_label = (200, 200, 200)
expected_shape_label = (200, 200, 400)
msg = f"Expected {expected_shape_label} for shape, received {tuple(data['train_label_shape'])} instead, in {args.infile.split('.')[0]}"
assert tuple(data['train_label_shape'])==expected_shape_label, msg
assert tuple(data["train_label_shape"]) == expected_shape_label, msg
expected_len = 64
msg = f"Expected {expected_len} for train patch loader length, received {data['train_patch_loader_length']} instead, in {args.infile.split('.')[0]}"
assert data['train_patch_loader_length']==expected_len, msg
assert data["train_patch_loader_length"] == expected_len, msg
expected_len = 1280
msg = f"Expected {expected_len} for validation patch loader length, received {data['validation_patch_loader_length']} instead, in {args.infile.split('.')[0]}"
assert data['validation_patch_loader_length']==expected_len, msg
assert data["validation_patch_loader_length"] == expected_len, msg
expected_len = 64
msg = f"Expected {expected_len} for train subset length, received {data['train_length_subset']} instead, in {args.infile.split('.')[0]}"
assert data['train_length_subset']==expected_len, msg
assert data["train_length_subset"] == expected_len, msg
expected_len = 32
msg = f"Expected {expected_len} for validation subset length, received {data['validation_length_subset']} instead, in {args.infile.split('.')[0]}"
assert data['validation_length_subset']==expected_len, msg
assert data["validation_length_subset"] == expected_len, msg
expected_len = 4
msg = f"Expected {expected_len} for train loader length, received {data['train_loader_length']} instead, in {args.infile.split('.')[0]}"
assert data['train_loader_length']==expected_len, msg
assert data["train_loader_length"] == expected_len, msg
expected_len = 1
msg = f"Expected {expected_len} for train loader length, received {data['train_loader_length']} instead, in {args.infile.split('.')[0]}"
assert data['validation_loader_length']==expected_len, msg
assert data["validation_loader_length"] == expected_len, msg
expected_n_classes = 2
msg = f"Expected {expected_n_classes} for number of classes, received {data['n_classes']} instead, in {args.infile.split('.')[0]}"
assert data['n_classes']==expected_n_classes, msg
logging.info("all done")
assert data["n_classes"] == expected_n_classes, msg
logging.info("all done")
""" cmd-line arguments """
@ -171,8 +169,12 @@ STEPS = ["test", "train"]
TRAIN_DEPTH = ["none", "patch", "section"]
parser.add_argument("--infile", help="Location of the file which has the metrics", type=str, required=True)
parser.add_argument("--step", choices=STEPS, type=str, required=True, help="Data flow checks for test or training pipeline")
parser.add_argument("--train_depth", choices=TRAIN_DEPTH, type=str, required=True, help="Train depth flag, to check the dimensions")
parser.add_argument(
"--step", choices=STEPS, type=str, required=True, help="Data flow checks for test or training pipeline"
)
parser.add_argument(
"--train_depth", choices=TRAIN_DEPTH, type=str, required=True, help="Train depth flag, to check the dimensions"
)
""" main wrapper with profiler """
if __name__ == "__main__":
main(parser.parse_args())