From 03777cef4ec833a9e3de8297da5c031e33273eb6 Mon Sep 17 00:00:00 2001 From: Caleb Robinson Date: Thu, 13 Aug 2020 00:45:26 +0000 Subject: [PATCH] Added a comment --- geospatial/data/StreamingDatasets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/geospatial/data/StreamingDatasets.py b/geospatial/data/StreamingDatasets.py index d410a5f..360bbd1 100644 --- a/geospatial/data/StreamingDatasets.py +++ b/geospatial/data/StreamingDatasets.py @@ -61,6 +61,7 @@ class StreamingGeospatialDataset(IterableDataset): if self.verbose: print("Creating a filename stream for worker %d" % (worker_id)) + # This logic splits up the list of filenames into `num_workers` chunks. Each worker will recieve ceil(num_filenames / num_workers) filenames to generate chips from. If the number of workers doesn't divide the number of filenames evenly then the last worker will have fewer filenames. N = len(self.fns) num_files_per_worker = int(np.ceil(N / num_workers)) lower_idx = worker_id * num_files_per_worker