Merge branch 'dev' of github.com:microsoft/landcover into dev

2020-08-12 21:26:18 +00:00 · 2020-08-12 21:26:18 +00:00 · 8d12809355
--- a/.gitignore
+++ b/.gitignore
@ -12,3 +12,6 @@ tmp/
 web_tool/endpoints.mine.js
 web_tool/datasets.mine.json
 web_tool/models.mine.json
+
+# Mac file system
+.DS_Store
--- a/.pylintrc
+++ b/.pylintrc
@ -372,7 +372,7 @@ indent-after-paren=4
 indent-string='    '

 # Maximum number of characters on a single line.
-max-line-length=100
+max-line-length=120

 # Maximum number of lines in a module.
 max-module-lines=1000
--- a/README.md
+++ b/README.md
@ -50,7 +50,7 @@ cd ..

 A last step is required to configure the _backend_ server with the demo models/data.

-Create and edit `web_tool/endpoints.mine.js`. Replace "localhost" with the address of your machine (or leave it alone it you are running locally), and choose the port you will use (defaults to 8080). Note: make sure this port is open to your machine if you are using a remote sever (e.g. with a DSVM on Azure, use the Networking tab to open port 8080).
+Create and edit `web_tool/endpoints.mine.js`. Replace "localhost" with the address of your machine (or leave it alone if you are running locally), and choose the port you will use (defaults to 8080). Note: make sure this port is open to your machine if you are using a remote sever (e.g. with a DSVM on Azure, use the Networking tab to open port 8080).

 ```bash
 cp landcover/web_tool/endpoints.js landcover/web_tool/endpoints.mine.js
@ -61,15 +61,19 @@ nano landcover/web_tool/endpoints.mine.js

 The _backend_ server looks for dataset definitions in two places: `web_tool/datasets.json` and `web_tool/datasets.mine.json`. The latter is included in `.gitignore` and is where you can add custom datasets following the template of the default datasets in `web_tool/datasets.json`.

+For a dataset entry, the `dataLayer` can point to a .tif file or a .vrt file (GDAL Virtual Format) uniting a set of .tif files - anything that `rasterio` loads. The `dataLayer` needs to contain all the channels required by the model.
+
+Path to data and shapefiles in this section, including the `url` of the `basemapLayers`, need to point to a location in the current directory so that the web server can serve the resources. If your data is mounted or stored elsewhere, you can create symbolic links to them from this directory.
+
 ### Adding new models

-Similar to datasets, the _backend_ server looks for model definitions in two places: `web_tool/models.json` and `web_tool/models.mine.json`. The latter is included in `.gitignore` and is where you can add custom models following the template of the default datasets in `web_tool/models.json`.
+Similar to datasets, the _backend_ server looks for model definitions in two places: `web_tool/models.json` and `web_tool/models.mine.json`. The latter is included in `.gitignore` and is where you can add custom models following the template of the default datasets in `web_tool/models.json`. The only required field is `fn`, a path pointing to a model checkpoint.

-The additional step you need to take for adding custom models is creating a class that extends `ModelSession` (from `web_tool/ModelSessionAbstract.py`) to wrap your custom model, then create a constructor in `worker.py` to handle your custom class type. Note: we have included implementations of `ModelSession` that handle standard use cases of Keras and PyTorch based models. The `ModelSession` interface exists to allow for easy customization of retraining and inference logic.  
+The additional step you need to take for adding custom models is creating a class that extends `ModelSession` (from `web_tool/ModelSessionAbstract.py`) to wrap your custom model, then create a constructor in `worker.py` to handle your custom class type. Note: we have included implementations of `ModelSession` that handle standard use cases of Keras and PyTorch based models. The `ModelSession` interface exists to allow for easy customization of retraining and inference logic.

 ### Using GPU workers

- Edit `self._WORKERS` of the SessionHandler class in SessionHandler.py to include the GPU resources you want to use on your machine. By default this is set to use GPU IDs 0 through 4.
+- Edit `self._WORKERS` of the `SessionHandler` class in `SessionHandler.py` to include the GPU resources you want to use on your machine. By default this is set to use GPU IDs 0 through 4.


 ## Running an instance of the web-tool
--- a/environment.yml
+++ b/environment.yml
@ -7,7 +7,7 @@ channels:

 dependencies:
  - python>=3.6
-  - gdal>=2.0
+  - gdal>=2.0  # if solving environment is taking too longer, specify the version of gdal to proceed faster (gdal==2.4.3)
  - rasterio
  - shapely
  - rtree
--- a/server.py
+++ b/server.py
@ -2,39 +2,30 @@
 # -*- coding: utf-8 -*-
 # vim:fenc=utf-8
 # pylint: disable=E1137,E1136,E0110,E1101
-import sys
-import os
-import time
-import datetime
-import collections
 import argparse
 import base64
 import json
-import uuid
-import threading
+import logging
+import os
+import sys
+import time

-import numpy as np
 import cv2
-
 import fiona
 import fiona.transform
-
+import numpy as np
 import rasterio
 import rasterio.warp

-import pickle
-import joblib
-
-import logging
 LOGGER = logging.getLogger("server")

 from web_tool.DataLoader import warp_data_to_3857, crop_data_by_extent, crop_data_by_geometry
 from web_tool.Datasets import load_datasets, get_area_from_geometry
 DATASETS = load_datasets()

-from web_tool.Utils import setup_logging, get_random_string, class_prediction_to_img, get_shape_layer_by_name, AtomicCounter
+from web_tool.Utils import setup_logging, get_random_string, class_prediction_to_img
 from web_tool import ROOT_DIR
-from web_tool.Session import Session, manage_session_folders, SESSION_FOLDER
+from web_tool.Session import manage_session_folders, SESSION_FOLDER
 from web_tool.SessionHandler import SessionHandler
 from web_tool.Checkpoints import Checkpoints
 SESSION_HANDLER = None
--- a/utils/create_naip_basemap.py
+++ b/utils/create_naip_basemap.py
@ -0,0 +1,111 @@
+'''Script for creating a XYZ style basemap for all NAIP imagery for a given (state, year).
+
+This goes really fast on Azure VMs in US East with large number of cores.
+'''
+import sys
+import os
+import time
+import subprocess
+import tempfile
+import urllib.request
+from multiprocessing import Pool
+
+import numpy as np
+
+NAIP_BLOB_ROOT = 'https://naipblobs.blob.core.windows.net/naip'
+temp_dir = os.path.join(tempfile.gettempdir(), 'naip')
+os.makedirs(temp_dir, exist_ok=True)
+NAIP_INDEX_FN = os.path.join(temp_dir, "naip_v002_index.csv")
+
+OUTPUT_DIR = "/home/caleb/data/oh_2017_naip/"
+OUTPUT_TILE_DIR = "/home/caleb/data/oh_2017_naip_tiles/"
+NUM_WORKERS = 64
+STATE = "oh" # use state code
+YEAR = 2017
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+os.makedirs(OUTPUT_TILE_DIR, exist_ok=True)
+
+
+def download_url(url, output_dir, force_download=False, verbose=False):
+    """
+    Download a URL
+    """
+    parsed_url = urllib.parse.urlparse(url)
+    url_as_filename = os.path.basename(parsed_url.path)
+    destination_filename = os.path.join(output_dir, url_as_filename)
+
+    if (not force_download) and (os.path.isfile(destination_filename)):
+        if verbose: print('Bypassing download of already-downloaded file {}'.format(os.path.basename(url)))
+        return destination_filename
+    
+    if verbose: print('Downloading file {} to {}'.format(os.path.basename(url),destination_filename),end='')
+    urllib.request.urlretrieve(url, destination_filename)  
+    assert(os.path.isfile(destination_filename))
+    nBytes = os.path.getsize(destination_filename)
+    if verbose: print('...done, {} bytes.'.format(nBytes))
+    return destination_filename
+
+if not os.path.exists(NAIP_INDEX_FN):
+    download_url("https://naipblobs.blob.core.windows.net/naip-index/naip_v002_index.csv", temp_dir)
+
+fns = []
+with open(NAIP_INDEX_FN, "r") as f:    
+    for line in f:
+        line = line.strip()
+        if line != "":
+            if line.endswith(".tif"):
+                if ("/%s/" % (STATE)) in line and ("/%d/" % (YEAR)) in line:
+                    fns.append(line)
+
+print("Working on %d files" % (len(fns)))
+
+def do_work(fn):
+    time.sleep(np.random.random()*2)
+    
+    url = NAIP_BLOB_ROOT + "/" + fn
+    output_fn = fn.split("/")[-1]
+    output_tmp_fn = output_fn[:-4] + "_tmp.tif"
+    
+    command = [
+        "GDAL_SKIP=DODS",
+        "gdalwarp",
+        "-t_srs", "epsg:3857",
+        "'%s'" % (url),
+        OUTPUT_DIR + output_tmp_fn
+    ]
+    subprocess.call(" ".join(command), shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    
+    
+    command = [
+        "gdal_translate",
+        "-b", "1", "-b", "2", "-b", "3",
+        OUTPUT_DIR + output_tmp_fn,
+        OUTPUT_DIR + output_fn
+    ]
+    subprocess.call(" ".join(command), shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    
+    os.remove(OUTPUT_DIR + output_tmp_fn)
+
+p = Pool(NUM_WORKERS)
+_ = p.map(do_work, fns)
+
+
+
+command = [
+    "gdalbuildvrt", "-srcnodata", "\"0 0 0\"", "basemap.vrt", "%s*.tif" % (OUTPUT_DIR)
+]
+subprocess.call(" ".join(command), shell=True)
+
+
+# We run gdal2tiles once for each zoom level that we want as output as the multithreaded part of gdal2tiles.py _only_ works for the largest zoom level you select.
+# E.g. if we run `gdal2tiles.py -z 8-16 --processes=32 basemap.vrt OUTPUT_DIR/` then level 16 would be built with 32 threads, however levels 8 through 15 would be built with a single thread.
+# This is OK if you are making a basemap for a relatively small area, however for large areas it is (much) faster to generate all the levels with multiple threads.  
+for zoom_level in range(8,17):
+   print("Running zoom level %d" % (zoom_level))
+   command = [
+       "gdal2tiles.py", "-z", str(zoom_level), "--processes=%d" % (NUM_WORKERS), "basemap.vrt", OUTPUT_TILE_DIR
+   ]
+   subprocess.call(" ".join(command), shell=True)
+
+
+os.remove("basemap.vrt")
--- a/web_tool/Datasets.py
+++ b/web_tool/Datasets.py
@ -64,12 +64,15 @@ def _load_dataset(dataset):
                shape_layer["crs"] = crs["init"] # TODO: will this break with fiona version; I think `.crs` will turn into a PyProj object
                shape_layers[shape_layer["name"]] = shape_layer
            else:
+                LOGGER.warning("Step 1 failed in loading dataset {}".format(dataset["metadata"]["displayName"]))
+                # TODO: check that the displayName field is present
                return False # TODO: maybe we should make these errors more descriptive (explain why we can't load a dataset)

    # Step 2: make sure the dataLayer exists
    if dataset["dataLayer"]["type"] == "CUSTOM":
        fn = dataset["dataLayer"]["path"]
        if not os.path.exists(fn):
+            LOGGER.warning("Step 2 failed in loading dataset {}".format(dataset["metadata"]["displayName"]))
            return False # TODO: maybe we should make these errors more descriptive (explain why we can't load a dataset)

    # Step 3: setup the appropriate DatasetLoader
@ -80,6 +83,7 @@ def _load_dataset(dataset):
    elif dataset["dataLayer"]["type"] == "BASEMAP":
        data_loader = DataLoaderBasemap(dataset["dataLayer"]["path"], dataset["dataLayer"]["padding"])
    else:
+        LOGGER.warning("Step 3 failed in loading dataset {}".format(dataset["metadata"]["displayName"]))
        return False # TODO: maybe we should make these errors more descriptive (explain why we can't load a dataset)

    return {
@ -117,7 +121,8 @@ def load_datasets():

 def is_valid_dataset(dataset_key):

-    dataset_json = json.load(open(os.path.join(ROOT_DIR, "datasets.json"),"r"))
-    dataset_mine_json = json.load(open(os.path.join(ROOT_DIR, "datasets.mine.json"),"r"))
+    dataset_json = json.load(open(os.path.join(ROOT_DIR, "datasets.json"), "r"))
+    if os.path.exists(os.path.join(ROOT_DIR, "datasets.mine.json")):
+        dataset_mine_json = json.load(open(os.path.join(ROOT_DIR, "datasets.mine.json"), "r"))

    return (dataset_key in dataset_json) or (dataset_key in dataset_mine_json)
--- a/web_tool/ModelSessionAbstract.py
+++ b/web_tool/ModelSessionAbstract.py
@ -5,93 +5,124 @@ class ModelSession(abc.ABC):
    @property
    @abc.abstractmethod
    def last_tile(self):
-        '''This property should be updated by `run()` with the value of the last `tile` tensor that was passed when `inference_mode == False`.
-        The purpose of keeping track of this data is to provide context for the `row` and `col` indices used in `add_sample_point()`. This property does
-        not need to be serialized to/from disk during `save_state_to()` and `load_state_from()`.
-        '''
+        """This property should be updated by `run()` with the value of the last `tile` tensor that was passed
+        when `inference_mode == False`.
+
+        The purpose of keeping track of this data is to provide context for the `row` and `col` indices used
+        in `add_sample_point()`. This property does not need to be serialized to/from disk during
+        `save_state_to()` and `load_state_from()`.
+        """
        pass

    @abc.abstractmethod
    def __init__(self, gpu_id, **kwargs):
-        '''Responsible for initializaing the model and other necessary components from the pararmeters in the models.json file.
+        """Responsible for initializing the model and other necessary components from the parameters in the
+        models.json files.

        Args:
            gpu_id: An int specifying which GPU to bind to, or None, to specify CPU.
-            **kwargs: Key, value pairs created from the contents of this implementation's "model" key in models.json. (the model filename should be passed this way)
-        '''
+            **kwargs: Key, value pairs created from the contents of this implementation's "model" key in models.json.
+                (the model filename should be passed this way)
+        """
        raise NotImplementedError()

    @abc.abstractmethod
    def run(self, tile, inference_mode=False):
-        '''Responsible for running the model on arbitrarily sized inputs.
+        """Responsible for running the model on arbitrarily sized inputs.

        Args:
-            tile: A tensor of data of size `(height, width, channels)` that has been cropped from the data source currently in use on the front-end. Here, `height` and `width` should be expected to vary between calls to `run()`.  
-            inference_mode: A boolean indicating whether or not to store the `tile` argument in `self.last_tile`. This should be `True` when the purpose of calling run is just for executing the model (vs. for executing and fine-tuning the model).  
+            tile: A tensor of data of size `(height, width, channels)` that has been cropped from the data source
+                currently in use on the front-end. Here, `height` and `width` should be expected to
+                vary between calls to `run()`.
+            inference_mode: A boolean indicating whether or not to store the `tile` argument in `self.last_tile`.
+                This should be `True` when the purpose of calling run is just for executing the model
+                (vs. for executing and fine-tuning the model).

        Returns:
-            A tensor of size `(height, width, num_classes)` where the last dimension sums to 1 (e.g. as a result of applying the softmax function to the vector at every spatial location).
-        '''
+            A tensor of size `(height, width, num_classes)` where the last dimension sums to 1
+                (e.g. as a result of applying the softmax function to the vector at every spatial location).
+        """
        raise NotImplementedError()

    @abc.abstractmethod
-    def retrain(self):
-        '''Responsible for updating the parameters of the internal model given the fine-tuning samples that have been passed through `add_sample_point()`.
-        The mechanism by which this happen is entirely up to the implementation of the class. Some implementations may use _all_ previously submitted fine-tuning samples,
-        while other implementations may use only the samples submitted since the last call to `retrain()`.
+    def retrain(self, **kwargs):
+        """Responsible for updating the parameters of the internal model given the fine-tuning samples
+        that have been passed through `add_sample_point()`.
+        The mechanism by which this happen is entirely up to the implementation of the class. Some
+        implementations may use _all_ previously submitted fine-tuning samples, while other implementations
+        may use only the samples submitted since the last call to `retrain()`.

        Returns:
-            Dictionary in the format `{"message": str, "success": bool}` describing the results of the retrain. The "message" will be displayed as HTML on the front-end, and styled according to "success".
-        '''
+            Dictionary in the format `{"message": str, "success": bool}` describing the results of the retrain.
+            The "message" will be displayed as HTML on the front-end, and styled according to "success".
+        """
        raise NotImplementedError()

    @abc.abstractmethod
    def add_sample_point(self, row, col, class_idx):
-        '''Responsible for recording fine-tuning samples internally so that they can be used in the next call to `retrain()`. Called once for every fine-tuning sample submitted in the front-end interface.
-        
-        Args: 
-            row: The row index into the last `tile` tensor that was passed to `run()`. This tensor should be stored in `self.last_tile`. 
-            col: The column index into the last `tile` tensor that was passed to `run()`. This tensor should be stored in `self.last_tile`.
-            class_idx: The new class label (0 indexed) that is associated with the given `row` and `column` of `self.last_tile`.
+        """Responsible for recording fine-tuning samples internally so that they can be used in the next
+        call to `retrain()`. Called once for every fine-tuning sample submitted in the front-end interface.
+
+        Args:
+            row: The row index into the last `tile` tensor that was passed to `run()`.
+                This tensor should be stored in `self.last_tile`.
+            col: The column index into the last `tile` tensor that was passed to `run()`.
+                This tensor should be stored in `self.last_tile`.
+            class_idx: The new class label (0 indexed) that is associated with the given
+                `row` and `column` of `self.last_tile`.

        Returns:
-            Dictionary in the format `{"message": str, "success": bool}` describing the results of the trying to add a training sample. The "message" will be displayed as HTML on the front-end, and styled according to "success".
-        '''
+            Dictionary in the format `{"message": str, "success": bool}` describing the results of trying to
+            add a training sample. The "message" will be displayed as HTML on the front-end, and styled
+            according to "success".
+        """
        raise NotImplementedError()

    @abc.abstractmethod
    def reset(self):
-        '''Responsible for resetting the state of the internal model back to the intial configuration that it was read "from disk".
-        Note: This is not necessarily the original state of the model. If the class was serialized from disk it should be reset to that state.
+        '''Responsible for resetting the state of the internal model back to the initial configuration
+        that it was read "from disk".
+
+        Note: This is not necessarily the original state of the model. If the (ModelSession) class was
+        serialized from disk it should be reset to that state.

        Returns:
-            Dictionary in the format `{"message": str, "success": bool}` describing the results of the reset operation. The "message" will be displayed as HTML on the front-end, and styled according to "success".
+            Dictionary in the format `{"message": str, "success": bool}` describing the result of
+            the reset operation. The "message" will be displayed as HTML on the front-end, and styled
+            according to "success".
        '''
        raise NotImplementedError()

    @abc.abstractmethod
    def undo(self):
-        '''Responsible for removing the previously added fine-tuning sample (from `add_sample_point()`) or rolling back a model training step - up to the implementation.
+        """Responsible for removing the previously added fine-tuning sample (from `add_sample_point()`)
+        or rolling back a model training step - up to the implementation.

        Returns:
-            Dictionary in the format `{"message": str, "success": bool}` describing the results of the undo operation. The "message" will be displayed as HTML on the front-end, and styled according to "success".
-        '''
+            Dictionary in the format `{"message": str, "success": bool}` describing the results of
+            the undo operation. The "message" will be displayed as HTML on the front-end, and styled
+            according to "success".
+        """
        raise NotImplementedError()

    @abc.abstractmethod
    def save_state_to(self, directory):
-        '''Resonsible for serializing the _current_ state of the class to a directory with the purpose of re-hydrating later. 
-        
+        """Responsible for serializing the _current_ state of the class to a directory with the purpose
+        of re-hydrating later.
+
        Args:
-            directory: The directory to serialize to. This is guaranteed to exist and only contain: "classes.json", "request_replay.p" and "samples.geojson".
-        '''
+            directory: The directory to serialize to. This is guaranteed to exist and
+            only contain: "classes.json", "request_replay.p" and "samples.geojson".
+        """
        raise NotImplementedError()

    @abc.abstractmethod
    def load_state_from(self, directory):
-        '''Responsible for re-hydrating a previously serialized model. After this method is run then the state of this object should be such that `run()` can be called immediately after.   
+        """Responsible for re-hydrating a previously serialized model. After this method is run then the state of
+        this object should be such that `run()` can be called immediately after.

        Args:
-            directory: The directory to re-hydrate from. This directory should have the output from `save_state_to()` in it.
-        '''
+            directory: The directory to re-hydrate from. This directory should have the output
+            from `save_state_to()` in it.
+        """
        raise NotImplementedError()
--- a/web_tool/ModelSessionPyTorchExample.py
+++ b/web_tool/ModelSessionPyTorchExample.py
@ -1,5 +1,6 @@
 import sys
 sys.path.append("..")
+
 import os
 import time
 import copy
--- a/web_tool/SessionHandler.py
+++ b/web_tool/SessionHandler.py
@ -1,17 +1,15 @@
-import time
-import threading
-import subprocess
-import json
 import socket
-from queue import Queue
+import subprocess
+import threading
+import time

 import logging
 LOGGER = logging.getLogger("server")

+from queue import Queue
+
 from .Session import Session
-
 from .ModelSessionRPC import ModelSessionRPC
-
 from .Models import load_models
 from .Datasets import is_valid_dataset
 from .Checkpoints import Checkpoints
@ -56,8 +54,8 @@ class SessionHandler():
    def __init__(self, args):
        self._WORKERS = [ # TODO: I hardcode that there are 4 GPUs available on the local machine
            {"type": "local", "gpu_id": 0},
-            {"type": "local", "gpu_id": 1},
-            {"type": "local", "gpu_id": 2}
+            # {"type": "local", "gpu_id": 1},
+            # {"type": "local", "gpu_id": 2}
        ]

        self._WORKER_POOL = Queue()
--- a/web_tool/datasets.json
+++ b/web_tool/datasets.json
@ -61,7 +61,7 @@
                "layerName": "NAIP 2017 Imagery",
                "initialZoom": 11,
                "url": "data/basemaps/m_3807537_ne_18_1_20170611_tiles/{z}/{x}/{y}.png",
-                "initialLocation": [ 38.477018, -75.402312],
+                "initialLocation": [38.477018, -75.402312],
                "args": {
                    "attribution": "Georeferenced Image",
                    "tms": true,