From 64b0bf802534cf5b46021005e927c5f0e46136aa Mon Sep 17 00:00:00 2001
From: Siyu Yang <yasiyu@microsoft.com>
Date: Tue, 30 Jun 2020 14:38:30 -0700
Subject: [PATCH] Add directory structure to /geospatial. (#9)

---
 README.md                                     |  23 +-
 geospatial/README.md                          |  25 +
 geospatial/data/.gitkeep                      |   0
 .../geospatial_recipes.md                     |   0
 geospatial/loss_functions/detection/.gitkeep  |   0
 .../loss_functions/segmentation/.gitkeep      |   0
 geospatial/model_evaluation/.gitkeep          |   0
 geospatial/model_scoring/.gitkeep             |   0
 geospatial/model_training/.gitkeep            |   0
 geospatial/models/detection/.gitkeep          |   0
 .../models/segmentation/segmentation_model.py |  13 +
 geospatial/visualization/.gitkeep             |   0
 pylintrc                                      | 426 ++++++++++++++++++
 13 files changed, 477 insertions(+), 10 deletions(-)
 create mode 100644 geospatial/README.md
 create mode 100644 geospatial/data/.gitkeep
 rename geospatial_recipes.md => geospatial/geospatial_recipes.md (100%)
 create mode 100644 geospatial/loss_functions/detection/.gitkeep
 create mode 100644 geospatial/loss_functions/segmentation/.gitkeep
 create mode 100644 geospatial/model_evaluation/.gitkeep
 create mode 100644 geospatial/model_scoring/.gitkeep
 create mode 100644 geospatial/model_training/.gitkeep
 create mode 100644 geospatial/models/detection/.gitkeep
 create mode 100644 geospatial/models/segmentation/segmentation_model.py
 create mode 100644 geospatial/visualization/.gitkeep
 create mode 100644 pylintrc

diff --git a/README.md b/README.md
index 3a97cd8..9d294e1 100644
--- a/README.md
+++ b/README.md
@@ -6,26 +6,29 @@ The general convention in this repo is that users who want to consume these util
 
 # Contents
 
-- `path_utils.py`: Miscellaneous useful utils for path manipulation, things that could *almost* be in os.path, but aren't.
+- [path_utils.py](path_utils.py): Miscellaneous useful utils for path manipulation, things that could *almost* be in os.path, but aren't.
 
-- `matlab_porting_tools.py`: A few ported Matlab functions that makes it easier to port other, larger Matlab functions to Python.
+- [matlab_porting_tools.py](matlab_porting_tools.py): A few ported Matlab functions that makes it easier to port other, larger Matlab functions to Python.
 
-- `write_html_image_list.py`: Given a list of image file names, writes an HTML file that shows all those images, with optional one-line headers above each.
+- [write_html_image_list.py](write_html_image_list.py): Given a list of image file names, writes an HTML file that shows all those images, with optional one-line headers above each.
 
-- `sas_blob_utils.py`: Helper functions for dealing with Shared Access Signatures (SAS) tokens
+- [sas_blob_utils.py](sas_blob_utils.py): Helper functions for dealing with Shared Access Signatures (SAS) tokens
 for Azure Blob Storage.
 
-- `TF_OD_API`: A Dockerfile and a script to prepare a Docker image for use with the [TensorFlow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection).
+- [TF_OD_API](TF_OD_API): A Dockerfile and a script to prepare a Docker image for use with the [TensorFlow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection).
 
-- `gDrive_download.py`: Semi-automatic script for bulk download from shared Google Drives using the gDrive Python SDK.
+- [gDrive_download.py](gDrive_download.py): Semi-automatic script for bulk download from shared Google Drives using the gDrive Python SDK.
 
-- `azure-sdk-calc-storage-size`: Script for recursively computing the size of all blobs and files in an Azure subscription.
+- [azure-sdk-calc-storage-size](azure-sdk-calc-storage-size): Script for recursively computing the size of all blobs and files in an Azure subscription.
 
-- `azure-metrics-calc-storage-size`: Script for computing the total size of all storage accounts in an Azure subscription (using Azure Metrics).
+- [azure-metrics-calc-storage-size](azure-metrics-calc-storage-size): Script for computing the total size of all storage accounts in an Azure subscription (using Azure Metrics).
 
-- `ai4e_azure_utils.py`: Functions for interacting with the Azure Storage SDK
+- [ai4e_azure_utils.py](ai4e_azure_utils.py): Functions for interacting with the Azure Storage SDK
+
+- [ai4e_web_utils.py](ai4e_web_utils.py): Functions for interacting with http requests
+
+- [geospatial](geospatial): Classes and utility functions for processing geospatial data for machine learning applications
 
-- `ai4e_web_utils.py`: Functions for interacting with http requests
 
 # Contributing
 
diff --git a/geospatial/README.md b/geospatial/README.md
new file mode 100644
index 0000000..9bdc9bb
--- /dev/null
+++ b/geospatial/README.md
@@ -0,0 +1,25 @@
+# `/geospatial`
+
+
+## Capabilities we hope to develop
+
+1. Visualization
+2. Data loading (patches of labels and imagery to be used in a Random forest, PyTorch or TF model)
+3. Data downloading (documentation walk-throughs mostly)
+4. Interactive map for displaying and discussing input and model output
+5. Interactive re-labeling tool (see the the landcover [repo](https://github.com/microsoft/landcover))
+6. Bring your own data (BYOD)
+7. "Default" models for segmentation and detection
+8. Model evaluation metrics computataion over an area / split
+
+
+## Content
+
+- [geospatial_recipes.md](geospatial_recipes.md): a list of recipes for working with geospatial data using the GDAL command line tools
+
+
+## TODO
+
+-[ ] Add a meta class for segmentation and detection models
+-[ ] Add a visualization class for viewing raster labels
+
diff --git a/geospatial/data/.gitkeep b/geospatial/data/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/geospatial_recipes.md b/geospatial/geospatial_recipes.md
similarity index 100%
rename from geospatial_recipes.md
rename to geospatial/geospatial_recipes.md
diff --git a/geospatial/loss_functions/detection/.gitkeep b/geospatial/loss_functions/detection/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/geospatial/loss_functions/segmentation/.gitkeep b/geospatial/loss_functions/segmentation/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/geospatial/model_evaluation/.gitkeep b/geospatial/model_evaluation/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/geospatial/model_scoring/.gitkeep b/geospatial/model_scoring/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/geospatial/model_training/.gitkeep b/geospatial/model_training/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/geospatial/models/detection/.gitkeep b/geospatial/models/detection/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/geospatial/models/segmentation/segmentation_model.py b/geospatial/models/segmentation/segmentation_model.py
new file mode 100644
index 0000000..2982e63
--- /dev/null
+++ b/geospatial/models/segmentation/segmentation_model.py
@@ -0,0 +1,13 @@
+import abc
+
+import torch
+import torch.nn as nn
+
+
+class BaseNetworkStructure(nn.Module, metaclass=abc.ABCMeta):
+    """
+    Docstring for the meta class
+    """
+    pass
+
+
diff --git a/geospatial/visualization/.gitkeep b/geospatial/visualization/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/pylintrc b/pylintrc
new file mode 100644
index 0000000..b841822
--- /dev/null
+++ b/pylintrc
@@ -0,0 +1,426 @@
+# This Pylint rcfile contains a best-effort configuration to uphold the
+# best-practices and style described in the Google Python style guide:
+#   https://google.github.io/styleguide/pyguide.html
+#
+# Its canonical open-source location is:
+#   https://google.github.io/styleguide/pylintrc
+
+[MASTER]
+
+# Add files or directories to the blocklist. They should be base names, not
+# paths.
+ignore=third_party
+
+# Add files or directories matching the regex patterns to the blocklist. The
+# regex matches against base names, not paths.
+ignore-patterns=
+
+# Pickle collected data for later comparisons.
+persistent=no
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Use multiple processes to speed up Pylint.
+jobs=4
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code
+extension-pkg-whitelist=
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
+confidence=
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+#enable=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once).You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use"--disable=all --enable=classes
+# --disable=W"
+disable=apply-builtin,
+        backtick,
+        bad-option-value,
+        basestring-builtin,
+        buffer-builtin,
+        c-extension-no-member,
+        cmp-builtin,
+        cmp-method,
+        coerce-builtin,
+        coerce-method,
+        delslice-method,
+        div-method,
+        duplicate-code,
+        eq-without-hash,
+        execfile-builtin,
+        file-builtin,
+        filter-builtin-not-iterating,
+        fixme,
+        getslice-method,
+        global-statement,
+        hex-method,
+        idiv-method,
+        implicit-str-concat-in-sequence,
+        import-error,
+        import-self,
+        import-star-module-level,
+        input-builtin,
+        intern-builtin,
+        invalid-str-codec,
+        locally-disabled,
+        long-builtin,
+        long-suffix,
+        map-builtin-not-iterating,
+        metaclass-assignment,
+        next-method-called,
+        next-method-defined,
+        no-absolute-import,
+        no-else-break,
+        no-else-continue,
+        no-else-raise,
+        no-else-return,
+        no-member,
+        no-self-use,
+        nonzero-method,
+        oct-method,
+        old-division,
+        old-ne-operator,
+        old-octal-literal,
+        old-raise-syntax,
+        parameter-unpacking,
+        print-statement,
+        raising-string,
+        range-builtin-not-iterating,
+        raw_input-builtin,
+        rdiv-method,
+        reduce-builtin,
+        relative-import,
+        reload-builtin,
+        round-builtin,
+        setslice-method,
+        signature-differs,
+        standarderror-builtin,
+        suppressed-message,
+        sys-max-int,
+        too-few-public-methods,
+        too-many-ancestors,
+        too-many-arguments,
+        too-many-boolean-expressions,
+        too-many-branches,
+        too-many-instance-attributes,
+        too-many-locals,
+        too-many-public-methods,
+        too-many-return-statements,
+        too-many-statements,
+        trailing-newlines,
+        unichr-builtin,
+        unicode-builtin,
+        unpacking-in-except,
+        useless-else-on-loop,
+        useless-suppression,
+        using-cmp-argument,
+        xrange-builtin,
+        zip-builtin-not-iterating,
+
+
+[REPORTS]
+
+# Set the output format. Available formats are text, parseable, colorized, msvs
+# (visual studio) and html. You can also give a reporter class, eg
+# mypackage.mymodule.MyReporterClass.
+output-format=text
+
+# Put messages in a separate file for each module / package specified on the
+# command line instead of printing them on stdout. Reports (if any) will be
+# written in a file name "pylint_global.[txt|html]". This option is deprecated
+# and it will be removed in Pylint 2.0.
+files-output=no
+
+# Tells whether to display a full report or only the messages
+reports=no
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note). You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details
+#msg-template=
+
+
+[BASIC]
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=main,_
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Include a hint for the correct naming format with invalid-name
+include-naming-hint=no
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+property-classes=abc.abstractproperty,cached_property.cached_property,cached_property.threaded_cached_property,cached_property.cached_property_with_ttl,cached_property.threaded_cached_property_with_ttl
+
+# Regular expression matching correct function names
+function-rgx=^(?:(?P<exempt>setUp|tearDown|setUpModule|tearDownModule)|(?P<camel_case>_?[A-Z][a-zA-Z0-9]*)|(?P<snake_case>_?[a-z][a-z0-9_]*))$
+
+# Regular expression matching correct variable names
+variable-rgx=^[a-z][a-z0-9_]*$
+
+# Regular expression matching correct constant names
+const-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$
+
+# Regular expression matching correct attribute names
+attr-rgx=^_{0,2}[a-z][a-z0-9_]*$
+
+# Regular expression matching correct argument names
+argument-rgx=^[a-z][a-z0-9_]*$
+
+# Regular expression matching correct class attribute names
+class-attribute-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$
+
+# Regular expression matching correct inline iteration names
+inlinevar-rgx=^[a-z][a-z0-9_]*$
+
+# Regular expression matching correct class names
+class-rgx=^_?[A-Z][a-zA-Z0-9]*$
+
+# Regular expression matching correct module names
+module-rgx=^(_?[a-z][a-z0-9_]*|__init__)$
+
+# Regular expression matching correct method names
+method-rgx=(?x)^(?:(?P<exempt>_[a-z0-9_]+__|runTest|setUp|tearDown|setUpTestCase|tearDownTestCase|setupSelf|tearDownClass|setUpClass|(test|assert)_*[A-Z0-9][a-zA-Z0-9_]*|next)|(?P<camel_case>_{0,2}[A-Z][a-zA-Z0-9_]*)|(?P<snake_case>_{0,2}[a-z][a-z0-9_]*))$
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=(__.*__|main|test.*|.*test|.*Test)$
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=10
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager,contextlib2.contextmanager
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis. It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+
+[FORMAT]
+
+# Maximum number of characters on a single line.
+max-line-length=80
+
+# TODO(https://github.com/PyCQA/pylint/issues/3352): Direct pylint to exempt
+# lines made too long by directives to pytype.
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=(?x)(
+  ^\s*(\#\ )?<?https?://\S+>?$|
+  ^\s*(from\s+\S+\s+)?import\s+.+$)
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=yes
+
+# List of optional constructs for which whitespace checking is disabled. `dict-
+# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
+# `trailing-comma` allows a space between comma and closing bracket: (a, ).
+# `empty-line` allows space-only lines.
+no-space-check=
+
+# Maximum number of lines in a module
+max-module-lines=99999
+
+# String used as indentation unit.  The internal Google style guide mandates 2
+# spaces.  Google's externaly-published style guide says 4, consistent with
+# PEP 8.  Here, we use 4 spaces.
+indent-string='    '
+
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=TODO
+
+
+[VARIABLES]
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# A regular expression matching the name of dummy variables (i.e. expectedly
+# not used).
+dummy-variables-rgx=^\*{0,2}(_$|unused_|dummy_)
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+additional-builtins=
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,_cb
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six,six.moves,past.builtins,future.builtins,functools
+
+
+[LOGGING]
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format
+logging-modules=logging,absl.logging,tensorflow.google.logging
+
+
+[SIMILARITIES]
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+# Ignore imports when computing similarities.
+ignore-imports=no
+
+
+[SPELLING]
+
+# Spelling dictionary name. Available dictionaries: none. To make it working
+# install python-enchant package.
+spelling-dict=
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to indicated private dictionary in
+# --spelling-private-dict-file option instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[IMPORTS]
+
+# Deprecated modules which should not be used, separated by a comma
+deprecated-modules=regsub,
+                   TERMIOS,
+                   Bastion,
+                   rexec,
+                   sets
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled)
+import-graph=
+
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled)
+ext-import-graph=
+
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled)
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant, absl
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+
+[CLASSES]
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+                      __new__,
+                      setUp
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,
+                  _fields,
+                  _replace,
+                  _source,
+                  _make
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls,
+                            class_
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=mcs
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when being caught. Defaults to
+# "Exception"
+overgeneral-exceptions=StandardError,
+                       Exception,
+                       BaseException
\ No newline at end of file