Merge branch 'task_segmentation' of github.com:microsoft/archai into task_segmentation

2023-04-04 18:18:35 -07:00 · 2023-04-04 18:18:35 -07:00 · 61a36ecf99
--- a/setup.py
+++ b/setup.py
@ -23,6 +23,7 @@ dependencies = [
    "hyperopt",
    "ipykernel",
    "jupyter",
+    "lightning>=2.0.0",
    "matplotlib",
    "mldesigner",
    "mlflow",
@ -38,7 +39,6 @@ dependencies = [
    "psutil",
    "pydata-sphinx-theme==0.13.1",
    "pytest",
-    "pytorch-lightning>=2.0.0",
    "pyunpack",
    "pyyaml",
    "ray>=1.0.0",
@ -76,7 +76,7 @@ extras_require = {}
 extras_require["cv"] = filter_dependencies(
    "gorilla",
    "opencv-python",
-    "pytorch-lightning",
+    "lightning",
    "scikit-learn",
    "torchvision",
 )
@ -121,7 +121,7 @@ extras_require["aml"] = filter_dependencies(
    "matplotlib",
    "mldesigner",
    "mlflow",
-    "pytorch-lightning",
+    "lightning",
    "torchvision",
 )

--- a/tasks/face_segmentation/.vscode/launch.json
+++ b/tasks/face_segmentation/.vscode/launch.json
@ -16,7 +16,6 @@
                "f85154f6",
                "--cleanup_stale_pods",
                "60",
-                "--no_quantization",
                "--working",
                "~/snpe/experiment1",
            ]
--- a/tasks/face_segmentation/snpe/azure/README.md
+++ b/tasks/face_segmentation/snpe/azure/README.md
@ -41,13 +41,12 @@ Then to get the ball rolling create a temp folder and run this:

 ```
 mkdir -p ~/experiment
-cd ~/experiment
-python ~/git/snpe_runner/azure/runner.py
+python ~/git/snpe_runner/azure/runner.py --working ~/experiment
 ```

 This will monitor the Azure blob store for new work to do, and run those jobs in
 priority order.  If you also provide a `--device` option pointing to the `adb device` for a Qualcomm 888 Dev Board then it will also run the quantized models
 on that device and report the performance and F1 score results.

-To setup a quantization only runner in the cloud you can pass `--no_quantization` argument, this is what
-we do in the quantization docker image.
+If you setup a quantization only runner in the cloud using the `docker/quantizer` image, you can pass `--no_quantization` argument when you have a `--device` so that the local runs do not do quantization.
+This way the linux machine driving the local Qualcomm devices is not overloaded with quantization work.
--- a/tasks/face_segmentation/snpe/azure/runner.py
+++ b/tasks/face_segmentation/snpe/azure/runner.py
@ -1,14 +1,7 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.

-# This script monitors an Azure blob store using connection string defined
-# in the environmentvariable "FaceSyntheticsModelStorageAccount".
-# When a file named "go" shows up in a folder containing a new model to test
-# it downloads the model, runs the F1 score check with 10,000 images
-# and the benchmark and posts the 2 results .csv files
-# back to the same folder.  The .csv files are named:
-# - test_results.csv - for the F1 score.
-# - benchmark_stats_model.csv - for benchmark perf numbers.
+# See Readme.md
 import argparse
 import json
 import os
@ -103,6 +96,8 @@ def check_device(device, snpe_root):


 def check_dataset(shape, name, test_size):
+    _, w, h, c = shape
+    img_size = (w, h, 3)
    test = os.path.join('data', name)
    if os.path.isdir(test):
        s = read_shape(test)
@ -116,7 +111,7 @@ def check_dataset(shape, name, test_size):
                rmtree(test)

    if not os.path.isdir(test):
-        create_dataset(dataset, name, shape, test_size)
+        create_dataset(dataset, name, img_size, test_size)
        save_shape(test, shape)


@ -497,8 +492,6 @@ def run_model(name, snpe_root, dataset, conn_string, use_device, benchmark_only,

    log(f"==> running {prop} test using model {model}")

-    input_size = tuple(input_shape)[0:2]  # e.g. (256,256)
-
    # copy model to the device.
    if prop != 'f1_onnx':
        # now that we have the shape, we can create the appropriate quant and test
@ -520,7 +513,7 @@ def run_model(name, snpe_root, dataset, conn_string, use_device, benchmark_only,

    try:
        use_pillow = 'use_pillow' in entity and entity['use_pillow']
-        test_results, chart, f1score = get_metrics(input_size, False, dataset, snpe_output_dir, use_pillow)
+        test_results, chart, f1score = get_metrics(input_shape, False, dataset, snpe_output_dir, use_pillow)
    except Exception as ex:
        entity['status'] = 'error'
        entity['error'] = str(ex)
--- a/tasks/face_segmentation/snpe/notebooks/quantize/scripts/create_data.py
+++ b/tasks/face_segmentation/snpe/notebooks/quantize/scripts/create_data.py
@ -107,5 +107,5 @@ if __name__ == '__main__':
    if os.path.isdir(dst_root):
        rmtree(dst_root)

-    create_dataset(dataset, dst_root, 'quant', [dim, dim], count, transpose)
-    create_dataset(dataset, dst_root, 'test', [dim, dim], count, transpose)
+    create_dataset(dataset, dst_root, 'quant', (dim, dim), count, transpose)
+    create_dataset(dataset, dst_root, 'test', (dim, dim), count, transpose)
--- a/tasks/face_segmentation/snpe/notebooks/quantize/scripts/data_prep.py
+++ b/tasks/face_segmentation/snpe/notebooks/quantize/scripts/data_prep.py
@ -38,7 +38,7 @@ def main():
        zip_ref.extractall(data_dir)

    print('Converting the .png images to SNPE quantization .bin files...')
-    create_dataset(data_dir, output_dir, 'quant', [256, 256, 3], 1000)
+    create_dataset(data_dir, output_dir, 'quant', (256, 256), 1000)

    for name in os.listdir(output_dir):
        print(name)
@ -48,4 +48,4 @@ def main():


 if __name__ == "__main__":
-    main()
+    main()
--- a/tasks/face_segmentation/snpe/readme.md
+++ b/tasks/face_segmentation/snpe/readme.md
@ -1,24 +1,25 @@
 # Readme

 This folder contains code that automates the testing of ONNX models across one or more machines that are connected via
-USB to Qualcomm 888 boards.  Many thanks to Yatao Zhong for the original device code included in this test suite.
+USB to Qualcomm 888 boards.

 The code is organized into:

-1. [SNPE Device Code](snpe/readme.md) that knows how to use the Qualcomm SNPE SDK to talk to the device, convert ONNX
-models to .dlc, quantize them, and test them on the board using the Android `adb` tool.
+1. [SNPE Device Code](snpe/readme.md) that knows how to use the Qualcomm Neural Processing SDK to talk to the device,
+convert ONNX models to .dlc, quantize them, and test them on the board using the Android `adb` tool.

 1. [Azure Code](azure/readme.md) that talks to a configured Azure storage account for uploading models to test,
-downloading them, uploading test results, and keeping an Azure table "status" that summarizes results so far.
+downloading them, uploading test results, and keeping an Azure table "status" that summarizes results of all your
+models.

-1. [Docker](docker/readme.md) scripts for setting up your Azure account and optionally creating a docker image for
-running in an Azure Kubernetes cluster.
+1. [Docker](docker/quantizer/readme.md) scripts for setting up your Azure account and optionally creating a docker image
+for running in an Azure Kubernetes cluster to do model quantization using the Qualcomm Neural Processing SDK.
+Quantization is time consuming so having an elastic scale speeds things up a lot.

 1. [Notebooks](notebook/gallery_performance.md) contains a Jupyter Notebook that can visualize the results from the
-Azure table.
+Azure "status" table.

-Both are based on Python, so it is best if you setup a new Conda Python environment for Python 3.6 with the
-`requirements.txt` included here using:
+It is best if you setup a new Conda Python environment for Python 3.10 with the `requirements.txt` included here using:

 ```shell
 pip install -r requirements.txt
@ -35,8 +36,9 @@ including a Kubernetes cluster setup for quantization (see [docker/quantizer](do
 ![system](images/system.png)

 Each instance of `runner.py` looks for work, and executes it in priority order where the prioritization is defined by
-the `find_work_prioritized` function in the runner.  This prioritization maps to the columns of the status table as
-follows:
+the `find_work_prioritized` function in the runner.  This script is completely restartable, and can distribute the work
+across multiple instances of the runner script.  Each instance will pick up where a previous one left off based on what
+it finds in your "status" Azure table. The prioritization maps to the columns of the status table as follows:

 1. **macs:** convert to .dlc and post Macs score and `snpe-dlc-viewer` output and do model quantization (runs on Linux) - priority 20
 1. **total_inference_avg** run `snpe_bench.py` with quantized model on Qualcomm device DSP - priority 30
@ -57,10 +59,12 @@ that you want to bump to the top of the list.

 Notice some of the above jobs can run on Linux and do not require Qualcomm device. So in order to maximize throughput on
 machines that do have a Qualcomm devices you can allocate other Linux machines with no Qualcomm devices to do the other
-work, namely, converting models, quantizing them, and running the `onnxruntime` test set.
+work, namely, converting models, quantizing them, and running the `f1_onnx` test.

-Folks across your team can use the `azure/upload.py` to submit jobs and let them run.  You can use `status.py` to
-monitor progress or look at the Azure `status` table.  Various status messages are posted there so you can see which
-machine is doing what and is in what stage of the job.
+Folks across your team can use the `azure/upload.py` to submit jobs and let them run, or they can automate that as
+shown in the `RemoteAzureBenchmarkEvaluator` in the `search.py` script.
+
+You can use `status.py` to monitor progress or look at the Azure `status` table.  Various status messages are posted
+there so you can see which machine is doing what and is in what stage of the job.

 Next you can go to the `notebook` page and get some pretty pictures of your Pareto Curves.
--- a/tasks/face_segmentation/snpe/snpe/create_data.py
+++ b/tasks/face_segmentation/snpe/snpe/create_data.py
@ -106,5 +106,5 @@ if __name__ == '__main__':
    else:
        transpose = None

-    create_dataset(dataset, 'quant', [dim, dim], count, transpose)
-    create_dataset(dataset, 'test', [dim, dim], count, transpose)
+    create_dataset(dataset, 'quant', (dim, dim), count, transpose)
+    create_dataset(dataset, 'test', (dim, dim), count, transpose)
--- a/tasks/face_segmentation/snpe/vision/collect_metrics.py
+++ b/tasks/face_segmentation/snpe/vision/collect_metrics.py
@ -46,7 +46,9 @@ def _get_dataset_gt(img_name, dataset, img_shape, use_pillow=False):
    return gt_seg


-def show_output(img_shape, transpose, dataset, outputs):
+def show_output(input_shape, transpose, dataset, outputs):
+    _, w, h, c = input_shape
+    img_shape = (w, h)
    output_list = [x for x in os.listdir(outputs) if x.endswith('.raw')]
    output_list.sort()
    for out_f in output_list:
@ -93,7 +95,7 @@ def get_confusion_matrix(gt_label, pred_label, valid_mask, num_classes):
    return confusion_matrix


-def get_metrics(img_shape, transpose, dataset, outputs, use_pillow=False):
+def get_metrics(input_shape, transpose, dataset, outputs, use_pillow=False):

    output_list = [x for x in os.listdir(outputs) if x.endswith('.raw')]
    output_list.sort()
@ -103,7 +105,8 @@ def get_metrics(img_shape, transpose, dataset, outputs, use_pillow=False):

    print(f"Collecting metrics on {len(output_list)} output .raw files...")

-    num_classes = 19
+    _, width, height, num_classes = input_shape
+    img_shape = (width, height)
    confusion_matx = None

    bins = int(1e6)
@ -121,9 +124,9 @@ def get_metrics(img_shape, transpose, dataset, outputs, use_pillow=False):
            full_path = os.path.join(outputs, out_f)
            logits = np.fromfile(full_path, dtype=np.float32)
            size = np.product(logits.shape)
-            num_classes = int(size / (img_shape[0] * img_shape[1]))
-            if num_classes < 18:
-                raise Exception(f"Result {out_f} has unexpected number of predictions {num_classes}")
+            found_classes = int(size / (img_shape[0] * img_shape[1]))
+            if found_classes != num_classes:
+                raise Exception(f"Result {out_f} has unexpected number of predictions {found_classes}, expecting {num_classes}")

            if transpose:
                logits = logits.reshape((num_classes, img_shape[0], img_shape[1])).transpose(transpose)
@ -238,8 +241,8 @@ if __name__ == '__main__':
                        default='snpe_output')
    parser.add_argument('--transpose', '-t', help='Transpose channels by (1,2,0)', action="store_true")
    parser.add_argument('--pillow', help="Resize images using Pillow instead of numpy", action="store_true")
-    parser.add_argument('--image_shape', help="Resize images this size, must match the size of the output images " +
-                                              "(default '256,256')")
+    parser.add_argument('--input_shape', help="Resize images this size, must match the shape of the model output " +
+                                              "(default '1,256,256,19')")
    args = parser.parse_args()

    use_pillow = args.pillow
@ -263,11 +266,11 @@ if __name__ == '__main__':
        print("Experiment 'output' dir not found: " + output_dir)
        sys.exit(1)

-    image_shape = (256, 256)
-    if args.image_shape:
-        image_shape = tuple(eval(args.image_shape))
+    input_shape = (1, 256, 256, 19)
+    if args.input_shape:
+        input_shape = tuple(eval(args.image_shape))

    if args.show:
-        show_output(image_shape, transpose, dataset, output_dir)
+        show_output(input_shape, transpose, dataset, output_dir)
    else:
-        get_metrics(image_shape, transpose, dataset, output_dir, use_pillow)
+        get_metrics(input_shape, transpose, dataset, output_dir, use_pillow)