Merge branch 'task_segmentation' of github.com:microsoft/archai into task_segmentation

This commit is contained in:
Chris Lovett 2023-04-04 18:18:35 -07:00
Родитель d220dac9b2 b76d7855a8
Коммит 61a36ecf99
9 изменённых файлов: 52 добавлений и 54 удалений

Просмотреть файл

@ -23,6 +23,7 @@ dependencies = [
"hyperopt",
"ipykernel",
"jupyter",
"lightning>=2.0.0",
"matplotlib",
"mldesigner",
"mlflow",
@ -38,7 +39,6 @@ dependencies = [
"psutil",
"pydata-sphinx-theme==0.13.1",
"pytest",
"pytorch-lightning>=2.0.0",
"pyunpack",
"pyyaml",
"ray>=1.0.0",
@ -76,7 +76,7 @@ extras_require = {}
extras_require["cv"] = filter_dependencies(
"gorilla",
"opencv-python",
"pytorch-lightning",
"lightning",
"scikit-learn",
"torchvision",
)
@ -121,7 +121,7 @@ extras_require["aml"] = filter_dependencies(
"matplotlib",
"mldesigner",
"mlflow",
"pytorch-lightning",
"lightning",
"torchvision",
)

1
tasks/face_segmentation/.vscode/launch.json поставляемый
Просмотреть файл

@ -16,7 +16,6 @@
"f85154f6",
"--cleanup_stale_pods",
"60",
"--no_quantization",
"--working",
"~/snpe/experiment1",
]

Просмотреть файл

@ -41,13 +41,12 @@ Then to get the ball rolling create a temp folder and run this:
```
mkdir -p ~/experiment
cd ~/experiment
python ~/git/snpe_runner/azure/runner.py
python ~/git/snpe_runner/azure/runner.py --working ~/experiment
```
This will monitor the Azure blob store for new work to do, and run those jobs in
priority order. If you also provide a `--device` option pointing to the `adb device` for a Qualcomm 888 Dev Board then it will also run the quantized models
on that device and report the performance and F1 score results.
To setup a quantization only runner in the cloud you can pass `--no_quantization` argument, this is what
we do in the quantization docker image.
If you setup a quantization only runner in the cloud using the `docker/quantizer` image, you can pass `--no_quantization` argument when you have a `--device` so that the local runs do not do quantization.
This way the linux machine driving the local Qualcomm devices is not overloaded with quantization work.

Просмотреть файл

@ -1,14 +1,7 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
# This script monitors an Azure blob store using connection string defined
# in the environmentvariable "FaceSyntheticsModelStorageAccount".
# When a file named "go" shows up in a folder containing a new model to test
# it downloads the model, runs the F1 score check with 10,000 images
# and the benchmark and posts the 2 results .csv files
# back to the same folder. The .csv files are named:
# - test_results.csv - for the F1 score.
# - benchmark_stats_model.csv - for benchmark perf numbers.
# See Readme.md
import argparse
import json
import os
@ -103,6 +96,8 @@ def check_device(device, snpe_root):
def check_dataset(shape, name, test_size):
_, w, h, c = shape
img_size = (w, h, 3)
test = os.path.join('data', name)
if os.path.isdir(test):
s = read_shape(test)
@ -116,7 +111,7 @@ def check_dataset(shape, name, test_size):
rmtree(test)
if not os.path.isdir(test):
create_dataset(dataset, name, shape, test_size)
create_dataset(dataset, name, img_size, test_size)
save_shape(test, shape)
@ -497,8 +492,6 @@ def run_model(name, snpe_root, dataset, conn_string, use_device, benchmark_only,
log(f"==> running {prop} test using model {model}")
input_size = tuple(input_shape)[0:2] # e.g. (256,256)
# copy model to the device.
if prop != 'f1_onnx':
# now that we have the shape, we can create the appropriate quant and test
@ -520,7 +513,7 @@ def run_model(name, snpe_root, dataset, conn_string, use_device, benchmark_only,
try:
use_pillow = 'use_pillow' in entity and entity['use_pillow']
test_results, chart, f1score = get_metrics(input_size, False, dataset, snpe_output_dir, use_pillow)
test_results, chart, f1score = get_metrics(input_shape, False, dataset, snpe_output_dir, use_pillow)
except Exception as ex:
entity['status'] = 'error'
entity['error'] = str(ex)

Просмотреть файл

@ -107,5 +107,5 @@ if __name__ == '__main__':
if os.path.isdir(dst_root):
rmtree(dst_root)
create_dataset(dataset, dst_root, 'quant', [dim, dim], count, transpose)
create_dataset(dataset, dst_root, 'test', [dim, dim], count, transpose)
create_dataset(dataset, dst_root, 'quant', (dim, dim), count, transpose)
create_dataset(dataset, dst_root, 'test', (dim, dim), count, transpose)

Просмотреть файл

@ -38,7 +38,7 @@ def main():
zip_ref.extractall(data_dir)
print('Converting the .png images to SNPE quantization .bin files...')
create_dataset(data_dir, output_dir, 'quant', [256, 256, 3], 1000)
create_dataset(data_dir, output_dir, 'quant', (256, 256), 1000)
for name in os.listdir(output_dir):
print(name)
@ -48,4 +48,4 @@ def main():
if __name__ == "__main__":
main()
main()

Просмотреть файл

@ -1,24 +1,25 @@
# Readme
This folder contains code that automates the testing of ONNX models across one or more machines that are connected via
USB to Qualcomm 888 boards. Many thanks to Yatao Zhong for the original device code included in this test suite.
USB to Qualcomm 888 boards.
The code is organized into:
1. [SNPE Device Code](snpe/readme.md) that knows how to use the Qualcomm SNPE SDK to talk to the device, convert ONNX
models to .dlc, quantize them, and test them on the board using the Android `adb` tool.
1. [SNPE Device Code](snpe/readme.md) that knows how to use the Qualcomm Neural Processing SDK to talk to the device,
convert ONNX models to .dlc, quantize them, and test them on the board using the Android `adb` tool.
1. [Azure Code](azure/readme.md) that talks to a configured Azure storage account for uploading models to test,
downloading them, uploading test results, and keeping an Azure table "status" that summarizes results so far.
downloading them, uploading test results, and keeping an Azure table "status" that summarizes results of all your
models.
1. [Docker](docker/readme.md) scripts for setting up your Azure account and optionally creating a docker image for
running in an Azure Kubernetes cluster.
1. [Docker](docker/quantizer/readme.md) scripts for setting up your Azure account and optionally creating a docker image
for running in an Azure Kubernetes cluster to do model quantization using the Qualcomm Neural Processing SDK.
Quantization is time consuming so having an elastic scale speeds things up a lot.
1. [Notebooks](notebook/gallery_performance.md) contains a Jupyter Notebook that can visualize the results from the
Azure table.
Azure "status" table.
Both are based on Python, so it is best if you setup a new Conda Python environment for Python 3.6 with the
`requirements.txt` included here using:
It is best if you setup a new Conda Python environment for Python 3.10 with the `requirements.txt` included here using:
```shell
pip install -r requirements.txt
@ -35,8 +36,9 @@ including a Kubernetes cluster setup for quantization (see [docker/quantizer](do
![system](images/system.png)
Each instance of `runner.py` looks for work, and executes it in priority order where the prioritization is defined by
the `find_work_prioritized` function in the runner. This prioritization maps to the columns of the status table as
follows:
the `find_work_prioritized` function in the runner. This script is completely restartable, and can distribute the work
across multiple instances of the runner script. Each instance will pick up where a previous one left off based on what
it finds in your "status" Azure table. The prioritization maps to the columns of the status table as follows:
1. **macs:** convert to .dlc and post Macs score and `snpe-dlc-viewer` output and do model quantization (runs on Linux) - priority 20
1. **total_inference_avg** run `snpe_bench.py` with quantized model on Qualcomm device DSP - priority 30
@ -57,10 +59,12 @@ that you want to bump to the top of the list.
Notice some of the above jobs can run on Linux and do not require Qualcomm device. So in order to maximize throughput on
machines that do have a Qualcomm devices you can allocate other Linux machines with no Qualcomm devices to do the other
work, namely, converting models, quantizing them, and running the `onnxruntime` test set.
work, namely, converting models, quantizing them, and running the `f1_onnx` test.
Folks across your team can use the `azure/upload.py` to submit jobs and let them run. You can use `status.py` to
monitor progress or look at the Azure `status` table. Various status messages are posted there so you can see which
machine is doing what and is in what stage of the job.
Folks across your team can use the `azure/upload.py` to submit jobs and let them run, or they can automate that as
shown in the `RemoteAzureBenchmarkEvaluator` in the `search.py` script.
You can use `status.py` to monitor progress or look at the Azure `status` table. Various status messages are posted
there so you can see which machine is doing what and is in what stage of the job.
Next you can go to the `notebook` page and get some pretty pictures of your Pareto Curves.

Просмотреть файл

@ -106,5 +106,5 @@ if __name__ == '__main__':
else:
transpose = None
create_dataset(dataset, 'quant', [dim, dim], count, transpose)
create_dataset(dataset, 'test', [dim, dim], count, transpose)
create_dataset(dataset, 'quant', (dim, dim), count, transpose)
create_dataset(dataset, 'test', (dim, dim), count, transpose)

Просмотреть файл

@ -46,7 +46,9 @@ def _get_dataset_gt(img_name, dataset, img_shape, use_pillow=False):
return gt_seg
def show_output(img_shape, transpose, dataset, outputs):
def show_output(input_shape, transpose, dataset, outputs):
_, w, h, c = input_shape
img_shape = (w, h)
output_list = [x for x in os.listdir(outputs) if x.endswith('.raw')]
output_list.sort()
for out_f in output_list:
@ -93,7 +95,7 @@ def get_confusion_matrix(gt_label, pred_label, valid_mask, num_classes):
return confusion_matrix
def get_metrics(img_shape, transpose, dataset, outputs, use_pillow=False):
def get_metrics(input_shape, transpose, dataset, outputs, use_pillow=False):
output_list = [x for x in os.listdir(outputs) if x.endswith('.raw')]
output_list.sort()
@ -103,7 +105,8 @@ def get_metrics(img_shape, transpose, dataset, outputs, use_pillow=False):
print(f"Collecting metrics on {len(output_list)} output .raw files...")
num_classes = 19
_, width, height, num_classes = input_shape
img_shape = (width, height)
confusion_matx = None
bins = int(1e6)
@ -121,9 +124,9 @@ def get_metrics(img_shape, transpose, dataset, outputs, use_pillow=False):
full_path = os.path.join(outputs, out_f)
logits = np.fromfile(full_path, dtype=np.float32)
size = np.product(logits.shape)
num_classes = int(size / (img_shape[0] * img_shape[1]))
if num_classes < 18:
raise Exception(f"Result {out_f} has unexpected number of predictions {num_classes}")
found_classes = int(size / (img_shape[0] * img_shape[1]))
if found_classes != num_classes:
raise Exception(f"Result {out_f} has unexpected number of predictions {found_classes}, expecting {num_classes}")
if transpose:
logits = logits.reshape((num_classes, img_shape[0], img_shape[1])).transpose(transpose)
@ -238,8 +241,8 @@ if __name__ == '__main__':
default='snpe_output')
parser.add_argument('--transpose', '-t', help='Transpose channels by (1,2,0)', action="store_true")
parser.add_argument('--pillow', help="Resize images using Pillow instead of numpy", action="store_true")
parser.add_argument('--image_shape', help="Resize images this size, must match the size of the output images " +
"(default '256,256')")
parser.add_argument('--input_shape', help="Resize images this size, must match the shape of the model output " +
"(default '1,256,256,19')")
args = parser.parse_args()
use_pillow = args.pillow
@ -263,11 +266,11 @@ if __name__ == '__main__':
print("Experiment 'output' dir not found: " + output_dir)
sys.exit(1)
image_shape = (256, 256)
if args.image_shape:
image_shape = tuple(eval(args.image_shape))
input_shape = (1, 256, 256, 19)
if args.input_shape:
input_shape = tuple(eval(args.image_shape))
if args.show:
show_output(image_shape, transpose, dataset, output_dir)
show_output(input_shape, transpose, dataset, output_dir)
else:
get_metrics(image_shape, transpose, dataset, output_dir, use_pillow)
get_metrics(input_shape, transpose, dataset, output_dir, use_pillow)