This commit is contained in:
Chris Lovett 2023-04-04 18:10:48 -07:00
Родитель 4edf5a73df
Коммит b76d7855a8
9 изменённых файлов: 52 добавлений и 54 удалений

Просмотреть файл

@ -23,6 +23,7 @@ dependencies = [
"hyperopt", "hyperopt",
"ipykernel", "ipykernel",
"jupyter", "jupyter",
"lightning>=2.0.0",
"matplotlib", "matplotlib",
"mldesigner", "mldesigner",
"mlflow", "mlflow",
@ -38,7 +39,6 @@ dependencies = [
"psutil", "psutil",
"pydata-sphinx-theme==0.13.1", "pydata-sphinx-theme==0.13.1",
"pytest", "pytest",
"pytorch-lightning>=2.0.0",
"pyunpack", "pyunpack",
"pyyaml", "pyyaml",
"ray>=1.0.0", "ray>=1.0.0",
@ -76,7 +76,7 @@ extras_require = {}
extras_require["cv"] = filter_dependencies( extras_require["cv"] = filter_dependencies(
"gorilla", "gorilla",
"opencv-python", "opencv-python",
"pytorch-lightning", "lightning",
"scikit-learn", "scikit-learn",
"torchvision", "torchvision",
) )
@ -121,7 +121,7 @@ extras_require["aml"] = filter_dependencies(
"matplotlib", "matplotlib",
"mldesigner", "mldesigner",
"mlflow", "mlflow",
"pytorch-lightning", "lightning",
"torchvision", "torchvision",
) )

1
tasks/face_segmentation/.vscode/launch.json поставляемый
Просмотреть файл

@ -16,7 +16,6 @@
"f85154f6", "f85154f6",
"--cleanup_stale_pods", "--cleanup_stale_pods",
"60", "60",
"--no_quantization",
"--working", "--working",
"~/snpe/experiment1", "~/snpe/experiment1",
] ]

Просмотреть файл

@ -41,13 +41,12 @@ Then to get the ball rolling create a temp folder and run this:
``` ```
mkdir -p ~/experiment mkdir -p ~/experiment
cd ~/experiment python ~/git/snpe_runner/azure/runner.py --working ~/experiment
python ~/git/snpe_runner/azure/runner.py
``` ```
This will monitor the Azure blob store for new work to do, and run those jobs in This will monitor the Azure blob store for new work to do, and run those jobs in
priority order. If you also provide a `--device` option pointing to the `adb device` for a Qualcomm 888 Dev Board then it will also run the quantized models priority order. If you also provide a `--device` option pointing to the `adb device` for a Qualcomm 888 Dev Board then it will also run the quantized models
on that device and report the performance and F1 score results. on that device and report the performance and F1 score results.
To setup a quantization only runner in the cloud you can pass `--no_quantization` argument, this is what If you setup a quantization only runner in the cloud using the `docker/quantizer` image, you can pass `--no_quantization` argument when you have a `--device` so that the local runs do not do quantization.
we do in the quantization docker image. This way the linux machine driving the local Qualcomm devices is not overloaded with quantization work.

Просмотреть файл

@ -1,14 +1,7 @@
# Copyright (c) Microsoft Corporation. # Copyright (c) Microsoft Corporation.
# Licensed under the MIT license. # Licensed under the MIT license.
# This script monitors an Azure blob store using connection string defined # See Readme.md
# in the environmentvariable "FaceSyntheticsModelStorageAccount".
# When a file named "go" shows up in a folder containing a new model to test
# it downloads the model, runs the F1 score check with 10,000 images
# and the benchmark and posts the 2 results .csv files
# back to the same folder. The .csv files are named:
# - test_results.csv - for the F1 score.
# - benchmark_stats_model.csv - for benchmark perf numbers.
import argparse import argparse
import json import json
import os import os
@ -103,6 +96,8 @@ def check_device(device, snpe_root):
def check_dataset(shape, name, test_size): def check_dataset(shape, name, test_size):
_, w, h, c = shape
img_size = (w, h, 3)
test = os.path.join('data', name) test = os.path.join('data', name)
if os.path.isdir(test): if os.path.isdir(test):
s = read_shape(test) s = read_shape(test)
@ -116,7 +111,7 @@ def check_dataset(shape, name, test_size):
rmtree(test) rmtree(test)
if not os.path.isdir(test): if not os.path.isdir(test):
create_dataset(dataset, name, shape, test_size) create_dataset(dataset, name, img_size, test_size)
save_shape(test, shape) save_shape(test, shape)
@ -497,8 +492,6 @@ def run_model(name, snpe_root, dataset, conn_string, use_device, benchmark_only,
log(f"==> running {prop} test using model {model}") log(f"==> running {prop} test using model {model}")
input_size = tuple(input_shape)[0:2] # e.g. (256,256)
# copy model to the device. # copy model to the device.
if prop != 'f1_onnx': if prop != 'f1_onnx':
# now that we have the shape, we can create the appropriate quant and test # now that we have the shape, we can create the appropriate quant and test
@ -520,7 +513,7 @@ def run_model(name, snpe_root, dataset, conn_string, use_device, benchmark_only,
try: try:
use_pillow = 'use_pillow' in entity and entity['use_pillow'] use_pillow = 'use_pillow' in entity and entity['use_pillow']
test_results, chart, f1score = get_metrics(input_size, False, dataset, snpe_output_dir, use_pillow) test_results, chart, f1score = get_metrics(input_shape, False, dataset, snpe_output_dir, use_pillow)
except Exception as ex: except Exception as ex:
entity['status'] = 'error' entity['status'] = 'error'
entity['error'] = str(ex) entity['error'] = str(ex)

Просмотреть файл

@ -107,5 +107,5 @@ if __name__ == '__main__':
if os.path.isdir(dst_root): if os.path.isdir(dst_root):
rmtree(dst_root) rmtree(dst_root)
create_dataset(dataset, dst_root, 'quant', [dim, dim], count, transpose) create_dataset(dataset, dst_root, 'quant', (dim, dim), count, transpose)
create_dataset(dataset, dst_root, 'test', [dim, dim], count, transpose) create_dataset(dataset, dst_root, 'test', (dim, dim), count, transpose)

Просмотреть файл

@ -38,7 +38,7 @@ def main():
zip_ref.extractall(data_dir) zip_ref.extractall(data_dir)
print('Converting the .png images to SNPE quantization .bin files...') print('Converting the .png images to SNPE quantization .bin files...')
create_dataset(data_dir, output_dir, 'quant', [256, 256, 3], 1000) create_dataset(data_dir, output_dir, 'quant', (256, 256), 1000)
for name in os.listdir(output_dir): for name in os.listdir(output_dir):
print(name) print(name)

Просмотреть файл

@ -1,24 +1,25 @@
# Readme # Readme
This folder contains code that automates the testing of ONNX models across one or more machines that are connected via This folder contains code that automates the testing of ONNX models across one or more machines that are connected via
USB to Qualcomm 888 boards. Many thanks to Yatao Zhong for the original device code included in this test suite. USB to Qualcomm 888 boards.
The code is organized into: The code is organized into:
1. [SNPE Device Code](snpe/readme.md) that knows how to use the Qualcomm SNPE SDK to talk to the device, convert ONNX 1. [SNPE Device Code](snpe/readme.md) that knows how to use the Qualcomm Neural Processing SDK to talk to the device,
models to .dlc, quantize them, and test them on the board using the Android `adb` tool. convert ONNX models to .dlc, quantize them, and test them on the board using the Android `adb` tool.
1. [Azure Code](azure/readme.md) that talks to a configured Azure storage account for uploading models to test, 1. [Azure Code](azure/readme.md) that talks to a configured Azure storage account for uploading models to test,
downloading them, uploading test results, and keeping an Azure table "status" that summarizes results so far. downloading them, uploading test results, and keeping an Azure table "status" that summarizes results of all your
models.
1. [Docker](docker/readme.md) scripts for setting up your Azure account and optionally creating a docker image for 1. [Docker](docker/quantizer/readme.md) scripts for setting up your Azure account and optionally creating a docker image
running in an Azure Kubernetes cluster. for running in an Azure Kubernetes cluster to do model quantization using the Qualcomm Neural Processing SDK.
Quantization is time consuming so having an elastic scale speeds things up a lot.
1. [Notebooks](notebook/gallery_performance.md) contains a Jupyter Notebook that can visualize the results from the 1. [Notebooks](notebook/gallery_performance.md) contains a Jupyter Notebook that can visualize the results from the
Azure table. Azure "status" table.
Both are based on Python, so it is best if you setup a new Conda Python environment for Python 3.6 with the It is best if you setup a new Conda Python environment for Python 3.10 with the `requirements.txt` included here using:
`requirements.txt` included here using:
```shell ```shell
pip install -r requirements.txt pip install -r requirements.txt
@ -35,8 +36,9 @@ including a Kubernetes cluster setup for quantization (see [docker/quantizer](do
![system](images/system.png) ![system](images/system.png)
Each instance of `runner.py` looks for work, and executes it in priority order where the prioritization is defined by Each instance of `runner.py` looks for work, and executes it in priority order where the prioritization is defined by
the `find_work_prioritized` function in the runner. This prioritization maps to the columns of the status table as the `find_work_prioritized` function in the runner. This script is completely restartable, and can distribute the work
follows: across multiple instances of the runner script. Each instance will pick up where a previous one left off based on what
it finds in your "status" Azure table. The prioritization maps to the columns of the status table as follows:
1. **macs:** convert to .dlc and post Macs score and `snpe-dlc-viewer` output and do model quantization (runs on Linux) - priority 20 1. **macs:** convert to .dlc and post Macs score and `snpe-dlc-viewer` output and do model quantization (runs on Linux) - priority 20
1. **total_inference_avg** run `snpe_bench.py` with quantized model on Qualcomm device DSP - priority 30 1. **total_inference_avg** run `snpe_bench.py` with quantized model on Qualcomm device DSP - priority 30
@ -57,10 +59,12 @@ that you want to bump to the top of the list.
Notice some of the above jobs can run on Linux and do not require Qualcomm device. So in order to maximize throughput on Notice some of the above jobs can run on Linux and do not require Qualcomm device. So in order to maximize throughput on
machines that do have a Qualcomm devices you can allocate other Linux machines with no Qualcomm devices to do the other machines that do have a Qualcomm devices you can allocate other Linux machines with no Qualcomm devices to do the other
work, namely, converting models, quantizing them, and running the `onnxruntime` test set. work, namely, converting models, quantizing them, and running the `f1_onnx` test.
Folks across your team can use the `azure/upload.py` to submit jobs and let them run. You can use `status.py` to Folks across your team can use the `azure/upload.py` to submit jobs and let them run, or they can automate that as
monitor progress or look at the Azure `status` table. Various status messages are posted there so you can see which shown in the `RemoteAzureBenchmarkEvaluator` in the `search.py` script.
machine is doing what and is in what stage of the job.
You can use `status.py` to monitor progress or look at the Azure `status` table. Various status messages are posted
there so you can see which machine is doing what and is in what stage of the job.
Next you can go to the `notebook` page and get some pretty pictures of your Pareto Curves. Next you can go to the `notebook` page and get some pretty pictures of your Pareto Curves.

Просмотреть файл

@ -106,5 +106,5 @@ if __name__ == '__main__':
else: else:
transpose = None transpose = None
create_dataset(dataset, 'quant', [dim, dim], count, transpose) create_dataset(dataset, 'quant', (dim, dim), count, transpose)
create_dataset(dataset, 'test', [dim, dim], count, transpose) create_dataset(dataset, 'test', (dim, dim), count, transpose)

Просмотреть файл

@ -46,7 +46,9 @@ def _get_dataset_gt(img_name, dataset, img_shape, use_pillow=False):
return gt_seg return gt_seg
def show_output(img_shape, transpose, dataset, outputs): def show_output(input_shape, transpose, dataset, outputs):
_, w, h, c = input_shape
img_shape = (w, h)
output_list = [x for x in os.listdir(outputs) if x.endswith('.raw')] output_list = [x for x in os.listdir(outputs) if x.endswith('.raw')]
output_list.sort() output_list.sort()
for out_f in output_list: for out_f in output_list:
@ -94,7 +96,7 @@ def get_confusion_matrix(gt_label, pred_label, valid_mask, num_classes):
return confusion_matrix return confusion_matrix
def get_metrics(img_shape, transpose, dataset, outputs, use_pillow=False): def get_metrics(input_shape, transpose, dataset, outputs, use_pillow=False):
output_list = [x for x in os.listdir(outputs) if x.endswith('.raw')] output_list = [x for x in os.listdir(outputs) if x.endswith('.raw')]
output_list.sort() output_list.sort()
@ -104,7 +106,8 @@ def get_metrics(img_shape, transpose, dataset, outputs, use_pillow=False):
print(f"Collecting metrics on {len(output_list)} output .raw files...") print(f"Collecting metrics on {len(output_list)} output .raw files...")
num_classes = 19 _, width, height, num_classes = input_shape
img_shape = (width, height)
confusion_matx = None confusion_matx = None
bins = int(1e6) bins = int(1e6)
@ -122,9 +125,9 @@ def get_metrics(img_shape, transpose, dataset, outputs, use_pillow=False):
full_path = os.path.join(outputs, out_f) full_path = os.path.join(outputs, out_f)
logits = np.fromfile(full_path, dtype=np.float32) logits = np.fromfile(full_path, dtype=np.float32)
size = np.product(logits.shape) size = np.product(logits.shape)
num_classes = int(size / (img_shape[0] * img_shape[1])) found_classes = int(size / (img_shape[0] * img_shape[1]))
if num_classes < 18: if found_classes != num_classes:
raise Exception(f"Result {out_f} has unexpected number of predictions {num_classes}") raise Exception(f"Result {out_f} has unexpected number of predictions {found_classes}, expecting {num_classes}")
if transpose: if transpose:
logits = logits.reshape((num_classes, img_shape[0], img_shape[1])).transpose(transpose) logits = logits.reshape((num_classes, img_shape[0], img_shape[1])).transpose(transpose)
@ -239,8 +242,8 @@ if __name__ == '__main__':
default='snpe_output') default='snpe_output')
parser.add_argument('--transpose', '-t', help='Transpose channels by (1,2,0)', action="store_true") parser.add_argument('--transpose', '-t', help='Transpose channels by (1,2,0)', action="store_true")
parser.add_argument('--pillow', help="Resize images using Pillow instead of numpy", action="store_true") parser.add_argument('--pillow', help="Resize images using Pillow instead of numpy", action="store_true")
parser.add_argument('--image_shape', help="Resize images this size, must match the size of the output images " + parser.add_argument('--input_shape', help="Resize images this size, must match the shape of the model output " +
"(default '256,256')") "(default '1,256,256,19')")
args = parser.parse_args() args = parser.parse_args()
use_pillow = args.pillow use_pillow = args.pillow
@ -264,11 +267,11 @@ if __name__ == '__main__':
print("Experiment 'output' dir not found: " + output_dir) print("Experiment 'output' dir not found: " + output_dir)
sys.exit(1) sys.exit(1)
image_shape = (256, 256) input_shape = (1, 256, 256, 19)
if args.image_shape: if args.input_shape:
image_shape = tuple(eval(args.image_shape)) input_shape = tuple(eval(args.image_shape))
if args.show: if args.show:
show_output(image_shape, transpose, dataset, output_dir) show_output(input_shape, transpose, dataset, output_dir)
else: else:
get_metrics(image_shape, transpose, dataset, output_dir, use_pillow) get_metrics(input_shape, transpose, dataset, output_dir, use_pillow)