зеркало из https://github.com/microsoft/archai.git
remove old notebook (#231)
* remove old notebook * store onnx latency allow aml partial training with no snapdragon mode * fix docker file now that aml branch is merged. * fix bug in reset add notebook * add link to notebook.
This commit is contained in:
Родитель
aae38db1a5
Коммит
fc031eb447
|
@ -390,7 +390,8 @@ class ArchaiStore:
|
|||
self._reset(e, except_list)
|
||||
|
||||
def _reset(self, e, except_list=[]):
|
||||
if self.is_locked_by_other(e):
|
||||
name = e['name']
|
||||
if self.is_locked_by_other(name):
|
||||
node = self.get_lock(e)
|
||||
print(f"Skipping {e['RowKey']} as it is locked by {node}")
|
||||
elif self._reset_metrics(e, except_list):
|
||||
|
|
|
@ -72,6 +72,15 @@ class RemoteAzureBenchmarkEvaluator(AsyncModelEvaluator):
|
|||
_ = self.store.get_existing_status(unknown_id)
|
||||
_ = self.store.list_blobs(unknown_id)
|
||||
|
||||
def _reset(self, entity):
|
||||
changed = False
|
||||
for k in ['mean', 'macs', 'params', 'stdev', 'total_inference_avg', 'error']:
|
||||
if k in entity:
|
||||
del entity[k]
|
||||
changed = True
|
||||
if changed:
|
||||
self.store.update_status_entity(entity)
|
||||
|
||||
@overrides
|
||||
def send(self, arch: ArchaiModel, budget: Optional[float] = None) -> None:
|
||||
# bug in azure ml sdk requires blob store folder names not begin with digits, so we prefix with 'id_'
|
||||
|
@ -88,8 +97,8 @@ class RemoteAzureBenchmarkEvaluator(AsyncModelEvaluator):
|
|||
print(f"Entry for {archid} already exists with {self.metric_key} = {value}")
|
||||
return
|
||||
else:
|
||||
# complete but missing the mean, so reset everything so we can try again below.
|
||||
self.store.reset(archid, ['benchmark_only', 'model_date'])
|
||||
# complete but missing the mean, so reset the benchmark metrics so we can try again.
|
||||
self._reset(entity)
|
||||
else:
|
||||
# job is still running, let it continue
|
||||
if self.verbose:
|
||||
|
|
|
@ -129,7 +129,7 @@ def main(output_dir: Path, experiment_name: str, seed: int):
|
|||
ml_client,
|
||||
image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest",
|
||||
conda_file="conda.yaml",
|
||||
version='1.0.20')
|
||||
version='1.0.21')
|
||||
environment_name = f"{archai_job_env.name}:{archai_job_env.version}"
|
||||
|
||||
# Register the datastore with AML
|
||||
|
|
|
@ -63,7 +63,7 @@ RUN wget -O azcopy_v10.tar.gz https://aka.ms/downloadazcopy-v10-linux && tar -xf
|
|||
# this echo is a trick to bypass docker build cache.
|
||||
# simply change the echo string every time you want docker build to pull down new bits.
|
||||
RUN echo '04/18/2023 12:22 PM' >/dev/null && git clone https://github.com/microsoft/archai.git
|
||||
RUN cd archai && git checkout clovett/aml && pip install -e .[dev]
|
||||
RUN cd archai && pip install -e .[dev]
|
||||
|
||||
RUN echo "using this pip version: " && which pip
|
||||
RUN echo "using this python version: " && which python
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -1,103 +0,0 @@
|
|||
import numpy as np
|
||||
from scipy import interpolate
|
||||
from scipy import spatial
|
||||
from functools import reduce
|
||||
|
||||
|
||||
def spline_fit(xs, ys):
|
||||
indexes = np.argsort(np.array(xs), axis=0)
|
||||
ixs = []
|
||||
iys = []
|
||||
for i in indexes:
|
||||
ixs += [xs[i]]
|
||||
iys += [ys[i]]
|
||||
|
||||
f = interpolate.UnivariateSpline(ixs, iys)
|
||||
f.set_smoothing_factor(0.1)
|
||||
xn = np.linspace(ixs[0], ixs[-1], 100)
|
||||
return [xn, f(xn)]
|
||||
|
||||
|
||||
def filter_(pts, pt):
|
||||
"""
|
||||
Get all points in pts that are not Pareto dominated by the point pt
|
||||
"""
|
||||
weakly_worse = (pts <= pt).all(axis=-1)
|
||||
strictly_worse = (pts < pt).any(axis=-1)
|
||||
return pts[~(weakly_worse & strictly_worse)]
|
||||
|
||||
|
||||
def get_pareto_undominated_by(pts1, pts2=None):
|
||||
"""
|
||||
Return all points in pts1 that are not Pareto dominated
|
||||
by any points in pts2
|
||||
"""
|
||||
if pts2 is None:
|
||||
pts2 = pts1
|
||||
return reduce(filter_, pts2, pts1)
|
||||
|
||||
|
||||
def get_pareto_frontier(pts):
|
||||
"""
|
||||
Iteratively filter points based on the convex hull heuristic
|
||||
"""
|
||||
pareto_groups = []
|
||||
|
||||
# loop while there are points remaining
|
||||
while pts.shape[0]:
|
||||
# brute force if there are few points:
|
||||
if pts.shape[0] < 10:
|
||||
pareto_groups.append(get_pareto_undominated_by(pts))
|
||||
break
|
||||
|
||||
# compute vertices of the convex hull
|
||||
hull_vertices = spatial.ConvexHull(pts).vertices
|
||||
|
||||
# get corresponding points
|
||||
hull_pts = pts[hull_vertices]
|
||||
|
||||
# get points in pts that are not convex hull vertices
|
||||
nonhull_mask = np.ones(pts.shape[0], dtype=bool)
|
||||
nonhull_mask[hull_vertices] = False
|
||||
pts = pts[nonhull_mask]
|
||||
|
||||
# get points in the convex hull that are on the Pareto frontier
|
||||
pareto = get_pareto_undominated_by(hull_pts)
|
||||
pareto_groups.append(pareto)
|
||||
|
||||
# filter remaining points to keep those not dominated by
|
||||
# Pareto points of the convex hull
|
||||
pts = get_pareto_undominated_by(pts, pareto)
|
||||
|
||||
return np.vstack(pareto_groups)
|
||||
|
||||
|
||||
def pareto_curve(Xs, Ys):
|
||||
# in our case faster is better so invert the X axis
|
||||
pts = np.array(list(zip(Xs, Ys))) * np.array((-1, 1))
|
||||
result = get_pareto_frontier(pts)
|
||||
indices = []
|
||||
for pt in result:
|
||||
i = np.where(pts == pt)[0][0]
|
||||
indices += [i]
|
||||
return indices
|
||||
|
||||
|
||||
def get_pareto_edges(xs, ys):
|
||||
left = None
|
||||
top = None
|
||||
bottom = None
|
||||
right = None
|
||||
for i in range(len(xs)):
|
||||
x = xs[i]
|
||||
y = ys[i]
|
||||
if left is None or x < left:
|
||||
left = x
|
||||
if bottom is None or y < bottom:
|
||||
bottom = y
|
||||
if top is None or y > top:
|
||||
top = y
|
||||
if right is None or x > right:
|
||||
right = x
|
||||
|
||||
return (left, bottom, right, top)
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -24,9 +24,8 @@ creating a docker image for running in an Azure Kubernetes cluster to do model q
|
|||
the Qualcomm Neural Processing SDK. Quantization is time consuming so having an elastic scale speeds
|
||||
things up a lot.
|
||||
|
||||
1. [Notebooks](notebook/gallery_performance.md) contains a Jupyter Notebook that can visualize the
|
||||
results from your Azure "status" table.
|
||||
|
||||
1. [Notebook](notebooks/results.ipynb) a simple Jupyter notebook for visualizing the
|
||||
results in your Azure table.
|
||||
|
||||
## Workflow
|
||||
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
from overrides import overrides
|
||||
from archai.discrete_search.evaluators import AvgOnnxLatency
|
||||
from archai.discrete_search.api.archai_model import ArchaiModel
|
||||
from archai.common.store import ArchaiStore
|
||||
|
||||
|
||||
class AvgOnnxLatencyEvaluator(AvgOnnxLatency):
|
||||
"""Evaluate the average ONNX Latency (in seconds) of an architecture and store the result.
|
||||
The latency is measured by running the model on random inputs and averaging the latency over
|
||||
`num_trials` trials.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_shape: Union[Tuple[int, ...], List[Tuple[int, ...]]],
|
||||
num_trials: Optional[int] = 1,
|
||||
input_dtype: Optional[str] = "torch.FloatTensor",
|
||||
rand_range: Optional[Tuple[float, float]] = (0.0, 1.0),
|
||||
export_kwargs: Optional[Dict[str, Any]] = None,
|
||||
device: Optional[str] = 'cpu',
|
||||
inf_session_kwargs: Optional[Dict[str, Any]] = None,
|
||||
store: ArchaiStore = None,
|
||||
metric_key: str = 'onnx_latency'
|
||||
) -> None:
|
||||
super(AvgOnnxLatencyEvaluator, self).__init__(
|
||||
input_shape,
|
||||
num_trials,
|
||||
input_dtype,
|
||||
rand_range,
|
||||
export_kwargs,
|
||||
device,
|
||||
inf_session_kwargs)
|
||||
self.store = store
|
||||
self.metric_key = metric_key
|
||||
|
||||
@overrides
|
||||
def evaluate(self, model: ArchaiModel, budget: Optional[float] = None) -> float:
|
||||
result = super(AvgOnnxLatencyEvaluator, self).evaluate(model, budget)
|
||||
if self.store is not None:
|
||||
archid = f'id_{model.archid}'
|
||||
e = self.store.get_status(archid)
|
||||
e['status'] = 'complete'
|
||||
e[self.metric_key] = result
|
||||
self.store.merge_status_entity(e)
|
||||
return result
|
|
@ -92,9 +92,6 @@ aml:
|
|||
- matplotlib
|
||||
- mldesigner
|
||||
- mlflow
|
||||
- onnx>=1.10.2
|
||||
- onnxruntime>=1.10.0
|
||||
- psutil
|
||||
- torch
|
||||
- torchvision
|
||||
- torchaudio
|
||||
|
|
|
@ -16,13 +16,14 @@ from archai.discrete_search.algos import (
|
|||
MoBananasSearch, EvolutionParetoSearch, LocalSearch,
|
||||
RandomSearch, RegularizedEvolutionSearch
|
||||
)
|
||||
from archai.discrete_search.evaluators import TorchNumParameters, AvgOnnxLatency, RayParallelEvaluator
|
||||
from archai.discrete_search.evaluators import TorchNumParameters, RayParallelEvaluator
|
||||
from archai.discrete_search.evaluators.remote_azure_benchmark import RemoteAzureBenchmarkEvaluator
|
||||
|
||||
from search_space.hgnet import HgnetSegmentationSearchSpace
|
||||
from training.partial_training_evaluator import PartialTrainingValIOU
|
||||
from aml.training.aml_training_evaluator import AmlPartialTrainingEvaluator
|
||||
from aml.util.setup import configure_store
|
||||
from aml.training.onnx_latency import AvgOnnxLatencyEvaluator
|
||||
|
||||
AVAILABLE_ALGOS = {
|
||||
'mo_bananas': MoBananasSearch,
|
||||
|
@ -100,24 +101,27 @@ def main():
|
|||
constraint=(1e6, max_parameters)
|
||||
)
|
||||
|
||||
aml_training = False
|
||||
store = None
|
||||
if 'aml' in config:
|
||||
aml_config = config['aml']
|
||||
experiment_name = aml_config.get('experiment_name', 'facesynthetics')
|
||||
store: ArchaiStore = configure_store(aml_config)
|
||||
aml_training = 'training_cluster' in aml_config
|
||||
|
||||
# Adds a constrained objective on model latency so we don't pick models that are too slow.
|
||||
so.add_objective(
|
||||
'CPU ONNX Latency (s)',
|
||||
AvgOnnxLatency(
|
||||
input_shape=input_shape, export_kwargs={'opset_version': 11}
|
||||
AvgOnnxLatencyEvaluator(
|
||||
input_shape=input_shape, export_kwargs={'opset_version': 11}, store=store
|
||||
),
|
||||
higher_is_better=False,
|
||||
compute_intensive=False,
|
||||
constraint=[0, max_latency]
|
||||
)
|
||||
|
||||
aml_training = False
|
||||
|
||||
if target_name == 'snp':
|
||||
# Gets connection string from env variable
|
||||
aml_config = config['aml']
|
||||
experiment_name = aml_config.get('experiment_name', 'facesynthetics')
|
||||
store: ArchaiStore = configure_store(aml_config)
|
||||
evaluator = RemoteAzureBenchmarkEvaluator(
|
||||
input_shape=input_shape,
|
||||
store=store,
|
||||
|
@ -133,8 +137,6 @@ def main():
|
|||
compute_intensive=True
|
||||
)
|
||||
|
||||
aml_training = 'training_cluster' in aml_config
|
||||
|
||||
if aml_training:
|
||||
# do the partial training on an AML gpu cluster
|
||||
partial_tr_obj = AmlPartialTrainingEvaluator(
|
||||
|
|
Загрузка…
Ссылка в новой задаче