Add perf check pipeline (#523)
## Describe your changes Add perf check pipeline. - 4 models supported: - Intel/bert-base-uncased-mrpc - microsoft/deberta-base-mnli - distilbert": "distilbert-base-uncased-finetuned-sst-2-english - roberta_large": "roberta-large-mnli - The pipeline will be run at 6:00 am every Friday. - Each metric will be run 10 times and print the average. - You can manually run it by `python run_performance_check.py --model_name bert` - Pipeline link: https://aiinfra.visualstudio.com/Model%20optimization%20Toolkit/_build?definitionId=1265&_a=summary ## Checklist before requesting a review - [ ] Add unit tests for this change. - [ ] Make sure all tests can pass. - [ ] Update documents if necessary. - [ ] Format your code by running `pre-commit run --all-files` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. ## (Optional) Issue link
This commit is contained in:
Родитель
b178e2f2c8
Коммит
5cef4ddcc0
|
@ -0,0 +1,46 @@
|
|||
parameters:
|
||||
model_name: ''
|
||||
pool: ''
|
||||
device: 'cpu'
|
||||
|
||||
jobs:
|
||||
- job: ${{ parameters.device }}_Model_Performance
|
||||
timeoutInMinutes: 300
|
||||
pool:
|
||||
name: ${{ parameters.pool }}
|
||||
strategy:
|
||||
matrix:
|
||||
${{ insert }}: ${{ parameters.examples }}
|
||||
variables:
|
||||
WINDOWS: ${{ parameters.windows }}
|
||||
runCodesignValidationInjection: false
|
||||
device: ${{ parameters.device }}
|
||||
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: 3.8
|
||||
displayName: Use Python 3.8
|
||||
|
||||
- script: make install-olive PIPELINE=True INSTALL_EXTRAS=[$(device)]
|
||||
displayName: Install Olive
|
||||
|
||||
- script: make performance PIPELINE=True MODEL_NAME=$(MODEL_NAME) DEVICE=${{ parameters.device }}
|
||||
displayName: Run performance comparison
|
||||
|
||||
- task: CredScan@3
|
||||
displayName: 'Run CredScan'
|
||||
inputs:
|
||||
debugMode: false
|
||||
continueOnError: true
|
||||
|
||||
- task: ComponentGovernanceComponentDetection@0
|
||||
inputs:
|
||||
scanType: 'Register'
|
||||
verbosity: 'Verbose'
|
||||
alertWarningLevel: 'High'
|
||||
displayName: Component Detection
|
||||
|
||||
- script: make clean WINDOWS=$(WINDOWS)
|
||||
condition: always()
|
||||
displayName: Clean remaining artifacts
|
|
@ -0,0 +1,44 @@
|
|||
trigger: none
|
||||
pr: none
|
||||
|
||||
schedules:
|
||||
- cron: 0 6 * * 5
|
||||
displayName: Scheduled Build
|
||||
branches:
|
||||
include:
|
||||
- main
|
||||
always: true
|
||||
|
||||
|
||||
jobs:
|
||||
- template: job_templates/olive-performance-template.yaml
|
||||
parameters:
|
||||
name: Linux_CPU_CI
|
||||
pool: $(OLIVE_POOL_UBUNTU2004)
|
||||
windows: False
|
||||
device: cpu
|
||||
examples:
|
||||
bert:
|
||||
model_name: bert
|
||||
distilbert:
|
||||
model_name: distilbert
|
||||
deberta:
|
||||
model_name: deberta
|
||||
roberta_large:
|
||||
model_name: roberta_large
|
||||
|
||||
- template: job_templates/olive-performance-template.yaml
|
||||
parameters:
|
||||
name: Linux_GPU_CI
|
||||
pool: $(OLIVE_GPU_POOL_UBUNTU2004)
|
||||
windows: False
|
||||
device: gpu
|
||||
examples:
|
||||
bert:
|
||||
model_name: bert
|
||||
distilbert:
|
||||
model_name: distilbert
|
||||
deberta:
|
||||
model_name: deberta
|
||||
roberta_large:
|
||||
model_name: roberta_large
|
|
@ -0,0 +1,86 @@
|
|||
{
|
||||
"input_model":{
|
||||
"type": "PyTorchModel",
|
||||
"config": {
|
||||
"hf_config": {
|
||||
"model_name": "Intel/bert-base-uncased-mrpc",
|
||||
"task": "text-classification",
|
||||
"dataset": {
|
||||
"data_name":"glue",
|
||||
"subset": "mrpc",
|
||||
"split": "validation",
|
||||
"input_cols": ["sentence1", "sentence2"],
|
||||
"label_cols": ["label"],
|
||||
"batch_size": 1,
|
||||
"max_samples": 100
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"evaluators": {
|
||||
"common_evaluator": {
|
||||
"metrics":[
|
||||
{
|
||||
"name": "accuracy",
|
||||
"type": "accuracy",
|
||||
"backend": "huggingface_metrics",
|
||||
"sub_types": [
|
||||
{"name": "accuracy", "priority": 1, "goal": {"type": "max-degradation", "value": 0.01}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "latency",
|
||||
"type": "latency",
|
||||
"sub_types": [
|
||||
{"name": "avg", "priority": 2, "goal": {"type": "percent-min-improvement", "value": 20}}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"passes": {
|
||||
"conversion": {
|
||||
"type": "OnnxConversion",
|
||||
"config": {
|
||||
"target_opset": 13
|
||||
}
|
||||
},
|
||||
"transformers_optimization": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "bert",
|
||||
"num_heads": 12,
|
||||
"hidden_size": 768,
|
||||
"float16": false
|
||||
}
|
||||
},
|
||||
"quantization": {
|
||||
"type": "OnnxQuantization",
|
||||
"config": {
|
||||
"data_config": "__input_model_data_config__"
|
||||
}
|
||||
},
|
||||
"perf_tuning": {
|
||||
"type": "OrtPerfTuning",
|
||||
"config": {
|
||||
"data_config": "__input_model_data_config__"
|
||||
}
|
||||
}
|
||||
},
|
||||
"engine": {
|
||||
"search_strategy": {
|
||||
"execution_order": "joint",
|
||||
"search_algorithm": "tpe",
|
||||
"search_algorithm_config": {
|
||||
"num_samples": 3,
|
||||
"seed": 0
|
||||
}
|
||||
},
|
||||
"clean_cache": true,
|
||||
"evaluator": "common_evaluator",
|
||||
"execution_providers": ["CPUExecutionProvider"],
|
||||
"cache_dir": "cache",
|
||||
"output_dir" : "models/bert_ptq"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
{
|
||||
"input_model":{
|
||||
"type": "PyTorchModel",
|
||||
"config": {
|
||||
"hf_config": {
|
||||
"model_name": "Intel/bert-base-uncased-mrpc",
|
||||
"task": "text-classification",
|
||||
"dataset": {
|
||||
"data_name":"glue",
|
||||
"subset": "mrpc",
|
||||
"split": "validation",
|
||||
"input_cols": ["sentence1", "sentence2"],
|
||||
"label_cols": ["label"],
|
||||
"batch_size": 1,
|
||||
"max_samples": 100
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"evaluators": {
|
||||
"common_evaluator": {
|
||||
"metrics":[
|
||||
{
|
||||
"name": "accuracy",
|
||||
"type": "accuracy",
|
||||
"backend": "huggingface_metrics",
|
||||
"sub_types": [
|
||||
{"name": "accuracy", "priority": 1, "goal": {"type": "max-degradation", "value": 0.01}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "latency",
|
||||
"type": "latency",
|
||||
"sub_types": [
|
||||
{"name": "avg", "priority": 2, "goal": {"type": "percent-min-improvement", "value": 20}}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"passes": {
|
||||
"conversion": {
|
||||
"type": "OnnxConversion",
|
||||
"config": {
|
||||
"target_opset": 13
|
||||
}
|
||||
},
|
||||
"transformers_optimization": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "bert",
|
||||
"num_heads": 12,
|
||||
"hidden_size": 768,
|
||||
"float16": true
|
||||
}
|
||||
},
|
||||
"perf_tuning": {
|
||||
"type": "OrtPerfTuning",
|
||||
"config": {
|
||||
"enable_cuda_graph": true,
|
||||
"data_config": "__input_model_data_config__"
|
||||
}
|
||||
}
|
||||
},
|
||||
"pass_flows": [
|
||||
["conversion", "transformers_optimization", "perf_tuning"],
|
||||
["conversion", "perf_tuning"]
|
||||
],
|
||||
"engine": {
|
||||
"search_strategy": {
|
||||
"execution_order": "joint",
|
||||
"search_algorithm": "tpe",
|
||||
"search_algorithm_config": {
|
||||
"num_samples": 3,
|
||||
"seed": 0
|
||||
}
|
||||
},
|
||||
"evaluator": "common_evaluator",
|
||||
"execution_providers": ["CUDAExecutionProvider"],
|
||||
"clean_cache": true,
|
||||
"cache_dir": "cache",
|
||||
"output_dir" : "models/bert_gpu"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
{
|
||||
"input_model":{
|
||||
"type": "PyTorchModel",
|
||||
"config": {
|
||||
"hf_config": {
|
||||
"model_name": "microsoft/deberta-base-mnli",
|
||||
"task": "text-classification",
|
||||
"dataset": {
|
||||
"data_name":"glue",
|
||||
"subset": "mnli_matched",
|
||||
"split": "validation",
|
||||
"input_cols": ["premise", "hypothesis"],
|
||||
"label_cols": ["label"],
|
||||
"batch_size": 1,
|
||||
"max_samples": 100,
|
||||
"component_kwargs": {
|
||||
"pre_process_data": {
|
||||
"align_labels": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"evaluators": {
|
||||
"common_evaluator": {
|
||||
"metrics":[
|
||||
{
|
||||
"name": "accuracy",
|
||||
"type": "accuracy",
|
||||
"backend": "huggingface_metrics",
|
||||
"sub_types": [
|
||||
{"name": "accuracy", "priority": 1}
|
||||
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "latency",
|
||||
"type": "latency",
|
||||
"sub_types": [
|
||||
{"name": "avg", "priority": 2}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"passes": {
|
||||
"conversion": {
|
||||
"type": "OnnxConversion",
|
||||
"config": {
|
||||
"target_opset": 13
|
||||
}
|
||||
},
|
||||
"transformers_optimization": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "bert",
|
||||
"num_heads": 12,
|
||||
"hidden_size": 768,
|
||||
"float16": false
|
||||
}
|
||||
},
|
||||
"quantization": {
|
||||
"type": "OnnxQuantization",
|
||||
"config": {
|
||||
"data_config": "__input_model_data_config__"
|
||||
}
|
||||
},
|
||||
"perf_tuning": {
|
||||
"type": "OrtPerfTuning",
|
||||
"config": {
|
||||
"data_config": "__input_model_data_config__"
|
||||
}
|
||||
}
|
||||
},
|
||||
"engine": {
|
||||
"search_strategy": {
|
||||
"execution_order": "joint",
|
||||
"search_algorithm": "tpe",
|
||||
"search_algorithm_config": {
|
||||
"num_samples": 3,
|
||||
"seed": 0
|
||||
}
|
||||
},
|
||||
"clean_cache": true,
|
||||
"evaluator": "common_evaluator",
|
||||
"execution_providers": ["CPUExecutionProvider"],
|
||||
"cache_dir": "cache",
|
||||
"output_dir" : "models/microsoft-deberta"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
{
|
||||
"input_model":{
|
||||
"type": "PyTorchModel",
|
||||
"config": {
|
||||
"hf_config": {
|
||||
"model_name": "microsoft/deberta-base-mnli",
|
||||
"task": "text-classification",
|
||||
"dataset": {
|
||||
"data_name":"glue",
|
||||
"subset": "mnli_matched",
|
||||
"split": "validation",
|
||||
"input_cols": ["premise", "hypothesis"],
|
||||
"label_cols": ["label"],
|
||||
"batch_size": 1,
|
||||
"max_samples": 100,
|
||||
"component_kwargs": {
|
||||
"pre_process_data": {
|
||||
"align_labels": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"evaluators": {
|
||||
"common_evaluator": {
|
||||
"metrics":[
|
||||
{
|
||||
"name": "accuracy",
|
||||
"type": "accuracy",
|
||||
"backend": "huggingface_metrics",
|
||||
"sub_types": [
|
||||
{"name": "accuracy", "priority": 1}
|
||||
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "latency",
|
||||
"type": "latency",
|
||||
"sub_types": [
|
||||
{"name": "avg", "priority": 2}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"passes": {
|
||||
"conversion": {
|
||||
"type": "OnnxConversion",
|
||||
"config": {
|
||||
"target_opset": 13
|
||||
}
|
||||
},
|
||||
"transformers_optimization": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "bert",
|
||||
"num_heads": 12,
|
||||
"hidden_size": 768,
|
||||
"float16": true
|
||||
}
|
||||
},
|
||||
"perf_tuning": {
|
||||
"type": "OrtPerfTuning",
|
||||
"config": {
|
||||
"enable_cuda_graph": true,
|
||||
"data_config": "__input_model_data_config__"
|
||||
}
|
||||
}
|
||||
},
|
||||
"pass_flows": [
|
||||
["conversion", "transformers_optimization", "perf_tuning"],
|
||||
["conversion", "perf_tuning"]
|
||||
],
|
||||
"engine": {
|
||||
"search_strategy": {
|
||||
"execution_order": "joint",
|
||||
"search_algorithm": "tpe",
|
||||
"search_algorithm_config": {
|
||||
"num_samples": 3,
|
||||
"seed": 0
|
||||
}
|
||||
},
|
||||
"clean_cache": true,
|
||||
"evaluator": "common_evaluator",
|
||||
"execution_providers": ["CUDAExecutionProvider"],
|
||||
"cache_dir": "cache",
|
||||
"output_dir" : "models/microsoft-deberta_cuda"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
{
|
||||
"input_model":{
|
||||
"type": "PyTorchModel",
|
||||
"config": {
|
||||
"hf_config": {
|
||||
"model_name": "distilbert-base-uncased-finetuned-sst-2-english",
|
||||
"task": "text-classification",
|
||||
"dataset": {
|
||||
"data_name":"glue",
|
||||
"subset": "sst2",
|
||||
"split": "validation",
|
||||
"input_cols": ["sentence"],
|
||||
"label_cols": ["label"],
|
||||
"batch_size": 1,
|
||||
"max_samples": 100
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
"evaluators": {
|
||||
"common_evaluator": {
|
||||
"metrics":[
|
||||
{
|
||||
"name": "accuracy",
|
||||
"type": "accuracy",
|
||||
"backend": "huggingface_metrics",
|
||||
"sub_types": [
|
||||
{"name": "accuracy", "priority": 1}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "latency",
|
||||
"type": "latency",
|
||||
"sub_types": [
|
||||
{"name": "avg", "priority": 2}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"passes": {
|
||||
"conversion": {
|
||||
"type": "OnnxConversion",
|
||||
"config": {
|
||||
"target_opset": 13
|
||||
}
|
||||
},
|
||||
"transformers_optimization": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "bert",
|
||||
"num_heads": 12,
|
||||
"hidden_size": 768,
|
||||
"float16": false
|
||||
}
|
||||
},
|
||||
"quantization": {
|
||||
"type": "OnnxQuantization",
|
||||
"config": {
|
||||
"data_config": "__input_model_data_config__"
|
||||
}
|
||||
},
|
||||
"perf_tuning": {
|
||||
"type": "OrtPerfTuning",
|
||||
"config": {
|
||||
"data_config": "__input_model_data_config__"
|
||||
}
|
||||
}
|
||||
},
|
||||
"engine": {
|
||||
"search_strategy": {
|
||||
"execution_order": "joint",
|
||||
"search_algorithm": "tpe",
|
||||
"search_algorithm_config": {
|
||||
"num_samples": 3,
|
||||
"seed": 0
|
||||
}
|
||||
},
|
||||
"clean_cache": true,
|
||||
"evaluator": "common_evaluator",
|
||||
"execution_providers": ["CPUExecutionProvider"],
|
||||
"cache_dir": "cache",
|
||||
"output_dir" : "models/distilbert"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
{
|
||||
"input_model":{
|
||||
"type": "PyTorchModel",
|
||||
"config": {
|
||||
"hf_config": {
|
||||
"model_name": "distilbert-base-uncased-finetuned-sst-2-english",
|
||||
"task": "text-classification",
|
||||
"dataset": {
|
||||
"data_name":"glue",
|
||||
"subset": "sst2",
|
||||
"split": "validation",
|
||||
"input_cols": ["sentence"],
|
||||
"label_cols": ["label"],
|
||||
"batch_size": 1,
|
||||
"max_samples": 100
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"evaluators": {
|
||||
"common_evaluator": {
|
||||
"metrics":[
|
||||
{
|
||||
"name": "accuracy",
|
||||
"type": "accuracy",
|
||||
"backend": "huggingface_metrics",
|
||||
"sub_types": [
|
||||
{"name": "accuracy", "priority": 1, "goal": {"type": "max-degradation", "value": 0.01}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "latency",
|
||||
"type": "latency",
|
||||
"sub_types": [
|
||||
{"name": "avg", "priority": 2, "goal": {"type": "percent-min-improvement", "value": 20}}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"passes": {
|
||||
"conversion": {
|
||||
"type": "OnnxConversion",
|
||||
"config": {
|
||||
"target_opset": 13
|
||||
}
|
||||
},
|
||||
"transformers_optimization": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "bert",
|
||||
"num_heads": 12,
|
||||
"hidden_size": 768,
|
||||
"float16": true
|
||||
}
|
||||
},
|
||||
"perf_tuning": {
|
||||
"type": "OrtPerfTuning",
|
||||
"config": {
|
||||
"enable_cuda_graph": true,
|
||||
"data_config": "__input_model_data_config__"
|
||||
}
|
||||
}
|
||||
},
|
||||
"pass_flows": [
|
||||
["conversion", "transformers_optimization", "perf_tuning"],
|
||||
["conversion", "perf_tuning"]
|
||||
],
|
||||
"engine": {
|
||||
"search_strategy": {
|
||||
"execution_order": "joint",
|
||||
"search_algorithm": "tpe",
|
||||
"search_algorithm_config": {
|
||||
"num_samples": 3,
|
||||
"seed": 0
|
||||
}
|
||||
},
|
||||
"evaluator": "common_evaluator",
|
||||
"execution_providers": ["CUDAExecutionProvider"],
|
||||
"clean_cache": true,
|
||||
"cache_dir": "cache",
|
||||
"output_dir" : "models/distilbert_cuda"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
{
|
||||
"input_model":{
|
||||
"type": "ONNXModel",
|
||||
"config": {
|
||||
}
|
||||
},
|
||||
"systems": {
|
||||
"local_system": {
|
||||
"type": "LocalSystem",
|
||||
"config": {
|
||||
"accelerators": ["cpu"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"evaluators": {
|
||||
"common_evaluator": {
|
||||
"metrics":[]
|
||||
}
|
||||
},
|
||||
"engine": {
|
||||
"evaluator": "common_evaluator",
|
||||
"host": "local_system",
|
||||
"target": "local_system",
|
||||
"clean_cache": true
|
||||
}
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
{
|
||||
"input_model":{
|
||||
"type": "PyTorchModel",
|
||||
"config": {
|
||||
"hf_config": {
|
||||
"model_name": "roberta-large-mnli",
|
||||
"task": "text-classification",
|
||||
"dataset": {
|
||||
"data_name":"glue",
|
||||
"subset": "mnli_matched",
|
||||
"split": "validation",
|
||||
"input_cols": ["premise", "hypothesis"],
|
||||
"label_cols": ["label"],
|
||||
"batch_size": 1,
|
||||
"max_samples": 100,
|
||||
"component_kwargs": {
|
||||
"pre_process_data": {
|
||||
"align_labels": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"evaluators": {
|
||||
"common_evaluator": {
|
||||
"metrics":[
|
||||
{
|
||||
"name": "accuracy",
|
||||
"type": "accuracy",
|
||||
"backend": "huggingface_metrics",
|
||||
"sub_types": [
|
||||
{"name": "accuracy", "priority": 1, "goal": {"type": "max-degradation", "value": 0.01}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "latency",
|
||||
"type": "latency",
|
||||
"sub_types": [
|
||||
{"name": "avg", "priority": 2, "goal": {"type": "percent-min-improvement", "value": 20}}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"passes": {
|
||||
"conversion": {
|
||||
"type": "OnnxConversion",
|
||||
"config": {
|
||||
"target_opset": 13
|
||||
}
|
||||
},
|
||||
"transformers_optimization": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "bert",
|
||||
"num_heads": 12,
|
||||
"hidden_size": 768,
|
||||
"float16": false
|
||||
}
|
||||
},
|
||||
"quantization": {
|
||||
"type": "OnnxQuantization",
|
||||
"config": {
|
||||
"data_config": "__input_model_data_config__"
|
||||
}
|
||||
},
|
||||
"perf_tuning": {
|
||||
"type": "OrtPerfTuning",
|
||||
"config": {
|
||||
"data_config": "__input_model_data_config__"
|
||||
}
|
||||
}
|
||||
},
|
||||
"engine": {
|
||||
"search_strategy": {
|
||||
"execution_order": "joint",
|
||||
"search_algorithm": "tpe",
|
||||
"search_algorithm_config": {
|
||||
"num_samples": 3,
|
||||
"seed": 0
|
||||
}
|
||||
},
|
||||
"clean_cache": true,
|
||||
"evaluator": "common_evaluator",
|
||||
"execution_providers": ["CPUExecutionProvider"],
|
||||
"cache_dir": "cache",
|
||||
"output_dir" : "models/roberta_large"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
{
|
||||
"input_model":{
|
||||
"type": "PyTorchModel",
|
||||
"config": {
|
||||
"hf_config": {
|
||||
"model_name": "roberta-large-mnli",
|
||||
"task": "text-classification",
|
||||
"dataset": {
|
||||
"data_name":"glue",
|
||||
"subset": "mnli_matched",
|
||||
"split": "validation",
|
||||
"input_cols": ["premise", "hypothesis"],
|
||||
"label_cols": ["label"],
|
||||
"batch_size": 1,
|
||||
"max_samples": 100,
|
||||
"component_kwargs": {
|
||||
"pre_process_data": {
|
||||
"align_labels": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"evaluators": {
|
||||
"common_evaluator": {
|
||||
"metrics":[
|
||||
{
|
||||
"name": "accuracy",
|
||||
"type": "accuracy",
|
||||
"backend": "huggingface_metrics",
|
||||
"sub_types": [
|
||||
{"name": "accuracy", "priority": 1, "goal": {"type": "max-degradation", "value": 0.01}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "latency",
|
||||
"type": "latency",
|
||||
"sub_types": [
|
||||
{"name": "avg", "priority": 2, "goal": {"type": "percent-min-improvement", "value": 20}}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"passes": {
|
||||
"conversion": {
|
||||
"type": "OnnxConversion",
|
||||
"config": {
|
||||
"target_opset": 13
|
||||
}
|
||||
},
|
||||
"transformers_optimization": {
|
||||
"type": "OrtTransformersOptimization",
|
||||
"disable_search": true,
|
||||
"config": {
|
||||
"model_type": "bert",
|
||||
"num_heads": 12,
|
||||
"hidden_size": 768,
|
||||
"float16": true
|
||||
}
|
||||
},
|
||||
"perf_tuning": {
|
||||
"type": "OrtPerfTuning",
|
||||
"config": {
|
||||
"enable_cuda_graph": true,
|
||||
"data_config": "__input_model_data_config__"
|
||||
}
|
||||
}
|
||||
},
|
||||
"pass_flows": [
|
||||
["conversion", "transformers_optimization", "perf_tuning"],
|
||||
["conversion", "perf_tuning"]
|
||||
],
|
||||
"engine": {
|
||||
"search_strategy": {
|
||||
"execution_order": "joint",
|
||||
"search_algorithm": "tpe",
|
||||
"search_algorithm_config": {
|
||||
"num_samples": 3,
|
||||
"seed": 0
|
||||
}
|
||||
},
|
||||
"evaluator": "common_evaluator",
|
||||
"execution_providers": ["CUDAExecutionProvider"],
|
||||
"clean_cache": true,
|
||||
"cache_dir": "cache",
|
||||
"output_dir" : "models/roberta_large"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
apache_beam
|
||||
datasets
|
||||
evaluate
|
||||
neural-compressor
|
||||
onnxruntime
|
||||
optimum
|
||||
scikit-learn
|
||||
tabulate
|
||||
transformers
|
|
@ -0,0 +1,9 @@
|
|||
apache_beam
|
||||
datasets
|
||||
evaluate
|
||||
neural-compressor
|
||||
onnxruntime-gpu
|
||||
optimum
|
||||
scikit-learn
|
||||
tabulate
|
||||
transformers
|
|
@ -0,0 +1,297 @@
|
|||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
# The Optimum optimization levels are:
|
||||
# O1: basic general optimizations.
|
||||
# O2: basic and extended general optimizations, transformers-specific fusions.
|
||||
# O3: same as O2 with GELU approximation.
|
||||
# O4: same as O3 with mixed precision (fp16, GPU-only, requires --device cuda).
|
||||
|
||||
import argparse
|
||||
import ast
|
||||
import copy
|
||||
import json
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from optimum.exporters.onnx import main_export
|
||||
from optimum.onnxruntime import ORTOptimizer, ORTQuantizer
|
||||
from optimum.onnxruntime.configuration import AutoOptimizationConfig, AutoQuantizationConfig
|
||||
from tabulate import tabulate
|
||||
|
||||
from olive.data.template import huggingface_data_config_template
|
||||
from olive.workflows import run as olive_run
|
||||
|
||||
MODEL_NAME_MAP = {
|
||||
"bert": "Intel/bert-base-uncased-mrpc",
|
||||
"deberta": "microsoft/deberta-base-mnli",
|
||||
"distilbert": "distilbert-base-uncased-finetuned-sst-2-english",
|
||||
"roberta_large": "roberta-large-mnli",
|
||||
}
|
||||
|
||||
MODEL_NAME_TO_CONFIG_MAP = {
|
||||
"bert": {
|
||||
"model_name": "Intel/bert-base-uncased-mrpc",
|
||||
"task": "text-classification",
|
||||
"dataset": {
|
||||
"data_name": "glue",
|
||||
"subset": "mrpc",
|
||||
"split": "validation",
|
||||
"input_cols": ["sentence1", "sentence2"],
|
||||
"label_cols": ["label"],
|
||||
"batch_size": 1,
|
||||
"max_samples": 100,
|
||||
},
|
||||
},
|
||||
"deberta": {
|
||||
"model_name": "microsoft/deberta-base-mnli",
|
||||
"task": "text-classification",
|
||||
"dataset": {
|
||||
"data_name": "glue",
|
||||
"subset": "mnli_matched",
|
||||
"split": "validation",
|
||||
"input_cols": ["premise", "hypothesis"],
|
||||
"label_cols": ["label"],
|
||||
"batch_size": 1,
|
||||
"max_samples": 100,
|
||||
"component_kwargs": {"pre_process_data": {"align_labels": True}},
|
||||
},
|
||||
},
|
||||
"distilbert": {
|
||||
"model_name": "distilbert-base-uncased-finetuned-sst-2-english",
|
||||
"task": "text-classification",
|
||||
"dataset": {
|
||||
"data_name": "glue",
|
||||
"subset": "sst2",
|
||||
"split": "validation",
|
||||
"input_cols": ["sentence"],
|
||||
"label_cols": ["label"],
|
||||
"batch_size": 1,
|
||||
"max_samples": 100,
|
||||
},
|
||||
},
|
||||
"roberta_large": {
|
||||
"model_name": "roberta-large-mnli",
|
||||
"task": "text-classification",
|
||||
"dataset": {
|
||||
"data_name": "glue",
|
||||
"subset": "mnli_matched",
|
||||
"split": "validation",
|
||||
"input_cols": ["premise", "hypothesis"],
|
||||
"label_cols": ["label"],
|
||||
"batch_size": 1,
|
||||
"max_samples": 100,
|
||||
"component_kwargs": {"pre_process_data": {"align_labels": True}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
ACC_METRIC = {
|
||||
"name": "accuracy",
|
||||
"type": "accuracy",
|
||||
"backend": "huggingface_metrics",
|
||||
"sub_types": [{"name": "accuracy", "priority": 1, "goal": {"type": "max-degradation", "value": 0.01}}],
|
||||
}
|
||||
|
||||
LAT_METRIC = {
|
||||
"name": "latency",
|
||||
"type": "latency",
|
||||
"sub_types": [{"name": "avg", "priority": 2, "goal": {"type": "percent-min-improvement", "value": 20}}],
|
||||
}
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument("--model_name", type=str, help="The name of the model to run the perf comparison on")
|
||||
parser.add_argument("--device", type=str, default="cpu", help="The device to run the perf comparison on")
|
||||
parser.add_argument("--test_num", type=int, default=10, help="The number of times to run the perf comparison")
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def export_onnx(model_name, model_root_path, device="cpu"):
|
||||
onnx_model_path = model_root_path / "onnx"
|
||||
main_export(model_name, onnx_model_path) if device == "cpu" else main_export(
|
||||
model_name, onnx_model_path, device="cuda"
|
||||
)
|
||||
return onnx_model_path
|
||||
|
||||
|
||||
def export_optimum_o1(optimizer, model_root_path):
|
||||
o1_model_path = model_root_path / "optimum_o1"
|
||||
optimization_config = AutoOptimizationConfig.O1()
|
||||
optimizer.optimize(save_dir=o1_model_path, optimization_config=optimization_config)
|
||||
|
||||
|
||||
def export_optimum_o2(optimizer, model_root_path):
|
||||
o2_model_path = model_root_path / "optimum_o2"
|
||||
optimization_config = AutoOptimizationConfig.O2()
|
||||
optimizer.optimize(save_dir=o2_model_path, optimization_config=optimization_config)
|
||||
|
||||
|
||||
def export_optimum_o3(optimizer, model_root_path):
|
||||
o3_model_path = model_root_path / "optimum_o3"
|
||||
optimization_config = AutoOptimizationConfig.O3()
|
||||
optimizer.optimize(save_dir=o3_model_path, optimization_config=optimization_config)
|
||||
|
||||
|
||||
def export_optimum_o4(optimizer, model_root_path):
|
||||
o4_model_path = model_root_path / "optimum_o4"
|
||||
optimization_config = AutoOptimizationConfig.O4()
|
||||
optimizer.optimize(save_dir=o4_model_path, optimization_config=optimization_config)
|
||||
|
||||
|
||||
def export_optimum_dynamic_quantization(onnx_model_path, model_root_path):
|
||||
quantizer = ORTQuantizer.from_pretrained(onnx_model_path)
|
||||
quantization_model_path = model_root_path / "optimum_dynamic_quantization"
|
||||
dqconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=False)
|
||||
quantizer.quantize(
|
||||
save_dir=quantization_model_path,
|
||||
quantization_config=dqconfig,
|
||||
)
|
||||
|
||||
|
||||
def run_with_config(tool, olive_config, metric_res):
|
||||
outputs = olive_run(olive_config)
|
||||
if tool == "olive":
|
||||
metric = str(list(list(outputs.values())[0].nodes.values())[0].metrics.value)
|
||||
else:
|
||||
metric = str(list(outputs.values())[0])
|
||||
metric_dict = ast.literal_eval(metric)
|
||||
|
||||
for metric_name, metric_value in metric_dict.items():
|
||||
if metric_name not in metric_res[tool]:
|
||||
metric_res[tool][metric_name] = []
|
||||
metric_res[tool][metric_name].append(metric_value)
|
||||
|
||||
|
||||
def run_perf_comparison(cur_dir, model_name, device, model_root_path, test_num):
|
||||
print(f"Start running perf comparison on {model_name} model {test_num} times...")
|
||||
model_list = ["hf_pytorch", "pytorch_compile", "onnx", "optimum_o1", "optimum_o2", "optimum_o3"]
|
||||
if device == "gpu":
|
||||
model_list.append("optimum_o4")
|
||||
if device == "cpu":
|
||||
model_list.append("optimum_dynamic_quantization")
|
||||
metric_res = {}
|
||||
config_json_path = cur_dir / "configs" / "perf.json"
|
||||
for optimized_model in model_list:
|
||||
metric_res[f"{optimized_model}"] = {}
|
||||
metric_res["olive"] = {}
|
||||
for i in range(test_num):
|
||||
print(f"Start running {i} time...")
|
||||
for optimized_model in model_list:
|
||||
accuracy_metric = copy.deepcopy(ACC_METRIC)
|
||||
latency_metric = copy.deepcopy(LAT_METRIC)
|
||||
print(f"Start evaluating {optimized_model} model")
|
||||
with open(config_json_path, "r") as fin:
|
||||
olive_config = json.load(fin)
|
||||
user_script_path = str(cur_dir / "user_scripts" / f"{model_name}.py")
|
||||
hf_model_config = MODEL_NAME_TO_CONFIG_MAP[model_name]
|
||||
if optimized_model == "onnx":
|
||||
olive_config["input_model"]["config"]["model_path"] = str(
|
||||
Path(model_root_path / optimized_model / "model.onnx")
|
||||
)
|
||||
elif optimized_model == "optimum_dynamic_quantization":
|
||||
olive_config["input_model"]["config"]["model_path"] = str(
|
||||
Path(model_root_path / optimized_model / "model_quantized.onnx")
|
||||
)
|
||||
elif optimized_model in ["optimum_o1", "optimum_o2", "optimum_o3", "optimum_o4"]:
|
||||
olive_config["input_model"]["config"]["model_path"] = str(
|
||||
Path(model_root_path / optimized_model / "model_optimized.onnx")
|
||||
)
|
||||
elif optimized_model == "hf_pytorch":
|
||||
olive_config["input_model"]["type"] = "PyTorchModel"
|
||||
hf_config = {"hf_config": hf_model_config}
|
||||
olive_config["input_model"]["config"] = hf_config
|
||||
elif optimized_model == "pytorch_compile":
|
||||
olive_config["input_model"]["type"] = "PyTorchModel"
|
||||
olive_config["input_model"]["config"]["model_script"] = user_script_path
|
||||
olive_config["input_model"]["config"]["model_loader"] = "torch_complied_model"
|
||||
|
||||
olive_config["systems"]["local_system"]["config"]["accelerators"] = (
|
||||
["cpu"] if device == "cpu" else ["gpu"]
|
||||
)
|
||||
olive_config["engine"]["cache_dir"] = str(Path(model_root_path / optimized_model / "cache"))
|
||||
olive_config["engine"]["output_dir"] = str(Path(model_root_path / optimized_model / "output"))
|
||||
olive_config["engine"]["execution_providers"] = (
|
||||
["CPUExecutionProvider"] if device == "cpu" else ["CUDAExecutionProvider"]
|
||||
)
|
||||
olive_config["evaluators"]["common_evaluator"]["metrics"].append(accuracy_metric)
|
||||
olive_config["evaluators"]["common_evaluator"]["metrics"].append(latency_metric)
|
||||
olive_config["evaluators"]["common_evaluator"]["metrics"][0][
|
||||
"data_config"
|
||||
] = huggingface_data_config_template(
|
||||
hf_model_config["model_name"], hf_model_config["task"], **hf_model_config["dataset"]
|
||||
)
|
||||
olive_config["evaluators"]["common_evaluator"]["metrics"][1][
|
||||
"data_config"
|
||||
] = huggingface_data_config_template(
|
||||
hf_model_config["model_name"], hf_model_config["task"], **hf_model_config["dataset"]
|
||||
)
|
||||
|
||||
run_with_config(optimized_model, olive_config, metric_res)
|
||||
|
||||
olive_config = f"{model_name}.json" if device == "cpu" else f"{model_name}_gpu.json"
|
||||
olive_config_path = cur_dir / "configs" / olive_config
|
||||
run_with_config("olive", olive_config_path, metric_res)
|
||||
print(metric_res)
|
||||
for model, v in metric_res.items():
|
||||
for metric_name, metric_value_list in v.items():
|
||||
vsum = sum(float(v) for v in metric_value_list)
|
||||
metric_res[model][metric_name] = vsum / len(metric_value_list)
|
||||
return metric_res
|
||||
|
||||
|
||||
def print_perf_table(metric_res, device):
|
||||
for key, value in metric_res.items():
|
||||
json_value = str(value).replace("'", '"')
|
||||
metric_res[key] = ast.literal_eval(json_value)
|
||||
|
||||
columns = [f"tool({device})"] + list(metric_res[next(iter(metric_res))].keys())
|
||||
rows = [[key] + list(values.values()) for key, values in metric_res.items()]
|
||||
table = tabulate(rows, headers=columns, tablefmt="pipe")
|
||||
print(table)
|
||||
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
model_name = args.model_name
|
||||
model_id = MODEL_NAME_MAP[model_name]
|
||||
device = args.device
|
||||
test_num = args.test_num
|
||||
|
||||
cur_dir = Path(__file__).absolute().parent
|
||||
model_root_path = cur_dir / "run_cache" / model_name
|
||||
model_root_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# export the model to onnx
|
||||
onnx_model_path = export_onnx(model_id, model_root_path, device)
|
||||
|
||||
optimizer = ORTOptimizer.from_pretrained(onnx_model_path)
|
||||
|
||||
# Optimum optimization
|
||||
export_optimum_o1(optimizer, model_root_path)
|
||||
export_optimum_o2(optimizer, model_root_path)
|
||||
export_optimum_o3(optimizer, model_root_path)
|
||||
if device == "gpu":
|
||||
export_optimum_o4(optimizer, model_root_path)
|
||||
if device == "cpu":
|
||||
export_optimum_dynamic_quantization(onnx_model_path, model_root_path)
|
||||
|
||||
metric_res = run_perf_comparison(cur_dir, model_name, device, model_root_path, test_num)
|
||||
|
||||
if device == "cpu":
|
||||
lscpu = subprocess.check_output(["lscpu"])
|
||||
print(lscpu.decode("utf-8"))
|
||||
elif device == "gpu":
|
||||
nvidia_smi = subprocess.check_output(["nvidia-smi"])
|
||||
print(nvidia_smi.decode("utf-8"))
|
||||
print_perf_table(metric_res, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,11 @@
|
|||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
# --------------------------------------------------------------------------
|
||||
import torch
|
||||
from transformers import BertForSequenceClassification
|
||||
|
||||
|
||||
def torch_complied_model(model_path):
|
||||
model = BertForSequenceClassification.from_pretrained("Intel/bert-base-uncased-mrpc")
|
||||
return torch.compile(model)
|
|
@ -0,0 +1,11 @@
|
|||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
# --------------------------------------------------------------------------
|
||||
import torch
|
||||
from transformers import DebertaForSequenceClassification
|
||||
|
||||
|
||||
def torch_complied_model(model_path):
|
||||
model = DebertaForSequenceClassification.from_pretrained("microsoft/deberta-base-mnli")
|
||||
return torch.compile(model)
|
|
@ -0,0 +1,11 @@
|
|||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
# --------------------------------------------------------------------------
|
||||
import torch
|
||||
from transformers import DistilBertForSequenceClassification
|
||||
|
||||
|
||||
def torch_complied_model(model_path):
|
||||
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
|
||||
return torch.compile(model)
|
|
@ -0,0 +1,11 @@
|
|||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
# --------------------------------------------------------------------------
|
||||
import torch
|
||||
from transformers import RobertaForSequenceClassification
|
||||
|
||||
|
||||
def torch_complied_model(model_path):
|
||||
model = RobertaForSequenceClassification.from_pretrained("roberta-large-mnli")
|
||||
return torch.compile(model)
|
|
@ -11,6 +11,7 @@ examples/**/data
|
|||
examples/**/model
|
||||
test/**/data
|
||||
nc_workspace/
|
||||
run_cache/
|
||||
|
||||
# Onnx dynamo export artifacts
|
||||
*.sarif
|
||||
|
|
8
Makefile
8
Makefile
|
@ -5,6 +5,8 @@ EXAMPLE_FOLDER ?=
|
|||
EXAMPLE_NAME ?=
|
||||
INSTALL_EXTRAS ?=
|
||||
VERSION ?=
|
||||
MODEL_NAME ?=
|
||||
DEVICE ?=
|
||||
ifeq ($(WINDOWS), True)
|
||||
CURRENT_DIR = "$(subst /,\\,${CURDIR})"
|
||||
MKDIR_LOG_CMD = mkdir logs | exit 0
|
||||
|
@ -12,6 +14,7 @@ ifeq ($(WINDOWS), True)
|
|||
TEST_CMD = "scripts\\test.bat"
|
||||
TEST_EXAMPLES_CMD = "scripts\\test_examples.bat"
|
||||
OVERWRITE_VERSION = "python scripts\\overwrite_version.py --version $(VERSION)"
|
||||
PERF_CHECK_CMD = "scripts\\run_performance_check.bat"
|
||||
else
|
||||
CURRENT_DIR = ${CURDIR}
|
||||
MKDIR_LOG_CMD = mkdir -p logs
|
||||
|
@ -19,6 +22,7 @@ else
|
|||
TEST_CMD = bash scripts/test.sh
|
||||
TEST_EXAMPLES_CMD = bash scripts/test_examples.sh
|
||||
OVERWRITE_VERSION = python scripts/overwrite_version.py --version $(VERSION)
|
||||
PERF_CHECK_CMD = bash scripts/run_performance_check.sh
|
||||
endif
|
||||
|
||||
.PHONY: all
|
||||
|
@ -49,6 +53,10 @@ test-examples: logs/
|
|||
test-examples:
|
||||
$(TEST_EXAMPLES_CMD) $(PIPELINE) $(CURRENT_DIR) $(EXAMPLE_FOLDER) $(EXAMPLE_NAME)
|
||||
|
||||
.PHONY: performance
|
||||
performance:
|
||||
$(PERF_CHECK_CMD) $(PIPELINE) $(CURRENT_DIR) $(MODEL_NAME) $(DEVICE)
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
git clean -dfX
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
REM -------------------------------------------------------------------------
|
||||
REM Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
REM Licensed under the MIT License.
|
||||
REM --------------------------------------------------------------------------
|
||||
@echo off
|
||||
|
||||
set PIPELINE=%1
|
||||
set INSTALL_DEV_MODE=%2
|
||||
set MODEL_NAME=%3
|
||||
set DEVICE=%4
|
||||
|
||||
if "%PIPELINE%"=="True" (
|
||||
call olive-venv\\Scripts\\activate.bat || goto :error
|
||||
)
|
||||
|
||||
call python -m pip install -r %ROOT_DIR%\\.azure_pipelines\\performance_check\\requirements-%DEVICE%.txt
|
||||
call python %ROOT_DIR%\\.azure_pipelines\\performance_check\\run_performance_check.py --model_name %MODEL_NAME% --device %DEVICE%
|
||||
|
||||
REM clean up
|
||||
call rmdir /s /q %ROOT_DIR%\\.azure_pipelines\\performance_check\\run_cache
|
|
@ -0,0 +1,24 @@
|
|||
#!/usr/bin/env bash
|
||||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
# --------------------------------------------------------------------------
|
||||
set -eoux pipefail
|
||||
|
||||
PIPELINE=$1
|
||||
ROOT_DIR=$2
|
||||
MODEL_NAME=$3
|
||||
DEVICE=$4
|
||||
|
||||
echo $PIPELINE
|
||||
if [[ "$PIPELINE" == "True" ]]; then
|
||||
set +x
|
||||
source olive-venv/bin/activate
|
||||
set -x
|
||||
fi
|
||||
|
||||
python -m pip install -r $ROOT_DIR/.azure_pipelines/performance_check/requirements-$DEVICE.txt
|
||||
python $ROOT_DIR/.azure_pipelines/performance_check/run_performance_check.py --model_name $MODEL_NAME --device $DEVICE
|
||||
|
||||
# clean up
|
||||
rm -rf $ROOT_DIR/.azure_pipelines/performance_check/run_cache
|
Загрузка…
Ссылка в новой задаче