Config - Add inference config for NC A100 and NV A10 series (#329)

Add inference config for preview SKUs, including:
* [NC96ads_A100_v4](https://docs.microsoft.com/en-us/azure/virtual-machines/nc-a100-v4-series)
* [NV18ads_A10_v5](https://docs.microsoft.com/en-us/azure/virtual-machines/nva10v5-series)
This commit is contained in:
Yifan Xiong 2022-03-21 14:24:37 +08:00 коммит произвёл GitHub
Родитель 6e74918044
Коммит a9634ef5a8
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 612 добавлений и 0 удалений

Просмотреть файл

@ -1,4 +1,5 @@
[mypy]
exclude = build/
ignore_missing_imports = True
scripts_are_modules = True
warn_return_any = True

Просмотреть файл

@ -0,0 +1,315 @@
version: v0.4
superbench:
enable: null
monitor:
enable: true
sample_duration: 1
sample_interval: 10
var:
default_local_mode: &default_local_mode
enable: true
modes:
- name: local
proc_num: 4
prefix: CUDA_VISIBLE_DEVICES={proc_rank}
parallel: yes
default_pytorch_mode: &default_pytorch_mode
enable: true
modes:
- name: torch.distributed
proc_num: 4
node_num: 1
frameworks:
- pytorch
offline_inference_config: &offline_inference_config
duration: 0
num_warmup: 64
num_steps: 2048
sample_count: 8192
batch_size: 32
precision:
- float32
- float16
model_action:
- inference
pin_memory: yes
online_inference_config: &online_inference_config
duration: 0
num_warmup: 64
num_steps: 2048
sample_count: 8192
batch_size: 1
precision:
- float32
- float16
model_action:
- inference
pin_memory: yes
benchmarks:
kernel-launch:
<<: *default_local_mode
gemm-flops:
<<: *default_local_mode
nccl-bw:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
parameters:
ngpus: 4
cpu-memory-bw-latency:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
parameters:
tests:
- bandwidth_matrix
- latency_matrix
- max_bandwidth
mem-bw:
enable: true
modes:
- name: local
proc_num: 4
prefix: CUDA_VISIBLE_DEVICES={proc_rank} numactl -N {proc_rank}
parallel: no
gpu-copy-bw:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type:
- htod
- dtoh
copy_type:
- sm
- dma
cudnn-function:
<<: *default_local_mode
cublas-function:
<<: *default_local_mode
matmul:
<<: *default_local_mode
frameworks:
- pytorch
sharding-matmul:
<<: *default_pytorch_mode
computation-communication-overlap:
<<: *default_pytorch_mode
enable: false
ort-inference:fp16-offline:
<<: *default_local_mode
parameters:
pytorch_models:
- resnet50
- resnet101
- resnet152
- densenet169
- densenet201
batch_size: 32
precision: float16
ort-inference:fp16-online:
<<: *default_local_mode
parameters:
pytorch_models:
- resnet50
- resnet101
- resnet152
- densenet169
- densenet201
batch_size: 1
precision: float16
tensorrt-inference:fp16-offline:
<<: *default_local_mode
parameters:
pytorch_models:
- resnet50
- resnet101
- resnet152
- densenet169
- densenet201
- bert-base
- bert-large
seq_length: 224
batch_size: 32
precision: fp16
tensorrt-inference:fp16-online:
<<: *default_local_mode
parameters:
pytorch_models:
- resnet50
- resnet101
- resnet152
- densenet169
- densenet201
- bert-base
- bert-large
seq_length: 224
batch_size: 1
precision: fp16
tensorrt-inference:int8-offline:
<<: *default_local_mode
parameters:
pytorch_models:
- resnet50
- resnet101
- resnet152
- densenet169
- densenet201
- bert-base
- bert-large
seq_length: 224
batch_size: 32
precision: int8
tensorrt-inference:int8-online:
<<: *default_local_mode
parameters:
pytorch_models:
- resnet50
- resnet101
- resnet152
- densenet169
- densenet201
- bert-base
- bert-large
seq_length: 224
batch_size: 1
precision: int8
# PyTorch offline inference
model-benchmarks:gpt-offline:
<<: *default_local_mode
frameworks:
- pytorch
models:
- gpt2-small
- gpt2-large
parameters:
<<: *offline_inference_config
batch_size: 8
seq_len: 224
model-benchmarks:bert-offline:
<<: *default_local_mode
frameworks:
- pytorch
models:
- bert-base
- bert-large
parameters:
<<: *offline_inference_config
seq_len: 224
model-benchmarks:lstm-offline:
<<: *default_local_mode
frameworks:
- pytorch
models:
- lstm
parameters:
<<: *offline_inference_config
batch_size: 224
input_size: 224
hidden_size: 1000
seq_len: 32
pin_memory: no
model-benchmarks:resnet-offline:
<<: *default_local_mode
frameworks:
- pytorch
models:
- resnet50
- resnet101
- resnet152
parameters:
<<: *offline_inference_config
batch_size: 192
num_steps: 512
model-benchmarks:densenet-offline:
<<: *default_local_mode
frameworks:
- pytorch
models:
- densenet169
- densenet201
parameters:
<<: *offline_inference_config
pin_memory: no
model-benchmarks:vgg-offline:
<<: *default_local_mode
frameworks:
- pytorch
models:
- vgg11
- vgg13
- vgg16
- vgg19
parameters:
<<: *offline_inference_config
pin_memory: no
# PyTorch online inference
model-benchmarks:gpt-online:
<<: *default_local_mode
frameworks:
- pytorch
models:
- gpt2-small
- gpt2-large
parameters:
<<: *online_inference_config
seq_len: 224
model-benchmarks:bert-online:
<<: *default_local_mode
frameworks:
- pytorch
models:
- bert-base
- bert-large
parameters:
<<: *online_inference_config
seq_len: 224
model-benchmarks:lstm-online:
<<: *default_local_mode
frameworks:
- pytorch
models:
- lstm
parameters:
<<: *online_inference_config
input_size: 224
hidden_size: 1000
seq_len: 32
pin_memory: no
model-benchmarks:resnet-online:
<<: *default_local_mode
frameworks:
- pytorch
models:
- resnet50
- resnet101
- resnet152
parameters:
<<: *online_inference_config
model-benchmarks:densenet-online:
<<: *default_local_mode
frameworks:
- pytorch
models:
- densenet169
- densenet201
parameters:
<<: *online_inference_config
pin_memory: no
model-benchmarks:vgg-online:
<<: *default_local_mode
frameworks:
- pytorch
models:
- vgg11
- vgg13
- vgg16
- vgg19
parameters:
<<: *online_inference_config
pin_memory: no

Просмотреть файл

@ -0,0 +1,296 @@
version: v0.4
superbench:
enable: null
monitor:
enable: false
sample_duration: 1
sample_interval: 10
var:
default_local_mode: &default_local_mode
enable: true
modes:
- name: local
proc_num: 1
prefix: CUDA_VISIBLE_DEVICES={proc_rank}
parallel: yes
offline_inference_config: &offline_inference_config
duration: 0
num_warmup: 64
num_steps: 2048
sample_count: 8192
batch_size: 32
precision:
- float32
- float16
model_action:
- inference
pin_memory: yes
online_inference_config: &online_inference_config
duration: 0
num_warmup: 64
num_steps: 2048
sample_count: 8192
batch_size: 1
precision:
- float32
- float16
model_action:
- inference
pin_memory: yes
benchmarks:
kernel-launch:
<<: *default_local_mode
gemm-flops:
<<: *default_local_mode
parameters:
precision: [fp32, fp16, tf32_tc, bf16_tc, fp16_tc, int8_tc, int4_tc]
cpu-memory-bw-latency:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
parameters:
tests:
- bandwidth_matrix
- latency_matrix
- max_bandwidth
mem-bw:
enable: true
modes:
- name: local
proc_num: 1
prefix: CUDA_VISIBLE_DEVICES={proc_rank} numactl -N {proc_rank}
parallel: no
gpu-copy-bw:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type:
- htod
- dtoh
copy_type:
- sm
- dma
cudnn-function:
<<: *default_local_mode
cublas-function:
<<: *default_local_mode
matmul:
<<: *default_local_mode
frameworks:
- pytorch
ort-inference:fp16-offline:
<<: *default_local_mode
parameters:
pytorch_models:
- resnet50
- resnet101
- resnet152
- densenet169
- densenet201
batch_size: 8
precision: float16
ort-inference:fp16-online:
<<: *default_local_mode
parameters:
pytorch_models:
- resnet50
- resnet101
- resnet152
- densenet169
- densenet201
batch_size: 1
precision: float16
tensorrt-inference:fp16-offline:
<<: *default_local_mode
parameters:
pytorch_models:
- resnet50
- resnet101
- resnet152
- densenet169
- densenet201
- bert-base
- bert-large
seq_length: 224
batch_size: 8
precision: fp16
tensorrt-inference:fp16-online:
<<: *default_local_mode
parameters:
pytorch_models:
- resnet50
- resnet101
- resnet152
- densenet169
- densenet201
- bert-base
- bert-large
seq_length: 224
batch_size: 1
precision: fp16
tensorrt-inference:int8-offline:
<<: *default_local_mode
parameters:
pytorch_models:
- resnet50
- resnet101
- resnet152
- densenet169
- densenet201
- bert-base
- bert-large
seq_length: 224
batch_size: 32
precision: int8
tensorrt-inference:int8-online:
<<: *default_local_mode
parameters:
pytorch_models:
- resnet50
- resnet101
- resnet152
- densenet169
- densenet201
- bert-base
- bert-large
seq_length: 224
batch_size: 1
precision: int8
# PyTorch offline inference
model-benchmarks:gpt-offline:
<<: *default_local_mode
frameworks:
- pytorch
models:
- gpt2-small
- gpt2-large
parameters:
<<: *offline_inference_config
batch_size: 8
seq_len: 224
model-benchmarks:bert-offline:
<<: *default_local_mode
frameworks:
- pytorch
models:
- bert-base
- bert-large
parameters:
<<: *offline_inference_config
seq_len: 224
model-benchmarks:lstm-offline:
<<: *default_local_mode
frameworks:
- pytorch
models:
- lstm
parameters:
<<: *offline_inference_config
batch_size: 224
input_size: 224
hidden_size: 1000
seq_len: 32
pin_memory: no
model-benchmarks:resnet-offline:
<<: *default_local_mode
frameworks:
- pytorch
models:
- resnet50
- resnet101
- resnet152
parameters:
<<: *offline_inference_config
batch_size: 192
num_steps: 512
model-benchmarks:densenet-offline:
<<: *default_local_mode
frameworks:
- pytorch
models:
- densenet169
- densenet201
parameters:
<<: *offline_inference_config
pin_memory: no
model-benchmarks:vgg-offline:
<<: *default_local_mode
frameworks:
- pytorch
models:
- vgg11
- vgg13
- vgg16
- vgg19
parameters:
<<: *offline_inference_config
pin_memory: no
# PyTorch online inference
model-benchmarks:gpt-online:
<<: *default_local_mode
frameworks:
- pytorch
models:
- gpt2-small
- gpt2-large
parameters:
<<: *online_inference_config
seq_len: 224
model-benchmarks:bert-online:
<<: *default_local_mode
frameworks:
- pytorch
models:
- bert-base
- bert-large
parameters:
<<: *online_inference_config
seq_len: 224
model-benchmarks:lstm-online:
<<: *default_local_mode
frameworks:
- pytorch
models:
- lstm
parameters:
<<: *online_inference_config
input_size: 224
hidden_size: 1000
seq_len: 32
pin_memory: no
model-benchmarks:resnet-online:
<<: *default_local_mode
frameworks:
- pytorch
models:
- resnet50
- resnet101
- resnet152
parameters:
<<: *online_inference_config
model-benchmarks:densenet-online:
<<: *default_local_mode
frameworks:
- pytorch
models:
- densenet169
- densenet201
parameters:
<<: *online_inference_config
pin_memory: no
model-benchmarks:vgg-online:
<<: *default_local_mode
frameworks:
- pytorch
models:
- vgg11
- vgg13
- vgg16
- vgg19
parameters:
<<: *online_inference_config
pin_memory: no