superbenchmark/tests/data/test.yaml

298 строки
6.5 KiB
YAML

# SuperBench Config
version: v0.6
superbench:
enable: null
monitor:
enable: true
sample_duration: 1
sample_interval: 10
var:
default_local_mode: &default_local_mode
enable: true
modes:
- name: local
proc_num: 8
prefix: CUDA_VISIBLE_DEVICES={proc_rank}
parallel: yes
default_pytorch_mode: &default_pytorch_mode
enable: true
modes:
- name: torch.distributed
proc_num: 8
node_num: 1
frameworks:
- pytorch
common_model_config: &common_model_config
duration: 0
num_warmup: 16
num_steps: 128
batch_size: 1
precision:
- float32
- float16
model_action:
- train
benchmarks:
gpu-burn:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
parameters:
time: 300
doubles: true
tensor_core: true
nccl-bw:default:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
parameters:
ngpus: 8
nccl-bw:gdr-only:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
env:
NCCL_IB_PCI_RELAXED_ORDERING: '1'
NCCL_NET_GDR_LEVEL: '5'
NCCL_P2P_DISABLE: '1'
NCCL_SHM_DISABLE: '1'
NCCL_MIN_NCHANNELS: '16'
NCCL_IB_DISABLE: '0'
parameters:
ngpus: 8
nccl-bw:all-nodes:
enable: true
modes:
- name: mpi
proc_num: 8
node_num: all
pattern:
type: all-nodes
parameters:
ngpus: 8
nccl-bw:pair-wise:
enable: true
modes:
- name: mpi
proc_num: 8
node_num: all
pattern:
type: pair-wise
parameters:
ngpus: 8
nccl-bw:k-batch:
enable: true
modes:
- name: mpi
proc_num: 8
node_num: all
pattern:
type: k-batch
parameters:
ngpus: 8
nccl-bw:topo-aware:
enable: true
modes:
- name: mpi
proc_num: 8
node_num: all
pattern:
type: topo-aware
ibstat: ibstat_file.txt
ibnetdiscover: ibnetdiscover_file.txt
min_dist: 2
max_dist: 6
parameters:
ngpus: 8
ib-loopback:
enable: true
modes:
- name: local
proc_num: 4
prefix: PROC_RANK={proc_rank} IB_DEVICES=0,2,4,6 NUMA_NODES=1,0,3,2
parallel: yes
- name: local
proc_num: 4
prefix: PROC_RANK={proc_rank} IB_DEVICES=1,3,5,7 NUMA_NODES=1,0,3,2
parallel: yes
disk-benchmark:
enable: false
modes:
- name: local
proc_num: 1
parallel: no
parameters:
block_devices:
- /dev/nvme0n1
cpu-memory-bw-latency:
enable: false
modes:
- name: local
proc_num: 1
parallel: no
parameters:
tests:
- bandwidth_matrix
- latency_matrix
- max_bandwidth
mem-bw:
enable: true
modes:
- name: local
proc_num: 8
prefix: CUDA_VISIBLE_DEVICES={proc_rank} numactl -N $(({proc_rank}/2))
parallel: no
gpu-copy-bw:correctness:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type:
- htod
- dtoh
- dtod
copy_type:
- sm
- dma
size: 4096
num_warm_up: 0
num_loops: 1
check_data: true
gpu-copy-bw:perf:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type:
- htod
- dtoh
- dtod
copy_type:
- sm
- dma
kernel-launch:
<<: *default_local_mode
gemm-flops:
<<: *default_local_mode
cudnn-function:
<<: *default_local_mode
cublas-function:
<<: *default_local_mode
matmul:
<<: *default_local_mode
frameworks:
- pytorch
sharding-matmul:
<<: *default_pytorch_mode
computation-communication-overlap:
<<: *default_pytorch_mode
ib-traffic:
enable: false
modes:
- name: mpi
proc_num: 8
parameters:
msg_size: 8388608
ib_dev: mlx5_$LOCAL_RANK
gpu_dev: $LOCAL_RANK
numa_dev: $((LOCAL_RANK/2))
gpcnet-network-test:
enable: false
modes:
- name: mpi
proc_num: 1
mca:
pml: ucx
btl: ^uct
btl_tcp_if_include: eth0
env:
UCX_NET_DEVICES: mlx5_0:1
gpcnet-network-load-test:
enable: false
modes:
- name: mpi
proc_num: 1
mca:
pml: ucx
btl: ^uct
btl_tcp_if_include: eth0
env:
UCX_NET_DEVICES: mlx5_0:1
tcp-connectivity:
enable: false
modes:
- name: local
parallel: no
parameters:
port: 22
ort-inference:
<<: *default_local_mode
parameters:
batch_size: 1
tensorrt-inference:
<<: *default_local_mode
parameters:
pytorch_models:
- resnet50
- resnet101
- resnet152
- densenet169
- densenet201
- bert-base
- bert-large
seq_length: 224
batch_size: 1
precision: int8
gpt_models:
<<: *default_pytorch_mode
models:
- gpt2-small
- gpt2-large
parameters:
<<: *common_model_config
bert_models:
<<: *default_pytorch_mode
models:
- bert-base
- bert-large
parameters:
<<: *common_model_config
lstm_models:
<<: *default_pytorch_mode
models:
- lstm
parameters:
<<: *common_model_config
resnet_models:
<<: *default_pytorch_mode
models:
- resnet50
- resnet101
- resnet152
parameters:
<<: *common_model_config
densenet_models:
<<: *default_pytorch_mode
models:
- densenet169
- densenet201
parameters:
<<: *common_model_config
vgg_models:
<<: *default_pytorch_mode
models:
- vgg11
- vgg13
- vgg16
- vgg19
parameters:
<<: *common_model_config