From ff563b66af8a4fa2e99a25d994a9a04a97388742 Mon Sep 17 00:00:00 2001 From: Yifan Xiong Date: Thu, 30 Dec 2021 16:24:00 +0800 Subject: [PATCH] Release - SuperBench v0.4.0 (#278) __Description__ Cherry-pick bug fixes from v0.4.0 to main. __Major Revisions__ * Bug - Fix issues for Ansible and benchmarks (#267) * Tests - Refine test cases for microbenchmark (#268) * Bug - Build openmpi with ucx support in rocm dockerfiles (#269) * Benchmarks: Fix Bug - Fix fio build issue (#272) * Docs - Unify metric and add doc for cublas and cudnn functions (#271) * Monitor: Revision - Add 'monitor/' prefix to monitor metrics in result summary (#274) * Bug - Fix bug of detecting if gpu_index is none (#275) * Bug - Fix bugs in data diagnosis (#273) * Bug - Fix issue that the root mpi rank may not be the first in the hostfile (#270) * Benchmarks: Configuration - Update inference and network benchmarks in configs (#276) * Docs - Upgrade version and release note (#277) Co-authored-by: Yuting Jiang --- README.md | 10 +- dockerfile/rocm4.0-pytorch1.7.0.dockerfile | 24 +- dockerfile/rocm4.2-pytorch1.7.0.dockerfile | 2 +- docs/getting-started/installation.mdx | 2 +- docs/getting-started/run-superbench.md | 2 +- docs/superbench-config.mdx | 2 +- .../benchmarks/micro-benchmarks.md | 33 +- docs/user-tutorial/container-images.mdx | 3 + docs/user-tutorial/data-diagnosis.md | 2 +- setup.py | 2 +- superbench/__init__.py | 2 +- superbench/analyzer/data_diagnosis.py | 31 +- .../micro_benchmarks/_export_torch_to_onnx.py | 25 +- .../micro_benchmarks/cublas_function.py | 4 +- .../micro_benchmarks/cudnn_function.py | 5 +- .../ib_validation_performance.py | 2 +- superbench/config/amd_mi100_hpe.yaml | 28 +- superbench/config/amd_mi100_z53.yaml | 2 +- superbench/config/azure_ndmv4.yaml | 52 ++- superbench/config/azure_ndv4.yaml | 49 ++- superbench/config/default.yaml | 53 ++- superbench/runner/ansible.py | 16 +- .../runner/playbooks/fetch_results.yaml | 4 +- superbench/runner/runner.py | 5 +- .../test_data_analysis.py | 0 tests/analyzer/test_data_diagnosis.py | 51 ++- tests/analyzer/test_rules.yaml | 2 +- tests/ansible/tests/test_deploy.yaml | 3 +- .../test_cpu_memory_bw_latency_performance.py | 25 +- .../test_cuda_gemm_flops_performance.py | 23 +- .../test_cuda_memory_bw_performance.py | 304 +------------- .../test_cuda_nccl_bw_performance.py | 387 ++---------------- .../micro_benchmarks/test_disk_performance.py | 340 +-------------- .../test_gpcnet_performance.py | 194 +-------- .../test_gpu_copy_bw_performance.py | 23 +- .../test_ib_loopback_performance.py | 147 +------ .../test_ib_traffic_performance.py | 43 +- .../test_rocm_gemm_flops_performance.py | 23 +- .../test_rocm_memory_bw_performance.py | 137 +------ .../test_tensorrt_inference_performance.py | 35 +- tests/data/cuda_memory_d2d_bw.log | 89 ++++ tests/data/cuda_memory_d2h_bw.log | 89 ++++ tests/data/cuda_memory_h2d_bw.log | 89 ++++ tests/data/diagnosis_summary.jsonl | 2 + tests/data/diagnosis_summary.xlsx | Bin 0 -> 19618 bytes tests/data/disk_performance.log | 309 ++++++++++++++ tests/data/gpcnet_network_load.log | 97 +++++ tests/data/gpcnet_network_load_error.log | 12 + tests/data/gpcnet_network_test.log | 30 ++ tests/data/gpcnet_network_test_error.log | 12 + tests/data/ib_loopback_8M_size.log | 43 ++ tests/data/ib_loopback_all_sizes.log | 66 +++ tests/data/nccl_allgather.log | 53 +++ tests/data/nccl_allreduce.log | 53 +++ tests/data/nccl_alltoall.log | 52 +++ tests/data/nccl_broadcast.log | 53 +++ tests/data/nccl_reduce.log | 53 +++ tests/data/nccl_reducescatter.log | 53 +++ tests/data/rocm_memory_d2h_bw.log | 51 +++ tests/data/rocm_memory_h2d_bw.log | 51 +++ tests/helper/testcase.py | 83 ++++ tests/runner/test_ansible.py | 62 ++- tests/runner/test_runner.py | 64 +-- third_party/Makefile | 2 +- website/blog/2021-12-24-release-0-4.md | 58 +++ website/docusaurus.config.js | 2 +- website/package-lock.json | 2 +- website/package.json | 2 +- 68 files changed, 2010 insertions(+), 1619 deletions(-) rename tests/{analylzer => analyzer}/test_data_analysis.py (100%) create mode 100644 tests/data/cuda_memory_d2d_bw.log create mode 100644 tests/data/cuda_memory_d2h_bw.log create mode 100644 tests/data/cuda_memory_h2d_bw.log create mode 100644 tests/data/diagnosis_summary.jsonl create mode 100644 tests/data/diagnosis_summary.xlsx create mode 100644 tests/data/disk_performance.log create mode 100644 tests/data/gpcnet_network_load.log create mode 100644 tests/data/gpcnet_network_load_error.log create mode 100644 tests/data/gpcnet_network_test.log create mode 100644 tests/data/gpcnet_network_test_error.log create mode 100644 tests/data/ib_loopback_8M_size.log create mode 100644 tests/data/ib_loopback_all_sizes.log create mode 100644 tests/data/nccl_allgather.log create mode 100644 tests/data/nccl_allreduce.log create mode 100644 tests/data/nccl_alltoall.log create mode 100644 tests/data/nccl_broadcast.log create mode 100644 tests/data/nccl_reduce.log create mode 100644 tests/data/nccl_reducescatter.log create mode 100644 tests/data/rocm_memory_d2h_bw.log create mode 100644 tests/data/rocm_memory_h2d_bw.log create mode 100644 tests/helper/testcase.py create mode 100644 website/blog/2021-12-24-release-0-4.md diff --git a/README.md b/README.md index eaefbf30..2cf77eac 100644 --- a/README.md +++ b/README.md @@ -7,15 +7,15 @@ [![Docker Pulls](https://img.shields.io/docker/pulls/superbench/superbench.svg)](https://hub.docker.com/r/superbench/superbench/tags) [![License](https://img.shields.io/github/license/microsoft/superbenchmark.svg)](LICENSE) -| Azure Pipelines | Build Status | -| :---: | :---: | -| cpu-unit-test | [![Build Status](https://dev.azure.com/msrasrg/SuperBenchmark/_apis/build/status/cpu-unit-test?branchName=main)](https://dev.azure.com/msrasrg/SuperBenchmark/_build/latest?definitionId=77&branchName=main) | -| cuda-unit-test | [![Build Status](https://dev.azure.com/msrasrg/SuperBenchmark/_apis/build/status/cuda-unit-test?branchName=main)](https://dev.azure.com/msrasrg/SuperBenchmark/_build/latest?definitionId=80&branchName=main) | +| Azure Pipelines | Build Status | +|--------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| cpu-unit-test | [![Build Status](https://dev.azure.com/msrasrg/SuperBenchmark/_apis/build/status/cpu-unit-test?branchName=main)](https://dev.azure.com/msrasrg/SuperBenchmark/_build/latest?definitionId=77&branchName=main) | +| cuda-unit-test | [![Build Status](https://dev.azure.com/msrasrg/SuperBenchmark/_apis/build/status/cuda-unit-test?branchName=main)](https://dev.azure.com/msrasrg/SuperBenchmark/_build/latest?definitionId=80&branchName=main) | | ansible-integration-test | [![Build Status](https://dev.azure.com/msrasrg/SuperBenchmark/_apis/build/status/ansible-integration-test?branchName=main)](https://dev.azure.com/msrasrg/SuperBenchmark/_build/latest?definitionId=82&branchName=main) | __SuperBench__ is a validation and profiling tool for AI infrastructure. -📢 [v0.3.0](https://github.com/microsoft/superbenchmark/releases/tag/v0.3.0) has been released! +📢 [v0.4.0](https://github.com/microsoft/superbenchmark/releases/tag/v0.4.0) has been released! ## _Check [aka.ms/superbench](https://aka.ms/superbench) for more details._ diff --git a/dockerfile/rocm4.0-pytorch1.7.0.dockerfile b/dockerfile/rocm4.0-pytorch1.7.0.dockerfile index d0565a5d..86bd9f8f 100644 --- a/dockerfile/rocm4.0-pytorch1.7.0.dockerfile +++ b/dockerfile/rocm4.0-pytorch1.7.0.dockerfile @@ -63,18 +63,6 @@ RUN mkdir -p /root/.ssh && \ echo -e "* soft nofile 1048576\n* hard nofile 1048576" >> /etc/security/limits.conf && \ echo -e "root soft nofile 1048576\nroot hard nofile 1048576" >> /etc/security/limits.conf -# Install OpenMPI -ENV OPENMPI_VERSION=4.0.5 -RUN cd /tmp && \ - wget -q https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-${OPENMPI_VERSION}.tar.gz && \ - tar xzf openmpi-${OPENMPI_VERSION}.tar.gz && \ - cd openmpi-${OPENMPI_VERSION} && \ - ./configure --enable-orterun-prefix-by-default && \ - make -j $(nproc) all && \ - make install && \ - ldconfig && \ - rm -rf /tmp/openmpi-${OPENMPI_VERSION}* - # Install OFED ENV OFED_VERSION=5.2-2.2.3.0 RUN cd /tmp && \ @@ -83,6 +71,18 @@ RUN cd /tmp && \ PATH=/usr/bin:${PATH} MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu18.04-x86_64/mlnxofedinstall --user-space-only --without-fw-update --force --all && \ rm -rf MLNX_OFED_LINUX-${OFED_VERSION}* +# Install OpenMPI +ENV OPENMPI_VERSION=4.0.5 +RUN cd /tmp && \ + wget -q https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-${OPENMPI_VERSION}.tar.gz && \ + tar xzf openmpi-${OPENMPI_VERSION}.tar.gz && \ + cd openmpi-${OPENMPI_VERSION} && \ + ./configure --enable-orterun-prefix-by-default --with-ucx=/usr --enable-mca-no-build=btl-uct && \ + make -j $(nproc) all && \ + make install && \ + ldconfig && \ + rm -rf /tmp/openmpi-${OPENMPI_VERSION}* + # Install HPC-X RUN cd /opt && \ wget -q https://azhpcstor.blob.core.windows.net/azhpc-images-store/hpcx-v2.8.3-gcc-MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu18.04-x86_64.tbz && \ diff --git a/dockerfile/rocm4.2-pytorch1.7.0.dockerfile b/dockerfile/rocm4.2-pytorch1.7.0.dockerfile index cf09e360..ee3a588d 100644 --- a/dockerfile/rocm4.2-pytorch1.7.0.dockerfile +++ b/dockerfile/rocm4.2-pytorch1.7.0.dockerfile @@ -69,7 +69,7 @@ RUN cd /tmp && \ wget -q https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-${OPENMPI_VERSION}.tar.gz && \ tar xzf openmpi-${OPENMPI_VERSION}.tar.gz && \ cd openmpi-${OPENMPI_VERSION} && \ - ./configure --enable-orterun-prefix-by-default && \ + ./configure --enable-orterun-prefix-by-default --with-ucx=/opt/ucx --enable-mca-no-build=btl-uct && \ make -j $(nproc) all && \ make install && \ ldconfig && \ diff --git a/docs/getting-started/installation.mdx b/docs/getting-started/installation.mdx index 4a02e5dc..714c33a5 100644 --- a/docs/getting-started/installation.mdx +++ b/docs/getting-started/installation.mdx @@ -61,7 +61,7 @@ You can clone the source from GitHub and build it. :::note Note You should checkout corresponding tag to use release version, for example, -`git clone -b v0.3.0 https://github.com/microsoft/superbenchmark` +`git clone -b v0.4.0 https://github.com/microsoft/superbenchmark` ::: ```bash diff --git a/docs/getting-started/run-superbench.md b/docs/getting-started/run-superbench.md index 1c1d90a5..f935178f 100644 --- a/docs/getting-started/run-superbench.md +++ b/docs/getting-started/run-superbench.md @@ -27,7 +27,7 @@ sb deploy -f remote.ini --host-password [password] :::note Note You should deploy corresponding Docker image to use release version, for example, -`sb deploy -f local.ini -i superbench/superbench:v0.3.0-cuda11.1.1` +`sb deploy -f local.ini -i superbench/superbench:v0.4.0-cuda11.1.1` You should note that version of git repo only determines version of sb CLI, and not the sb container. You should define the container version even if you specified a release version for the git clone. diff --git a/docs/superbench-config.mdx b/docs/superbench-config.mdx index 7013c9a2..213d9871 100644 --- a/docs/superbench-config.mdx +++ b/docs/superbench-config.mdx @@ -70,7 +70,7 @@ superbench: ```yaml -version: v0.3 +version: v0.4 superbench: enable: benchmark_1 monitor: diff --git a/docs/user-tutorial/benchmarks/micro-benchmarks.md b/docs/user-tutorial/benchmarks/micro-benchmarks.md index 9ed59bcf..4b0d35b0 100644 --- a/docs/user-tutorial/benchmarks/micro-benchmarks.md +++ b/docs/user-tutorial/benchmarks/micro-benchmarks.md @@ -60,11 +60,40 @@ Large scale matmul operation using `torch.matmul` with one GPU. ### `cublas-function` -TODO +#### Introduction + +Measure the performance of most common Nvidia cuBLAS functions with parameters in models training including ResNet, VGG, DenseNet, LSTM, BERT, and GPT-2. + +The supported functions for cuBLAS are as follows: + - cublasSgemm + - cublasSgemmStridedBatched + - cublasGemmStridedBatchedEx + - cublasGemmEx + - cublasCgemm3mStridedBatched + - cublasCgemm + +#### Metrics + +| Name | Unit | Description | +|----------------------------------------------------------|-----------|-------------------------------------------------------------------| +| cublas-function/name_${function_name}_${parameters}_time | time (us) | The mean time to execute the cublas function with the parameters. | ### `cudnn-function` -TODO +#### Introduction + +Measure the performance of most common Nvidia cuDNN functions with parameters in models training including ResNet, VGG, DenseNet, LSTM, BERT, and GPT-2. + +The supported functions for cuDNN are as follows: + - cudnnConvolutionBackwardFilter + - cudnnConvolutionBackwardData + - cudnnConvolutionForward + +#### Metrics + +| Name | Unit | Description | +|---------------------------------------------------------|-----------|------------------------------------------------------------------| +| cudnn-function/name_${function_name}_${parameters}_time | time (us) | The mean time to execute the cudnn function with the parameters. | ### `tensorrt-inference` diff --git a/docs/user-tutorial/container-images.mdx b/docs/user-tutorial/container-images.mdx index d3ea643e..b46ec820 100644 --- a/docs/user-tutorial/container-images.mdx +++ b/docs/user-tutorial/container-images.mdx @@ -29,6 +29,7 @@ available tags are listed below for all stable versions. | Tag | Description | | ----------------- | ---------------------------------- | +| v0.4.0-cuda11.1.1 | SuperBench v0.4.0 with CUDA 11.1.1 | | v0.3.0-cuda11.1.1 | SuperBench v0.3.0 with CUDA 11.1.1 | | v0.2.1-cuda11.1.1 | SuperBench v0.2.1 with CUDA 11.1.1 | | v0.2.0-cuda11.1.1 | SuperBench v0.2.0 with CUDA 11.1.1 | @@ -38,6 +39,8 @@ available tags are listed below for all stable versions. | Tag | Description | | --------------------------- | ---------------------------------------------- | +| v0.4.0-rocm4.2-pytorch1.7.0 | SuperBench v0.4.0 with ROCm 4.2, PyTorch 1.7.0 | +| v0.4.0-rocm4.0-pytorch1.7.0 | SuperBench v0.4.0 with ROCm 4.0, PyTorch 1.7.0 | | v0.3.0-rocm4.2-pytorch1.7.0 | SuperBench v0.3.0 with ROCm 4.2, PyTorch 1.7.0 | | v0.3.0-rocm4.0-pytorch1.7.0 | SuperBench v0.3.0 with ROCm 4.0, PyTorch 1.7.0 | diff --git a/docs/user-tutorial/data-diagnosis.md b/docs/user-tutorial/data-diagnosis.md index afa969a7..a9768405 100644 --- a/docs/user-tutorial/data-diagnosis.md +++ b/docs/user-tutorial/data-diagnosis.md @@ -64,7 +64,7 @@ superbench: example: ```yaml # SuperBench rules -version: v0.3 +version: v0.4 superbench: rules: failure-rule: diff --git a/setup.py b/setup.py index 1513e620..c636dde9 100644 --- a/setup.py +++ b/setup.py @@ -165,7 +165,7 @@ setup( 'pytest>=6.2.2', 'types-pyyaml', 'vcrpy>=4.1.1', - 'yapf>=0.30.0', + 'yapf==0.31.0', ], 'nvidia': ['py3nvml>=0.2.6'], 'ort': [ diff --git a/superbench/__init__.py b/superbench/__init__.py index 60cedd82..87e8508c 100644 --- a/superbench/__init__.py +++ b/superbench/__init__.py @@ -6,5 +6,5 @@ Provide hardware and software benchmarks for AI systems. """ -__version__ = '0.3.0' +__version__ = '0.4.0' __author__ = 'Microsoft' diff --git a/superbench/analyzer/data_diagnosis.py b/superbench/analyzer/data_diagnosis.py index 160a37cb..5c0c8dc0 100644 --- a/superbench/analyzer/data_diagnosis.py +++ b/superbench/analyzer/data_diagnosis.py @@ -5,12 +5,13 @@ import re from typing import Callable +from pathlib import Path import pandas as pd from superbench.common.utils import logger from superbench.analyzer.diagnosis_rule_op import RuleOp, DiagnosisRuleType -import superbench.analyzer.file_handler as file_handler +from superbench.analyzer import file_handler class DataDiagnosis(): @@ -31,10 +32,15 @@ class DataDiagnosis(): """ benchmarks_metrics = {} for metric in metrics_list: - benchmark = metric.split('/')[0] - if benchmark not in benchmarks_metrics: - benchmarks_metrics[benchmark] = set() - benchmarks_metrics[benchmark].add(metric) + if '/' not in metric: + logger.warning( + 'DataDiagnosis: get_metrics_by_benchmarks - {} does not have benchmark_name'.format(metric) + ) + else: + benchmark = metric.split('/')[0] + if benchmark not in benchmarks_metrics: + benchmarks_metrics[benchmark] = set() + benchmarks_metrics[benchmark].add(metric) return benchmarks_metrics def _check_rules(self, rule, name): @@ -133,6 +139,7 @@ class DataDiagnosis(): if re.search(metric_regex, metric): self._sb_rules[rule]['metrics'][metric] = self._get_baseline_of_metric(baseline, metric) self._enable_metrics.append(metric) + self._enable_metrics.sort() except Exception as e: logger.error('DataDiagnosis: get criteria failed - {}'.format(str(e))) return False @@ -171,8 +178,8 @@ class DataDiagnosis(): issue_label = True if issue_label: # Add category information - general_cat_str = ','.join(categories) - details_cat_str = ','.join(details) + general_cat_str = ','.join(sorted(list(categories))) + details_cat_str = ','.join(sorted((details))) details_row = [general_cat_str, details_cat_str] return details_row, summary_data_row @@ -236,15 +243,15 @@ class DataDiagnosis(): try: self._raw_data_df = file_handler.read_raw_data(raw_data_file) self._metrics = self._get_metrics_by_benchmarks(list(self._raw_data_df.columns)) - logger.info('DataDiagnosis: Begin to processe {} nodes'.format(len(self._raw_data_df))) + logger.info('DataDiagnosis: Begin to process {} nodes'.format(len(self._raw_data_df))) data_not_accept_df, label_df = self.run_diagnosis_rules(rule_file, baseline_file) logger.info('DataDiagnosis: Processed finished') - outpout_path = '' + output_path = '' if output_format == 'excel': - output_path = output_dir + '/diagnosis_summary.xlsx' - file_handler.output_excel(self._raw_data_df, data_not_accept_df, outpout_path, self._sb_rules) + output_path = str(Path(output_dir) / 'diagnosis_summary.xlsx') + file_handler.output_excel(self._raw_data_df, data_not_accept_df, output_path, self._sb_rules) elif output_format == 'json': - output_path = output_dir + '/diagnosis_summary.jsonl' + output_path = str(Path(output_dir) / 'diagnosis_summary.jsonl') file_handler.output_json_data_not_accept(data_not_accept_df, output_path) else: logger.error('DataDiagnosis: output failed - unsupported output format') diff --git a/superbench/benchmarks/micro_benchmarks/_export_torch_to_onnx.py b/superbench/benchmarks/micro_benchmarks/_export_torch_to_onnx.py index 8d3547ac..cd7c8b13 100644 --- a/superbench/benchmarks/micro_benchmarks/_export_torch_to_onnx.py +++ b/superbench/benchmarks/micro_benchmarks/_export_torch_to_onnx.py @@ -129,10 +129,11 @@ class torch2onnxExporter(): if not self.check_torchvision_model(model_name): return '' file_name = str(self._onnx_model_path / (model_name + '.onnx')) - input_shape = (batch_size, 3, 224, 224) + model = getattr(torchvision.models, model_name)(pretrained=False).eval().cuda() + dummy_input = torch.randn((batch_size, 3, 224, 224), device='cuda') torch.onnx.export( - getattr(torchvision.models, model_name)(pretrained=False).eval().cuda(), - torch.randn(input_shape, device='cuda'), + model, + dummy_input, file_name, opset_version=10, operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK, @@ -147,6 +148,10 @@ class torch2onnxExporter(): } }, ) + + del model + del dummy_input + torch.cuda.empty_cache() return file_name def export_benchmark_model(self, model_name, batch_size=1, seq_length=512): @@ -163,13 +168,13 @@ class torch2onnxExporter(): if not self.check_benchmark_model(model_name): return file_name = str(self._onnx_model_path / (model_name + '.onnx')) - input_shape, dtype = (batch_size, seq_length), torch.int64 + model = self.benchmark_models[model_name]().eval().cuda() + dummy_input = torch.ones((batch_size, seq_length), dtype=torch.int64, device='cuda') if model_name == 'lstm': - input_shape += (self.lstm_input_size, ) - dtype = None + dummy_input = torch.ones((batch_size, seq_length, self.lstm_input_size), device='cuda') torch.onnx.export( - self.benchmark_models[model_name]().eval().cuda(), - torch.ones(input_shape, dtype=dtype, device='cuda'), + model, + dummy_input, file_name, opset_version=10, do_constant_folding=True, @@ -185,4 +190,8 @@ class torch2onnxExporter(): } }, ) + + del model + del dummy_input + torch.cuda.empty_cache() return file_name diff --git a/superbench/benchmarks/micro_benchmarks/cublas_function.py b/superbench/benchmarks/micro_benchmarks/cublas_function.py index 2a3414b7..4ceafde7 100644 --- a/superbench/benchmarks/micro_benchmarks/cublas_function.py +++ b/superbench/benchmarks/micro_benchmarks/cublas_function.py @@ -291,8 +291,8 @@ class CublasBenchmark(MicroBenchmarkWithInvoke): raw_data = raw_data.split(',') raw_data.pop() raw_data = [float(item) for item in raw_data] - self._result.add_result(metric, statistics.mean(raw_data)) - self._result.add_raw_data(metric, raw_data) + self._result.add_result(metric.lower() + '_time', statistics.mean(raw_data)) + self._result.add_raw_data(metric.lower() + '_time', raw_data) if 'Error' in line: error = True except BaseException as e: diff --git a/superbench/benchmarks/micro_benchmarks/cudnn_function.py b/superbench/benchmarks/micro_benchmarks/cudnn_function.py index a5b1f7b4..426b04f9 100644 --- a/superbench/benchmarks/micro_benchmarks/cudnn_function.py +++ b/superbench/benchmarks/micro_benchmarks/cudnn_function.py @@ -6,6 +6,7 @@ import os import json import yaml +import statistics from superbench.common.utils import logger from superbench.benchmarks import Platform, BenchmarkRegistry, ReturnCode @@ -424,8 +425,8 @@ class CudnnBenchmark(MicroBenchmarkWithInvoke): raw_data = raw_data.split(',') raw_data.pop() raw_data = [float(item) for item in raw_data] - self._result.add_result(metric, sum(raw_data) / len(raw_data)) - self._result.add_raw_data(metric, raw_data) + self._result.add_result(metric.lower() + '_time', statistics.mean(raw_data) * 1000) + self._result.add_raw_data(metric.lower() + '_time', raw_data) if 'Error' in line: error = True except BaseException as e: diff --git a/superbench/benchmarks/micro_benchmarks/ib_validation_performance.py b/superbench/benchmarks/micro_benchmarks/ib_validation_performance.py index e2a17f34..b3f5e397 100644 --- a/superbench/benchmarks/micro_benchmarks/ib_validation_performance.py +++ b/superbench/benchmarks/micro_benchmarks/ib_validation_performance.py @@ -249,7 +249,7 @@ class IBBenchmark(MicroBenchmarkWithInvoke): msg_size = '-s ' + str(self._args.msg_size) # Add GPUDirect for ib command gpu_enable = '' - if self._args.gpu_index: + if self._args.gpu_index is not None: gpu = GPU() if gpu.vendor == 'nvidia': gpu_enable = ' --use_cuda={gpu_index}'.format(gpu_index=str(self._args.gpu_index)) diff --git a/superbench/config/amd_mi100_hpe.yaml b/superbench/config/amd_mi100_hpe.yaml index f5ddb6cd..202d7dbb 100644 --- a/superbench/config/amd_mi100_hpe.yaml +++ b/superbench/config/amd_mi100_hpe.yaml @@ -3,7 +3,7 @@ # Server: # - Product: HPE Apollo 6500 -version: v0.3 +version: v0.4 superbench: enable: null var: @@ -99,9 +99,31 @@ superbench: copy_type: - sm - dma - ort-inference: - <<: *default_local_mode + ib-traffic: enable: false + modes: + - name: mpi + proc_num: 1 + mca: + btl: tcp,self + pml: ob1 + btl_tcp_if_include: ens17f0 + gpcnet-network-test: + enable: false + modes: + - name: mpi + proc_num: 1 + mca: + pml: ucx + btl: ^uct + btl_tcp_if_include: ens17f0 + tcp-connectivity: + enable: false + modes: + - name: local + parallel: no + parameters: + port: 22 ort-models: enable: false modes: diff --git a/superbench/config/amd_mi100_z53.yaml b/superbench/config/amd_mi100_z53.yaml index 8101d74a..0c079a2f 100644 --- a/superbench/config/amd_mi100_z53.yaml +++ b/superbench/config/amd_mi100_z53.yaml @@ -4,7 +4,7 @@ # - Product: G482-Z53 # - Link: https://www.gigabyte.cn/FileUpload/Global/MicroSite/553/G482-Z53.html -version: v0.3 +version: v0.4 superbench: enable: null var: diff --git a/superbench/config/azure_ndmv4.yaml b/superbench/config/azure_ndmv4.yaml index 4b2cca60..5f70c958 100644 --- a/superbench/config/azure_ndmv4.yaml +++ b/superbench/config/azure_ndmv4.yaml @@ -3,9 +3,13 @@ # Azure NDm A100 v4 # reference: https://docs.microsoft.com/en-us/azure/virtual-machines/ndm-a100-v4-series -version: v0.3 +version: v0.4 superbench: enable: null + monitor: + enable: true + sample_duration: 1 + sample_interval: 10 var: default_local_mode: &default_local_mode enable: true @@ -123,6 +127,52 @@ superbench: <<: *default_pytorch_mode computation-communication-overlap: <<: *default_pytorch_mode + ib-traffic: + enable: false + modes: + - name: mpi + proc_num: 1 + gpcnet-network-test: + enable: false + modes: + - name: mpi + proc_num: 1 + mca: + pml: ucx + btl: ^uct + btl_tcp_if_include: eth0 + gpcnet-network-load-test: + enable: false + modes: + - name: mpi + proc_num: 1 + mca: + pml: ucx + btl: ^uct + btl_tcp_if_include: eth0 + tcp-connectivity: + enable: false + modes: + - name: local + parallel: no + parameters: + port: 22 + ort-inference: + <<: *default_local_mode + tensorrt-inference: + <<: *default_local_mode + parameters: + pytorch_models: + - resnet50 + - resnet101 + - resnet152 + - densenet169 + - densenet201 + - bert-base + - bert-large + seq_length: 224 + batch_size: 32 + precision: int8 gpt_models: <<: *default_pytorch_mode models: diff --git a/superbench/config/azure_ndv4.yaml b/superbench/config/azure_ndv4.yaml index 97ca668d..53acd3af 100644 --- a/superbench/config/azure_ndv4.yaml +++ b/superbench/config/azure_ndv4.yaml @@ -1,9 +1,9 @@ # SuperBench Config -version: v0.3 +version: v0.4 superbench: enable: null monitor: - enable: false + enable: true sample_duration: 1 sample_interval: 10 var: @@ -109,9 +109,52 @@ superbench: <<: *default_pytorch_mode computation-communication-overlap: <<: *default_pytorch_mode + ib-traffic: + enable: false + modes: + - name: mpi + proc_num: 1 + gpcnet-network-test: + enable: false + modes: + - name: mpi + proc_num: 1 + mca: + pml: ucx + btl: ^uct + btl_tcp_if_include: eth0 + gpcnet-network-load-test: + enable: false + modes: + - name: mpi + proc_num: 1 + mca: + pml: ucx + btl: ^uct + btl_tcp_if_include: eth0 + tcp-connectivity: + enable: false + modes: + - name: local + parallel: no + parameters: + port: 22 ort-inference: <<: *default_local_mode - enable: false + tensorrt-inference: + <<: *default_local_mode + parameters: + pytorch_models: + - resnet50 + - resnet101 + - resnet152 + - densenet169 + - densenet201 + - bert-base + - bert-large + seq_length: 224 + batch_size: 32 + precision: int8 gpt_models: <<: *default_pytorch_mode models: diff --git a/superbench/config/default.yaml b/superbench/config/default.yaml index d2cdf74e..d0da30e1 100644 --- a/superbench/config/default.yaml +++ b/superbench/config/default.yaml @@ -1,9 +1,9 @@ # SuperBench Config -version: v0.3 +version: v0.4 superbench: enable: null monitor: - enable: false + enable: true sample_duration: 1 sample_interval: 10 var: @@ -107,9 +107,56 @@ superbench: <<: *default_pytorch_mode computation-communication-overlap: <<: *default_pytorch_mode + ib-traffic: + enable: false + modes: + - name: mpi + proc_num: 1 + gpcnet-network-test: + enable: false + modes: + - name: mpi + proc_num: 1 + mca: + pml: ucx + btl: ^uct + btl_tcp_if_include: eth0 + env: + UCX_NET_DEVICES: mlx5_0:1 + gpcnet-network-load-test: + enable: false + modes: + - name: mpi + proc_num: 1 + mca: + pml: ucx + btl: ^uct + btl_tcp_if_include: eth0 + env: + UCX_NET_DEVICES: mlx5_0:1 + tcp-connectivity: + enable: false + modes: + - name: local + parallel: no + parameters: + port: 22 ort-inference: <<: *default_local_mode - enable: false + tensorrt-inference: + <<: *default_local_mode + parameters: + pytorch_models: + - resnet50 + - resnet101 + - resnet152 + - densenet169 + - densenet201 + - bert-base + - bert-large + seq_length: 224 + batch_size: 32 + precision: int8 gpt_models: <<: *default_pytorch_mode models: diff --git a/superbench/runner/ansible.py b/superbench/runner/ansible.py index 15ddb48d..f9f7b0fe 100644 --- a/superbench/runner/ansible.py +++ b/superbench/runner/ansible.py @@ -3,6 +3,7 @@ """SuperBench Ansible Client.""" +import tempfile from pathlib import Path import ansible_runner @@ -22,10 +23,10 @@ class AnsibleClient(): """ self._playbook_path = Path(__file__).parent / 'playbooks' self._config = { - 'private_data_dir': None, 'host_pattern': 'localhost', 'cmdline': '--forks 128', } + self._head_host = None if config: inventory_file = getattr(config, 'host_file', None) inventory_list = getattr(config, 'host_list', None) @@ -34,9 +35,10 @@ class AnsibleClient(): if inventory_file or inventory_list: self._config['host_pattern'] = 'all' inventory = InventoryManager(loader=DataLoader(), sources=inventory_file or f'{inventory_list},') - host_list = inventory.get_groups_dict()['all'] + host_list = inventory.get_hosts(pattern='all', order='sorted') if len(host_list) > 0: self._config['cmdline'] = '--forks {}'.format(len(host_list)) + self._head_host = host_list[0].get_name() if inventory_list in ['localhost', '127.0.0.1']: self._config['cmdline'] += ' --connection local' self._config['cmdline'] += ' --inventory {}'.format(inventory_file or f'{inventory_list},') @@ -69,12 +71,13 @@ class AnsibleClient(): if sudo: logger.info('Run as sudo ...') ansible_config['cmdline'] += ' --become' - r = ansible_runner.run(**ansible_config) + with tempfile.TemporaryDirectory(prefix='ansible') as tmpdir: + r = ansible_runner.run(private_data_dir=tmpdir, **ansible_config) + logger.debug(r.stats) if r.rc == 0: logger.info('Run succeed, return code {}.'.format(r.rc)) else: logger.warning('Run failed, return code {}.'.format(r.rc)) - logger.debug(r.stats) return r.rc def update_mpi_config(self, ansible_config): @@ -86,7 +89,10 @@ class AnsibleClient(): Returns: dict: Updated Ansible config dict. """ - ansible_config['host_pattern'] += '[0]' + if not self._head_host: + ansible_config['host_pattern'] += '[0]' + else: + ansible_config['host_pattern'] = self._head_host return ansible_config def get_shell_config(self, cmd): diff --git a/superbench/runner/playbooks/fetch_results.yaml b/superbench/runner/playbooks/fetch_results.yaml index d1661fab..74f29c3d 100644 --- a/superbench/runner/playbooks/fetch_results.yaml +++ b/superbench/runner/playbooks/fetch_results.yaml @@ -1,11 +1,13 @@ - name: Fetch Results hosts: all gather_facts: true + vars: + workspace: '{{ ansible_user_dir }}/sb-workspace' tasks: - name: Synchronize Output Directory ansible.posix.synchronize: mode: pull - src: '{{ sb_output_dir }}/' + src: '{{ sb_output_dir if sb_output_dir.startswith("/") else workspace + "/" + sb_output_dir }}/' dest: '{{ absolute_output_dir }}/nodes/{{ ansible_hostname }}' rsync_opts: - --exclude=nodes diff --git a/superbench/runner/runner.py b/superbench/runner/runner.py index c8e7a0d3..d6fc63d9 100644 --- a/superbench/runner/runner.py +++ b/superbench/runner/runner.py @@ -39,7 +39,7 @@ class SuperBenchRunner(): self._ansible_client = AnsibleClient(ansible_config) self.__set_logger('sb-run.log') - logger.info('Runner uses config: %s.', pformat(self._sb_config)) + logger.info('Runner uses config: %s.', pformat(OmegaConf.to_container(self._sb_config, resolve=True))) logger.info('Runner writes to: %s.', str(self._output_path)) self._sb_benchmarks = self._sb_config.superbench.benchmarks @@ -336,7 +336,8 @@ class SuperBenchRunner(): for pattern, reduce_type in MonitorRecord.reduce_ops.items(): if pattern in metric: reduce_func = Reducer.get_reduce_func(reduce_type) - metrics_summary[metric] = reduce_func(values) + metric_name = 'monitor/{}'.format(metric) + metrics_summary[metric_name] = reduce_func(values) continue return metrics_summary diff --git a/tests/analylzer/test_data_analysis.py b/tests/analyzer/test_data_analysis.py similarity index 100% rename from tests/analylzer/test_data_analysis.py rename to tests/analyzer/test_data_analysis.py diff --git a/tests/analyzer/test_data_diagnosis.py b/tests/analyzer/test_data_diagnosis.py index 3ee2b181..0468732e 100644 --- a/tests/analyzer/test_data_diagnosis.py +++ b/tests/analyzer/test_data_diagnosis.py @@ -18,9 +18,10 @@ class TestDataDiagnosis(unittest.TestCase): """Test for DataDiagnosis class.""" def setUp(self): """Method called to prepare the test fixture.""" - self.output_excel_file = str(Path(__file__).parent.resolve()) + '/diagnosis_summary.xlsx' - self.test_rule_file_fake = str(Path(__file__).parent.resolve()) + '/test_rules_fake.yaml' - self.output_json_file = str(Path(__file__).parent.resolve()) + '/diagnosis_summary.jsonl' + self.parent_path = Path(__file__).parent + self.output_excel_file = str(self.parent_path / 'diagnosis_summary.xlsx') + self.test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml') + self.output_json_file = str(self.parent_path / 'diagnosis_summary.jsonl') def tearDown(self): """Method called after the test method has been called and the result recorded.""" @@ -33,21 +34,31 @@ class TestDataDiagnosis(unittest.TestCase): """Test for rule-based data diagnosis.""" # Test - read_raw_data and get_metrics_from_raw_data # Positive case - test_raw_data = str(Path(__file__).parent.resolve()) + '/test_results.jsonl' - test_rule_file = str(Path(__file__).parent.resolve()) + '/test_rules.yaml' - test_baseline_file = str(Path(__file__).parent.resolve()) + '/test_baseline.json' + test_raw_data = str(self.parent_path / 'test_results.jsonl') + test_rule_file = str(self.parent_path / 'test_rules.yaml') + test_baseline_file = str(self.parent_path / 'test_baseline.json') diag1 = DataDiagnosis() diag1._raw_data_df = file_handler.read_raw_data(test_raw_data) diag1._metrics = diag1._get_metrics_by_benchmarks(list(diag1._raw_data_df)) assert (len(diag1._raw_data_df) == 3) # Negative case - test_raw_data_fake = str(Path(__file__).parent.resolve()) + '/test_results_fake.jsonl' - test_rule_file_fake = str(Path(__file__).parent.resolve()) + '/test_rules_fake.yaml' + test_raw_data_fake = str(self.parent_path / 'test_results_fake.jsonl') + test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml') diag2 = DataDiagnosis() diag2._raw_data_df = file_handler.read_raw_data(test_raw_data_fake) diag2._metrics = diag2._get_metrics_by_benchmarks(list(diag2._raw_data_df)) assert (len(diag2._raw_data_df) == 0) assert (len(diag2._metrics) == 0) + metric_list = [ + 'gpu_temperature', 'gpu_power_limit', 'gemm-flops/FP64', + 'bert_models/pytorch-bert-base/steptime_train_float32' + ] + self.assertDictEqual( + diag2._get_metrics_by_benchmarks(metric_list), { + 'gemm-flops': {'gemm-flops/FP64'}, + 'bert_models': {'bert_models/pytorch-bert-base/steptime_train_float32'} + } + ) # Test - read rules rules = file_handler.read_rules(test_rule_file_fake) assert (not rules) @@ -176,3 +187,27 @@ class TestDataDiagnosis(unittest.TestCase): assert ('Category' in line) assert ('Defective Details' in line) assert ('Index' in line) + + def test_data_diagnosis_run(self): + """Test for the run process of rule-based data diagnosis.""" + test_raw_data = str(self.parent_path / 'test_results.jsonl') + test_rule_file = str(self.parent_path / 'test_rules.yaml') + test_baseline_file = str(self.parent_path / 'test_baseline.json') + + # Test - output in excel + DataDiagnosis().run(test_raw_data, test_rule_file, test_baseline_file, str(self.parent_path), 'excel') + excel_file = pd.ExcelFile(self.output_excel_file, engine='openpyxl') + data_sheet_name = 'Not Accept' + data_not_accept_read_from_excel = excel_file.parse(data_sheet_name) + expect_result_file = pd.ExcelFile(str(self.parent_path / '../data/diagnosis_summary.xlsx'), engine='openpyxl') + expect_result = expect_result_file.parse(data_sheet_name) + pd.util.testing.assert_frame_equal(data_not_accept_read_from_excel, expect_result) + # Test - output in json + DataDiagnosis().run(test_raw_data, test_rule_file, test_baseline_file, str(self.parent_path), 'json') + assert (Path(self.output_json_file).is_file()) + with Path(self.output_json_file).open() as f: + data_not_accept_read_from_json = f.read() + expect_result_file = self.parent_path / '../data/diagnosis_summary.jsonl' + with Path(expect_result_file).open() as f: + expect_result = f.read() + assert (data_not_accept_read_from_json == expect_result) diff --git a/tests/analyzer/test_rules.yaml b/tests/analyzer/test_rules.yaml index 97e8bc5c..e1a01ec1 100644 --- a/tests/analyzer/test_rules.yaml +++ b/tests/analyzer/test_rules.yaml @@ -1,5 +1,5 @@ # SuperBench rules -version: v0.3 +version: v0.4 superbench: rules: rule0: diff --git a/tests/ansible/tests/test_deploy.yaml b/tests/ansible/tests/test_deploy.yaml index 1284ed08..84d9e01a 100644 --- a/tests/ansible/tests/test_deploy.yaml +++ b/tests/ansible/tests/test_deploy.yaml @@ -14,4 +14,5 @@ vars: ssh_port: 12345 output_dir: /tmp/test_ansible - docker_image: superbench/superbench + # use a mock superbench image (requires `sb` binary inside) + docker_image: superbench/superbench:v0.3.0-cuda11.1.1 diff --git a/tests/benchmarks/micro_benchmarks/test_cpu_memory_bw_latency_performance.py b/tests/benchmarks/micro_benchmarks/test_cpu_memory_bw_latency_performance.py index ca97fcec..0a2d2a21 100644 --- a/tests/benchmarks/micro_benchmarks/test_cpu_memory_bw_latency_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_cpu_memory_bw_latency_performance.py @@ -3,29 +3,20 @@ """Tests for cpu-memory-bw-latency benchmark.""" -from pathlib import Path -import os import unittest +from tests.helper.testcase import BenchmarkTestCase from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform -class CpuMemBwLatencyBenchmarkTest(unittest.TestCase): +class CpuMemBwLatencyBenchmarkTest(BenchmarkTestCase, unittest.TestCase): """Test class for cpu-memory-bw-latency benchmark.""" - def setUp(self): - """Method called to prepare the test fixture.""" - # Create fake binary file just for testing. - self.__curr_micro_path = os.environ.get('SB_MICRO_PATH', '') - os.environ['SB_MICRO_PATH'] = '/tmp/superbench/' - binary_path = Path(os.getenv('SB_MICRO_PATH'), 'bin') - binary_path.mkdir(parents=True, exist_ok=True) - self.__binary_file = binary_path / 'mlc' - self.__binary_file.touch(mode=0o755, exist_ok=True) - - def tearDown(self): - """Method called after the test method has been called and the result recorded.""" - self.__binary_file.unlink() - os.environ['SB_MICRO_PATH'] = self.__curr_micro_path + @classmethod + def setUpClass(cls): + """Hook method for setting up class fixture before running tests in the class.""" + super().setUpClass() + cls.createMockEnvs(cls) + cls.createMockFiles(cls, ['bin/mlc']) def test_cpu_mem_bw_latency_benchmark_empty_param(self): """Test cpu-memory-bw-latency benchmark command generation with empty parameter.""" diff --git a/tests/benchmarks/micro_benchmarks/test_cuda_gemm_flops_performance.py b/tests/benchmarks/micro_benchmarks/test_cuda_gemm_flops_performance.py index caf38709..71ed3a5d 100644 --- a/tests/benchmarks/micro_benchmarks/test_cuda_gemm_flops_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_cuda_gemm_flops_performance.py @@ -3,29 +3,22 @@ """Tests for gemm-flops benchmark.""" -import os import unittest -from pathlib import Path from tests.helper import decorator +from tests.helper.testcase import BenchmarkTestCase from superbench.common.utils import device_manager as dm from superbench.benchmarks import BenchmarkRegistry, ReturnCode, Platform, BenchmarkType -class CudaGemmFlopsBenchmarkTest(unittest.TestCase): +class CudaGemmFlopsBenchmarkTest(BenchmarkTestCase, unittest.TestCase): """Tests for CudaGemmFlopsBenchmark benchmark.""" - def setUp(self): - """Method called to prepare the test fixture.""" - # Create fake binary file just for testing. - os.environ['SB_MICRO_PATH'] = '/tmp/superbench/' - binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin') - Path(binary_path).mkdir(parents=True, exist_ok=True) - self.__binary_file = Path(os.path.join(binary_path, 'cutlass_profiler')) - self.__binary_file.touch(mode=0o755, exist_ok=True) - - def tearDown(self): - """Method called after the test method has been called and the result recorded.""" - self.__binary_file.unlink() + @classmethod + def setUpClass(cls): + """Hook method for setting up class fixture before running tests in the class.""" + super().setUpClass() + cls.createMockEnvs(cls) + cls.createMockFiles(cls, ['bin/cutlass_profiler']) @decorator.cuda_test def test_flops_performance_cuda(self): diff --git a/tests/benchmarks/micro_benchmarks/test_cuda_memory_bw_performance.py b/tests/benchmarks/micro_benchmarks/test_cuda_memory_bw_performance.py index d72070e1..d38766bd 100644 --- a/tests/benchmarks/micro_benchmarks/test_cuda_memory_bw_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_cuda_memory_bw_performance.py @@ -4,29 +4,26 @@ """Tests for mem-bw benchmark.""" import numbers -from pathlib import Path -import os import unittest +from tests.helper import decorator +from tests.helper.testcase import BenchmarkTestCase from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform -class CudaMemBwTest(unittest.TestCase): +class CudaMemBwTest(BenchmarkTestCase, unittest.TestCase): """Test class for cuda mem-bw benchmark.""" - def setUp(self): - """Method called to prepare the test fixture.""" - # Create fake binary file just for testing. - os.environ['SB_MICRO_PATH'] = '/tmp/superbench/' - binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin') - Path(os.getenv('SB_MICRO_PATH'), 'bin').mkdir(parents=True, exist_ok=True) - self.__binary_file = Path(binary_path, 'bandwidthTest') - self.__binary_file.touch(mode=0o755, exist_ok=True) + @classmethod + def setUpClass(cls): + """Hook method for setting up class fixture before running tests in the class.""" + super().setUpClass() + cls.createMockEnvs(cls) + cls.createMockFiles(cls, ['bin/bandwidthTest']) - def tearDown(self): - """Method called after the test method has been called and the result recorded.""" - self.__binary_file.unlink() - - def test_cuda_memory_bw_performance(self): + @decorator.load_data('tests/data/cuda_memory_h2d_bw.log') + @decorator.load_data('tests/data/cuda_memory_d2h_bw.log') + @decorator.load_data('tests/data/cuda_memory_d2d_bw.log') + def test_cuda_memory_bw_performance(self, raw_output_h2d, raw_output_d2h, raw_output_d2d): """Test cuda mem-bw benchmark.""" benchmark_name = 'mem-bw' (benchmark_class, @@ -54,280 +51,7 @@ class CudaMemBwTest(unittest.TestCase): assert (command == expected_command[i]) # Check results and metrics. - raw_output = {} - raw_output[0] = """ -[CUDA Bandwidth Test] - Starting... -Running on... - - Device 0: Tesla V100-PCIE-32GB - Shmoo Mode - -................................................................................. -bandwidthTest-H2D-Pinned, Bandwidth = 0.4 GB/s, Time = 0.00000 s, Size = 1000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 0.7 GB/s, Time = 0.00000 s, Size = 2000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 1.0 GB/s, Time = 0.00000 s, Size = 3000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 1.4 GB/s, Time = 0.00000 s, Size = 4000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 1.7 GB/s, Time = 0.00000 s, Size = 5000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 2.0 GB/s, Time = 0.00000 s, Size = 6000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 2.3 GB/s, Time = 0.00000 s, Size = 7000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 2.5 GB/s, Time = 0.00000 s, Size = 8000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 2.7 GB/s, Time = 0.00000 s, Size = 9000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 2.9 GB/s, Time = 0.00000 s, Size = 10000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 3.2 GB/s, Time = 0.00000 s, Size = 11000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 3.4 GB/s, Time = 0.00000 s, Size = 12000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 3.5 GB/s, Time = 0.00000 s, Size = 13000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 3.5 GB/s, Time = 0.00000 s, Size = 14000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 3.8 GB/s, Time = 0.00000 s, Size = 15000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 4.0 GB/s, Time = 0.00000 s, Size = 16000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 4.1 GB/s, Time = 0.00000 s, Size = 17000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 4.3 GB/s, Time = 0.00000 s, Size = 18000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 4.4 GB/s, Time = 0.00000 s, Size = 19000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 4.6 GB/s, Time = 0.00000 s, Size = 20000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 4.8 GB/s, Time = 0.00000 s, Size = 22000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 5.0 GB/s, Time = 0.00000 s, Size = 24000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 5.2 GB/s, Time = 0.00000 s, Size = 26000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 5.4 GB/s, Time = 0.00001 s, Size = 28000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 5.7 GB/s, Time = 0.00001 s, Size = 30000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 5.9 GB/s, Time = 0.00001 s, Size = 32000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 6.1 GB/s, Time = 0.00001 s, Size = 34000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 6.3 GB/s, Time = 0.00001 s, Size = 36000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 6.4 GB/s, Time = 0.00001 s, Size = 38000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 6.6 GB/s, Time = 0.00001 s, Size = 40000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 6.7 GB/s, Time = 0.00001 s, Size = 42000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 6.9 GB/s, Time = 0.00001 s, Size = 44000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 7.0 GB/s, Time = 0.00001 s, Size = 46000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 7.1 GB/s, Time = 0.00001 s, Size = 48000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 7.3 GB/s, Time = 0.00001 s, Size = 50000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 7.8 GB/s, Time = 0.00001 s, Size = 60000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 8.2 GB/s, Time = 0.00001 s, Size = 70000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 8.6 GB/s, Time = 0.00001 s, Size = 80000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 8.9 GB/s, Time = 0.00001 s, Size = 90000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 9.2 GB/s, Time = 0.00001 s, Size = 100000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 10.5 GB/s, Time = 0.00002 s, Size = 200000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 11.1 GB/s, Time = 0.00003 s, Size = 300000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 11.4 GB/s, Time = 0.00004 s, Size = 400000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 11.6 GB/s, Time = 0.00004 s, Size = 500000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 11.7 GB/s, Time = 0.00005 s, Size = 600000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 11.8 GB/s, Time = 0.00006 s, Size = 700000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 11.9 GB/s, Time = 0.00007 s, Size = 800000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 11.9 GB/s, Time = 0.00008 s, Size = 900000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 11.7 GB/s, Time = 0.00009 s, Size = 1000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.1 GB/s, Time = 0.00016 s, Size = 2000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00024 s, Size = 3000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00033 s, Size = 4000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 11.5 GB/s, Time = 0.00043 s, Size = 5000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00049 s, Size = 6000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00057 s, Size = 7000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00065 s, Size = 8000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00073 s, Size = 9000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00081 s, Size = 10000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00089 s, Size = 11000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00097 s, Size = 12000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00105 s, Size = 13000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00113 s, Size = 14000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00121 s, Size = 15000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00129 s, Size = 16000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00145 s, Size = 18000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00162 s, Size = 20000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00178 s, Size = 22000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00194 s, Size = 24000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00210 s, Size = 26000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00226 s, Size = 28000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00242 s, Size = 30000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 10.5 GB/s, Time = 0.00304 s, Size = 32000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.2 GB/s, Time = 0.00295 s, Size = 36000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 10.8 GB/s, Time = 0.00369 s, Size = 40000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00355 s, Size = 44000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00387 s, Size = 48000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.1 GB/s, Time = 0.00431 s, Size = 52000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 11.7 GB/s, Time = 0.00480 s, Size = 56000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00484 s, Size = 60000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.1 GB/s, Time = 0.00528 s, Size = 64000000 bytes, NumDevsUsed = 1 -bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00549 s, Size = 68000000 bytes, NumDevsUsed = 1 -Result = PASS - """ - raw_output[1] = """ -[CUDA Bandwidth Test] - Starting... -Running on... - - Device 0: Tesla V100-PCIE-32GB - Shmoo Mode - -................................................................................. -bandwidthTest-D2H-Pinned, Bandwidth = 0.4 GB/s, Time = 0.00000 s, Size = 1000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 0.5 GB/s, Time = 0.00000 s, Size = 2000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 0.9 GB/s, Time = 0.00000 s, Size = 3000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 1.1 GB/s, Time = 0.00000 s, Size = 4000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 1.4 GB/s, Time = 0.00000 s, Size = 5000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 1.9 GB/s, Time = 0.00000 s, Size = 6000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 2.6 GB/s, Time = 0.00000 s, Size = 7000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 2.9 GB/s, Time = 0.00000 s, Size = 8000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 3.3 GB/s, Time = 0.00000 s, Size = 9000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 3.7 GB/s, Time = 0.00000 s, Size = 10000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 4.0 GB/s, Time = 0.00000 s, Size = 11000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 4.5 GB/s, Time = 0.00000 s, Size = 12000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 4.9 GB/s, Time = 0.00000 s, Size = 13000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 5.3 GB/s, Time = 0.00000 s, Size = 14000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 5.3 GB/s, Time = 0.00000 s, Size = 15000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 5.6 GB/s, Time = 0.00000 s, Size = 16000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 5.7 GB/s, Time = 0.00000 s, Size = 17000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 6.0 GB/s, Time = 0.00000 s, Size = 18000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 6.2 GB/s, Time = 0.00000 s, Size = 19000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 6.3 GB/s, Time = 0.00000 s, Size = 20000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 6.5 GB/s, Time = 0.00000 s, Size = 22000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 6.9 GB/s, Time = 0.00000 s, Size = 24000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 7.1 GB/s, Time = 0.00000 s, Size = 26000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 7.4 GB/s, Time = 0.00000 s, Size = 28000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 7.6 GB/s, Time = 0.00000 s, Size = 30000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 7.9 GB/s, Time = 0.00000 s, Size = 32000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 8.0 GB/s, Time = 0.00000 s, Size = 34000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 8.3 GB/s, Time = 0.00000 s, Size = 36000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 8.5 GB/s, Time = 0.00000 s, Size = 38000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 8.6 GB/s, Time = 0.00000 s, Size = 40000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 8.7 GB/s, Time = 0.00000 s, Size = 42000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 9.3 GB/s, Time = 0.00000 s, Size = 44000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 9.4 GB/s, Time = 0.00000 s, Size = 46000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 9.5 GB/s, Time = 0.00001 s, Size = 48000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 9.5 GB/s, Time = 0.00001 s, Size = 50000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 10.1 GB/s, Time = 0.00001 s, Size = 60000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 10.4 GB/s, Time = 0.00001 s, Size = 70000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 10.6 GB/s, Time = 0.00001 s, Size = 80000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 10.9 GB/s, Time = 0.00001 s, Size = 90000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 11.1 GB/s, Time = 0.00001 s, Size = 100000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 12.0 GB/s, Time = 0.00002 s, Size = 200000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00002 s, Size = 300000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 12.6 GB/s, Time = 0.00003 s, Size = 400000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 12.6 GB/s, Time = 0.00004 s, Size = 500000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 12.7 GB/s, Time = 0.00005 s, Size = 600000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 12.7 GB/s, Time = 0.00006 s, Size = 700000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 12.8 GB/s, Time = 0.00006 s, Size = 800000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 12.9 GB/s, Time = 0.00007 s, Size = 900000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 12.8 GB/s, Time = 0.00008 s, Size = 1000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.0 GB/s, Time = 0.00015 s, Size = 2000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.0 GB/s, Time = 0.00023 s, Size = 3000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00031 s, Size = 4000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00038 s, Size = 5000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00046 s, Size = 6000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00053 s, Size = 7000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00061 s, Size = 8000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 12.5 GB/s, Time = 0.00072 s, Size = 9000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00076 s, Size = 10000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00084 s, Size = 11000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00091 s, Size = 12000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00099 s, Size = 13000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00106 s, Size = 14000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00114 s, Size = 15000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00122 s, Size = 16000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00137 s, Size = 18000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00152 s, Size = 20000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00167 s, Size = 22000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00183 s, Size = 24000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 12.9 GB/s, Time = 0.00202 s, Size = 26000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00213 s, Size = 28000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00228 s, Size = 30000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00243 s, Size = 32000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00273 s, Size = 36000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00304 s, Size = 40000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00334 s, Size = 44000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00364 s, Size = 48000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00395 s, Size = 52000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00425 s, Size = 56000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00455 s, Size = 60000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00487 s, Size = 64000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00520 s, Size = 68000000 bytes, NumDevsUsed = 1 -Result = PASS - """ - raw_output[2] = """ -[CUDA Bandwidth Test] - Starting... -Running on... - - Device 0: Tesla V100-PCIE-32GB - Shmoo Mode - -................................................................................. -bandwidthTest-D2D, Bandwidth = 0.4 GB/s, Time = 0.00000 s, Size = 1000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 0.1 GB/s, Time = 0.00004 s, Size = 2000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 0.8 GB/s, Time = 0.00000 s, Size = 3000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 1.2 GB/s, Time = 0.00000 s, Size = 4000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 0.4 GB/s, Time = 0.00001 s, Size = 5000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 1.7 GB/s, Time = 0.00000 s, Size = 6000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 7.0 GB/s, Time = 0.00000 s, Size = 7000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 8.0 GB/s, Time = 0.00000 s, Size = 8000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 9.0 GB/s, Time = 0.00000 s, Size = 9000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 10.0 GB/s, Time = 0.00000 s, Size = 10000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 6.1 GB/s, Time = 0.00000 s, Size = 11000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 12.0 GB/s, Time = 0.00000 s, Size = 12000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 13.1 GB/s, Time = 0.00000 s, Size = 13000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 5.3 GB/s, Time = 0.00000 s, Size = 14000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 8.0 GB/s, Time = 0.00000 s, Size = 15000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 8.9 GB/s, Time = 0.00000 s, Size = 16000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 9.5 GB/s, Time = 0.00000 s, Size = 17000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 9.8 GB/s, Time = 0.00000 s, Size = 18000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 19.0 GB/s, Time = 0.00000 s, Size = 19000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 5.3 GB/s, Time = 0.00000 s, Size = 20000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 22.0 GB/s, Time = 0.00000 s, Size = 22000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 6.3 GB/s, Time = 0.00000 s, Size = 24000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 0.7 GB/s, Time = 0.00004 s, Size = 26000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 28.1 GB/s, Time = 0.00000 s, Size = 28000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 30.1 GB/s, Time = 0.00000 s, Size = 30000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 32.0 GB/s, Time = 0.00000 s, Size = 32000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 14.6 GB/s, Time = 0.00000 s, Size = 34000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 20.9 GB/s, Time = 0.00000 s, Size = 36000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 22.7 GB/s, Time = 0.00000 s, Size = 38000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 23.5 GB/s, Time = 0.00000 s, Size = 40000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 24.8 GB/s, Time = 0.00000 s, Size = 42000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 44.1 GB/s, Time = 0.00000 s, Size = 44000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 27.2 GB/s, Time = 0.00000 s, Size = 46000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 48.0 GB/s, Time = 0.00000 s, Size = 48000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 28.5 GB/s, Time = 0.00000 s, Size = 50000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 60.2 GB/s, Time = 0.00000 s, Size = 60000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 42.7 GB/s, Time = 0.00000 s, Size = 70000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 8.4 GB/s, Time = 0.00001 s, Size = 80000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 55.6 GB/s, Time = 0.00000 s, Size = 90000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 59.6 GB/s, Time = 0.00000 s, Size = 100000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 127.9 GB/s, Time = 0.00000 s, Size = 200000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 183.1 GB/s, Time = 0.00000 s, Size = 300000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 270.2 GB/s, Time = 0.00000 s, Size = 400000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 15.5 GB/s, Time = 0.00003 s, Size = 500000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 399.2 GB/s, Time = 0.00000 s, Size = 600000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 172.1 GB/s, Time = 0.00000 s, Size = 700000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 27.5 GB/s, Time = 0.00003 s, Size = 800000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 71.3 GB/s, Time = 0.00001 s, Size = 900000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 502.2 GB/s, Time = 0.00000 s, Size = 1000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 59.4 GB/s, Time = 0.00003 s, Size = 2000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 348.7 GB/s, Time = 0.00001 s, Size = 3000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 519.4 GB/s, Time = 0.00001 s, Size = 4000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 422.3 GB/s, Time = 0.00001 s, Size = 5000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 447.9 GB/s, Time = 0.00001 s, Size = 6000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 225.3 GB/s, Time = 0.00003 s, Size = 7000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 146.0 GB/s, Time = 0.00005 s, Size = 8000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 190.9 GB/s, Time = 0.00005 s, Size = 9000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 301.1 GB/s, Time = 0.00003 s, Size = 10000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 192.8 GB/s, Time = 0.00006 s, Size = 11000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 243.9 GB/s, Time = 0.00005 s, Size = 12000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 328.7 GB/s, Time = 0.00004 s, Size = 13000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 621.2 GB/s, Time = 0.00002 s, Size = 14000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 682.5 GB/s, Time = 0.00002 s, Size = 15000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 686.3 GB/s, Time = 0.00002 s, Size = 16000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 693.1 GB/s, Time = 0.00003 s, Size = 18000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 707.0 GB/s, Time = 0.00003 s, Size = 20000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 714.4 GB/s, Time = 0.00003 s, Size = 22000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 719.4 GB/s, Time = 0.00003 s, Size = 24000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 723.2 GB/s, Time = 0.00004 s, Size = 26000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 726.7 GB/s, Time = 0.00004 s, Size = 28000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 728.8 GB/s, Time = 0.00004 s, Size = 30000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 724.2 GB/s, Time = 0.00004 s, Size = 32000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 735.3 GB/s, Time = 0.00005 s, Size = 36000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 741.1 GB/s, Time = 0.00005 s, Size = 40000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 748.9 GB/s, Time = 0.00006 s, Size = 44000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 748.9 GB/s, Time = 0.00006 s, Size = 48000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 754.1 GB/s, Time = 0.00007 s, Size = 52000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 757.4 GB/s, Time = 0.00007 s, Size = 56000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 758.5 GB/s, Time = 0.00008 s, Size = 60000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 772.0 GB/s, Time = 0.00008 s, Size = 64000000 bytes, NumDevsUsed = 1 -bandwidthTest-D2D, Bandwidth = 762.8 GB/s, Time = 0.00009 s, Size = 68000000 bytes, NumDevsUsed = 1 -Result = PASS - """ + raw_output = [raw_output_h2d, raw_output_d2h, raw_output_d2d] for i, metric in enumerate(['h2d_bw', 'd2h_bw', 'd2d_bw']): assert (benchmark._process_raw_result(i, raw_output[i])) assert (metric in benchmark.result) diff --git a/tests/benchmarks/micro_benchmarks/test_cuda_nccl_bw_performance.py b/tests/benchmarks/micro_benchmarks/test_cuda_nccl_bw_performance.py index 25203ff9..30ab2e11 100644 --- a/tests/benchmarks/micro_benchmarks/test_cuda_nccl_bw_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_cuda_nccl_bw_performance.py @@ -3,36 +3,41 @@ """Tests for nccl-bw benchmark.""" -import os import numbers import unittest -from pathlib import Path +from tests.helper import decorator +from tests.helper.testcase import BenchmarkTestCase from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform -class CudaNcclBwBenchmarkTest(unittest.TestCase): +class CudaNcclBwBenchmarkTest(BenchmarkTestCase, unittest.TestCase): """Tests for CudaNcclBwBenchmark benchmark.""" - def setUp(self): - """Method called to prepare the test fixture.""" - # Create fake binary file just for testing. - os.environ['SB_MICRO_PATH'] = '/tmp/superbench/' - binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin') - Path(binary_path).mkdir(parents=True, exist_ok=True) - self.__binary_files = [] - for bin_name in [ - 'all_reduce_perf', 'all_gather_perf', 'broadcast_perf', 'reduce_perf', 'reduce_scatter_perf', - 'alltoall_perf' - ]: - self.__binary_files.append(Path(binary_path, bin_name)) - Path(binary_path, bin_name).touch(mode=0o755, exist_ok=True) + @classmethod + def setUpClass(cls): + """Hook method for setting up class fixture before running tests in the class.""" + super().setUpClass() + cls.createMockEnvs(cls) + cls.createMockFiles( + cls, [ + f'bin/{name}' for name in [ + 'all_reduce_perf', + 'all_gather_perf', + 'broadcast_perf', + 'reduce_perf', + 'reduce_scatter_perf', + 'alltoall_perf', + ] + ] + ) - def tearDown(self): - """Method called after the test method has been called and the result recorded.""" - for binary_file in self.__binary_files: - binary_file.unlink() - - def test_nccl_bw_performance(self): + @decorator.load_data('tests/data/nccl_allgather.log') + @decorator.load_data('tests/data/nccl_allreduce.log') + @decorator.load_data('tests/data/nccl_reduce.log') + @decorator.load_data('tests/data/nccl_broadcast.log') + @decorator.load_data('tests/data/nccl_reducescatter.log') + @decorator.load_data('tests/data/nccl_alltoall.log') + def test_nccl_bw_performance(self, allgather, allreduce, reduce, broadcast, reducescatter, alltoall): """Test nccl-bw benchmark.""" benchmark_name = 'nccl-bw' (benchmark_class, @@ -75,336 +80,14 @@ class CudaNcclBwBenchmarkTest(unittest.TestCase): assert (benchmark._process_raw_result(0, '') is False) # Case with valid raw_output - raw_output = {} - raw_output['allgather'] = """ -# nThread 1 nGpus 8 minBytes 1 maxBytes 8589934592 step: 2(factor) warmup iters: 5 iters: 20 validation: 0 -# -# Using devices -# Rank 0 Pid 112372 on localhost device 0 [0x00] A100-SXM4-40GB -# Rank 1 Pid 112372 on localhost device 1 [0x00] A100-SXM4-40GB -# Rank 2 Pid 112372 on localhost device 2 [0x00] A100-SXM4-40GB -# Rank 3 Pid 112372 on localhost device 3 [0x00] A100-SXM4-40GB -# Rank 4 Pid 112372 on localhost device 4 [0x00] A100-SXM4-40GB -# Rank 5 Pid 112372 on localhost device 5 [0x00] A100-SXM4-40GB -# Rank 6 Pid 112372 on localhost device 6 [0x00] A100-SXM4-40GB -# Rank 7 Pid 112372 on localhost device 7 [0x00] A100-SXM4-40GB -# -# out-of-place in-place -# size count type time algbw busbw error time algbw busbw error -# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) -hostname:3442:3442 [0] NCCL INFO Launch mode Parallel - 0 0 float 34.27 0.00 0.00 N/A 33.57 0.00 0.00 N/A - 0 0 float 33.41 0.00 0.00 N/A 33.62 0.00 0.00 N/A - 0 0 float 33.94 0.00 0.00 N/A 33.48 0.00 0.00 N/A - 0 0 float 33.83 0.00 0.00 N/A 33.62 0.00 0.00 N/A - 0 0 float 33.82 0.00 0.00 N/A 33.57 0.00 0.00 N/A - 32 1 float 35.03 0.00 0.00 N/A 34.15 0.00 0.00 N/A - 64 2 float 34.36 0.00 0.00 N/A 33.83 0.00 0.00 N/A - 128 4 float 33.94 0.00 0.00 N/A 35.22 0.00 0.00 N/A - 256 8 float 34.44 0.01 0.01 N/A 34.82 0.01 0.01 N/A - 512 16 float 34.84 0.01 0.01 N/A 34.76 0.01 0.01 N/A - 1024 32 float 35.38 0.03 0.03 N/A 34.53 0.03 0.03 N/A - 2048 64 float 34.67 0.06 0.05 N/A 34.91 0.06 0.05 N/A - 4096 128 float 34.62 0.12 0.10 N/A 34.81 0.12 0.10 N/A - 8192 256 float 34.76 0.24 0.21 N/A 35.03 0.23 0.20 N/A - 16384 512 float 34.80 0.47 0.41 N/A 34.90 0.47 0.41 N/A - 32768 1024 float 34.54 0.95 0.83 N/A 35.23 0.93 0.81 N/A - 65536 2048 float 36.34 1.80 1.58 N/A 36.01 1.82 1.59 N/A - 131072 4096 float 40.18 3.26 2.85 N/A 39.43 3.32 2.91 N/A - 262144 8192 float 46.45 5.64 4.94 N/A 46.27 5.67 4.96 N/A - 524288 16384 float 58.48 8.96 7.84 N/A 60.40 8.68 7.60 N/A - 1048576 32768 float 72.95 14.37 12.58 N/A 73.07 14.35 12.56 N/A - 2097152 65536 float 77.28 27.14 23.75 N/A 75.84 27.65 24.20 N/A - 4194304 131072 float 100.7 41.64 36.43 N/A 99.56 42.13 36.86 N/A - 8388608 262144 float 123.5 67.94 59.44 N/A 120.7 69.51 60.82 N/A - 16777216 524288 float 167.7 100.03 87.52 N/A 164.6 101.94 89.20 N/A - 33554432 1048576 float 265.8 126.24 110.46 N/A 257.5 130.33 114.04 N/A - 67108864 2097152 float 379.7 176.74 154.65 N/A 367.6 182.57 159.75 N/A - 134217728 4194304 float 698.6 192.13 168.12 N/A 657.3 204.20 178.67 N/A - 268435456 8388608 float 1192.2 225.16 197.01 N/A 1136.0 236.29 206.76 N/A - 536870912 16777216 float 2304.1 233.01 203.88 N/A 2227.9 240.98 210.85 N/A - 1073741824 33554432 float 4413.4 243.29 212.88 N/A 4258.8 252.12 220.61 N/A - 2147483648 67108864 float 8658.8 248.01 217.01 N/A 8389.4 255.98 223.98 N/A - 4294967296 134217728 float 17016 252.40 220.85 N/A 16474 260.71 228.12 N/A - 8589934592 268435456 float 33646 255.31 223.39 N/A 32669 262.94 230.07 N/A -# Out of bounds values : 0 OK -# Avg bus bandwidth : 58.2651 -# -""" - raw_output['allreduce'] = """ -# nThread 1 nGpus 8 minBytes 1 maxBytes 8589934592 step: 2(factor) warmup iters: 5 iters: 20 validation: 0 -# -# Using devices -# Rank 0 Pid 112424 on localhost device 0 [0x00] A100-SXM4-40GB -# Rank 1 Pid 112424 on localhost device 1 [0x00] A100-SXM4-40GB -# Rank 2 Pid 112424 on localhost device 2 [0x00] A100-SXM4-40GB -# Rank 3 Pid 112424 on localhost device 3 [0x00] A100-SXM4-40GB -# Rank 4 Pid 112424 on localhost device 4 [0x00] A100-SXM4-40GB -# Rank 5 Pid 112424 on localhost device 5 [0x00] A100-SXM4-40GB -# Rank 6 Pid 112424 on localhost device 6 [0x00] A100-SXM4-40GB -# Rank 7 Pid 112424 on localhost device 7 [0x00] A100-SXM4-40GB -# -# out-of-place in-place -# size count type redop time algbw busbw error time algbw busbw error -# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) -hostname:3442:3442 [0] NCCL INFO Launch mode Parallel - 0 0 float sum 35.20 0.00 0.00 N/A 34.05 0.00 0.00 N/A - 0 0 float sum 34.18 0.00 0.00 N/A 33.50 0.00 0.00 N/A - 4 1 float sum 34.73 0.00 0.00 N/A 35.30 0.00 0.00 N/A - 8 2 float sum 34.66 0.00 0.00 N/A 34.84 0.00 0.00 N/A - 16 4 float sum 35.00 0.00 0.00 N/A 35.61 0.00 0.00 N/A - 32 8 float sum 35.60 0.00 0.00 N/A 35.27 0.00 0.00 N/A - 64 16 float sum 34.83 0.00 0.00 N/A 34.61 0.00 0.00 N/A - 128 32 float sum 34.53 0.00 0.01 N/A 43.78 0.00 0.01 N/A - 256 64 float sum 34.56 0.01 0.01 N/A 34.95 0.01 0.01 N/A - 512 128 float sum 34.94 0.01 0.03 N/A 35.20 0.01 0.03 N/A - 1024 256 float sum 36.07 0.03 0.05 N/A 35.77 0.03 0.05 N/A - 2048 512 float sum 35.42 0.06 0.10 N/A 35.89 0.06 0.10 N/A - 4096 1024 float sum 35.92 0.11 0.20 N/A 36.11 0.11 0.20 N/A - 8192 2048 float sum 35.91 0.23 0.40 N/A 36.07 0.23 0.40 N/A - 16384 4096 float sum 36.18 0.45 0.79 N/A 35.87 0.46 0.80 N/A - 32768 8192 float sum 36.65 0.89 1.56 N/A 35.73 0.92 1.60 N/A - 65536 16384 float sum 37.82 1.73 3.03 N/A 37.25 1.76 3.08 N/A - 131072 32768 float sum 41.19 3.18 5.57 N/A 41.11 3.19 5.58 N/A - 262144 65536 float sum 47.53 5.52 9.65 N/A 47.94 5.47 9.57 N/A - 524288 131072 float sum 60.32 8.69 15.21 N/A 60.52 8.66 15.16 N/A - 1048576 262144 float sum 74.78 14.02 24.54 N/A 76.17 13.77 24.09 N/A - 2097152 524288 float sum 93.48 22.43 39.26 N/A 96.10 21.82 38.19 N/A - 4194304 1048576 float sum 112.0 37.44 65.52 N/A 110.2 38.06 66.60 N/A - 8388608 2097152 float sum 162.0 51.79 90.63 N/A 160.0 52.44 91.77 N/A - 16777216 4194304 float sum 226.0 74.23 129.90 N/A 225.0 74.57 130.49 N/A - 33554432 8388608 float sum 374.3 89.65 156.89 N/A 372.8 90.00 157.50 N/A - 67108864 16777216 float sum 584.5 114.81 200.91 N/A 581.9 115.33 201.82 N/A - 134217728 33554432 float sum 1162.2 115.49 202.11 N/A 1162.5 115.46 202.05 N/A - 268435456 67108864 float sum 2112.2 127.09 222.40 N/A 2111.8 127.11 222.45 N/A - 536870912 134217728 float sum 4200.3 127.82 223.68 N/A 4184.0 128.32 224.55 N/A - 1073741824 268435456 float sum 8159.5 131.59 230.29 N/A 8176.5 131.32 229.81 N/A - 2147483648 536870912 float sum 16215 132.44 231.76 N/A 16203 132.53 231.93 N/A - 4294967296 1073741824 float sum 32070 133.92 234.37 N/A 32052 134.00 234.50 N/A - 8589934592 2147483648 float sum 63896 134.44 235.26 N/A 63959 134.30 235.03 N/A -# Out of bounds values : 0 OK -# Avg bus bandwidth : 68.4048 -# -""" - raw_output['reduce'] = """ -# nThread 1 nGpus 8 minBytes 1 maxBytes 8589934592 step: 2(factor) warmup iters: 5 iters: 20 validation: 0 -# -# Using devices -# Rank 0 Pid 112476 on localhost device 0 [0x00] A100-SXM4-40GB -# Rank 1 Pid 112476 on localhost device 1 [0x00] A100-SXM4-40GB -# Rank 2 Pid 112476 on localhost device 2 [0x00] A100-SXM4-40GB -# Rank 3 Pid 112476 on localhost device 3 [0x00] A100-SXM4-40GB -# Rank 4 Pid 112476 on localhost device 4 [0x00] A100-SXM4-40GB -# Rank 5 Pid 112476 on localhost device 5 [0x00] A100-SXM4-40GB -# Rank 6 Pid 112476 on localhost device 6 [0x00] A100-SXM4-40GB -# Rank 7 Pid 112476 on localhost device 7 [0x00] A100-SXM4-40GB -# -# out-of-place in-place -# size count type redop root time algbw busbw error time algbw busbw error -# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) -hostname:3442:3442 [0] NCCL INFO Launch mode Parallel - 0 0 float sum 0 36.90 0.00 0.00 N/A 36.47 0.00 0.00 N/A - 0 0 float sum 0 34.18 0.00 0.00 N/A 35.70 0.00 0.00 N/A - 4 1 float sum 0 35.40 0.00 0.00 N/A 35.59 0.00 0.00 N/A - 8 2 float sum 0 36.35 0.00 0.00 N/A 35.74 0.00 0.00 N/A - 16 4 float sum 0 35.47 0.00 0.00 N/A 34.27 0.00 0.00 N/A - 32 8 float sum 0 36.16 0.00 0.00 N/A 36.19 0.00 0.00 N/A - 64 16 float sum 0 35.61 0.00 0.00 N/A 35.45 0.00 0.00 N/A - 128 32 float sum 0 34.78 0.00 0.00 N/A 35.80 0.00 0.00 N/A - 256 64 float sum 0 35.37 0.01 0.01 N/A 35.89 0.01 0.01 N/A - 512 128 float sum 0 35.49 0.01 0.01 N/A 35.53 0.01 0.01 N/A - 1024 256 float sum 0 35.38 0.03 0.03 N/A 35.52 0.03 0.03 N/A - 2048 512 float sum 0 35.97 0.06 0.06 N/A 35.13 0.06 0.06 N/A - 4096 1024 float sum 0 36.03 0.11 0.11 N/A 35.82 0.11 0.11 N/A - 8192 2048 float sum 0 36.80 0.22 0.22 N/A 36.71 0.22 0.22 N/A - 16384 4096 float sum 0 35.37 0.46 0.46 N/A 36.79 0.45 0.45 N/A - 32768 8192 float sum 0 35.16 0.93 0.93 N/A 35.72 0.92 0.92 N/A - 65536 16384 float sum 0 38.08 1.72 1.72 N/A 37.74 1.74 1.74 N/A - 131072 32768 float sum 0 43.07 3.04 3.04 N/A 41.59 3.15 3.15 N/A - 262144 65536 float sum 0 52.16 5.03 5.03 N/A 50.49 5.19 5.19 N/A - 524288 131072 float sum 0 67.58 7.76 7.76 N/A 66.57 7.88 7.88 N/A - 1048576 262144 float sum 0 76.74 13.66 13.66 N/A 80.47 13.03 13.03 N/A - 2097152 524288 float sum 0 78.51 26.71 26.71 N/A 78.76 26.63 26.63 N/A - 4194304 1048576 float sum 0 81.47 51.48 51.48 N/A 80.30 52.23 52.23 N/A - 8388608 2097152 float sum 0 94.72 88.57 88.57 N/A 94.06 89.19 89.19 N/A - 16777216 4194304 float sum 0 137.7 121.83 121.83 N/A 139.6 120.17 120.17 N/A - 33554432 8388608 float sum 0 218.3 153.70 153.70 N/A 218.1 153.83 153.83 N/A - 67108864 16777216 float sum 0 370.8 180.96 180.96 N/A 369.8 181.49 181.49 N/A - 134217728 33554432 float sum 0 661.0 203.06 203.06 N/A 659.9 203.39 203.39 N/A - 268435456 67108864 float sum 0 1251.4 214.52 214.52 N/A 1268.1 211.68 211.68 N/A - 536870912 134217728 float sum 0 2421.6 221.70 221.70 N/A 2413.4 222.45 222.45 N/A - 1073741824 268435456 float sum 0 4736.0 226.72 226.72 N/A 4757.9 225.68 225.68 N/A - 2147483648 536870912 float sum 0 9323.5 230.33 230.33 N/A 9354.0 229.58 229.58 N/A - 4294967296 1073741824 float sum 0 18594 230.99 230.99 N/A 18570 231.28 231.28 N/A - 8589934592 2147483648 float sum 0 37613 228.38 228.38 N/A 37539 228.83 228.83 N/A -# Out of bounds values : 0 OK -# Avg bus bandwidth : 65.018 -# -""" - raw_output['broadcast'] = """ -# nThread 1 nGpus 8 minBytes 1 maxBytes 8589934592 step: 2(factor) warmup iters: 5 iters: 20 validation: 0 -# -# Using devices -# Rank 0 Pid 112528 on localhost device 0 [0x00] A100-SXM4-40GB -# Rank 1 Pid 112528 on localhost device 1 [0x00] A100-SXM4-40GB -# Rank 2 Pid 112528 on localhost device 2 [0x00] A100-SXM4-40GB -# Rank 3 Pid 112528 on localhost device 3 [0x00] A100-SXM4-40GB -# Rank 4 Pid 112528 on localhost device 4 [0x00] A100-SXM4-40GB -# Rank 5 Pid 112528 on localhost device 5 [0x00] A100-SXM4-40GB -# Rank 6 Pid 112528 on localhost device 6 [0x00] A100-SXM4-40GB -# Rank 7 Pid 112528 on localhost device 7 [0x00] A100-SXM4-40GB -# -# out-of-place in-place -# size count type root time algbw busbw error time algbw busbw error -# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) -hostname:3442:3442 [0] NCCL INFO Launch mode Parallel - 0 0 float 0 34.61 0.00 0.00 N/A 34.33 0.00 0.00 N/A - 0 0 float 0 34.43 0.00 0.00 N/A 35.06 0.00 0.00 N/A - 4 1 float 0 33.96 0.00 0.00 N/A 33.80 0.00 0.00 N/A - 8 2 float 0 34.16 0.00 0.00 N/A 34.32 0.00 0.00 N/A - 16 4 float 0 34.47 0.00 0.00 N/A 34.85 0.00 0.00 N/A - 32 8 float 0 35.24 0.00 0.00 N/A 34.75 0.00 0.00 N/A - 64 16 float 0 35.12 0.00 0.00 N/A 34.89 0.00 0.00 N/A - 128 32 float 0 34.67 0.00 0.00 N/A 34.36 0.00 0.00 N/A - 256 64 float 0 34.23 0.01 0.01 N/A 34.42 0.01 0.01 N/A - 512 128 float 0 34.26 0.01 0.01 N/A 35.20 0.01 0.01 N/A - 1024 256 float 0 34.87 0.03 0.03 N/A 34.80 0.03 0.03 N/A - 2048 512 float 0 34.90 0.06 0.06 N/A 35.27 0.06 0.06 N/A - 4096 1024 float 0 35.37 0.12 0.12 N/A 34.59 0.12 0.12 N/A - 8192 2048 float 0 34.95 0.23 0.23 N/A 34.79 0.24 0.24 N/A - 16384 4096 float 0 34.94 0.47 0.47 N/A 34.94 0.47 0.47 N/A - 32768 8192 float 0 35.03 0.94 0.94 N/A 34.71 0.94 0.94 N/A - 65536 16384 float 0 36.04 1.82 1.82 N/A 36.48 1.80 1.80 N/A - 131072 32768 float 0 40.09 3.27 3.27 N/A 39.92 3.28 3.28 N/A - 262144 65536 float 0 46.58 5.63 5.63 N/A 45.89 5.71 5.71 N/A - 524288 131072 float 0 58.37 8.98 8.98 N/A 59.67 8.79 8.79 N/A - 1048576 262144 float 0 76.02 13.79 13.79 N/A 78.43 13.37 13.37 N/A - 2097152 524288 float 0 78.12 26.85 26.85 N/A 78.84 26.60 26.60 N/A - 4194304 1048576 float 0 81.06 51.74 51.74 N/A 80.39 52.17 52.17 N/A - 8388608 2097152 float 0 97.20 86.30 86.30 N/A 96.09 87.30 87.30 N/A - 16777216 4194304 float 0 143.1 117.22 117.22 N/A 142.1 118.06 118.06 N/A - 33554432 8388608 float 0 223.4 150.21 150.21 N/A 221.3 151.61 151.61 N/A - 67108864 16777216 float 0 374.8 179.05 179.05 N/A 374.4 179.23 179.23 N/A - 134217728 33554432 float 0 672.2 199.67 199.67 N/A 670.0 200.34 200.34 N/A - 268435456 67108864 float 0 1271.5 211.11 211.11 N/A 1264.5 212.28 212.28 N/A - 536870912 134217728 float 0 2436.3 220.37 220.37 N/A 2434.5 220.53 220.53 N/A - 1073741824 268435456 float 0 4769.2 225.14 225.14 N/A 4697.5 228.58 228.58 N/A - 2147483648 536870912 float 0 9314.2 230.56 230.56 N/A 9248.3 232.20 232.20 N/A - 4294967296 1073741824 float 0 18487 232.33 232.33 N/A 18381 233.66 233.66 N/A - 8589934592 2147483648 float 0 36896 232.81 232.81 N/A 36599 234.70 234.70 N/A -# Out of bounds values : 0 OK -# Avg bus bandwidth : 64.8653 -# -""" - raw_output['reducescatter'] = """ -# nThread 1 nGpus 8 minBytes 1 maxBytes 8589934592 step: 2(factor) warmup iters: 5 iters: 20 validation: 0 -# -# Using devices -# Rank 0 Pid 112580 on localhost device 0 [0x00] A100-SXM4-40GB -# Rank 1 Pid 112580 on localhost device 1 [0x00] A100-SXM4-40GB -# Rank 2 Pid 112580 on localhost device 2 [0x00] A100-SXM4-40GB -# Rank 3 Pid 112580 on localhost device 3 [0x00] A100-SXM4-40GB -# Rank 4 Pid 112580 on localhost device 4 [0x00] A100-SXM4-40GB -# Rank 5 Pid 112580 on localhost device 5 [0x00] A100-SXM4-40GB -# Rank 6 Pid 112580 on localhost device 6 [0x00] A100-SXM4-40GB -# Rank 7 Pid 112580 on localhost device 7 [0x00] A100-SXM4-40GB -# -# out-of-place in-place -# size count type redop time algbw busbw error time algbw busbw error -# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) -hostname:3442:3442 [0] NCCL INFO Launch mode Parallel - 0 0 float sum 34.88 0.00 0.00 N/A 33.65 0.00 0.00 N/A - 0 0 float sum 33.54 0.00 0.00 N/A 33.72 0.00 0.00 N/A - 0 0 float sum 33.45 0.00 0.00 N/A 33.44 0.00 0.00 N/A - 0 0 float sum 34.07 0.00 0.00 N/A 33.44 0.00 0.00 N/A - 0 0 float sum 33.55 0.00 0.00 N/A 33.43 0.00 0.00 N/A - 32 1 float sum 35.06 0.00 0.00 N/A 35.14 0.00 0.00 N/A - 64 2 float sum 34.82 0.00 0.00 N/A 34.76 0.00 0.00 N/A - 128 4 float sum 34.38 0.00 0.00 N/A 34.52 0.00 0.00 N/A - 256 8 float sum 34.75 0.01 0.01 N/A 34.32 0.01 0.01 N/A - 512 16 float sum 34.71 0.01 0.01 N/A 35.43 0.01 0.01 N/A - 1024 32 float sum 35.16 0.03 0.03 N/A 34.75 0.03 0.03 N/A - 2048 64 float sum 35.43 0.06 0.05 N/A 35.29 0.06 0.05 N/A - 4096 128 float sum 35.49 0.12 0.10 N/A 35.17 0.12 0.10 N/A - 8192 256 float sum 35.18 0.23 0.20 N/A 35.77 0.23 0.20 N/A - 16384 512 float sum 35.27 0.46 0.41 N/A 35.49 0.46 0.40 N/A - 32768 1024 float sum 35.00 0.94 0.82 N/A 35.09 0.93 0.82 N/A - 65536 2048 float sum 36.78 1.78 1.56 N/A 36.92 1.77 1.55 N/A - 131072 4096 float sum 40.71 3.22 2.82 N/A 39.78 3.29 2.88 N/A - 262144 8192 float sum 48.12 5.45 4.77 N/A 46.65 5.62 4.92 N/A - 524288 16384 float sum 59.81 8.77 7.67 N/A 58.88 8.90 7.79 N/A - 1048576 32768 float sum 72.37 14.49 12.68 N/A 74.95 13.99 12.24 N/A - 2097152 65536 float sum 80.64 26.01 22.76 N/A 79.62 26.34 23.05 N/A - 4194304 131072 float sum 108.9 38.53 33.72 N/A 109.3 38.37 33.57 N/A - 8388608 262144 float sum 147.3 56.96 49.84 N/A 166.8 50.28 44.00 N/A - 16777216 524288 float sum 152.4 110.11 96.34 N/A 152.8 109.82 96.09 N/A - 33554432 1048576 float sum 240.5 139.50 122.06 N/A 240.8 139.33 121.91 N/A - 67108864 2097152 float sum 356.1 188.45 164.89 N/A 352.1 190.57 166.75 N/A - 134217728 4194304 float sum 618.1 217.15 190.01 N/A 615.2 218.18 190.90 N/A - 268435456 8388608 float sum 1108.7 242.11 211.84 N/A 1112.6 241.27 211.11 N/A - 536870912 16777216 float sum 2169.0 247.52 216.58 N/A 2181.8 246.07 215.31 N/A - 1073741824 33554432 float sum 4203.0 255.47 223.54 N/A 4206.3 255.27 223.36 N/A - 2147483648 67108864 float sum 8356.9 256.97 224.85 N/A 8323.5 258.00 225.75 N/A - 4294967296 134217728 float sum 16400 261.89 229.15 N/A 16402 261.86 229.13 N/A - 8589934592 268435456 float sum 32464 264.60 231.52 N/A 32502 264.29 231.25 N/A -# Out of bounds values : 0 OK -# Avg bus bandwidth : 60.168 -# -""" - raw_output['alltoall'] = """ -# nThread 1 nGpus 8 minBytes 1 maxBytes 8589934592 step: 2(factor) warmup iters: 5 iters: 20 validation: 0 -# -# Using devices -# Rank 0 Pid 167261 on localhost device 0 [0x00] A100-SXM4-40GB -# Rank 1 Pid 167261 on localhost device 1 [0x00] A100-SXM4-40GB -# Rank 2 Pid 167261 on localhost device 2 [0x00] A100-SXM4-40GB -# Rank 3 Pid 167261 on localhost device 3 [0x00] A100-SXM4-40GB -# Rank 4 Pid 167261 on localhost device 4 [0x00] A100-SXM4-40GB -# Rank 5 Pid 167261 on localhost device 5 [0x00] A100-SXM4-40GB -# Rank 6 Pid 167261 on localhost device 6 [0x00] A100-SXM4-40GB -# Rank 7 Pid 167261 on localhost device 7 [0x00] A100-SXM4-40GB -# -# out-of-place in-place -# size count type redop time algbw busbw error time algbw busbw error -# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) - 0 0 float 1.63 0.00 0.00 N/A 1.38 0.00 0.00 N/A - 0 0 float 1.35 0.00 0.00 N/A 1.34 0.00 0.00 N/A - 0 0 float 1.35 0.00 0.00 N/A 1.77 0.00 0.00 N/A - 0 0 float 1.37 0.00 0.00 N/A 1.39 0.00 0.00 N/A - 0 0 float 1.34 0.00 0.00 N/A 1.33 0.00 0.00 N/A - 32 1 float 89.00 0.00 0.00 N/A 85.13 0.00 0.00 N/A - 64 2 float 86.83 0.00 0.00 N/A 85.77 0.00 0.00 N/A - 128 4 float 86.02 0.00 0.00 N/A 85.30 0.00 0.00 N/A - 256 8 float 87.20 0.00 0.00 N/A 86.21 0.00 0.00 N/A - 512 16 float 87.33 0.01 0.01 N/A 88.47 0.01 0.01 N/A - 1024 32 float 88.17 0.01 0.01 N/A 88.98 0.01 0.01 N/A - 2048 64 float 86.44 0.02 0.02 N/A 86.65 0.02 0.02 N/A - 4096 128 float 86.75 0.05 0.04 N/A 86.68 0.05 0.04 N/A - 8192 256 float 88.78 0.09 0.08 N/A 87.05 0.09 0.08 N/A - 16384 512 float 87.71 0.19 0.16 N/A 86.76 0.19 0.17 N/A - 32768 1024 float 86.26 0.38 0.33 N/A 88.92 0.37 0.32 N/A - 65536 2048 float 87.67 0.75 0.65 N/A 89.16 0.74 0.64 N/A - 131072 4096 float 87.35 1.50 1.31 N/A 86.76 1.51 1.32 N/A - 262144 8192 float 87.02 3.01 2.64 N/A 87.98 2.98 2.61 N/A - 524288 16384 float 86.58 6.06 5.30 N/A 89.33 5.87 5.14 N/A - 1048576 32768 float 87.42 11.99 10.50 N/A 88.90 11.79 10.32 N/A - 2097152 65536 float 89.61 23.40 20.48 N/A 90.10 23.27 20.37 N/A - 4194304 131072 float 96.44 43.49 38.05 N/A 99.62 42.10 36.84 N/A - 8388608 262144 float 121.1 69.28 60.62 N/A 120.6 69.56 60.87 N/A - 16777216 524288 float 160.4 104.62 91.55 N/A 158.8 105.64 92.43 N/A - 33554432 1048576 float 237.5 141.30 123.64 N/A 234.5 143.11 125.22 N/A - 67108864 2097152 float 396.8 169.13 147.99 N/A 387.0 173.41 151.73 N/A - 134217728 4194304 float 633.6 211.83 185.35 N/A 620.9 216.17 189.15 N/A - 268435456 8388608 float 1189.1 225.75 197.53 N/A 1167.8 229.86 201.13 N/A - 536870912 16777216 float 2236.6 240.04 210.03 N/A 2197.4 244.32 213.78 N/A - 1073741824 33554432 float 4335.5 247.66 216.71 N/A 4274.2 251.22 219.81 N/A - 2147483648 67108864 float 8510.4 252.34 220.79 N/A 8405.3 255.49 223.56 N/A - 4294967296 134217728 float 16860 254.74 222.90 N/A 16678 257.53 225.34 N/A - 8589934592 268435456 float 33508 256.36 224.31 N/A 33234 258.47 226.16 N/A -# Out of bounds values : 0 OK -# Avg bus bandwidth : 58.6481 - -""" + raw_output = { + 'allgather': allgather, + 'allreduce': allreduce, + 'reduce': reduce, + 'broadcast': broadcast, + 'reducescatter': reducescatter, + 'alltoall': alltoall, + } for op in raw_output.keys(): benchmark._args.operation = op diff --git a/tests/benchmarks/micro_benchmarks/test_disk_performance.py b/tests/benchmarks/micro_benchmarks/test_disk_performance.py index 710f5a23..57b1da9a 100644 --- a/tests/benchmarks/micro_benchmarks/test_disk_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_disk_performance.py @@ -3,28 +3,22 @@ """Tests for disk-performance benchmark.""" -from pathlib import Path -from unittest import mock -import os import unittest +from unittest import mock +from tests.helper import decorator +from tests.helper.testcase import BenchmarkTestCase from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform -class DiskBenchmarkTest(unittest.TestCase): +class DiskBenchmarkTest(BenchmarkTestCase, unittest.TestCase): """Test class for disk-performance benchmark.""" - def setUp(self): - """Method called to prepare the test fixture.""" - # Create fake binary file just for testing. - os.environ['SB_MICRO_PATH'] = '/tmp/superbench/' - binary_path = Path(os.getenv('SB_MICRO_PATH'), 'bin') - binary_path.mkdir(parents=True, exist_ok=True) - self.__binary_file = binary_path / 'fio' - self.__binary_file.touch(mode=0o755, exist_ok=True) - - def tearDown(self): - """Method called after the test method has been called and the result recorded.""" - self.__binary_file.unlink() + @classmethod + def setUpClass(cls): + """Hook method for setting up class fixture before running tests in the class.""" + super().setUpClass() + cls.createMockEnvs(cls) + cls.createMockFiles(cls, ['bin/fio']) def test_disk_performance_empty_param(self): """Test disk-performance benchmark command generation with empty parameter.""" @@ -178,7 +172,8 @@ class DiskBenchmarkTest(unittest.TestCase): assert ('--rwmixread=%d' % default_rwmixread in benchmark._commands[command_idx]) command_idx += 1 - def test_disk_performance_result_parsing(self): + @decorator.load_data('tests/data/disk_performance.log') + def test_disk_performance_result_parsing(self, test_raw_output): """Test disk-performance benchmark result parsing.""" benchmark_name = 'disk-benchmark' (benchmark_class, @@ -193,317 +188,6 @@ class DiskBenchmarkTest(unittest.TestCase): assert (benchmark.type == BenchmarkType.MICRO) # Positive case - valid raw output. - test_raw_output = """ -{ - "fio version" : "fio-3.16", - "timestamp" : 1626763278, - "timestamp_ms" : 1626763278577, - "time" : "Tue Jul 20 06:41:18 2021", - "global options" : { - "filename" : "/dev/nvme0n1", - "ramp_time" : "10s", - "runtime" : "30s", - "iodepth" : "64", - "numjobs" : "4", - "randrepeat" : "1", - "thread" : "1", - "ioengine" : "libaio", - "direct" : "1", - "norandommap" : "1", - "lat_percentiles" : "1", - "group_reporting" : "1" - }, - "jobs" : [ - { - "jobname" : "rand_read_write", - "groupid" : 0, - "error" : 0, - "eta" : 0, - "elapsed" : 41, - "job options" : { - "name" : "rand_read", - "rw" : "randrw", - "bs" : "4096", - "time_based" : "1" - }, - "read" : { - "io_bytes" : 10463010816, - "io_kbytes" : 10217784, - "bw_bytes" : 348743777, - "bw" : 340570, - "iops" : 85138.890741, - "runtime" : 30002, - "total_ios" : 2554337, - "short_ios" : 0, - "drop_ios" : 0, - "slat_ns" : { - "min" : 1332, - "max" : 48691, - "mean" : 2032.588341, - "stddev" : 864.921965 - }, - "clat_ns" : { - "min" : 278533, - "max" : 10175655, - "mean" : 1444476.063469, - "stddev" : 300748.583131 - }, - "lat_ns" : { - "min" : 280646, - "max" : 10177629, - "mean" : 1446562.147113, - "stddev" : 300723.879349, - "percentile" : { - "1.000000" : 872448, - "5.000000" : 1036288, - "10.000000" : 1122304, - "20.000000" : 1220608, - "30.000000" : 1286144, - "40.000000" : 1351680, - "50.000000" : 1417216, - "60.000000" : 1482752, - "70.000000" : 1564672, - "80.000000" : 1662976, - "90.000000" : 1810432, - "95.000000" : 1941504, - "99.000000" : 2244608, - "99.500000" : 2408448, - "99.900000" : 3620864, - "99.950000" : 4358144, - "99.990000" : 6062080 - } - }, - "bw_min" : 291288, - "bw_max" : 380288, - "bw_agg" : 99.999134, - "bw_mean" : 340567.050000, - "bw_dev" : 6222.338382, - "bw_samples" : 240, - "iops_min" : 72822, - "iops_max" : 95072, - "iops_mean" : 85141.733333, - "iops_stddev" : 1555.582888, - "iops_samples" : 240 - }, - "write" : { - "io_bytes" : 10454208512, - "io_kbytes" : 10209188, - "bw_bytes" : 348450387, - "bw" : 340283, - "iops" : 85066.128925, - "runtime" : 30002, - "total_ios" : 2552154, - "short_ios" : 0, - "drop_ios" : 0, - "slat_ns" : { - "min" : 1383, - "max" : 315361, - "mean" : 2182.824623, - "stddev" : 919.625590 - }, - "clat_ns" : { - "min" : 433904, - "max" : 6300941, - "mean" : 1558511.433458, - "stddev" : 207734.850159 - }, - "lat_ns" : { - "min" : 441909, - "max" : 6302845, - "mean" : 1560749.444938, - "stddev" : 207695.144244, - "percentile" : { - "1.000000" : 1155072, - "5.000000" : 1269760, - "10.000000" : 1318912, - "20.000000" : 1384448, - "30.000000" : 1449984, - "40.000000" : 1499136, - "50.000000" : 1531904, - "60.000000" : 1597440, - "70.000000" : 1646592, - "80.000000" : 1728512, - "90.000000" : 1826816, - "95.000000" : 1908736, - "99.000000" : 2072576, - "99.500000" : 2179072, - "99.900000" : 2605056, - "99.950000" : 3031040, - "99.990000" : 4358144 - } - }, - "bw_min" : 288464, - "bw_max" : 380080, - "bw_agg" : 99.998134, - "bw_mean" : 340276.650000, - "bw_dev" : 6293.894521, - "bw_samples" : 240, - "iops_min" : 72116, - "iops_max" : 95020, - "iops_mean" : 85069.133333, - "iops_stddev" : 1573.475038, - "iops_samples" : 240 - }, - "trim" : { - "io_bytes" : 0, - "io_kbytes" : 0, - "bw_bytes" : 0, - "bw" : 0, - "iops" : 0.000000, - "runtime" : 0, - "total_ios" : 0, - "short_ios" : 0, - "drop_ios" : 0, - "slat_ns" : { - "min" : 0, - "max" : 0, - "mean" : 0.000000, - "stddev" : 0.000000 - }, - "clat_ns" : { - "min" : 0, - "max" : 0, - "mean" : 0.000000, - "stddev" : 0.000000 - }, - "lat_ns" : { - "min" : 0, - "max" : 0, - "mean" : 0.000000, - "stddev" : 0.000000, - "percentile" : { - "1.000000" : 0, - "5.000000" : 0, - "10.000000" : 0, - "20.000000" : 0, - "30.000000" : 0, - "40.000000" : 0, - "50.000000" : 0, - "60.000000" : 0, - "70.000000" : 0, - "80.000000" : 0, - "90.000000" : 0, - "95.000000" : 0, - "99.000000" : 0, - "99.500000" : 0, - "99.900000" : 0, - "99.950000" : 0, - "99.990000" : 0 - } - }, - "bw_min" : 0, - "bw_max" : 0, - "bw_agg" : 0.000000, - "bw_mean" : 0.000000, - "bw_dev" : 0.000000, - "bw_samples" : 0, - "iops_min" : 0, - "iops_max" : 0, - "iops_mean" : 0.000000, - "iops_stddev" : 0.000000, - "iops_samples" : 0 - }, - "sync" : { - "lat_ns" : { - "min" : 0, - "max" : 0, - "mean" : 0.000000, - "stddev" : 0.000000 - }, - "total_ios" : 0 - }, - "job_runtime" : 120004, - "usr_cpu" : 4.833172, - "sys_cpu" : 20.800973, - "ctx" : 3542118, - "majf" : 0, - "minf" : 1263, - "iodepth_level" : { - "1" : 0.000000, - "2" : 0.000000, - "4" : 0.000000, - "8" : 0.000000, - "16" : 0.000000, - "32" : 0.000000, - ">=64" : 100.000000 - }, - "iodepth_submit" : { - "0" : 0.000000, - "4" : 100.000000, - "8" : 0.000000, - "16" : 0.000000, - "32" : 0.000000, - "64" : 0.000000, - ">=64" : 0.000000 - }, - "iodepth_complete" : { - "0" : 0.000000, - "4" : 99.999922, - "8" : 0.000000, - "16" : 0.000000, - "32" : 0.000000, - "64" : 0.100000, - ">=64" : 0.000000 - }, - "latency_ns" : { - "2" : 0.000000, - "4" : 0.000000, - "10" : 0.000000, - "20" : 0.000000, - "50" : 0.000000, - "100" : 0.000000, - "250" : 0.000000, - "500" : 0.000000, - "750" : 0.000000, - "1000" : 0.000000 - }, - "latency_us" : { - "2" : 0.000000, - "4" : 0.000000, - "10" : 0.000000, - "20" : 0.000000, - "50" : 0.000000, - "100" : 0.000000, - "250" : 0.000000, - "500" : 0.010000, - "750" : 0.070126, - "1000" : 1.756079 - }, - "latency_ms" : { - "2" : 95.414131, - "4" : 2.722457, - "10" : 0.040830, - "20" : 0.010000, - "50" : 0.000000, - "100" : 0.000000, - "250" : 0.000000, - "500" : 0.000000, - "750" : 0.000000, - "1000" : 0.000000, - "2000" : 0.000000, - ">=2000" : 0.000000 - }, - "latency_depth" : 64, - "latency_target" : 0, - "latency_percentile" : 100.000000, - "latency_window" : 0 - } - ], - "disk_util" : [ - { - "name" : "nvme0n1", - "read_ios" : 3004914, - "write_ios" : 3003760, - "read_merges" : 0, - "write_merges" : 0, - "read_ticks" : 4269143, - "write_ticks" : 4598453, - "in_queue" : 11104, - "util" : 99.840351 - } - ] -} -""" jobname_prefix = 'nvme0n1_rand_read_write' assert (benchmark._process_raw_result(0, test_raw_output)) assert (benchmark.return_code == ReturnCode.SUCCESS) diff --git a/tests/benchmarks/micro_benchmarks/test_gpcnet_performance.py b/tests/benchmarks/micro_benchmarks/test_gpcnet_performance.py index f5864701..31f3acec 100644 --- a/tests/benchmarks/micro_benchmarks/test_gpcnet_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_gpcnet_performance.py @@ -3,66 +3,27 @@ """Tests for GPCNet benchmark.""" -import os import numbers import unittest -from pathlib import Path +from tests.helper import decorator +from tests.helper.testcase import BenchmarkTestCase from superbench.benchmarks import BenchmarkRegistry, Platform, BenchmarkType -class GPCNetBenchmarkTest(unittest.TestCase): # noqa: E501 +class GPCNetBenchmarkTest(BenchmarkTestCase, unittest.TestCase): """Tests for GPCNetBenchmark benchmark.""" - def setUp(self): - """Method called to prepare the test fixture.""" - # Create fake binary file just for testing. - os.environ['SB_MICRO_PATH'] = '/tmp/superbench' - binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin') - Path(binary_path).mkdir(parents=True, exist_ok=True) - self.__binary_files = [] - for bin_name in ['network_test', 'network_load_test']: - self.__binary_files.append(Path(binary_path, bin_name)) - Path(binary_path, bin_name).touch(mode=0o755, exist_ok=True) + @classmethod + def setUpClass(cls): + """Hook method for setting up class fixture before running tests in the class.""" + super().setUpClass() + cls.createMockEnvs(cls) + cls.createMockFiles(cls, ['bin/network_test', 'bin/network_load_test']) - def tearDown(self): - """Method called after the test method has been called and the result recorded.""" - for bin_file in self.__binary_files: - bin_file.unlink() - - def test_gpcnet_network_test(self): + @decorator.load_data('tests/data/gpcnet_network_test.log') + @decorator.load_data('tests/data/gpcnet_network_test_error.log') + def test_gpcnet_network_test(self, raw_output, raw_output_no_execution): """Test gpcnet-network-test benchmark.""" - raw_output = """# noqa: E501 -Network Tests v1.3 - Test with 2 MPI ranks (2 nodes) - - Legend - RR = random ring communication pattern - Nat = natural ring communication pattern - Lat = latency - BW = bandwidth - BW+Sync = bandwidth with barrier -+------------------------------------------------------------------------------+ -| Isolated Network Tests | -+---------------------------------+--------------+--------------+--------------+ -| Name | Avg | 99% | Units | -+---------------------------------+--------------+--------------+--------------+ -| RR Two-sided Lat (8 B) | 10000.0 | 10000.0 | usec | -+---------------------------------+--------------+--------------+--------------+ -| RR Get Lat (8 B) | 10000.0 | 10000.0 | usec | -+---------------------------------+--------------+--------------+--------------+ -| RR Two-sided BW (131072 B) | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+ -| RR Put BW (131072 B) | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+ -| RR Two-sided BW+Sync (131072 B) | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+ -| Nat Two-sided BW (131072 B) | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+ -| Multiple Allreduce (8 B) | 10000.0 | 10000.0 | usec | -+---------------------------------+--------------+--------------+--------------+ -| Multiple Alltoall (4096 B) | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+ -""" # Check registry. benchmark_name = 'gpcnet-network-test' (benchmark_class, @@ -78,20 +39,6 @@ Network Tests v1.3 command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1] assert (command == expect_command) - raw_output_no_execution = """ -ERROR: this application must be run on at least 2 nodes --------------------------------------------------------------------------- -Primary job terminated normally, but 1 process returned -a non-zero exit code. Per user-direction, the job has been aborted. --------------------------------------------------------------------------- --------------------------------------------------------------------------- -mpirun detected that one or more processes exited with non-zero status, thus causing -the job to be terminated. The first process to do so was: - - Process name: [[63697,1],0] - Exit code: 1 --------------------------------------------------------------------------- -""" assert (benchmark._process_raw_result(0, raw_output_no_execution)) assert (len(benchmark.result) == benchmark.default_metric_count) @@ -123,107 +70,10 @@ the job to be terminated. The first process to do so was: assert (benchmark.type == BenchmarkType.MICRO) assert (benchmark._bin_name == 'network_test') - def test_gpcnet_network_load(self): # noqa: C901 + @decorator.load_data('tests/data/gpcnet_network_load.log') + @decorator.load_data('tests/data/gpcnet_network_load_error.log') + def test_gpcnet_network_load(self, raw_output, raw_output_no_execution): """Test gpcnet-network-load-test benchmark.""" - raw_output = """# noqa: E501 -NetworkLoad Tests v1.3 - Test with 10 MPI ranks (10 nodes) - 2 nodes running Network Tests - 8 nodes running Congestion Tests (min 100 nodes per congestor) - - Legend - RR = random ring communication pattern - Lat = latency - BW = bandwidth - BW+Sync = bandwidth with barrier -+------------------------------------------------------------------------------------------------------------------------------------------+ -| Isolated Network Tests | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| RR Two-sided Lat (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| RR Two-sided BW+Sync (131072 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Multiple Allreduce (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ - -+------------------------------------------------------------------------------------------------------------------------------------------+ -| Isolated Congestion Tests | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ - -+------------------------------------------------------------------------------------------------------------------------------------------+ -| Network Tests running with Congestion Tests ( RR Two-sided Lat Network Test) | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| RR Two-sided Lat (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ - -+------------------------------------------------------------------------------------------------------------------------------------------+ -| Network Tests running with Congestion Tests (RR Two-sided BW+Sync Network Test) | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| RR Two-sided BW+Sync (131072 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ - -+------------------------------------------------------------------------------------------------------------------------------------------+ -| Network Tests running with Congestion Tests ( Multiple Allreduce Network Test) | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Multiple Allreduce (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ -| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | -+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ - -+------------------------------------------------------------------------------+ -| Network Tests running with Congestion Tests - Key Results | -+---------------------------------+--------------------------------------------+ -| Name | Congestion Impact Factor | -+---------------------------------+----------------------+---------------------+ -| | Avg | 99% | -+---------------------------------+----------------------+---------------------+ -| RR Two-sided Lat (8 B) | 0.0X | 0.0X | -+---------------------------------+----------------------+---------------------+ -| RR Two-sided BW+Sync (131072 B) | 0.0X | 0.0X | -+---------------------------------+----------------------+---------------------+ -| Multiple Allreduce (8 B) | 0.0X | 0.0X | -+---------------------------------+----------------------+---------------------+ -""" # Check registry. benchmark_name = 'gpcnet-network-load-test' (benchmark_class, @@ -240,20 +90,6 @@ NetworkLoad Tests v1.3 assert (command == expect_command) # Check function process_raw_data. - raw_output_no_execution = """ -ERROR: this application must be run on at least 10 nodes --------------------------------------------------------------------------- -Primary job terminated normally, but 1 process returned -a non-zero exit code. Per user-direction, the job has been aborted. --------------------------------------------------------------------------- --------------------------------------------------------------------------- -mpirun detected that one or more processes exited with non-zero status, thus causing -the job to be terminated. The first process to do so was: - - Process name: [[63697,1],0] - Exit code: 1 --------------------------------------------------------------------------- -""" assert (benchmark._process_raw_result(0, raw_output_no_execution)) assert (len(benchmark.result) == benchmark.default_metric_count) # Positive case - valid raw output. diff --git a/tests/benchmarks/micro_benchmarks/test_gpu_copy_bw_performance.py b/tests/benchmarks/micro_benchmarks/test_gpu_copy_bw_performance.py index 56047916..8b103deb 100644 --- a/tests/benchmarks/micro_benchmarks/test_gpu_copy_bw_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_gpu_copy_bw_performance.py @@ -3,29 +3,22 @@ """Tests for gpu-copy-bw benchmark.""" -from pathlib import Path import numbers -import os import unittest from tests.helper import decorator +from tests.helper.testcase import BenchmarkTestCase from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform -class GpuCopyBwBenchmarkTest(unittest.TestCase): +class GpuCopyBwBenchmarkTest(BenchmarkTestCase, unittest.TestCase): """Test class for gpu-copy-bw benchmark.""" - def setUp(self): - """Method called to prepare the test fixture.""" - # Create fake binary file just for testing. - os.environ['SB_MICRO_PATH'] = '/tmp/superbench/' - binary_path = Path(os.getenv('SB_MICRO_PATH'), 'bin') - binary_path.mkdir(parents=True, exist_ok=True) - self.__binary_file = binary_path / 'gpu_copy' - self.__binary_file.touch(mode=0o755, exist_ok=True) - - def tearDown(self): - """Method called after the test method has been called and the result recorded.""" - self.__binary_file.unlink() + @classmethod + def setUpClass(cls): + """Hook method for setting up class fixture before running tests in the class.""" + super().setUpClass() + cls.createMockEnvs(cls) + cls.createMockFiles(cls, ['bin/gpu_copy']) def _test_gpu_copy_bw_performance_command_generation(self, platform): """Test gpu-copy benchmark command generation.""" diff --git a/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py b/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py index b325508a..ca2316b2 100644 --- a/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py @@ -6,113 +6,42 @@ import os import numbers import unittest -from pathlib import Path from unittest import mock +from tests.helper import decorator +from tests.helper.testcase import BenchmarkTestCase from superbench.benchmarks import BenchmarkRegistry, Platform, BenchmarkType, ReturnCode from superbench.common.utils import network from superbench.benchmarks.micro_benchmarks import ib_loopback_performance -class IBLoopbackBenchmarkTest(unittest.TestCase): +class IBLoopbackBenchmarkTest(BenchmarkTestCase, unittest.TestCase): """Tests for IBLoopbackBenchmark benchmark.""" - def setUp(self): - """Method called to prepare the test fixture.""" - if (len(network.get_ib_devices()) < 1): - # Create fake binary file just for testing. - os.environ['SB_MICRO_PATH'] = '/tmp/superbench' - binary_path = Path(os.getenv('SB_MICRO_PATH'), 'bin') - binary_path.mkdir(parents=True, exist_ok=True) - self.__binary_file = Path(binary_path, 'run_perftest_loopback') - self.__binary_file.touch(mode=0o755, exist_ok=True) - - def tearDown(self): - """Method called after the test method has been called and the result recorded.""" - if (len(network.get_ib_devices()) < 1): - self.__binary_file.unlink() + @classmethod + def setUpClass(cls): + """Hook method for setting up class fixture before running tests in the class.""" + super().setUpClass() + cls.createMockEnvs(cls) + cls.createMockFiles(cls, ['bin/run_perftest_loopback']) def test_ib_loopback_util(self): """Test util functions 'get_numa_cores' and 'get_free_port' used in ib-loopback benchmark.""" port = network.get_free_port() assert (isinstance(port, numbers.Number)) numa_cores = ib_loopback_performance.get_numa_cores(0) + if numa_cores is None: + # in case no NUMA support available on test system + return assert (len(numa_cores) >= 2) for i in range(len(numa_cores)): assert (isinstance(numa_cores[i], numbers.Number)) + @decorator.load_data('tests/data/ib_loopback_all_sizes.log') @mock.patch('superbench.common.utils.network.get_free_port') @mock.patch('superbench.benchmarks.micro_benchmarks.ib_loopback_performance.get_numa_cores') @mock.patch('superbench.common.utils.network.get_ib_devices') - def test_ib_loopback_all_sizes(self, mock_ib_devices, mock_numa_cores, mock_port): + def test_ib_loopback_all_sizes(self, raw_output, mock_ib_devices, mock_numa_cores, mock_port): """Test ib-loopback benchmark for all sizes.""" - raw_output = """ -************************************ -* Waiting for client to connect... * -************************************ ---------------------------------------------------------------------------------------- - RDMA_Write BW Test -Dual-port : OFF Device : ibP257p0s0 -Number of qps : 1 Transport type : IB -Connection type : RC Using SRQ : OFF -PCIe relax order: ON ---------------------------------------------------------------------------------------- - RDMA_Write BW Test -Dual-port : OFF Device : ibP257p0s0 -Number of qps : 1 Transport type : IB -Connection type : RC Using SRQ : OFF -PCIe relax order: ON -ibv_wr* API : ON -TX depth : 128 -CQ Moderation : 100 -Mtu : 4096[B] -Link type : IB -Max inline data : 0[B] -rdma_cm QPs : OFF -Data ex. method : Ethernet ---------------------------------------------------------------------------------------- -ibv_wr* API : ON -CQ Moderation : 100 -Mtu : 4096[B] -Link type : IB -Max inline data : 0[B] -rdma_cm QPs : OFF -Data ex. method : Ethernet ---------------------------------------------------------------------------------------- -local address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97ff50000 -local address: LID 0xd06 QPN 0x092e PSN 0x3eb82d RKey 0x080228 VAddr 0x007f19adcbf000 -remote address: LID 0xd06 QPN 0x092e PSN 0x3eb82d RKey 0x080228 VAddr 0x007f19adcbf000 -remote address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97ff50000 ---------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------------- -#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] -#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] -2 2000 5.32 5.30 2.778732 -4 2000 10.65 10.64 2.788833 -8 2000 21.30 21.27 2.787609 -16 2000 42.60 42.55 2.788268 -32 2000 84.90 82.82 2.713896 -64 2000 173.55 171.66 2.812504 -128 2000 362.27 353.83 2.898535 -256 2000 687.82 679.37 2.782698 -512 2000 1337.12 1311.59 2.686135 -1024 2000 2674.25 2649.39 2.712980 -2048 2000 5248.56 5118.18 2.620509 -4096 2000 10034.02 9948.41 2.546793 -8192 2000 18620.51 12782.56 1.636168 -16384 2000 23115.27 16782.50 1.074080 -32768 2000 22927.94 18586.03 0.594753 -65536 2000 23330.56 21167.79 0.338685 -131072 2000 22750.35 21443.14 0.171545 -262144 2000 22673.63 22411.35 0.089645 -524288 2000 22679.02 22678.86 0.045358 -1048576 2000 22817.06 22816.86 0.022817 -2097152 2000 22919.37 22919.27 0.011460 -4194304 2000 23277.93 23277.91 0.005819 -8388608 2000 23240.68 23240.68 0.002905 ---------------------------------------------------------------------------------------- -8388608 2000 23240.68 23240.68 0.002905 ---------------------------------------------------------------------------------------- - """ # Test without ib devices # Check registry. benchmark_name = 'ib-loopback' @@ -179,56 +108,12 @@ remote address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97f assert (benchmark._args.iters == 2000) assert (benchmark._args.commands == ['write']) + @decorator.load_data('tests/data/ib_loopback_8M_size.log') @mock.patch('superbench.common.utils.network.get_free_port') @mock.patch('superbench.benchmarks.micro_benchmarks.ib_loopback_performance.get_numa_cores') @mock.patch('superbench.common.utils.network.get_ib_devices') - def test_ib_loopback_8M_size(self, mock_ib_devices, mock_numa_cores, mock_port): + def test_ib_loopback_8M_size(self, raw_output, mock_ib_devices, mock_numa_cores, mock_port): """Test ib-loopback benchmark for 8M size.""" - raw_output = """ - RDMA_Write BW Test - Dual-port : OFF Device : ibP257p0s0 - Number of qps : 1 Transport type : IB - Connection type : RC Using SRQ : OFF - PCIe relax order: ON - TX depth : 128 - CQ Moderation : 1 - Mtu : 4096[B] - Link type : IB - Max inline data : 0[B] - rdma_cm QPs : OFF - Data ex. method : Ethernet ---------------------------------------------------------------------------------------- - local address: LID 0xd06 QPN 0x095f PSN 0x3c9e82 RKey 0x080359 VAddr 0x007f9fc479c000 - remote address: LID 0xd06 QPN 0x095e PSN 0xbd024b RKey 0x080258 VAddr 0x007fe62504b000 ---------------------------------------------------------------------------------------- - #bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] - 8388608 20000 24056.74 24056.72 0.003007 -************************************ -* Waiting for client to connect... * -************************************ ---------------------------------------------------------------------------------------- - RDMA_Write BW Test - Dual-port : OFF Device : ibP257p0s0 - Number of qps : 1 Transport type : IB - Connection type : RC Using SRQ : OFF - PCIe relax order: ON - CQ Moderation : 1 - Mtu : 4096[B] - Link type : IB - Max inline data : 0[B] - rdma_cm QPs : OFF - Data ex. method : Ethernet ---------------------------------------------------------------------------------------- - local address: LID 0xd06 QPN 0x095e PSN 0xbd024b RKey 0x080258 VAddr 0x007fe62504b000 - remote address: LID 0xd06 QPN 0x095f PSN 0x3c9e82 RKey 0x080359 VAddr 0x007f9fc479c000 ---------------------------------------------------------------------------------------- - #bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] - 8388608 20000 24056.74 24056.72 0.003007 ---------------------------------------------------------------------------------------- - ---------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------------- -""" # Test without ib devices # Check registry. benchmark_name = 'ib-loopback' diff --git a/tests/benchmarks/micro_benchmarks/test_ib_traffic_performance.py b/tests/benchmarks/micro_benchmarks/test_ib_traffic_performance.py index 89271bb8..aa0d8367 100644 --- a/tests/benchmarks/micro_benchmarks/test_ib_traffic_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_ib_traffic_performance.py @@ -10,26 +10,26 @@ from pathlib import Path from unittest import mock from collections import defaultdict +from tests.helper.testcase import BenchmarkTestCase from superbench.benchmarks import BenchmarkRegistry, Platform, BenchmarkType, ReturnCode -class IBBenchmarkTest(unittest.TestCase): +class IBBenchmarkTest(BenchmarkTestCase, unittest.TestCase): """Tests for IBBenchmark benchmark.""" - def setUp(self): - """Method called to prepare the test fixture.""" - # Create fake binary file just for testing. - os.environ['SB_MICRO_PATH'] = '/tmp/superbench' - binary_path = Path(os.getenv('SB_MICRO_PATH'), 'bin') - binary_path.mkdir(parents=True, exist_ok=True) - self.__binary_file = Path(binary_path, 'ib_validation') - self.__binary_file.touch(mode=0o755, exist_ok=True) + @classmethod + def setUpClass(cls): + """Hook method for setting up class fixture before running tests in the class.""" + super().setUpClass() + cls.createMockEnvs(cls) + cls.createMockFiles(cls, ['bin/ib_validation']) - def tearDown(self): - """Method called after the test method has been called and the result recorded.""" - self.__binary_file.unlink() + @classmethod + def tearDownClass(cls): + """Hook method for deconstructing the class fixture after running all tests in the class.""" p = Path('hostfile') if p.is_file(): p.unlink() + super().tearDownClass() def test_generate_config(self): # noqa: C901 """Test util functions .""" @@ -117,8 +117,9 @@ class IBBenchmarkTest(unittest.TestCase): Path(test_config_file).unlink() + @mock.patch('superbench.common.devices.GPU.vendor', new_callable=mock.PropertyMock) @mock.patch('superbench.common.utils.network.get_ib_devices') - def test_ib_traffic_performance(self, mock_ib_devices): + def test_ib_traffic_performance(self, mock_ib_devices, mock_gpu): """Test ib-traffic benchmark.""" # Test without ib devices # Check registry. @@ -168,6 +169,22 @@ class IBBenchmarkTest(unittest.TestCase): command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1] assert (command == expect_command) + parameters = '--ib_index 0 --iters 2000 --pattern one-to-one --hostfile hostfile --gpu_index 0' + mock_gpu.return_value = 'nvidia' + benchmark = benchmark_class(benchmark_name, parameters=parameters) + ret = benchmark._preprocess() + expect_command = 'ib_validation --hostfile hostfile --cmd_prefix "ib_write_bw -F ' + \ + '--iters=2000 -d mlx5_0 -a --use_cuda=0" --input_config ' + os.getcwd() + '/config.txt' + command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1] + assert (command == expect_command) + mock_gpu.return_value = 'amd' + benchmark = benchmark_class(benchmark_name, parameters=parameters) + ret = benchmark._preprocess() + expect_command = 'ib_validation --hostfile hostfile --cmd_prefix "ib_write_bw -F ' + \ + '--iters=2000 -d mlx5_0 -a --use_rocm=0" --input_config ' + os.getcwd() + '/config.txt' + command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1] + assert (command == expect_command) + # Custom config config = ['0,1', '1,0;0,1', '0,1;1,0', '1,0;0,1'] with open('test_config.txt', 'w') as f: diff --git a/tests/benchmarks/micro_benchmarks/test_rocm_gemm_flops_performance.py b/tests/benchmarks/micro_benchmarks/test_rocm_gemm_flops_performance.py index 0433bc8d..010a305a 100644 --- a/tests/benchmarks/micro_benchmarks/test_rocm_gemm_flops_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_rocm_gemm_flops_performance.py @@ -3,27 +3,20 @@ """Tests for gemm-flops benchmark.""" -import os import unittest -from pathlib import Path +from tests.helper.testcase import BenchmarkTestCase from superbench.benchmarks import BenchmarkRegistry, ReturnCode, Platform, BenchmarkType -class RocmGemmFlopsTest(unittest.TestCase): +class RocmGemmFlopsTest(BenchmarkTestCase, unittest.TestCase): """Tests for RocmGemmFlops benchmark.""" - def setUp(self): - """Method called to prepare the test fixture.""" - # Create fake binary file just for testing. - os.environ['SB_MICRO_PATH'] = '/tmp/superbench/' - binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin') - Path(binary_path).mkdir(parents=True, exist_ok=True) - self.__binary_file = Path(os.path.join(binary_path, 'rocblas-bench')) - self.__binary_file.touch(mode=0o755, exist_ok=True) - - def tearDown(self): - """Method called after the test method has been called and the result recorded.""" - self.__binary_file.unlink() + @classmethod + def setUpClass(cls): + """Hook method for setting up class fixture before running tests in the class.""" + super().setUpClass() + cls.createMockEnvs(cls) + cls.createMockFiles(cls, ['bin/rocblas-bench']) def test_rocm_flops_performance(self): """Test gemm-flops benchmark.""" diff --git a/tests/benchmarks/micro_benchmarks/test_rocm_memory_bw_performance.py b/tests/benchmarks/micro_benchmarks/test_rocm_memory_bw_performance.py index 77bdf714..139716cd 100644 --- a/tests/benchmarks/micro_benchmarks/test_rocm_memory_bw_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_rocm_memory_bw_performance.py @@ -4,29 +4,25 @@ """Tests for mem-bw benchmark.""" import numbers -from pathlib import Path -import os import unittest +from tests.helper import decorator +from tests.helper.testcase import BenchmarkTestCase from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform -class RocmMemBwTest(unittest.TestCase): +class RocmMemBwTest(BenchmarkTestCase, unittest.TestCase): """Test class for rocm mem-bw benchmark.""" - def setUp(self): - """Method called to prepare the test fixture.""" - # Create fake binary file just for testing. - os.environ['SB_MICRO_PATH'] = '/tmp/superbench/' - binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin') - Path(os.getenv('SB_MICRO_PATH'), 'bin').mkdir(parents=True, exist_ok=True) - self.__binary_file = Path(binary_path, 'hipBusBandwidth') - self.__binary_file.touch(mode=0o755, exist_ok=True) + @classmethod + def setUpClass(cls): + """Hook method for setting up class fixture before running tests in the class.""" + super().setUpClass() + cls.createMockEnvs(cls) + cls.createMockFiles(cls, ['bin/hipBusBandwidth']) - def tearDown(self): - """Method called after the test method has been called and the result recorded.""" - self.__binary_file.unlink() - - def test_rocm_memory_bw_performance(self): + @decorator.load_data('tests/data/rocm_memory_h2d_bw.log') + @decorator.load_data('tests/data/rocm_memory_d2h_bw.log') + def test_rocm_memory_bw_performance(self, raw_output_h2d, raw_output_d2h): """Test rocm mem-bw benchmark.""" benchmark_name = 'mem-bw' (benchmark_class, @@ -51,114 +47,7 @@ class RocmMemBwTest(unittest.TestCase): assert (commnad == expected_command[i]) # Check results and metrics. - raw_output = {} - raw_output[0] = """ -Device:Device 738c Mem=32.0GB #CUs=120 Freq=1502Mhz MallocMode=pinned - test atts units median mean stddev min max - H2D_Bandwidth_pinned +064By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 - H2D_Bandwidth_pinned +256By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 - H2D_Bandwidth_pinned +512By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 - H2D_Bandwidth_pinned 1kB GB/sec 0.0414 0.0411 0.0017 0.0189 0.0434 - H2D_Bandwidth_pinned 2kB GB/sec 0.0828 0.0824 0.0018 0.0683 0.0862 - H2D_Bandwidth_pinned 4kB GB/sec 0.1656 0.1652 0.0032 0.1374 0.1724 - H2D_Bandwidth_pinned 8kB GB/sec 0.3268 0.3251 0.0117 0.1880 0.3425 - H2D_Bandwidth_pinned 16kB GB/sec 0.6410 0.6365 0.0259 0.3597 0.6757 - H2D_Bandwidth_pinned 32kB GB/sec 1.2422 1.2432 0.0278 0.9346 1.2987 - H2D_Bandwidth_pinned 64kB GB/sec 2.3968 2.4161 0.1486 0.7242 2.6042 - H2D_Bandwidth_pinned 128kB GB/sec 4.6786 4.6339 0.1310 4.1143 4.8162 - H2D_Bandwidth_pinned 256kB GB/sec 7.8349 7.8369 0.1150 6.9093 8.0270 - H2D_Bandwidth_pinned 512kB GB/sec 11.9963 11.9828 0.1287 11.2158 12.2201 - H2D_Bandwidth_pinned 1024kB GB/sec 16.3342 16.3315 0.0956 16.0147 16.5823 - H2D_Bandwidth_pinned 2048kB GB/sec 19.9790 19.9770 0.0853 19.7681 20.1635 - H2D_Bandwidth_pinned 4096kB GB/sec 22.2706 22.2642 0.0552 22.0644 22.3847 - H2D_Bandwidth_pinned 8192kB GB/sec 22.8232 22.7881 0.1669 21.3196 22.8930 - H2D_Bandwidth_pinned 16384kB GB/sec 24.1521 24.1411 0.0429 24.0165 24.2162 - H2D_Bandwidth_pinned 32768kB GB/sec 24.8695 24.7086 0.7491 20.6288 24.9035 - H2D_Bandwidth_pinned 65536kB GB/sec 24.4840 24.0101 2.5769 6.1754 24.5292 - H2D_Bandwidth_pinned 131072kB GB/sec 25.0487 24.9593 0.2601 24.1286 25.0711 - H2D_Bandwidth_pinned 262144kB GB/sec 25.3280 25.2351 0.1788 24.8746 25.3498 - H2D_Bandwidth_pinned 524288kB GB/sec 24.7523 24.6708 0.1586 24.3154 24.7880 - H2D_Timepinned +064By ms 0.0245 0.0253 0.0240 0.0232 0.7821 - H2D_Timepinned +256By ms 0.0243 0.0244 0.0013 0.0232 0.0546 - H2D_Timepinned +512By ms 0.0243 0.0244 0.0014 0.0230 0.0566 - H2D_Timepinned 1kB ms 0.0242 0.0244 0.0016 0.0230 0.0530 - H2D_Timepinned 2kB ms 0.0242 0.0243 0.0005 0.0232 0.0293 - H2D_Timepinned 4kB ms 0.0242 0.0242 0.0005 0.0232 0.0291 - H2D_Timepinned 8kB ms 0.0245 0.0247 0.0013 0.0234 0.0426 - H2D_Timepinned 16kB ms 0.0250 0.0252 0.0015 0.0237 0.0445 - H2D_Timepinned 32kB ms 0.0258 0.0258 0.0006 0.0246 0.0342 - H2D_Timepinned 64kB ms 0.0271 0.0272 0.0045 0.0250 0.0898 - H2D_Timepinned 128kB ms 0.0280 0.0283 0.0008 0.0272 0.0318 - H2D_Timepinned 256kB ms 0.0334 0.0334 0.0005 0.0326 0.0379 - H2D_Timepinned 512kB ms 0.0437 0.0437 0.0005 0.0429 0.0467 - H2D_Timepinned 1024kB ms 0.0642 0.0642 0.0004 0.0632 0.0654 - H2D_Timepinned 2048kB ms 0.1050 0.1050 0.0004 0.1040 0.1061 - H2D_Timepinned 4096kB ms 0.1883 0.1884 0.0005 0.1874 0.1901 - H2D_Timepinned 8192kB ms 0.3675 0.3681 0.0028 0.3664 0.3934 - H2D_Timepinned 16384kB ms 0.6946 0.6950 0.0012 0.6928 0.6986 - H2D_Timepinned 32768kB ms 1.3492 1.3595 0.0482 1.3474 1.6266 - H2D_Timepinned 65536kB ms 2.7409 2.9163 1.1368 2.7358 10.8670 - H2D_Timepinned 131072kB ms 5.3582 5.3780 0.0576 5.3534 5.5626 - H2D_Timepinned 262144kB ms 10.5983 10.6379 0.0761 10.5892 10.7915 - H2D_Timepinned 524288kB ms 21.6897 21.7622 0.1411 21.6585 22.0794 - -Note: results marked with (*) had missing values such as -might occur with a mixture of architectural capabilities. - """ - raw_output[1] = """ -Device:Device 738c Mem=32.0GB #CUs=120 Freq=1502Mhz MallocMode=pinned - test atts units median mean stddev min max - D2H_Bandwidth_pinned +064By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 - D2H_Bandwidth_pinned +256By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 - D2H_Bandwidth_pinned +512By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 - D2H_Bandwidth_pinned 1kB GB/sec 0.0428 0.0426 0.0019 0.0114 0.0446 - D2H_Bandwidth_pinned 2kB GB/sec 0.0850 0.0844 0.0034 0.0415 0.0893 - D2H_Bandwidth_pinned 4kB GB/sec 0.1701 0.1687 0.0084 0.0504 0.1773 - D2H_Bandwidth_pinned 8kB GB/sec 0.3378 0.3348 0.0168 0.1085 0.3546 - D2H_Bandwidth_pinned 16kB GB/sec 0.6667 0.6606 0.0218 0.5618 0.6897 - D2H_Bandwidth_pinned 32kB GB/sec 1.3072 1.2954 0.0663 0.5682 1.3605 - D2H_Bandwidth_pinned 64kB GB/sec 2.5550 2.5339 0.0955 2.1382 2.6904 - D2H_Bandwidth_pinned 128kB GB/sec 4.8162 4.7807 0.2331 2.0940 4.9621 - D2H_Bandwidth_pinned 256kB GB/sec 8.2286 8.2192 0.1671 7.2456 8.5286 - D2H_Bandwidth_pinned 512kB GB/sec 12.7930 12.7062 0.4407 7.1196 13.0478 - D2H_Bandwidth_pinned 1024kB GB/sec 17.5603 17.4938 0.3921 12.7184 17.7989 - D2H_Bandwidth_pinned 2048kB GB/sec 21.6275 21.5591 0.2233 20.6073 21.8076 - D2H_Bandwidth_pinned 4096kB GB/sec 24.2708 24.2556 0.0942 23.5724 24.4292 - D2H_Bandwidth_pinned 8192kB GB/sec 24.9287 24.9093 0.0733 24.7171 25.0359 - D2H_Bandwidth_pinned 16384kB GB/sec 26.4588 26.1976 2.4387 1.9387 26.5191 - D2H_Bandwidth_pinned 32768kB GB/sec 27.2939 27.1202 0.7941 23.2086 27.3277 - D2H_Bandwidth_pinned 65536kB GB/sec 26.8278 26.7238 0.3894 24.7946 26.9000 - D2H_Bandwidth_pinned 131072kB GB/sec 27.4751 27.3457 0.3968 25.4168 27.5098 - D2H_Bandwidth_pinned 262144kB GB/sec 27.8236 27.7173 0.3072 26.7977 27.8525 - D2H_Bandwidth_pinned 524288kB GB/sec 28.0193 27.9348 0.1912 27.4707 28.0314 - D2H_Time_pinned +064By ms 0.0229 0.0246 0.0457 0.0216 1.4690 - D2H_Time_pinned +256By ms 0.0232 0.0234 0.0013 0.0221 0.0378 - D2H_Time_pinned +512By ms 0.0234 0.0238 0.0063 0.0224 0.2091 - D2H_Time_pinned 1kB ms 0.0234 0.0236 0.0028 0.0224 0.0875 - D2H_Time_pinned 2kB ms 0.0235 0.0237 0.0014 0.0224 0.0482 - D2H_Time_pinned 4kB ms 0.0235 0.0239 0.0031 0.0226 0.0794 - D2H_Time_pinned 8kB ms 0.0237 0.0240 0.0027 0.0226 0.0738 - D2H_Time_pinned 16kB ms 0.0240 0.0242 0.0009 0.0232 0.0285 - D2H_Time_pinned 32kB ms 0.0245 0.0248 0.0021 0.0235 0.0563 - D2H_Time_pinned 64kB ms 0.0254 0.0257 0.0011 0.0242 0.0304 - D2H_Time_pinned 128kB ms 0.0272 0.0275 0.0026 0.0264 0.0626 - D2H_Time_pinned 256kB ms 0.0318 0.0319 0.0007 0.0307 0.0362 - D2H_Time_pinned 512kB ms 0.0410 0.0413 0.0024 0.0402 0.0736 - D2H_Time_pinned 1024kB ms 0.0597 0.0599 0.0017 0.0589 0.0824 - D2H_Time_pinned 2048kB ms 0.0970 0.0973 0.0010 0.0962 0.1018 - D2H_Time_pinned 4096kB ms 0.1728 0.1729 0.0007 0.1717 0.1779 - D2H_Time_pinned 8192kB ms 0.3365 0.3367 0.0010 0.3350 0.3394 - D2H_Time_pinned 16384kB ms 0.6341 0.7147 0.7979 0.6326 8.6538 - D2H_Time_pinned 32768kB ms 1.2294 1.2385 0.0420 1.2278 1.4458 - D2H_Time_pinned 65536kB ms 2.5014 2.5117 0.0391 2.4947 2.7066 - D2H_Time_pinned 131072kB ms 4.8850 4.9092 0.0748 4.8789 5.2806 - D2H_Time_pinned 262144kB ms 9.6478 9.6860 0.1106 9.6377 10.0171 - D2H_Time_pinned 524288kB ms 19.1607 19.2196 0.1333 19.1525 19.5434 - -Note: results marked with (*) had missing values such as -might occur with a mixture of architectural capabilities. - """ - + raw_output = [raw_output_h2d, raw_output_d2h] for i, metric in enumerate(['h2d_bw', 'd2h_bw']): assert (benchmark._process_raw_result(i, raw_output[i])) assert (metric in benchmark.result) diff --git a/tests/benchmarks/micro_benchmarks/test_tensorrt_inference_performance.py b/tests/benchmarks/micro_benchmarks/test_tensorrt_inference_performance.py index 7db00d79..fe41a219 100644 --- a/tests/benchmarks/micro_benchmarks/test_tensorrt_inference_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_tensorrt_inference_performance.py @@ -3,36 +3,29 @@ """Tests for tensorrt-inference benchmark.""" -import os -import shutil -import tempfile import unittest from pathlib import Path from types import SimpleNamespace from tests.helper import decorator +from tests.helper.testcase import BenchmarkTestCase from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform from superbench.benchmarks.result import BenchmarkResult -class TensorRTInferenceBenchmarkTestCase(unittest.TestCase): +class TensorRTInferenceBenchmarkTestCase(BenchmarkTestCase, unittest.TestCase): """Class for tensorrt-inferencee benchmark test cases.""" - def setUp(self): - """Hook method for setting up the test fixture before exercising it.""" - self.benchmark_name = 'tensorrt-inference' - self.__tmp_dir = tempfile.mkdtemp() - self.__model_path = Path(self.__tmp_dir) / 'hub' / 'onnx' - self.__curr_micro_path = os.environ.get('SB_MICRO_PATH', '') - os.environ['TORCH_HOME'] = self.__tmp_dir - os.environ['SB_MICRO_PATH'] = self.__tmp_dir - (Path(self.__tmp_dir) / 'bin').mkdir(parents=True, exist_ok=True) - (Path(self.__tmp_dir) / 'bin' / 'trtexec').touch(mode=0o755, exist_ok=True) - - def tearDown(self): - """Hook method for deconstructing the test fixture after testing it.""" - shutil.rmtree(self.__tmp_dir) - os.environ['SB_MICRO_PATH'] = self.__curr_micro_path - del os.environ['TORCH_HOME'] + @classmethod + def setUpClass(cls): + """Hook method for setting up class fixture before running tests in the class.""" + super().setUpClass() + cls.benchmark_name = 'tensorrt-inference' + cls._model_path = Path(cls._tmp_dir) / 'hub' / 'onnx' + cls.createMockEnvs(cls, { + 'TORCH_HOME': cls._tmp_dir, + 'SB_MICRO_PATH': cls._tmp_dir, + }) + cls.createMockFiles(cls, ['bin/trtexec']) def test_tensorrt_inference_cls(self): """Test tensorrt-inference benchmark class.""" @@ -116,7 +109,7 @@ class TensorRTInferenceBenchmarkTestCase(unittest.TestCase): # Check models for model in benchmark._args.pytorch_models: - self.assertTrue((self.__model_path / f'{model}.onnx').is_file()) + self.assertTrue((self._model_path / f'{model}.onnx').is_file()) # Command list should equal to default model number self.assertEqual( diff --git a/tests/data/cuda_memory_d2d_bw.log b/tests/data/cuda_memory_d2d_bw.log new file mode 100644 index 00000000..afaac871 --- /dev/null +++ b/tests/data/cuda_memory_d2d_bw.log @@ -0,0 +1,89 @@ +[CUDA Bandwidth Test] - Starting... +Running on... + + Device 0: Tesla V100-PCIE-32GB + Shmoo Mode + +................................................................................. +bandwidthTest-D2D, Bandwidth = 0.4 GB/s, Time = 0.00000 s, Size = 1000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 0.1 GB/s, Time = 0.00004 s, Size = 2000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 0.8 GB/s, Time = 0.00000 s, Size = 3000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 1.2 GB/s, Time = 0.00000 s, Size = 4000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 0.4 GB/s, Time = 0.00001 s, Size = 5000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 1.7 GB/s, Time = 0.00000 s, Size = 6000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 7.0 GB/s, Time = 0.00000 s, Size = 7000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 8.0 GB/s, Time = 0.00000 s, Size = 8000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 9.0 GB/s, Time = 0.00000 s, Size = 9000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 10.0 GB/s, Time = 0.00000 s, Size = 10000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 6.1 GB/s, Time = 0.00000 s, Size = 11000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 12.0 GB/s, Time = 0.00000 s, Size = 12000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 13.1 GB/s, Time = 0.00000 s, Size = 13000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 5.3 GB/s, Time = 0.00000 s, Size = 14000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 8.0 GB/s, Time = 0.00000 s, Size = 15000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 8.9 GB/s, Time = 0.00000 s, Size = 16000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 9.5 GB/s, Time = 0.00000 s, Size = 17000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 9.8 GB/s, Time = 0.00000 s, Size = 18000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 19.0 GB/s, Time = 0.00000 s, Size = 19000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 5.3 GB/s, Time = 0.00000 s, Size = 20000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 22.0 GB/s, Time = 0.00000 s, Size = 22000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 6.3 GB/s, Time = 0.00000 s, Size = 24000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 0.7 GB/s, Time = 0.00004 s, Size = 26000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 28.1 GB/s, Time = 0.00000 s, Size = 28000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 30.1 GB/s, Time = 0.00000 s, Size = 30000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 32.0 GB/s, Time = 0.00000 s, Size = 32000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 14.6 GB/s, Time = 0.00000 s, Size = 34000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 20.9 GB/s, Time = 0.00000 s, Size = 36000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 22.7 GB/s, Time = 0.00000 s, Size = 38000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 23.5 GB/s, Time = 0.00000 s, Size = 40000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 24.8 GB/s, Time = 0.00000 s, Size = 42000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 44.1 GB/s, Time = 0.00000 s, Size = 44000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 27.2 GB/s, Time = 0.00000 s, Size = 46000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 48.0 GB/s, Time = 0.00000 s, Size = 48000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 28.5 GB/s, Time = 0.00000 s, Size = 50000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 60.2 GB/s, Time = 0.00000 s, Size = 60000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 42.7 GB/s, Time = 0.00000 s, Size = 70000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 8.4 GB/s, Time = 0.00001 s, Size = 80000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 55.6 GB/s, Time = 0.00000 s, Size = 90000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 59.6 GB/s, Time = 0.00000 s, Size = 100000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 127.9 GB/s, Time = 0.00000 s, Size = 200000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 183.1 GB/s, Time = 0.00000 s, Size = 300000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 270.2 GB/s, Time = 0.00000 s, Size = 400000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 15.5 GB/s, Time = 0.00003 s, Size = 500000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 399.2 GB/s, Time = 0.00000 s, Size = 600000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 172.1 GB/s, Time = 0.00000 s, Size = 700000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 27.5 GB/s, Time = 0.00003 s, Size = 800000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 71.3 GB/s, Time = 0.00001 s, Size = 900000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 502.2 GB/s, Time = 0.00000 s, Size = 1000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 59.4 GB/s, Time = 0.00003 s, Size = 2000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 348.7 GB/s, Time = 0.00001 s, Size = 3000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 519.4 GB/s, Time = 0.00001 s, Size = 4000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 422.3 GB/s, Time = 0.00001 s, Size = 5000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 447.9 GB/s, Time = 0.00001 s, Size = 6000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 225.3 GB/s, Time = 0.00003 s, Size = 7000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 146.0 GB/s, Time = 0.00005 s, Size = 8000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 190.9 GB/s, Time = 0.00005 s, Size = 9000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 301.1 GB/s, Time = 0.00003 s, Size = 10000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 192.8 GB/s, Time = 0.00006 s, Size = 11000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 243.9 GB/s, Time = 0.00005 s, Size = 12000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 328.7 GB/s, Time = 0.00004 s, Size = 13000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 621.2 GB/s, Time = 0.00002 s, Size = 14000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 682.5 GB/s, Time = 0.00002 s, Size = 15000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 686.3 GB/s, Time = 0.00002 s, Size = 16000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 693.1 GB/s, Time = 0.00003 s, Size = 18000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 707.0 GB/s, Time = 0.00003 s, Size = 20000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 714.4 GB/s, Time = 0.00003 s, Size = 22000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 719.4 GB/s, Time = 0.00003 s, Size = 24000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 723.2 GB/s, Time = 0.00004 s, Size = 26000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 726.7 GB/s, Time = 0.00004 s, Size = 28000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 728.8 GB/s, Time = 0.00004 s, Size = 30000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 724.2 GB/s, Time = 0.00004 s, Size = 32000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 735.3 GB/s, Time = 0.00005 s, Size = 36000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 741.1 GB/s, Time = 0.00005 s, Size = 40000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 748.9 GB/s, Time = 0.00006 s, Size = 44000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 748.9 GB/s, Time = 0.00006 s, Size = 48000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 754.1 GB/s, Time = 0.00007 s, Size = 52000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 757.4 GB/s, Time = 0.00007 s, Size = 56000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 758.5 GB/s, Time = 0.00008 s, Size = 60000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 772.0 GB/s, Time = 0.00008 s, Size = 64000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2D, Bandwidth = 762.8 GB/s, Time = 0.00009 s, Size = 68000000 bytes, NumDevsUsed = 1 +Result = PASS diff --git a/tests/data/cuda_memory_d2h_bw.log b/tests/data/cuda_memory_d2h_bw.log new file mode 100644 index 00000000..48dbf477 --- /dev/null +++ b/tests/data/cuda_memory_d2h_bw.log @@ -0,0 +1,89 @@ +[CUDA Bandwidth Test] - Starting... +Running on... + + Device 0: Tesla V100-PCIE-32GB + Shmoo Mode + +................................................................................. +bandwidthTest-D2H-Pinned, Bandwidth = 0.4 GB/s, Time = 0.00000 s, Size = 1000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 0.5 GB/s, Time = 0.00000 s, Size = 2000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 0.9 GB/s, Time = 0.00000 s, Size = 3000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 1.1 GB/s, Time = 0.00000 s, Size = 4000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 1.4 GB/s, Time = 0.00000 s, Size = 5000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 1.9 GB/s, Time = 0.00000 s, Size = 6000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 2.6 GB/s, Time = 0.00000 s, Size = 7000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 2.9 GB/s, Time = 0.00000 s, Size = 8000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 3.3 GB/s, Time = 0.00000 s, Size = 9000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 3.7 GB/s, Time = 0.00000 s, Size = 10000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 4.0 GB/s, Time = 0.00000 s, Size = 11000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 4.5 GB/s, Time = 0.00000 s, Size = 12000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 4.9 GB/s, Time = 0.00000 s, Size = 13000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 5.3 GB/s, Time = 0.00000 s, Size = 14000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 5.3 GB/s, Time = 0.00000 s, Size = 15000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 5.6 GB/s, Time = 0.00000 s, Size = 16000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 5.7 GB/s, Time = 0.00000 s, Size = 17000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 6.0 GB/s, Time = 0.00000 s, Size = 18000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 6.2 GB/s, Time = 0.00000 s, Size = 19000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 6.3 GB/s, Time = 0.00000 s, Size = 20000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 6.5 GB/s, Time = 0.00000 s, Size = 22000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 6.9 GB/s, Time = 0.00000 s, Size = 24000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 7.1 GB/s, Time = 0.00000 s, Size = 26000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 7.4 GB/s, Time = 0.00000 s, Size = 28000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 7.6 GB/s, Time = 0.00000 s, Size = 30000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 7.9 GB/s, Time = 0.00000 s, Size = 32000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 8.0 GB/s, Time = 0.00000 s, Size = 34000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 8.3 GB/s, Time = 0.00000 s, Size = 36000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 8.5 GB/s, Time = 0.00000 s, Size = 38000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 8.6 GB/s, Time = 0.00000 s, Size = 40000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 8.7 GB/s, Time = 0.00000 s, Size = 42000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 9.3 GB/s, Time = 0.00000 s, Size = 44000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 9.4 GB/s, Time = 0.00000 s, Size = 46000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 9.5 GB/s, Time = 0.00001 s, Size = 48000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 9.5 GB/s, Time = 0.00001 s, Size = 50000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 10.1 GB/s, Time = 0.00001 s, Size = 60000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 10.4 GB/s, Time = 0.00001 s, Size = 70000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 10.6 GB/s, Time = 0.00001 s, Size = 80000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 10.9 GB/s, Time = 0.00001 s, Size = 90000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 11.1 GB/s, Time = 0.00001 s, Size = 100000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 12.0 GB/s, Time = 0.00002 s, Size = 200000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00002 s, Size = 300000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 12.6 GB/s, Time = 0.00003 s, Size = 400000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 12.6 GB/s, Time = 0.00004 s, Size = 500000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 12.7 GB/s, Time = 0.00005 s, Size = 600000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 12.7 GB/s, Time = 0.00006 s, Size = 700000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 12.8 GB/s, Time = 0.00006 s, Size = 800000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 12.9 GB/s, Time = 0.00007 s, Size = 900000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 12.8 GB/s, Time = 0.00008 s, Size = 1000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.0 GB/s, Time = 0.00015 s, Size = 2000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.0 GB/s, Time = 0.00023 s, Size = 3000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00031 s, Size = 4000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00038 s, Size = 5000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00046 s, Size = 6000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00053 s, Size = 7000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00061 s, Size = 8000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 12.5 GB/s, Time = 0.00072 s, Size = 9000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00076 s, Size = 10000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00084 s, Size = 11000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00091 s, Size = 12000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00099 s, Size = 13000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00106 s, Size = 14000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00114 s, Size = 15000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00122 s, Size = 16000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00137 s, Size = 18000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00152 s, Size = 20000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00167 s, Size = 22000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00183 s, Size = 24000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 12.9 GB/s, Time = 0.00202 s, Size = 26000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00213 s, Size = 28000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00228 s, Size = 30000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00243 s, Size = 32000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00273 s, Size = 36000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00304 s, Size = 40000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00334 s, Size = 44000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00364 s, Size = 48000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00395 s, Size = 52000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00425 s, Size = 56000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00455 s, Size = 60000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00487 s, Size = 64000000 bytes, NumDevsUsed = 1 +bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00520 s, Size = 68000000 bytes, NumDevsUsed = 1 +Result = PASS diff --git a/tests/data/cuda_memory_h2d_bw.log b/tests/data/cuda_memory_h2d_bw.log new file mode 100644 index 00000000..bf33ab98 --- /dev/null +++ b/tests/data/cuda_memory_h2d_bw.log @@ -0,0 +1,89 @@ +[CUDA Bandwidth Test] - Starting... +Running on... + + Device 0: Tesla V100-PCIE-32GB + Shmoo Mode + +................................................................................. +bandwidthTest-H2D-Pinned, Bandwidth = 0.4 GB/s, Time = 0.00000 s, Size = 1000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 0.7 GB/s, Time = 0.00000 s, Size = 2000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 1.0 GB/s, Time = 0.00000 s, Size = 3000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 1.4 GB/s, Time = 0.00000 s, Size = 4000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 1.7 GB/s, Time = 0.00000 s, Size = 5000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 2.0 GB/s, Time = 0.00000 s, Size = 6000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 2.3 GB/s, Time = 0.00000 s, Size = 7000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 2.5 GB/s, Time = 0.00000 s, Size = 8000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 2.7 GB/s, Time = 0.00000 s, Size = 9000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 2.9 GB/s, Time = 0.00000 s, Size = 10000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 3.2 GB/s, Time = 0.00000 s, Size = 11000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 3.4 GB/s, Time = 0.00000 s, Size = 12000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 3.5 GB/s, Time = 0.00000 s, Size = 13000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 3.5 GB/s, Time = 0.00000 s, Size = 14000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 3.8 GB/s, Time = 0.00000 s, Size = 15000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 4.0 GB/s, Time = 0.00000 s, Size = 16000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 4.1 GB/s, Time = 0.00000 s, Size = 17000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 4.3 GB/s, Time = 0.00000 s, Size = 18000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 4.4 GB/s, Time = 0.00000 s, Size = 19000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 4.6 GB/s, Time = 0.00000 s, Size = 20000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 4.8 GB/s, Time = 0.00000 s, Size = 22000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 5.0 GB/s, Time = 0.00000 s, Size = 24000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 5.2 GB/s, Time = 0.00000 s, Size = 26000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 5.4 GB/s, Time = 0.00001 s, Size = 28000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 5.7 GB/s, Time = 0.00001 s, Size = 30000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 5.9 GB/s, Time = 0.00001 s, Size = 32000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 6.1 GB/s, Time = 0.00001 s, Size = 34000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 6.3 GB/s, Time = 0.00001 s, Size = 36000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 6.4 GB/s, Time = 0.00001 s, Size = 38000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 6.6 GB/s, Time = 0.00001 s, Size = 40000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 6.7 GB/s, Time = 0.00001 s, Size = 42000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 6.9 GB/s, Time = 0.00001 s, Size = 44000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 7.0 GB/s, Time = 0.00001 s, Size = 46000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 7.1 GB/s, Time = 0.00001 s, Size = 48000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 7.3 GB/s, Time = 0.00001 s, Size = 50000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 7.8 GB/s, Time = 0.00001 s, Size = 60000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 8.2 GB/s, Time = 0.00001 s, Size = 70000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 8.6 GB/s, Time = 0.00001 s, Size = 80000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 8.9 GB/s, Time = 0.00001 s, Size = 90000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 9.2 GB/s, Time = 0.00001 s, Size = 100000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 10.5 GB/s, Time = 0.00002 s, Size = 200000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 11.1 GB/s, Time = 0.00003 s, Size = 300000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 11.4 GB/s, Time = 0.00004 s, Size = 400000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 11.6 GB/s, Time = 0.00004 s, Size = 500000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 11.7 GB/s, Time = 0.00005 s, Size = 600000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 11.8 GB/s, Time = 0.00006 s, Size = 700000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 11.9 GB/s, Time = 0.00007 s, Size = 800000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 11.9 GB/s, Time = 0.00008 s, Size = 900000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 11.7 GB/s, Time = 0.00009 s, Size = 1000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.1 GB/s, Time = 0.00016 s, Size = 2000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00024 s, Size = 3000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00033 s, Size = 4000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 11.5 GB/s, Time = 0.00043 s, Size = 5000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00049 s, Size = 6000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00057 s, Size = 7000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00065 s, Size = 8000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00073 s, Size = 9000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00081 s, Size = 10000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00089 s, Size = 11000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00097 s, Size = 12000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00105 s, Size = 13000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00113 s, Size = 14000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00121 s, Size = 15000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00129 s, Size = 16000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00145 s, Size = 18000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00162 s, Size = 20000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00178 s, Size = 22000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00194 s, Size = 24000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00210 s, Size = 26000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00226 s, Size = 28000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00242 s, Size = 30000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 10.5 GB/s, Time = 0.00304 s, Size = 32000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.2 GB/s, Time = 0.00295 s, Size = 36000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 10.8 GB/s, Time = 0.00369 s, Size = 40000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00355 s, Size = 44000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00387 s, Size = 48000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.1 GB/s, Time = 0.00431 s, Size = 52000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 11.7 GB/s, Time = 0.00480 s, Size = 56000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00484 s, Size = 60000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.1 GB/s, Time = 0.00528 s, Size = 64000000 bytes, NumDevsUsed = 1 +bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00549 s, Size = 68000000 bytes, NumDevsUsed = 1 +Result = PASS diff --git a/tests/data/diagnosis_summary.jsonl b/tests/data/diagnosis_summary.jsonl new file mode 100644 index 00000000..dc153009 --- /dev/null +++ b/tests/data/diagnosis_summary.jsonl @@ -0,0 +1,2 @@ +{"Category": "KernelLaunch", "Defective Details": "kernel-launch/event_overhead:0(B/L: 0.0060 VAL: 0.1000 VAR: 1577.85% Rule:lambda x:x>0.05)", "kernel-launch/event_overhead:0": 15.7785234899, "kernel-launch/event_overhead:1": -0.0016778523, "kernel-launch/event_overhead:2": -0.0654362416, "kernel-launch/event_overhead:3": -0.0771812081, "kernel-launch/event_overhead:4": -0.0067114094, "kernel-launch/event_overhead:5": -0.0117449664, "kernel-launch/event_overhead:6": -0.0402684564, "kernel-launch/event_overhead:7": -0.0100671141, "kernel-launch/return_code": 0.0, "kernel-launch/wall_overhead:0": 0.0, "kernel-launch/wall_overhead:1": 0.0, "kernel-launch/wall_overhead:2": 0.0194931774, "kernel-launch/wall_overhead:3": 0.022417154, "kernel-launch/wall_overhead:4": 0.0360623782, "kernel-launch/wall_overhead:5": -0.0194931774, "kernel-launch/wall_overhead:6": 0.0185185185, "kernel-launch/wall_overhead:7": 0.0438596491, "mem-bw/D2H_Mem_BW:0": 0.0, "mem-bw/D2H_Mem_BW:1": 0.012345679, "mem-bw/D2H_Mem_BW:2": 0.0082304527, "mem-bw/D2H_Mem_BW:3": 0.012345679, "mem-bw/D2H_Mem_BW:4": 0.0, "mem-bw/D2H_Mem_BW:5": 0.0, "mem-bw/D2H_Mem_BW:6": -0.0164609053, "mem-bw/D2H_Mem_BW:7": 0.012345679, "mem-bw/H2D_Mem_BW:0": 0.0, "mem-bw/H2D_Mem_BW:1": 0.0078125, "mem-bw/H2D_Mem_BW:2": 0.015625, "mem-bw/H2D_Mem_BW:3": 0.01953125, "mem-bw/H2D_Mem_BW:4": 0.0234375, "mem-bw/H2D_Mem_BW:5": 0.0078125, "mem-bw/H2D_Mem_BW:6": -0.01171875, "mem-bw/H2D_Mem_BW:7": 0.01953125, "mem-bw/return_code": 0.0, "Index": "sb-validation-01"} +{"Category": "FailedTest,Mem", "Defective Details": "mem-bw/D2H_Mem_BW:0_miss,mem-bw/D2H_Mem_BW:1_miss,mem-bw/D2H_Mem_BW:2_miss,mem-bw/D2H_Mem_BW:3_miss,mem-bw/D2H_Mem_BW:4_miss,mem-bw/D2H_Mem_BW:5_miss,mem-bw/D2H_Mem_BW:6_miss,mem-bw/D2H_Mem_BW:7_miss,mem-bw/H2D_Mem_BW:0_miss,mem-bw/H2D_Mem_BW:1_miss,mem-bw/H2D_Mem_BW:2_miss,mem-bw/H2D_Mem_BW:3_miss,mem-bw/H2D_Mem_BW:4_miss,mem-bw/H2D_Mem_BW:5_miss,mem-bw/H2D_Mem_BW:6_miss,mem-bw/H2D_Mem_BW:7_miss,mem-bw/return_code(VAL: 1.0000 Rule:lambda x:x>0)", "kernel-launch/event_overhead:0": 0.0, "kernel-launch/event_overhead:1": -0.0016778523, "kernel-launch/event_overhead:2": -0.0654362416, "kernel-launch/event_overhead:3": -0.0771812081, "kernel-launch/event_overhead:4": -0.0067114094, "kernel-launch/event_overhead:5": -0.0117449664, "kernel-launch/event_overhead:6": -0.0402684564, "kernel-launch/event_overhead:7": -0.0100671141, "kernel-launch/return_code": 0.0, "kernel-launch/wall_overhead:0": 0.0, "kernel-launch/wall_overhead:1": 0.0, "kernel-launch/wall_overhead:2": 0.0194931774, "kernel-launch/wall_overhead:3": 0.022417154, "kernel-launch/wall_overhead:4": 0.0360623782, "kernel-launch/wall_overhead:5": -0.0194931774, "kernel-launch/wall_overhead:6": 0.0185185185, "kernel-launch/wall_overhead:7": 0.0438596491, "mem-bw/D2H_Mem_BW:0": null, "mem-bw/D2H_Mem_BW:1": null, "mem-bw/D2H_Mem_BW:2": null, "mem-bw/D2H_Mem_BW:3": null, "mem-bw/D2H_Mem_BW:4": null, "mem-bw/D2H_Mem_BW:5": null, "mem-bw/D2H_Mem_BW:6": null, "mem-bw/D2H_Mem_BW:7": null, "mem-bw/H2D_Mem_BW:0": null, "mem-bw/H2D_Mem_BW:1": null, "mem-bw/H2D_Mem_BW:2": null, "mem-bw/H2D_Mem_BW:3": null, "mem-bw/H2D_Mem_BW:4": null, "mem-bw/H2D_Mem_BW:5": null, "mem-bw/H2D_Mem_BW:6": null, "mem-bw/H2D_Mem_BW:7": null, "mem-bw/return_code": 1.0, "Index": "sb-validation-03"} diff --git a/tests/data/diagnosis_summary.xlsx b/tests/data/diagnosis_summary.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..88f99a332b7bcb60b14beb1b74a17f8f51f06e93 GIT binary patch literal 19618 zcmc({1yo#3)-H^N;O-DCSc1E|dq@Zb50-}D?hQc$1PKI}MuG=Oa0>)?8iEDa;7;TA zpH61xoyj-fx9&gp-gQ~4pHpYo-cRk?wNF)Xy3cu{ii|>pfPjDod?XR{8k-!n5fKnb z?;#)%0ulpR2YXjbdsh=}FGoukBMwhH+s+UD_HA5v@@F>@Ju{C5J{J~8e9MWkjMTnF zEEA_AiMI1X#qc*_MjX>&$9*%V{>gK1)}^~bjH>)~8arLiVcAQ+d2@21pVdy4fnU`j zM+S*>$_05RI2bjk7Aqs;jtw{gdavDIe2!q5MSoKqs84@}XjuF~Vj}LlLwY#vxZ#i# zmiJf6?52l~AD*qWDzx8c`PugLv`%HlC>D)G&wT-%Qm54dZGuI)h$B(sE^Cz%Ni(Aq zm6h;71U>mYVOaYX)%Mc(d#_wBU0RP4v_j)?_SaN7nkNk63;Rc^Y9@&a(nl}Gib7-w zm4b$3D}t3!-BZpg5@H-he0>zu&)#<=5G&bWdY%@N(JMs}BaA0{p`nSC(HED>!OzXQx}kA^8-!c&vE!L)=n6e(KRagWPS~A zw^85{<;lMc!VyGydJ_n~DiQ($CU7)ywzPHO08?VI+GL?q2vue_<@ShcFEcffTxKYg*p|3;Vsmj#e)l;R01U~f8LpQ(X z`OYjj?2GWP5vj`U=Qsc&p@58lKmi;*Z8?7j#lykb#>~ON2A-I|0`xU*@O3*EUhuvj z#+Nhwn$g6k^y^QN>T5QU(QNEee#8yi`(K|DqpLqa%fCMwpG+)z{AIxVm>;%`{LpRw z5laOXO2c&8P&LN<>CB8GUdJ@~oKF~r0>ck&!EMqV{(=V;MzrtMs2+VDSW;FgX2(h8 zp&8DDbahL+#C=JSp1ePo!*+Zv-Bom7n>5o5MAVV-NNT4$!2j@6eRTYMXZsBCfk}8Q zM1s19-kE3oi@c5_^?NZ1`9wo$P(YT6bHN{~D)B;c1Q_76b&Yp9lzKzvBdtkc*Y2rK=0) zuLCzcR8bjvE)N9bLKfC_Sq{n$vNy{Ay;( zzgD-rS#rVu>f|=2+W}Y7;@vhmLM^5*2MC6jv3|Hf$q;@>nhS_QsYznOH<<>bGH z!S0Gy4r>>hZ;!aVu7580OZfVLi&i(6Vy-}E=L;>qKvUy0f-PXTFU{iRnbhE`i!G6> z&GY6a@0-)X7z6+F%Y(DCjRh0&g}dXmrI;|2Od7BodY$)p?G7X+=58xU^{k+(1y)_} ze!5bGqgf92JKw!s@E2?Gx%N3cI632lmc^;;3)K5J`(LeX9`5UEN0{?&KB8Hi5k5kix>1XeaxX7eCRf_$*D}Sq z_{tKcuS5gI( ztN|q+c0j3sc-2eJRbM(&@qZ|GOsMx9zul+`6pMMQ!TPJ%B}SMrkqpWir8rh|)i=)! z*RUDj9kGt`ViGBZ)A9ojR{cW>6;PT69MqtMD+K{cc3glGHeBhASPUTVIZpnUV&^XR zh&84cHDN|g?D~HxRvK}@O*;W6YT!^=YIfKY6q0u;Yv9FqLgTs z6aJ)ck1=nj=g%Ykk8==xj zekSiDWNJ)|y&PQsyy`9Cn(hdLMzXnlnh@RP;4q8E+W=nI2*UiARd9g8fiLn|{!3Rl zRKWoT2flZ3862wM0D}Wx6kG;}DmcL4z!wdd!J!Hcu;^#r--F3P7z$@XmKX|GaJYj5 zvM>Pm;eZJTTsRQIffNpu!sZwXG;nwb2WB|1!hsVGyl@bNgD4y%;UEhKML4L!K@$$o z;Ghc!12~w#!CW|(xhvTTa4(S;a1RcANdUOQp$ZN#IPfLA05UjK!2t#bz7)6&4pne~ z!GSLoE`viA9AI$ZOM}bcPz47V9Qe}VGB{Mh0haEPvq&Yrp3Wkq2o)}H0CTPE%;9DGR{|~V=QF`vWA>O@p zKwGX=`0v{PiJ0pi!1V86Sg;NrGg!Ln|8royBuBOI?Ie1a>GJ;y2H}nW1?nw%Z4QVj zfg>(u|NkL85EGVele35B@6cN&!dmQzy( z?;-4GT>lBM!_EGe;ij0JcL*NJ{{M5(4X64KL2(3SKnUK02mc!&4!8XeK+6#r8BHQX zU`z<_C2a_w$+a>9L_Z_BR;lZhJe^DX-lbB0|RDAsnOPw6lixz`p#D)58peii=e$FD0 zBfrHjF%+O)|1)I8(!NT6LEo<*J73A2Cp{j%oa% z6+A@^pX|T0l~z(Tx?ewi?9lMhPDD4H_j`(bfU!!o{P!B8ggm?V{6-rj!<#PG_d3D`gz3`)P68on+hEM4TYYW`?Iqn|VH!MIBH@C`q zZ44tMIzpyD6;jr|OzT+4_|RuWGK*&8=yvYu-Co>t+-6CQ0^6*5V?6_EZneCs#A@X$ znoF!I?rRp{qnVrpi-EkJo@Qj!QxC8nALG*RR;XZjw3od;!olS)lb`ZRT{U`7D*xp~7yTKCi*vVzH zZ^jWifwExp*>(WgZgS85FMu~_Z)UppoDbmyH`9dJXtt!tGow{aq&>Xm}d zpvG`1?88J{h6EeX-BM|K>c&M>#Ud%cPHt1c)*ecB9XIR@#2C&he-h{)-^lI2K(30C zu+E6O;Qj#hHj0?JJ#?$1*<0y)7|x#{4eizk*?`}f=FD^ukfwg2ne|Fb*~jYUtuNya zvs$U2utsP(94eNkh5(~NTbD2=yv zp)EK0i*4`h1R6A{s{-G~xt%q}@@m}mLGv-$mD8Mq-GQTp@59OZf>wcZr(JiywZY5t zuN_+xtD`a7L)eU?{vOUouOxQJ{6B1~m@jmL4I9-J!Y?WP6Ga-Qhp#7}f^3XW)h!G? zp2z9b&rmen%PV$L6x6DXw-~~%Q%^4WRHh|OI7Q@U`Ltp1dCStHOFvekW8{ThYHvX; z_aPslNXtxhteBaX{2OM>#wdFV3Bu6yp6fk#X2$8j@PU1M%w)4@W>#zw&Vb-c{Sr6p zt_7qY8_4{n$^$xSPt$O3nHvi;x}^4U-E(r7!-h4EANx3uWlgoJ&xgF4$WIZ>PGPsl zXRggo$+(~kJDZLx?yO~x$JOFJeBtCwnyQy(f_hHo#{Ags=#gtdYa=s6R+Q2g%JnsR zIvhkfsx_8z&SN8jv%7OZJm;FzD%R=cAQII;c(yjGH>QE?3(ei@2!d?Vzb?4I53=O5 zb&|;_%en2K`qW;?U%5LLbJi5I%#vcCOFQw{fTA|s_Hb}w4EJo_ryBL0q}AsQHP#pk zH3P%^{Qb6LKC9OSf?9a%#hZ3$iA>V_xmg!W!MICro*oCGdt_%Yf^oTS3R}PX_VV6* zY-Q`5GP@K8>j!6*%MLSu1tPq$S~r|N(4wj540ZhA6SEF!_Y!CvPO9`8Q%JXX6dCGgFbaD#?YGVhl3n$HJA^q=ZywWwH^+ZGf?Gpx2E{Hzq{;P z)yQ|R#np6NPQA!BUL7Jz$v@9F7QE?Y?r_CjIL$|EluIu*u=?H+yUsJ=BDl{i^l-@H zGpY461}0^m>A9l`h7FxpBC|79Ji95|o*qjmMs0@oeGSIZGh>F_dTNKzS5i)Wb4F@Z z4iI!gzqlcWoHT=xInO%K(_L~5 zaW^lfZo4j=bnEtLZu>6$ys;LOTL`mCTMTvM;3<6ieO7c+A_>#Q>Q1Wx>V~M1;4-yT zr&pyuP>$6@(p@U$WwvNBA~ipjF!D`?f3bmu8=>xX*6o)<8CQaa^2!nGC%Q?CMH@2m zi?+p^mz`odq+t2V;msB|4&((IHrLxbC#4Y0YK*x_vP1TX$IW0t6zTGyHufI$y&F=Fudi#fa++ zgnWgV<5DC1uT+c+QPWM-hLhMOf}=g=f-*b`MnnWbqmePvwJ8t!ATsOqx2ygv{ek+qQ!o>di* zr7~yLG5-vo&mNIyW!XGd$xi#LHh$d$-#`c*M?1LC|9!Aw!Csh1?R)+g`z(n`rl4-= z)FN_H))8qv!En0QKd~fBE(QJYV2=w`nUUJr`t4a9X4)8DGiFXs*?Q>ue#~=+*sx8P zCq#TxM@P?GE!Rf5u?UnNm)gyjbTJs0G8jXrZu}I8`u)3r70nrLSr_XZUVX^?r^1rY zbk1k|pY4C(ojkhZAXl4>9O=;?Vm_dPuxBKB2O}(#V!N_0p|i+C?*1b#2Rh z*w{|s6YIFt-h|nx&UnC#Ve+kB2`WVr^EH>#12O6Ol+q;;Po!E=#U}0-fn{w!-NmEE zgJ#?Gbpvav9(=}W%q|l!xT+>a%=rk06|5=i5GalI#a3>!yv?=v8Xkl<(^XCXt|2lI zReoMknr~-Y){wm;nxg-y@BOb}k4K(__p~mzXN2CK`O@4|m-KrZNCJ*3>D=*q7g*O^ zdh^ml^sY+zxT0_Nd~0BT?USS`cWppo6rsXI%f`3e0fo>OgNqXS{ww4UVuI^+l;uGj zZj}8TcB35S#U!}j1)VXrUhUoDEnFb1n>^j=91O-88Puk^_oy#L;owjn(=%iz>^0SX zTAsqe7ah=k#ROW7r^~4X&wkQcYKFo=(HRBz=gldbXwd%go-KpOLU7OS)K!mH%*Ei! z;Qgo{C3}1mOJ8~va1JYHo1QNewyW>DL8>)))9lKthmZ`ujziBYP$4d&`LfRaid#AY zTs4U9Mk>#Wiv<*Ybwqj)|L@mgS}`%IqX4a&Pbs{@AKis;S z`#4qya98`NpYK}IuEfsv>tz{;9)31!nRL%`S?C966+3y&_FL_>j3`hK#;n(ToyGHd zWU(N;V;K}XP40FlQFTvobv|alnE%{S1=1FWF%r znB~|dv|(-X3iX?`$@P!p?x1w{$+aXCTzIO^!>2|0Ywef6@pmc#BWx90KgCFf%&9nl_LPJ4Y9+0U+fMJ|~GtIs9Y z)#v!;bLp%{ElV#X#e_UV#%>y;s_xz~K^q=-9@hntsy=UEtlZLzzfD_1jePKPph@zp z_4nJfcnQBcF6y76DaK43^>)u|t?j`TCmZA1r=!|h+D4Q9!B!Hqq7k1j2`A^Z&fv?EVk-&SVagxR2uiF(&3l&zOEv2QVzG>Y-xdE^>7`E+&neS&ol)p>&lPP=aVtqqpGnOn1u|?POf7 z=p##gT_MqDwKtRHQL5A=@kTHFv({i!bz}A1VFhyCxoS=~i|R9dmIK%-zKR~OoP8$@ zL5eiC(#w^ne4L6GA9?rVZejE#vt;{^I9QlyD(e$Z7d$4s^mOwsX16!w_q7)-rlGuc zBXxH|L7Bv$;mW`)(D|-cw(1rmY$tCF&-?W0Ycz4zYdJS^@#8$yVct6!@%p??A)b13`-?L8mgg^A^%SxL&=#9k|;smm{KmP+?cX9q7oNn zcR>}8#LQ6qo0@|sj<~4ZqA*M?bP>UOt@{|yd(EctUFJmt(4@&Ya2AQ34SH&eAReD) ztN=BJx5A(_!i$cR1=7DS3x(|^jqrXQw}h!hU}u56NBcKYR%3EN6c!sPsN~-6@c(8s z12e5-vq2&qWZopyZ?iz)qL&*f!7ScP(h-@P30UHzqC7CQ63dMglt;f6;*cVKS^SM8 zUpg`~Bo=5^4rnPF=n&9+I5$kK{NY9lz?naZ4`Um%9^oyG2OM}H4ZXHd{jFdIaEqY- zR+xq6fC$VtQYMs#dFPl2U<2KN1Cs#DHp51WvEnfAR~n#&uHj!S;C|VEFvOF|{*nVN z{@rGPlZ{9mF86(x19E;K3tbfn&AcF!hkn!u%~a075|emJ;D-?oVFgREX*Jg$J_;6LPhr`NmU-M999d= z40%NWQ!{8At}ILeY6lrM^v2+7bMR`7z)bnws=rBo8IpUGxE07lbKLb|Y+D+UzpMFd z*}X|l3Z$WJg^U|15CS1@l6%nlIjg;~eEbsgb z#Sz|3vFOYYB3_u9D3FM7>-^*#5M4JAVHexqb^r~g7BFOL&M>tGKNVZfU z9zQKdDrwb2GkXV=M|cDU%mEdP z16TIG7MvpsB%KT-+qn5iC6@^=j7@uBxH1Xo)HURhX-xx>p`vsoq*MB>-HJXonB*S& z?~g9#m)tHguseL~2Z-+fbCu0OC5zQzWzr%on5h%>fLAgr^q?}yO!hnlLLGX*E21X& z%#SDtV%@O_k}x~qy=d%&@|hh}R(j~!03y)-Ll?#=*JQX$tc}kvt8Axv_Iti+5>n{(uk=EI`~zl z;6B$Y99jf9zXzHS0GzfXksO`I0_WocV@G zS_bK7bV7a6Za?iSu!Gk)U!G5?zFh*{(g#8=_*0fZ;5XIhDYMn<;1w_b*8feNzAQ*B{ zW_F%ZKD!KZ)(wSNivrGw5(Gk&7kIA+1TD}cokhMeQ)UhOwA1ELGl0Ic_hg!TNxK=B+ z_{||1O9p0o&huM9dJj-1LOxGXlSe++78&ykUUl=1LR-mt_N^K=&scwjk=)M>R~|I$N*{c8PB)_s!f}8$h&|ESgL)5 zmvn&=TMp0+ha7N9-UO8VYXH;yI>hUNGk}~a*BL=FUQ0Lta*=j>cTjnR1=b3!ja&g0SOUr7lzsd+wM}g35~u)MEDX|% zM+JnylNufZz-g07pve-z4S2_a6&uK~(S9H~fL4Ij7tGX(c&)bm@;qh!h6kKq+wcye z`~{FWc#})UUjx|?ge5sj+YaS0@}k^RJ^&VkYqc<7sM?T$Vdf;pl8nde1eP`+oq-Yd zRs=Gl8j#U;{!&nTDs}kF;%_8;z_p6Ua}I0-!1_W?q8|d>;R+(ITG@DitZ(l zNrVzW{`W$Fam0y+T&yqtieSF991%s03};_ECp2(Q&Z)j64OsOP0KI~ z-x}z{dw}Ty+}rnnx75IlqGh57Ocbo$(54(=U{+Z@*E_%hlHl4AknWLRt5LKc@`gKK ztL0Eh#}qGvF~Wa2ef)m9b_( zbk~950jx)fo?Wx8CNF`UqJb;{?BFzXeFwY{PVfGLPZ zEC|v|(0a(bX%G#uhVovk!B3v^1DX9{0EogNKsWlh9U5p4L|X))$Ptf#gr5RlPXqbm zO5F{W^8xxx4dlpcz^PP=2uKka$b04dbG;{M?NFHyOQ38_U|l2N2ZH;B;#v))0jx3m zKqp86XL{QJ-;K@zy01VGJ|6(_@d0Rnm0c2(!1P=%`3A@$67^6>h&(WZeraL@lY9iG z(-h%H;I+mWAVIH$!1-w!KxogLuGJdhrhb4aFI6kFt+?lacNZ}Na?g$Xnr)9J5)xvE z2{?4ebUP^@A07XgQDGH)DIySu&Shf?7 zR??0TMaf+sF496#inqn51=e2#VEskukrb5;0M=hT+PecR#3Nx!nE+p1K&)$zPg~C_ z8&LIt1Tiizo>uDn8BmQ)E)&3@ih_6~n2+|vDiUzVo(zfMIS-0bu?1Re7a9`7m3t^c zc#1h%ES69x0`PAOVw?y$(yE&ZKJC;qUKFLV4m{e0_e8&>3~qp37mIYjN0m^7qObI5 zv7Q2f2+9e7&11kuz7>xazY_t8;n@fR5>XUP6)hGL7K%rkvWg0HNCzmH%-|udy6^`h zc-i65NmcMuNd@>4!sWJTh)1%fXo%mOKO}D#Q*AG47E)0^*S$K;Byj@LFi&&`RCrBcly(@0(Cl zXEL8b&m0!6-R`d1Pj5HR$`_hjE?1Un*CFMz6I)y3qmv6QJuQ-ww7EkFRgyo#*b};n~V)3wU;Y_YU!2f4Df#_zuw+6#?Nr_ka7t zMIQJM7rz?JI4=v~`7M+?UYOdfnFcamG^0OTBT+(9Myqf3`eyk0?cnr~#oB)D8HrIi?N0 zo_ZYB*1IMP?Wf?!YCNUlF!Qr%)x`bsUacU~X-uVy6RMT&5cP3{XTbDw%J)kj?DEh8 z5jt^bWz8o(g1mfxU9EgPMzCz3WW+|NVPfL_wNe~pJ8gbdPHXqS zG6gq&(!MHr(B|hw zt4$W&u0yTO$QmQ8{W_k+Jmb-Cx@t00GJD`_GpwJ> z4eeZphG%4E~GkjA?B(i>rJUENaEFZ#~^_Ioi zo}%jLgL%20Umy4PoHQr12g1=dTCW;1@29k9PaBL0uJmxdHBxYXwn-#4!;_+}ZbsN{ z$`K~Y?iej=D1$xuIe8bqi&(#Qb;=!$8#~4;aa}6Tk;&&?99}n{_>6!$-bswU0fKta zbeZ+kz!$@{BP~c0o6c7kJ>M_)oV~^r74L6s{G4I&JW|6AMDC-CN%qAy*>xl;bbH$= zhKt5TWiu&CR*9X)(;f1>QoT*crTuBJ&&@p1u}kUqmR{?5t1#Xa8?Ty8d=;a1ZlTu> zS|ocIoUtZ}yPh+cVXVzp4~85c!3ZO#aXvouTnfOwPvI+9a2yr+*F{gU#Qa&*DypF!ysHUifJaKVnSnL|cdn9Hnnf?oDqa*#F{l zFk2j=Y}m~+f9u&iB#|;Jtock{iku@03;#)U0&0oaPv;gruPw*OF|!}vtEaL9^$^Z5 zU%k1K<5JC(#8=ZXPVxgplK2C0oi~BuaTC9yz$p5J%cJflus+C-J~;RaT}KXg5>cvp1|#fYwPiW7_% z3VUch@gnpPsy4~gQZVx{yGDgnxuNGy%FgSc^&|ymNj||j9mJVyT7GxW7JPGg#&&m~ zWc`M2z|I_U%p0B9Z)}?WAu4SVBl@d#GBe&LPhZ7|*gI96Nmqm9ND7K0kQ!e=cqj{D zVhGPjY+JBpiDeGD8lQX$7kQuDGYyU8*C|$rgzQ>Owt+k&ac#ls<2)m%Ow(;5Z*&z6 zFbLJ0+*|{~qaEEA)m&G#!_1g8-h@9h?R0&@r16s_za%l7gKLC%0^N|i(b-kzi`i*F zc;;t~Md6Y7T*3&KWvpbOt8X3i)x~4`Y*B4uD)q{3RC((7mrago%o`J$8c79R(qcUZ ztX9WuMwvMg90k3j?F2H^_`^R#(!}IzMV-CH@*=_mm^f!w&{Of9^~JO@@P}7I48)`< z)6#J?)jH**B1!iSSa6*$8(&jowP4nIMAUc)&t)_KR;RaL8fhBNIz5HrD;J)rwF{p> z?-rQ4pEmZ^XWfQ;4#-CLp;P>(&1?&~^fN`2_3<$oYBqFppn2j?n~qP0i!6+hpB$>X zWfJi{V4^dRh(bH5llY`On37`pNBQcNTa8?Zqb>SVYA(2(BXRK_{v;&l;=#C{Fn*}?@ zIlBF|wP((4yrV$Y^Z|n+eV$GG<8=8Ot_$qbcmwZqZ8lFq2x;45dUj=t-|imv z9KNR--0V|t6TRbiB9hRc<2BHT1H_6JPV@_Jcfb$*N^G{{|jRF+EY02_I^WuX}TK1K=AfdPLpZKGq1#NsXe*gab_ zsVD0Nwb+v+Nyn*}KQfW&iceB&z&p9$l%8y7l6ic?NcS%EK9^0_kaRUfiR~nh?o;jY z2qGJ+BD7yoq@25HP*PE%ek?nlDC^)akC?L(t(t#{6hG#0%9q2?UP!YiI%K#KE|zIr zN4j{`MeLcn?!*;SHXC*m#T+MFhdZ6i=r(qz{Zdy#dU<7_t|ggRHL%<5Mqk*~KF1bX@hvVNdp58a~C?_WfM{q_!O( zrm?-={b=%`%UGki*YqAv!DXV}rOtBq*^BUYHa+jzd0Q3H-oY}g@=h<=LQa_m4wtTc zepUWEVbGF(?cHO&AC)k!LCq;=Bb}2Vk1*kD(ShuX%V^<1(EUWBJ14C<5xrSsto$(tML`Y-gRR`Qt}+YV$R9n0G-vY8ACJwiRUy zYSs2F#cZ4)Piju6 z2JzOE03Xa?-WnU=(+_-DLwIW{87x0{_8=a`TWZuO>6Qs{=42k z4?iy7-CR~&JtQ+||6*d|4iIdMoo@H#);rF0fUB@Iptzq^gIRF0lXQRj`wuWJ_S(uc!qc^R% zusEKDw5GeORj8CpkJ^B9j|&aYT$te*-Hcz42b+(?Y7?&BzU(|{yLq@?9#ieuj^67w7#6kDvCI#{l1g%fx+t@0dsj$5uSwXwbrvniLO zu34nI$dn~=S=Evz_0k@erzUoNA9e;DpZE0SVvu`zKIb|5T+4G*C>Jhf>`d;}w8eAe zxRlV-Uad=yYv35|yGZeOGsSyGt4rdpzuGL6QJ^+Xcqg_~`cCYz7JW>Tp#pVz!#goU zPsSt1UBH|q{o%rK0^)z(?&|+WyYjUEdArO1QM+eWw;#I$U&XVD6K~JKl(t8|yeR9( z<`05Jk(|W&Kl4)@%{*3oSo#2m%z8JVOUdRwzkqbocyW=LEuS6^={38c9899QE4cq$vC4Dn?OIB-mDU(O zdw@tzr-mZmovU2r4|2U&h+Q*FPb?<}&uU~K)_44enGN>0WhRA)b7J}!lbklO z?Re?^IIiGY&3jSKJM_8B?PD6SIxsgIW)zQ;&Os#j25VUQt}taTxtkHX+E`z*H3SDN zs=j1vLg~s;eJksW{w^fx+lxB94mu2~E>-CRq1YZeT&hsrR#R8v>mc$b44n3oMJ_}9 zTPIvj^n$}wM+y#%JF;-ykWo)Pp(u`z7oHfLAG4Q2`1l%P*UUNRg@mKdG&ELrTnz)&jWoGy6p%`JOzB=C?h`i0=a&1^3#%R*S zyf<4+xL`itFpG$JZ~k)#(*6|PVM;y6Sw}`kwhJcud=tnNFGRzm7dQ>UM4#;p@=Ww} zDY#nG2s}H|2&_G3P?GUqb7!8jrb_iLc%!-*rYwYMR^Ve()EfDS-!X{!*pZsKW7FQ? z^??8C5u2>Yz+bYjq&{Gbh`amnmi2H-}ZN|UbdFMHg;dNjUvamfPJ=Za6pX4jI^%q zG>1ML+T*t9MVsk-J6ak><*?0LZ-+Q=)DF*L5NGe;VEy`hI)k8J9a{8|HYM=2alGt1 z{DkV)$vZsP+uF?+0q?)a2?Vn$^NMK&(H!X(eIYD}5ziw*4B{i-D}5lL@V?0B@mt}- zH!44@6G%GRo_f3|=VOd=k`7et7HW<}F(p{HF8Rn`sFPboE-@4>#;>`dCiEd;kww#{ zGeLo5BO7P`z1(mB3mPcl)zB0{>p9V^;`#ycpit|_??dDvSf-zH?%wxnq6U(!Plnx7 z%*5%?FSbO-jI%TUgsA4J#KLc}m_p^{?{AA%R@#bt;`61<{L|#x>SzP)vqkj7!Cx z<8_ydr{EsQo@XtMZdljK^A-A)oL?=p&TL2cDd~~yM{-K6V$@vALi(bfG%*sx zpsezXP1tt5J-gO(0|&4C3$Q6WZz3*xhx&#J2XEJIH3={lQeAKF|7)g^T`FjQ2D0oa zER=M(^sW$IQr)H6ODoG}lHGiVl{gmI_RL4B4$InX4-74j2DY zK<@J?rPtm?b{8KVT?>zt!JZ25Nl`-G2b0PtDDqn-izkY4hp#FIYa$V+b`YfdrzWF^>{OLE}y1xm$a{sDhGtHeJ9zsBcJUNq# ze}Yb7!B$#IeoP%yF4t?@4q6_DGHwwG3y_IU#iaLf5N@DV*4%SO0*le&eh48;#IUa_ z)xZ6+Q(AJG{%{x3Jv8OGAMwXo z>r)W=_NjpesOuRraZ-O%BY*jt&LPEJ7XdhTbkmcP*w_4xN76y5{^(qp?!g|?KKu5< z)Si19aZ~L#!K8y5R!a5S2&goEZm%bLMyCsPim*k;6!ycWdj}J2h$Jiq{bhj?S|!fx zU1#+h$h5c|I(*WvQ{(VY5hptjpFBxN|CWYg;}OAb!eM7w5evJg{(R<=rSl0sW>3kw zWRv$T(O2)=)zziS_%KO`n78ZE0W8SPQdX!9FJdshrnOJJG8nS0n6$Hfv zzgjZOS;66w;OX7V#ISRSlVN&B*l#cQ`1Ky|TLBzp8KSHlCz}+Ygg1^YA}H%Pu>Awa>BT_{ywBHo91e1#Li$4ZWo9P$ZLuiju#J zd-T2Fmk2c#YK;&Rr#fxC%~-BzZ^ZZC{%L=hi-%ds9Yo((X0+MD ztv>X+>z#}sc;OxBz|NB|MP=C)(>UacMqvJdnDQz5>g84QRr7vxq@Txg_VO7!QtXL| z_2qPvCTAsTtuCDL5AV}>90mzK;9P!A;dIWqP|H+s)Qc!tlzDS(!%%mbT)e^6_UNsX zGkP|gUKl~1VVm+d{a#T?ji+1^3&*cOQc~vC-9cuJ*%0h?+oR_!ZqiZjS{vFZbmxI5 zpb;iNta-k&o=^F1JDBza8x3^dzd(;uPPXf%Lg2vuy&%089Aa`jZHSM|SIMxQC~O)^ zPrS>mzY9<}FxzF&ZZ6LfFE?SneQNhe<E$2JgA#P~NeiVht*XiS--pfPUHK7noQ}_}KkIYhd zx~A?4ei+9Gg|U51GAd`iEYiS=9&H&mpM~^Im1q*bl0_}PdakGU`Q!p+3ZA{EwJ=F= zZw^g)HVG-`Rj`Dll(3F2dniStQCf`84D8fC;oD})d+0p7nKhNaMG<;)cd)urmh3x3 zjY|?Zdptm2HUXpvGxq4lM)Gp4E~( z$;|TuiJtj4LEnQP={2c)BPb5;?QkM5nsX}E-lG2N1n>0<1Y!Vg+C2fb6ZpVZ!otD) ziL-;F3#Yk*vnBjKQAS)p`~@Mv>uWG_zp_hOaksf=vFq_au~Jd>kV$38nLt)9jc+O6 z`JsVi>?YjcF*HlAT^d~x1#T6|ejZi9AdkuwvutwPo#@E2v4KAp5+Feg}&VRfym&d>{v*Y5_D zRuT+^lpA>Y3f@15)YQ@OS45xn+s|;}HJ!1d%$q+}&>}+Zq;=F^9S+kH-Sko|Wuqrj zb`&h0E9Ycvk4UG&&@PyoYIUqLEJ~AIkDw-;T4Z(l$|C<}K6v`lkCECPQZ5k@Nal;p z;rXcpUwL-J<8{1DM+Zq!4^HOe5uWaZ4;K160VMUiE(PdC$U~OX{lk0{i$+|~2i}SO z71y+f-!^PMl`mVcXhfG}-c45Fyy{VmX*3@I>(6y0b1jPx?4r z6Z?5clkaP{T;Ua!SiN+Mee3=g37dI=&|Ocl8^lZRdX{5Bj?R>4N0w73CQnomk%*B0 z`}a5j!}jYh;0^rnZ{F(kC&8cZAo^{NfDrH|1W1K{dLz-FhJU`Q=P$z~;FA3BU)=L2 z&!4Zd`HQCo$U}hV-@e%9PnJKwSN@B|lkMNK{Q8#pPnJKw3HXZzi}xQafB#=64" : 100.000000 + }, + "iodepth_submit" : { + "0" : 0.000000, + "4" : 100.000000, + "8" : 0.000000, + "16" : 0.000000, + "32" : 0.000000, + "64" : 0.000000, + ">=64" : 0.000000 + }, + "iodepth_complete" : { + "0" : 0.000000, + "4" : 99.999922, + "8" : 0.000000, + "16" : 0.000000, + "32" : 0.000000, + "64" : 0.100000, + ">=64" : 0.000000 + }, + "latency_ns" : { + "2" : 0.000000, + "4" : 0.000000, + "10" : 0.000000, + "20" : 0.000000, + "50" : 0.000000, + "100" : 0.000000, + "250" : 0.000000, + "500" : 0.000000, + "750" : 0.000000, + "1000" : 0.000000 + }, + "latency_us" : { + "2" : 0.000000, + "4" : 0.000000, + "10" : 0.000000, + "20" : 0.000000, + "50" : 0.000000, + "100" : 0.000000, + "250" : 0.000000, + "500" : 0.010000, + "750" : 0.070126, + "1000" : 1.756079 + }, + "latency_ms" : { + "2" : 95.414131, + "4" : 2.722457, + "10" : 0.040830, + "20" : 0.010000, + "50" : 0.000000, + "100" : 0.000000, + "250" : 0.000000, + "500" : 0.000000, + "750" : 0.000000, + "1000" : 0.000000, + "2000" : 0.000000, + ">=2000" : 0.000000 + }, + "latency_depth" : 64, + "latency_target" : 0, + "latency_percentile" : 100.000000, + "latency_window" : 0 + } + ], + "disk_util" : [ + { + "name" : "nvme0n1", + "read_ios" : 3004914, + "write_ios" : 3003760, + "read_merges" : 0, + "write_merges" : 0, + "read_ticks" : 4269143, + "write_ticks" : 4598453, + "in_queue" : 11104, + "util" : 99.840351 + } + ] +} diff --git a/tests/data/gpcnet_network_load.log b/tests/data/gpcnet_network_load.log new file mode 100644 index 00000000..6b2f313e --- /dev/null +++ b/tests/data/gpcnet_network_load.log @@ -0,0 +1,97 @@ +NetworkLoad Tests v1.3 + Test with 10 MPI ranks (10 nodes) + 2 nodes running Network Tests + 8 nodes running Congestion Tests (min 100 nodes per congestor) + + Legend + RR = random ring communication pattern + Lat = latency + BW = bandwidth + BW+Sync = bandwidth with barrier ++------------------------------------------------------------------------------------------------------------------------------------------+ +| Isolated Network Tests | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| RR Two-sided Lat (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| RR Two-sided BW+Sync (131072 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Multiple Allreduce (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ + ++------------------------------------------------------------------------------------------------------------------------------------------+ +| Isolated Congestion Tests | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ + ++------------------------------------------------------------------------------------------------------------------------------------------+ +| Network Tests running with Congestion Tests ( RR Two-sided Lat Network Test) | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| RR Two-sided Lat (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ + ++------------------------------------------------------------------------------------------------------------------------------------------+ +| Network Tests running with Congestion Tests (RR Two-sided BW+Sync Network Test) | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| RR Two-sided BW+Sync (131072 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ + ++------------------------------------------------------------------------------------------------------------------------------------------+ +| Network Tests running with Congestion Tests ( Multiple Allreduce Network Test) | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Multiple Allreduce (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ +| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+ + ++------------------------------------------------------------------------------+ +| Network Tests running with Congestion Tests - Key Results | ++---------------------------------+--------------------------------------------+ +| Name | Congestion Impact Factor | ++---------------------------------+----------------------+---------------------+ +| | Avg | 99% | ++---------------------------------+----------------------+---------------------+ +| RR Two-sided Lat (8 B) | 0.0X | 0.0X | ++---------------------------------+----------------------+---------------------+ +| RR Two-sided BW+Sync (131072 B) | 0.0X | 0.0X | ++---------------------------------+----------------------+---------------------+ +| Multiple Allreduce (8 B) | 0.0X | 0.0X | ++---------------------------------+----------------------+---------------------+ diff --git a/tests/data/gpcnet_network_load_error.log b/tests/data/gpcnet_network_load_error.log new file mode 100644 index 00000000..b98b0627 --- /dev/null +++ b/tests/data/gpcnet_network_load_error.log @@ -0,0 +1,12 @@ +ERROR: this application must be run on at least 10 nodes +-------------------------------------------------------------------------- +Primary job terminated normally, but 1 process returned +a non-zero exit code. Per user-direction, the job has been aborted. +-------------------------------------------------------------------------- +-------------------------------------------------------------------------- +mpirun detected that one or more processes exited with non-zero status, thus causing +the job to be terminated. The first process to do so was: + + Process name: [[63697,1],0] + Exit code: 1 +-------------------------------------------------------------------------- diff --git a/tests/data/gpcnet_network_test.log b/tests/data/gpcnet_network_test.log new file mode 100644 index 00000000..44372adf --- /dev/null +++ b/tests/data/gpcnet_network_test.log @@ -0,0 +1,30 @@ +Network Tests v1.3 + Test with 2 MPI ranks (2 nodes) + + Legend + RR = random ring communication pattern + Nat = natural ring communication pattern + Lat = latency + BW = bandwidth + BW+Sync = bandwidth with barrier ++------------------------------------------------------------------------------+ +| Isolated Network Tests | ++---------------------------------+--------------+--------------+--------------+ +| Name | Avg | 99% | Units | ++---------------------------------+--------------+--------------+--------------+ +| RR Two-sided Lat (8 B) | 10000.0 | 10000.0 | usec | ++---------------------------------+--------------+--------------+--------------+ +| RR Get Lat (8 B) | 10000.0 | 10000.0 | usec | ++---------------------------------+--------------+--------------+--------------+ +| RR Two-sided BW (131072 B) | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+ +| RR Put BW (131072 B) | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+ +| RR Two-sided BW+Sync (131072 B) | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+ +| Nat Two-sided BW (131072 B) | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+ +| Multiple Allreduce (8 B) | 10000.0 | 10000.0 | usec | ++---------------------------------+--------------+--------------+--------------+ +| Multiple Alltoall (4096 B) | 10000.0 | 10000.0 | MiB/s/rank | ++---------------------------------+--------------+--------------+--------------+ diff --git a/tests/data/gpcnet_network_test_error.log b/tests/data/gpcnet_network_test_error.log new file mode 100644 index 00000000..d46ec549 --- /dev/null +++ b/tests/data/gpcnet_network_test_error.log @@ -0,0 +1,12 @@ +ERROR: this application must be run on at least 2 nodes +-------------------------------------------------------------------------- +Primary job terminated normally, but 1 process returned +a non-zero exit code. Per user-direction, the job has been aborted. +-------------------------------------------------------------------------- +-------------------------------------------------------------------------- +mpirun detected that one or more processes exited with non-zero status, thus causing +the job to be terminated. The first process to do so was: + + Process name: [[63697,1],0] + Exit code: 1 +-------------------------------------------------------------------------- diff --git a/tests/data/ib_loopback_8M_size.log b/tests/data/ib_loopback_8M_size.log new file mode 100644 index 00000000..de766404 --- /dev/null +++ b/tests/data/ib_loopback_8M_size.log @@ -0,0 +1,43 @@ + RDMA_Write BW Test + Dual-port : OFF Device : ib0 + Number of qps : 1 Transport type : IB + Connection type : RC Using SRQ : OFF + PCIe relax order: ON + TX depth : 128 + CQ Moderation : 1 + Mtu : 4096[B] + Link type : IB + Max inline data : 0[B] + rdma_cm QPs : OFF + Data ex. method : Ethernet +--------------------------------------------------------------------------------------- + local address: LID 0xd06 QPN 0x095f PSN 0x3c9e82 RKey 0x080359 VAddr 0x007f9fc479c000 + remote address: LID 0xd06 QPN 0x095e PSN 0xbd024b RKey 0x080258 VAddr 0x007fe62504b000 +--------------------------------------------------------------------------------------- + #bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] + 8388608 20000 24056.74 24056.72 0.003007 +************************************ +* Waiting for client to connect... * +************************************ +--------------------------------------------------------------------------------------- + RDMA_Write BW Test + Dual-port : OFF Device : ib0 + Number of qps : 1 Transport type : IB + Connection type : RC Using SRQ : OFF + PCIe relax order: ON + CQ Moderation : 1 + Mtu : 4096[B] + Link type : IB + Max inline data : 0[B] + rdma_cm QPs : OFF + Data ex. method : Ethernet +--------------------------------------------------------------------------------------- + local address: LID 0xd06 QPN 0x095e PSN 0xbd024b RKey 0x080258 VAddr 0x007fe62504b000 + remote address: LID 0xd06 QPN 0x095f PSN 0x3c9e82 RKey 0x080359 VAddr 0x007f9fc479c000 +--------------------------------------------------------------------------------------- + #bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] + 8388608 20000 24056.74 24056.72 0.003007 +--------------------------------------------------------------------------------------- + +--------------------------------------------------------------------------------------- +--------------------------------------------------------------------------------------- diff --git a/tests/data/ib_loopback_all_sizes.log b/tests/data/ib_loopback_all_sizes.log new file mode 100644 index 00000000..d01780ee --- /dev/null +++ b/tests/data/ib_loopback_all_sizes.log @@ -0,0 +1,66 @@ +************************************ +* Waiting for client to connect... * +************************************ +--------------------------------------------------------------------------------------- + RDMA_Write BW Test +Dual-port : OFF Device : ib0 +Number of qps : 1 Transport type : IB +Connection type : RC Using SRQ : OFF +PCIe relax order: ON +--------------------------------------------------------------------------------------- + RDMA_Write BW Test +Dual-port : OFF Device : ib0 +Number of qps : 1 Transport type : IB +Connection type : RC Using SRQ : OFF +PCIe relax order: ON +ibv_wr* API : ON +TX depth : 128 +CQ Moderation : 100 +Mtu : 4096[B] +Link type : IB +Max inline data : 0[B] +rdma_cm QPs : OFF +Data ex. method : Ethernet +--------------------------------------------------------------------------------------- +ibv_wr* API : ON +CQ Moderation : 100 +Mtu : 4096[B] +Link type : IB +Max inline data : 0[B] +rdma_cm QPs : OFF +Data ex. method : Ethernet +--------------------------------------------------------------------------------------- +local address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97ff50000 +local address: LID 0xd06 QPN 0x092e PSN 0x3eb82d RKey 0x080228 VAddr 0x007f19adcbf000 +remote address: LID 0xd06 QPN 0x092e PSN 0x3eb82d RKey 0x080228 VAddr 0x007f19adcbf000 +remote address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97ff50000 +--------------------------------------------------------------------------------------- +--------------------------------------------------------------------------------------- +#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] +#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] +2 2000 5.32 5.30 2.778732 +4 2000 10.65 10.64 2.788833 +8 2000 21.30 21.27 2.787609 +16 2000 42.60 42.55 2.788268 +32 2000 84.90 82.82 2.713896 +64 2000 173.55 171.66 2.812504 +128 2000 362.27 353.83 2.898535 +256 2000 687.82 679.37 2.782698 +512 2000 1337.12 1311.59 2.686135 +1024 2000 2674.25 2649.39 2.712980 +2048 2000 5248.56 5118.18 2.620509 +4096 2000 10034.02 9948.41 2.546793 +8192 2000 18620.51 12782.56 1.636168 +16384 2000 23115.27 16782.50 1.074080 +32768 2000 22927.94 18586.03 0.594753 +65536 2000 23330.56 21167.79 0.338685 +131072 2000 22750.35 21443.14 0.171545 +262144 2000 22673.63 22411.35 0.089645 +524288 2000 22679.02 22678.86 0.045358 +1048576 2000 22817.06 22816.86 0.022817 +2097152 2000 22919.37 22919.27 0.011460 +4194304 2000 23277.93 23277.91 0.005819 +8388608 2000 23240.68 23240.68 0.002905 +--------------------------------------------------------------------------------------- +8388608 2000 23240.68 23240.68 0.002905 +--------------------------------------------------------------------------------------- diff --git a/tests/data/nccl_allgather.log b/tests/data/nccl_allgather.log new file mode 100644 index 00000000..433694c7 --- /dev/null +++ b/tests/data/nccl_allgather.log @@ -0,0 +1,53 @@ +# nThread 1 nGpus 8 minBytes 1 maxBytes 8589934592 step: 2(factor) warmup iters: 5 iters: 20 validation: 0 +# +# Using devices +# Rank 0 Pid 112372 on localhost device 0 [0x00] A100-SXM4-40GB +# Rank 1 Pid 112372 on localhost device 1 [0x00] A100-SXM4-40GB +# Rank 2 Pid 112372 on localhost device 2 [0x00] A100-SXM4-40GB +# Rank 3 Pid 112372 on localhost device 3 [0x00] A100-SXM4-40GB +# Rank 4 Pid 112372 on localhost device 4 [0x00] A100-SXM4-40GB +# Rank 5 Pid 112372 on localhost device 5 [0x00] A100-SXM4-40GB +# Rank 6 Pid 112372 on localhost device 6 [0x00] A100-SXM4-40GB +# Rank 7 Pid 112372 on localhost device 7 [0x00] A100-SXM4-40GB +# +# out-of-place in-place +# size count type time algbw busbw error time algbw busbw error +# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) +hostname:3442:3442 [0] NCCL INFO Launch mode Parallel + 0 0 float 34.27 0.00 0.00 N/A 33.57 0.00 0.00 N/A + 0 0 float 33.41 0.00 0.00 N/A 33.62 0.00 0.00 N/A + 0 0 float 33.94 0.00 0.00 N/A 33.48 0.00 0.00 N/A + 0 0 float 33.83 0.00 0.00 N/A 33.62 0.00 0.00 N/A + 0 0 float 33.82 0.00 0.00 N/A 33.57 0.00 0.00 N/A + 32 1 float 35.03 0.00 0.00 N/A 34.15 0.00 0.00 N/A + 64 2 float 34.36 0.00 0.00 N/A 33.83 0.00 0.00 N/A + 128 4 float 33.94 0.00 0.00 N/A 35.22 0.00 0.00 N/A + 256 8 float 34.44 0.01 0.01 N/A 34.82 0.01 0.01 N/A + 512 16 float 34.84 0.01 0.01 N/A 34.76 0.01 0.01 N/A + 1024 32 float 35.38 0.03 0.03 N/A 34.53 0.03 0.03 N/A + 2048 64 float 34.67 0.06 0.05 N/A 34.91 0.06 0.05 N/A + 4096 128 float 34.62 0.12 0.10 N/A 34.81 0.12 0.10 N/A + 8192 256 float 34.76 0.24 0.21 N/A 35.03 0.23 0.20 N/A + 16384 512 float 34.80 0.47 0.41 N/A 34.90 0.47 0.41 N/A + 32768 1024 float 34.54 0.95 0.83 N/A 35.23 0.93 0.81 N/A + 65536 2048 float 36.34 1.80 1.58 N/A 36.01 1.82 1.59 N/A + 131072 4096 float 40.18 3.26 2.85 N/A 39.43 3.32 2.91 N/A + 262144 8192 float 46.45 5.64 4.94 N/A 46.27 5.67 4.96 N/A + 524288 16384 float 58.48 8.96 7.84 N/A 60.40 8.68 7.60 N/A + 1048576 32768 float 72.95 14.37 12.58 N/A 73.07 14.35 12.56 N/A + 2097152 65536 float 77.28 27.14 23.75 N/A 75.84 27.65 24.20 N/A + 4194304 131072 float 100.7 41.64 36.43 N/A 99.56 42.13 36.86 N/A + 8388608 262144 float 123.5 67.94 59.44 N/A 120.7 69.51 60.82 N/A + 16777216 524288 float 167.7 100.03 87.52 N/A 164.6 101.94 89.20 N/A + 33554432 1048576 float 265.8 126.24 110.46 N/A 257.5 130.33 114.04 N/A + 67108864 2097152 float 379.7 176.74 154.65 N/A 367.6 182.57 159.75 N/A + 134217728 4194304 float 698.6 192.13 168.12 N/A 657.3 204.20 178.67 N/A + 268435456 8388608 float 1192.2 225.16 197.01 N/A 1136.0 236.29 206.76 N/A + 536870912 16777216 float 2304.1 233.01 203.88 N/A 2227.9 240.98 210.85 N/A + 1073741824 33554432 float 4413.4 243.29 212.88 N/A 4258.8 252.12 220.61 N/A + 2147483648 67108864 float 8658.8 248.01 217.01 N/A 8389.4 255.98 223.98 N/A + 4294967296 134217728 float 17016 252.40 220.85 N/A 16474 260.71 228.12 N/A + 8589934592 268435456 float 33646 255.31 223.39 N/A 32669 262.94 230.07 N/A +# Out of bounds values : 0 OK +# Avg bus bandwidth : 58.2651 +# diff --git a/tests/data/nccl_allreduce.log b/tests/data/nccl_allreduce.log new file mode 100644 index 00000000..e6a4e052 --- /dev/null +++ b/tests/data/nccl_allreduce.log @@ -0,0 +1,53 @@ +# nThread 1 nGpus 8 minBytes 1 maxBytes 8589934592 step: 2(factor) warmup iters: 5 iters: 20 validation: 0 +# +# Using devices +# Rank 0 Pid 112424 on localhost device 0 [0x00] A100-SXM4-40GB +# Rank 1 Pid 112424 on localhost device 1 [0x00] A100-SXM4-40GB +# Rank 2 Pid 112424 on localhost device 2 [0x00] A100-SXM4-40GB +# Rank 3 Pid 112424 on localhost device 3 [0x00] A100-SXM4-40GB +# Rank 4 Pid 112424 on localhost device 4 [0x00] A100-SXM4-40GB +# Rank 5 Pid 112424 on localhost device 5 [0x00] A100-SXM4-40GB +# Rank 6 Pid 112424 on localhost device 6 [0x00] A100-SXM4-40GB +# Rank 7 Pid 112424 on localhost device 7 [0x00] A100-SXM4-40GB +# +# out-of-place in-place +# size count type redop time algbw busbw error time algbw busbw error +# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) +hostname:3442:3442 [0] NCCL INFO Launch mode Parallel + 0 0 float sum 35.20 0.00 0.00 N/A 34.05 0.00 0.00 N/A + 0 0 float sum 34.18 0.00 0.00 N/A 33.50 0.00 0.00 N/A + 4 1 float sum 34.73 0.00 0.00 N/A 35.30 0.00 0.00 N/A + 8 2 float sum 34.66 0.00 0.00 N/A 34.84 0.00 0.00 N/A + 16 4 float sum 35.00 0.00 0.00 N/A 35.61 0.00 0.00 N/A + 32 8 float sum 35.60 0.00 0.00 N/A 35.27 0.00 0.00 N/A + 64 16 float sum 34.83 0.00 0.00 N/A 34.61 0.00 0.00 N/A + 128 32 float sum 34.53 0.00 0.01 N/A 43.78 0.00 0.01 N/A + 256 64 float sum 34.56 0.01 0.01 N/A 34.95 0.01 0.01 N/A + 512 128 float sum 34.94 0.01 0.03 N/A 35.20 0.01 0.03 N/A + 1024 256 float sum 36.07 0.03 0.05 N/A 35.77 0.03 0.05 N/A + 2048 512 float sum 35.42 0.06 0.10 N/A 35.89 0.06 0.10 N/A + 4096 1024 float sum 35.92 0.11 0.20 N/A 36.11 0.11 0.20 N/A + 8192 2048 float sum 35.91 0.23 0.40 N/A 36.07 0.23 0.40 N/A + 16384 4096 float sum 36.18 0.45 0.79 N/A 35.87 0.46 0.80 N/A + 32768 8192 float sum 36.65 0.89 1.56 N/A 35.73 0.92 1.60 N/A + 65536 16384 float sum 37.82 1.73 3.03 N/A 37.25 1.76 3.08 N/A + 131072 32768 float sum 41.19 3.18 5.57 N/A 41.11 3.19 5.58 N/A + 262144 65536 float sum 47.53 5.52 9.65 N/A 47.94 5.47 9.57 N/A + 524288 131072 float sum 60.32 8.69 15.21 N/A 60.52 8.66 15.16 N/A + 1048576 262144 float sum 74.78 14.02 24.54 N/A 76.17 13.77 24.09 N/A + 2097152 524288 float sum 93.48 22.43 39.26 N/A 96.10 21.82 38.19 N/A + 4194304 1048576 float sum 112.0 37.44 65.52 N/A 110.2 38.06 66.60 N/A + 8388608 2097152 float sum 162.0 51.79 90.63 N/A 160.0 52.44 91.77 N/A + 16777216 4194304 float sum 226.0 74.23 129.90 N/A 225.0 74.57 130.49 N/A + 33554432 8388608 float sum 374.3 89.65 156.89 N/A 372.8 90.00 157.50 N/A + 67108864 16777216 float sum 584.5 114.81 200.91 N/A 581.9 115.33 201.82 N/A + 134217728 33554432 float sum 1162.2 115.49 202.11 N/A 1162.5 115.46 202.05 N/A + 268435456 67108864 float sum 2112.2 127.09 222.40 N/A 2111.8 127.11 222.45 N/A + 536870912 134217728 float sum 4200.3 127.82 223.68 N/A 4184.0 128.32 224.55 N/A + 1073741824 268435456 float sum 8159.5 131.59 230.29 N/A 8176.5 131.32 229.81 N/A + 2147483648 536870912 float sum 16215 132.44 231.76 N/A 16203 132.53 231.93 N/A + 4294967296 1073741824 float sum 32070 133.92 234.37 N/A 32052 134.00 234.50 N/A + 8589934592 2147483648 float sum 63896 134.44 235.26 N/A 63959 134.30 235.03 N/A +# Out of bounds values : 0 OK +# Avg bus bandwidth : 68.4048 +# diff --git a/tests/data/nccl_alltoall.log b/tests/data/nccl_alltoall.log new file mode 100644 index 00000000..29388b0b --- /dev/null +++ b/tests/data/nccl_alltoall.log @@ -0,0 +1,52 @@ +# nThread 1 nGpus 8 minBytes 1 maxBytes 8589934592 step: 2(factor) warmup iters: 5 iters: 20 validation: 0 +# +# Using devices +# Rank 0 Pid 167261 on localhost device 0 [0x00] A100-SXM4-40GB +# Rank 1 Pid 167261 on localhost device 1 [0x00] A100-SXM4-40GB +# Rank 2 Pid 167261 on localhost device 2 [0x00] A100-SXM4-40GB +# Rank 3 Pid 167261 on localhost device 3 [0x00] A100-SXM4-40GB +# Rank 4 Pid 167261 on localhost device 4 [0x00] A100-SXM4-40GB +# Rank 5 Pid 167261 on localhost device 5 [0x00] A100-SXM4-40GB +# Rank 6 Pid 167261 on localhost device 6 [0x00] A100-SXM4-40GB +# Rank 7 Pid 167261 on localhost device 7 [0x00] A100-SXM4-40GB +# +# out-of-place in-place +# size count type redop time algbw busbw error time algbw busbw error +# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) + 0 0 float 1.63 0.00 0.00 N/A 1.38 0.00 0.00 N/A + 0 0 float 1.35 0.00 0.00 N/A 1.34 0.00 0.00 N/A + 0 0 float 1.35 0.00 0.00 N/A 1.77 0.00 0.00 N/A + 0 0 float 1.37 0.00 0.00 N/A 1.39 0.00 0.00 N/A + 0 0 float 1.34 0.00 0.00 N/A 1.33 0.00 0.00 N/A + 32 1 float 89.00 0.00 0.00 N/A 85.13 0.00 0.00 N/A + 64 2 float 86.83 0.00 0.00 N/A 85.77 0.00 0.00 N/A + 128 4 float 86.02 0.00 0.00 N/A 85.30 0.00 0.00 N/A + 256 8 float 87.20 0.00 0.00 N/A 86.21 0.00 0.00 N/A + 512 16 float 87.33 0.01 0.01 N/A 88.47 0.01 0.01 N/A + 1024 32 float 88.17 0.01 0.01 N/A 88.98 0.01 0.01 N/A + 2048 64 float 86.44 0.02 0.02 N/A 86.65 0.02 0.02 N/A + 4096 128 float 86.75 0.05 0.04 N/A 86.68 0.05 0.04 N/A + 8192 256 float 88.78 0.09 0.08 N/A 87.05 0.09 0.08 N/A + 16384 512 float 87.71 0.19 0.16 N/A 86.76 0.19 0.17 N/A + 32768 1024 float 86.26 0.38 0.33 N/A 88.92 0.37 0.32 N/A + 65536 2048 float 87.67 0.75 0.65 N/A 89.16 0.74 0.64 N/A + 131072 4096 float 87.35 1.50 1.31 N/A 86.76 1.51 1.32 N/A + 262144 8192 float 87.02 3.01 2.64 N/A 87.98 2.98 2.61 N/A + 524288 16384 float 86.58 6.06 5.30 N/A 89.33 5.87 5.14 N/A + 1048576 32768 float 87.42 11.99 10.50 N/A 88.90 11.79 10.32 N/A + 2097152 65536 float 89.61 23.40 20.48 N/A 90.10 23.27 20.37 N/A + 4194304 131072 float 96.44 43.49 38.05 N/A 99.62 42.10 36.84 N/A + 8388608 262144 float 121.1 69.28 60.62 N/A 120.6 69.56 60.87 N/A + 16777216 524288 float 160.4 104.62 91.55 N/A 158.8 105.64 92.43 N/A + 33554432 1048576 float 237.5 141.30 123.64 N/A 234.5 143.11 125.22 N/A + 67108864 2097152 float 396.8 169.13 147.99 N/A 387.0 173.41 151.73 N/A + 134217728 4194304 float 633.6 211.83 185.35 N/A 620.9 216.17 189.15 N/A + 268435456 8388608 float 1189.1 225.75 197.53 N/A 1167.8 229.86 201.13 N/A + 536870912 16777216 float 2236.6 240.04 210.03 N/A 2197.4 244.32 213.78 N/A + 1073741824 33554432 float 4335.5 247.66 216.71 N/A 4274.2 251.22 219.81 N/A + 2147483648 67108864 float 8510.4 252.34 220.79 N/A 8405.3 255.49 223.56 N/A + 4294967296 134217728 float 16860 254.74 222.90 N/A 16678 257.53 225.34 N/A + 8589934592 268435456 float 33508 256.36 224.31 N/A 33234 258.47 226.16 N/A +# Out of bounds values : 0 OK +# Avg bus bandwidth : 58.6481 +# diff --git a/tests/data/nccl_broadcast.log b/tests/data/nccl_broadcast.log new file mode 100644 index 00000000..2c92850b --- /dev/null +++ b/tests/data/nccl_broadcast.log @@ -0,0 +1,53 @@ +# nThread 1 nGpus 8 minBytes 1 maxBytes 8589934592 step: 2(factor) warmup iters: 5 iters: 20 validation: 0 +# +# Using devices +# Rank 0 Pid 112528 on localhost device 0 [0x00] A100-SXM4-40GB +# Rank 1 Pid 112528 on localhost device 1 [0x00] A100-SXM4-40GB +# Rank 2 Pid 112528 on localhost device 2 [0x00] A100-SXM4-40GB +# Rank 3 Pid 112528 on localhost device 3 [0x00] A100-SXM4-40GB +# Rank 4 Pid 112528 on localhost device 4 [0x00] A100-SXM4-40GB +# Rank 5 Pid 112528 on localhost device 5 [0x00] A100-SXM4-40GB +# Rank 6 Pid 112528 on localhost device 6 [0x00] A100-SXM4-40GB +# Rank 7 Pid 112528 on localhost device 7 [0x00] A100-SXM4-40GB +# +# out-of-place in-place +# size count type root time algbw busbw error time algbw busbw error +# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) +hostname:3442:3442 [0] NCCL INFO Launch mode Parallel + 0 0 float 0 34.61 0.00 0.00 N/A 34.33 0.00 0.00 N/A + 0 0 float 0 34.43 0.00 0.00 N/A 35.06 0.00 0.00 N/A + 4 1 float 0 33.96 0.00 0.00 N/A 33.80 0.00 0.00 N/A + 8 2 float 0 34.16 0.00 0.00 N/A 34.32 0.00 0.00 N/A + 16 4 float 0 34.47 0.00 0.00 N/A 34.85 0.00 0.00 N/A + 32 8 float 0 35.24 0.00 0.00 N/A 34.75 0.00 0.00 N/A + 64 16 float 0 35.12 0.00 0.00 N/A 34.89 0.00 0.00 N/A + 128 32 float 0 34.67 0.00 0.00 N/A 34.36 0.00 0.00 N/A + 256 64 float 0 34.23 0.01 0.01 N/A 34.42 0.01 0.01 N/A + 512 128 float 0 34.26 0.01 0.01 N/A 35.20 0.01 0.01 N/A + 1024 256 float 0 34.87 0.03 0.03 N/A 34.80 0.03 0.03 N/A + 2048 512 float 0 34.90 0.06 0.06 N/A 35.27 0.06 0.06 N/A + 4096 1024 float 0 35.37 0.12 0.12 N/A 34.59 0.12 0.12 N/A + 8192 2048 float 0 34.95 0.23 0.23 N/A 34.79 0.24 0.24 N/A + 16384 4096 float 0 34.94 0.47 0.47 N/A 34.94 0.47 0.47 N/A + 32768 8192 float 0 35.03 0.94 0.94 N/A 34.71 0.94 0.94 N/A + 65536 16384 float 0 36.04 1.82 1.82 N/A 36.48 1.80 1.80 N/A + 131072 32768 float 0 40.09 3.27 3.27 N/A 39.92 3.28 3.28 N/A + 262144 65536 float 0 46.58 5.63 5.63 N/A 45.89 5.71 5.71 N/A + 524288 131072 float 0 58.37 8.98 8.98 N/A 59.67 8.79 8.79 N/A + 1048576 262144 float 0 76.02 13.79 13.79 N/A 78.43 13.37 13.37 N/A + 2097152 524288 float 0 78.12 26.85 26.85 N/A 78.84 26.60 26.60 N/A + 4194304 1048576 float 0 81.06 51.74 51.74 N/A 80.39 52.17 52.17 N/A + 8388608 2097152 float 0 97.20 86.30 86.30 N/A 96.09 87.30 87.30 N/A + 16777216 4194304 float 0 143.1 117.22 117.22 N/A 142.1 118.06 118.06 N/A + 33554432 8388608 float 0 223.4 150.21 150.21 N/A 221.3 151.61 151.61 N/A + 67108864 16777216 float 0 374.8 179.05 179.05 N/A 374.4 179.23 179.23 N/A + 134217728 33554432 float 0 672.2 199.67 199.67 N/A 670.0 200.34 200.34 N/A + 268435456 67108864 float 0 1271.5 211.11 211.11 N/A 1264.5 212.28 212.28 N/A + 536870912 134217728 float 0 2436.3 220.37 220.37 N/A 2434.5 220.53 220.53 N/A + 1073741824 268435456 float 0 4769.2 225.14 225.14 N/A 4697.5 228.58 228.58 N/A + 2147483648 536870912 float 0 9314.2 230.56 230.56 N/A 9248.3 232.20 232.20 N/A + 4294967296 1073741824 float 0 18487 232.33 232.33 N/A 18381 233.66 233.66 N/A + 8589934592 2147483648 float 0 36896 232.81 232.81 N/A 36599 234.70 234.70 N/A +# Out of bounds values : 0 OK +# Avg bus bandwidth : 64.8653 +# diff --git a/tests/data/nccl_reduce.log b/tests/data/nccl_reduce.log new file mode 100644 index 00000000..902bc241 --- /dev/null +++ b/tests/data/nccl_reduce.log @@ -0,0 +1,53 @@ +# nThread 1 nGpus 8 minBytes 1 maxBytes 8589934592 step: 2(factor) warmup iters: 5 iters: 20 validation: 0 +# +# Using devices +# Rank 0 Pid 112476 on localhost device 0 [0x00] A100-SXM4-40GB +# Rank 1 Pid 112476 on localhost device 1 [0x00] A100-SXM4-40GB +# Rank 2 Pid 112476 on localhost device 2 [0x00] A100-SXM4-40GB +# Rank 3 Pid 112476 on localhost device 3 [0x00] A100-SXM4-40GB +# Rank 4 Pid 112476 on localhost device 4 [0x00] A100-SXM4-40GB +# Rank 5 Pid 112476 on localhost device 5 [0x00] A100-SXM4-40GB +# Rank 6 Pid 112476 on localhost device 6 [0x00] A100-SXM4-40GB +# Rank 7 Pid 112476 on localhost device 7 [0x00] A100-SXM4-40GB +# +# out-of-place in-place +# size count type redop root time algbw busbw error time algbw busbw error +# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) +hostname:3442:3442 [0] NCCL INFO Launch mode Parallel + 0 0 float sum 0 36.90 0.00 0.00 N/A 36.47 0.00 0.00 N/A + 0 0 float sum 0 34.18 0.00 0.00 N/A 35.70 0.00 0.00 N/A + 4 1 float sum 0 35.40 0.00 0.00 N/A 35.59 0.00 0.00 N/A + 8 2 float sum 0 36.35 0.00 0.00 N/A 35.74 0.00 0.00 N/A + 16 4 float sum 0 35.47 0.00 0.00 N/A 34.27 0.00 0.00 N/A + 32 8 float sum 0 36.16 0.00 0.00 N/A 36.19 0.00 0.00 N/A + 64 16 float sum 0 35.61 0.00 0.00 N/A 35.45 0.00 0.00 N/A + 128 32 float sum 0 34.78 0.00 0.00 N/A 35.80 0.00 0.00 N/A + 256 64 float sum 0 35.37 0.01 0.01 N/A 35.89 0.01 0.01 N/A + 512 128 float sum 0 35.49 0.01 0.01 N/A 35.53 0.01 0.01 N/A + 1024 256 float sum 0 35.38 0.03 0.03 N/A 35.52 0.03 0.03 N/A + 2048 512 float sum 0 35.97 0.06 0.06 N/A 35.13 0.06 0.06 N/A + 4096 1024 float sum 0 36.03 0.11 0.11 N/A 35.82 0.11 0.11 N/A + 8192 2048 float sum 0 36.80 0.22 0.22 N/A 36.71 0.22 0.22 N/A + 16384 4096 float sum 0 35.37 0.46 0.46 N/A 36.79 0.45 0.45 N/A + 32768 8192 float sum 0 35.16 0.93 0.93 N/A 35.72 0.92 0.92 N/A + 65536 16384 float sum 0 38.08 1.72 1.72 N/A 37.74 1.74 1.74 N/A + 131072 32768 float sum 0 43.07 3.04 3.04 N/A 41.59 3.15 3.15 N/A + 262144 65536 float sum 0 52.16 5.03 5.03 N/A 50.49 5.19 5.19 N/A + 524288 131072 float sum 0 67.58 7.76 7.76 N/A 66.57 7.88 7.88 N/A + 1048576 262144 float sum 0 76.74 13.66 13.66 N/A 80.47 13.03 13.03 N/A + 2097152 524288 float sum 0 78.51 26.71 26.71 N/A 78.76 26.63 26.63 N/A + 4194304 1048576 float sum 0 81.47 51.48 51.48 N/A 80.30 52.23 52.23 N/A + 8388608 2097152 float sum 0 94.72 88.57 88.57 N/A 94.06 89.19 89.19 N/A + 16777216 4194304 float sum 0 137.7 121.83 121.83 N/A 139.6 120.17 120.17 N/A + 33554432 8388608 float sum 0 218.3 153.70 153.70 N/A 218.1 153.83 153.83 N/A + 67108864 16777216 float sum 0 370.8 180.96 180.96 N/A 369.8 181.49 181.49 N/A + 134217728 33554432 float sum 0 661.0 203.06 203.06 N/A 659.9 203.39 203.39 N/A + 268435456 67108864 float sum 0 1251.4 214.52 214.52 N/A 1268.1 211.68 211.68 N/A + 536870912 134217728 float sum 0 2421.6 221.70 221.70 N/A 2413.4 222.45 222.45 N/A + 1073741824 268435456 float sum 0 4736.0 226.72 226.72 N/A 4757.9 225.68 225.68 N/A + 2147483648 536870912 float sum 0 9323.5 230.33 230.33 N/A 9354.0 229.58 229.58 N/A + 4294967296 1073741824 float sum 0 18594 230.99 230.99 N/A 18570 231.28 231.28 N/A + 8589934592 2147483648 float sum 0 37613 228.38 228.38 N/A 37539 228.83 228.83 N/A +# Out of bounds values : 0 OK +# Avg bus bandwidth : 65.018 +# diff --git a/tests/data/nccl_reducescatter.log b/tests/data/nccl_reducescatter.log new file mode 100644 index 00000000..fafd87bb --- /dev/null +++ b/tests/data/nccl_reducescatter.log @@ -0,0 +1,53 @@ +# nThread 1 nGpus 8 minBytes 1 maxBytes 8589934592 step: 2(factor) warmup iters: 5 iters: 20 validation: 0 +# +# Using devices +# Rank 0 Pid 112580 on localhost device 0 [0x00] A100-SXM4-40GB +# Rank 1 Pid 112580 on localhost device 1 [0x00] A100-SXM4-40GB +# Rank 2 Pid 112580 on localhost device 2 [0x00] A100-SXM4-40GB +# Rank 3 Pid 112580 on localhost device 3 [0x00] A100-SXM4-40GB +# Rank 4 Pid 112580 on localhost device 4 [0x00] A100-SXM4-40GB +# Rank 5 Pid 112580 on localhost device 5 [0x00] A100-SXM4-40GB +# Rank 6 Pid 112580 on localhost device 6 [0x00] A100-SXM4-40GB +# Rank 7 Pid 112580 on localhost device 7 [0x00] A100-SXM4-40GB +# +# out-of-place in-place +# size count type redop time algbw busbw error time algbw busbw error +# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) +hostname:3442:3442 [0] NCCL INFO Launch mode Parallel + 0 0 float sum 34.88 0.00 0.00 N/A 33.65 0.00 0.00 N/A + 0 0 float sum 33.54 0.00 0.00 N/A 33.72 0.00 0.00 N/A + 0 0 float sum 33.45 0.00 0.00 N/A 33.44 0.00 0.00 N/A + 0 0 float sum 34.07 0.00 0.00 N/A 33.44 0.00 0.00 N/A + 0 0 float sum 33.55 0.00 0.00 N/A 33.43 0.00 0.00 N/A + 32 1 float sum 35.06 0.00 0.00 N/A 35.14 0.00 0.00 N/A + 64 2 float sum 34.82 0.00 0.00 N/A 34.76 0.00 0.00 N/A + 128 4 float sum 34.38 0.00 0.00 N/A 34.52 0.00 0.00 N/A + 256 8 float sum 34.75 0.01 0.01 N/A 34.32 0.01 0.01 N/A + 512 16 float sum 34.71 0.01 0.01 N/A 35.43 0.01 0.01 N/A + 1024 32 float sum 35.16 0.03 0.03 N/A 34.75 0.03 0.03 N/A + 2048 64 float sum 35.43 0.06 0.05 N/A 35.29 0.06 0.05 N/A + 4096 128 float sum 35.49 0.12 0.10 N/A 35.17 0.12 0.10 N/A + 8192 256 float sum 35.18 0.23 0.20 N/A 35.77 0.23 0.20 N/A + 16384 512 float sum 35.27 0.46 0.41 N/A 35.49 0.46 0.40 N/A + 32768 1024 float sum 35.00 0.94 0.82 N/A 35.09 0.93 0.82 N/A + 65536 2048 float sum 36.78 1.78 1.56 N/A 36.92 1.77 1.55 N/A + 131072 4096 float sum 40.71 3.22 2.82 N/A 39.78 3.29 2.88 N/A + 262144 8192 float sum 48.12 5.45 4.77 N/A 46.65 5.62 4.92 N/A + 524288 16384 float sum 59.81 8.77 7.67 N/A 58.88 8.90 7.79 N/A + 1048576 32768 float sum 72.37 14.49 12.68 N/A 74.95 13.99 12.24 N/A + 2097152 65536 float sum 80.64 26.01 22.76 N/A 79.62 26.34 23.05 N/A + 4194304 131072 float sum 108.9 38.53 33.72 N/A 109.3 38.37 33.57 N/A + 8388608 262144 float sum 147.3 56.96 49.84 N/A 166.8 50.28 44.00 N/A + 16777216 524288 float sum 152.4 110.11 96.34 N/A 152.8 109.82 96.09 N/A + 33554432 1048576 float sum 240.5 139.50 122.06 N/A 240.8 139.33 121.91 N/A + 67108864 2097152 float sum 356.1 188.45 164.89 N/A 352.1 190.57 166.75 N/A + 134217728 4194304 float sum 618.1 217.15 190.01 N/A 615.2 218.18 190.90 N/A + 268435456 8388608 float sum 1108.7 242.11 211.84 N/A 1112.6 241.27 211.11 N/A + 536870912 16777216 float sum 2169.0 247.52 216.58 N/A 2181.8 246.07 215.31 N/A + 1073741824 33554432 float sum 4203.0 255.47 223.54 N/A 4206.3 255.27 223.36 N/A + 2147483648 67108864 float sum 8356.9 256.97 224.85 N/A 8323.5 258.00 225.75 N/A + 4294967296 134217728 float sum 16400 261.89 229.15 N/A 16402 261.86 229.13 N/A + 8589934592 268435456 float sum 32464 264.60 231.52 N/A 32502 264.29 231.25 N/A +# Out of bounds values : 0 OK +# Avg bus bandwidth : 60.168 +# diff --git a/tests/data/rocm_memory_d2h_bw.log b/tests/data/rocm_memory_d2h_bw.log new file mode 100644 index 00000000..4f1a22fb --- /dev/null +++ b/tests/data/rocm_memory_d2h_bw.log @@ -0,0 +1,51 @@ +Device:Device 738c Mem=32.0GB #CUs=120 Freq=1502Mhz MallocMode=pinned + test atts units median mean stddev min max + D2H_Bandwidth_pinned +064By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 + D2H_Bandwidth_pinned +256By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 + D2H_Bandwidth_pinned +512By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 + D2H_Bandwidth_pinned 1kB GB/sec 0.0428 0.0426 0.0019 0.0114 0.0446 + D2H_Bandwidth_pinned 2kB GB/sec 0.0850 0.0844 0.0034 0.0415 0.0893 + D2H_Bandwidth_pinned 4kB GB/sec 0.1701 0.1687 0.0084 0.0504 0.1773 + D2H_Bandwidth_pinned 8kB GB/sec 0.3378 0.3348 0.0168 0.1085 0.3546 + D2H_Bandwidth_pinned 16kB GB/sec 0.6667 0.6606 0.0218 0.5618 0.6897 + D2H_Bandwidth_pinned 32kB GB/sec 1.3072 1.2954 0.0663 0.5682 1.3605 + D2H_Bandwidth_pinned 64kB GB/sec 2.5550 2.5339 0.0955 2.1382 2.6904 + D2H_Bandwidth_pinned 128kB GB/sec 4.8162 4.7807 0.2331 2.0940 4.9621 + D2H_Bandwidth_pinned 256kB GB/sec 8.2286 8.2192 0.1671 7.2456 8.5286 + D2H_Bandwidth_pinned 512kB GB/sec 12.7930 12.7062 0.4407 7.1196 13.0478 + D2H_Bandwidth_pinned 1024kB GB/sec 17.5603 17.4938 0.3921 12.7184 17.7989 + D2H_Bandwidth_pinned 2048kB GB/sec 21.6275 21.5591 0.2233 20.6073 21.8076 + D2H_Bandwidth_pinned 4096kB GB/sec 24.2708 24.2556 0.0942 23.5724 24.4292 + D2H_Bandwidth_pinned 8192kB GB/sec 24.9287 24.9093 0.0733 24.7171 25.0359 + D2H_Bandwidth_pinned 16384kB GB/sec 26.4588 26.1976 2.4387 1.9387 26.5191 + D2H_Bandwidth_pinned 32768kB GB/sec 27.2939 27.1202 0.7941 23.2086 27.3277 + D2H_Bandwidth_pinned 65536kB GB/sec 26.8278 26.7238 0.3894 24.7946 26.9000 + D2H_Bandwidth_pinned 131072kB GB/sec 27.4751 27.3457 0.3968 25.4168 27.5098 + D2H_Bandwidth_pinned 262144kB GB/sec 27.8236 27.7173 0.3072 26.7977 27.8525 + D2H_Bandwidth_pinned 524288kB GB/sec 28.0193 27.9348 0.1912 27.4707 28.0314 + D2H_Time_pinned +064By ms 0.0229 0.0246 0.0457 0.0216 1.4690 + D2H_Time_pinned +256By ms 0.0232 0.0234 0.0013 0.0221 0.0378 + D2H_Time_pinned +512By ms 0.0234 0.0238 0.0063 0.0224 0.2091 + D2H_Time_pinned 1kB ms 0.0234 0.0236 0.0028 0.0224 0.0875 + D2H_Time_pinned 2kB ms 0.0235 0.0237 0.0014 0.0224 0.0482 + D2H_Time_pinned 4kB ms 0.0235 0.0239 0.0031 0.0226 0.0794 + D2H_Time_pinned 8kB ms 0.0237 0.0240 0.0027 0.0226 0.0738 + D2H_Time_pinned 16kB ms 0.0240 0.0242 0.0009 0.0232 0.0285 + D2H_Time_pinned 32kB ms 0.0245 0.0248 0.0021 0.0235 0.0563 + D2H_Time_pinned 64kB ms 0.0254 0.0257 0.0011 0.0242 0.0304 + D2H_Time_pinned 128kB ms 0.0272 0.0275 0.0026 0.0264 0.0626 + D2H_Time_pinned 256kB ms 0.0318 0.0319 0.0007 0.0307 0.0362 + D2H_Time_pinned 512kB ms 0.0410 0.0413 0.0024 0.0402 0.0736 + D2H_Time_pinned 1024kB ms 0.0597 0.0599 0.0017 0.0589 0.0824 + D2H_Time_pinned 2048kB ms 0.0970 0.0973 0.0010 0.0962 0.1018 + D2H_Time_pinned 4096kB ms 0.1728 0.1729 0.0007 0.1717 0.1779 + D2H_Time_pinned 8192kB ms 0.3365 0.3367 0.0010 0.3350 0.3394 + D2H_Time_pinned 16384kB ms 0.6341 0.7147 0.7979 0.6326 8.6538 + D2H_Time_pinned 32768kB ms 1.2294 1.2385 0.0420 1.2278 1.4458 + D2H_Time_pinned 65536kB ms 2.5014 2.5117 0.0391 2.4947 2.7066 + D2H_Time_pinned 131072kB ms 4.8850 4.9092 0.0748 4.8789 5.2806 + D2H_Time_pinned 262144kB ms 9.6478 9.6860 0.1106 9.6377 10.0171 + D2H_Time_pinned 524288kB ms 19.1607 19.2196 0.1333 19.1525 19.5434 + +Note: results marked with (*) had missing values such as +might occur with a mixture of architectural capabilities. diff --git a/tests/data/rocm_memory_h2d_bw.log b/tests/data/rocm_memory_h2d_bw.log new file mode 100644 index 00000000..2e0f00e4 --- /dev/null +++ b/tests/data/rocm_memory_h2d_bw.log @@ -0,0 +1,51 @@ +Device:Device 738c Mem=32.0GB #CUs=120 Freq=1502Mhz MallocMode=pinned + test atts units median mean stddev min max + H2D_Bandwidth_pinned +064By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 + H2D_Bandwidth_pinned +256By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 + H2D_Bandwidth_pinned +512By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 + H2D_Bandwidth_pinned 1kB GB/sec 0.0414 0.0411 0.0017 0.0189 0.0434 + H2D_Bandwidth_pinned 2kB GB/sec 0.0828 0.0824 0.0018 0.0683 0.0862 + H2D_Bandwidth_pinned 4kB GB/sec 0.1656 0.1652 0.0032 0.1374 0.1724 + H2D_Bandwidth_pinned 8kB GB/sec 0.3268 0.3251 0.0117 0.1880 0.3425 + H2D_Bandwidth_pinned 16kB GB/sec 0.6410 0.6365 0.0259 0.3597 0.6757 + H2D_Bandwidth_pinned 32kB GB/sec 1.2422 1.2432 0.0278 0.9346 1.2987 + H2D_Bandwidth_pinned 64kB GB/sec 2.3968 2.4161 0.1486 0.7242 2.6042 + H2D_Bandwidth_pinned 128kB GB/sec 4.6786 4.6339 0.1310 4.1143 4.8162 + H2D_Bandwidth_pinned 256kB GB/sec 7.8349 7.8369 0.1150 6.9093 8.0270 + H2D_Bandwidth_pinned 512kB GB/sec 11.9963 11.9828 0.1287 11.2158 12.2201 + H2D_Bandwidth_pinned 1024kB GB/sec 16.3342 16.3315 0.0956 16.0147 16.5823 + H2D_Bandwidth_pinned 2048kB GB/sec 19.9790 19.9770 0.0853 19.7681 20.1635 + H2D_Bandwidth_pinned 4096kB GB/sec 22.2706 22.2642 0.0552 22.0644 22.3847 + H2D_Bandwidth_pinned 8192kB GB/sec 22.8232 22.7881 0.1669 21.3196 22.8930 + H2D_Bandwidth_pinned 16384kB GB/sec 24.1521 24.1411 0.0429 24.0165 24.2162 + H2D_Bandwidth_pinned 32768kB GB/sec 24.8695 24.7086 0.7491 20.6288 24.9035 + H2D_Bandwidth_pinned 65536kB GB/sec 24.4840 24.0101 2.5769 6.1754 24.5292 + H2D_Bandwidth_pinned 131072kB GB/sec 25.0487 24.9593 0.2601 24.1286 25.0711 + H2D_Bandwidth_pinned 262144kB GB/sec 25.3280 25.2351 0.1788 24.8746 25.3498 + H2D_Bandwidth_pinned 524288kB GB/sec 24.7523 24.6708 0.1586 24.3154 24.7880 + H2D_Timepinned +064By ms 0.0245 0.0253 0.0240 0.0232 0.7821 + H2D_Timepinned +256By ms 0.0243 0.0244 0.0013 0.0232 0.0546 + H2D_Timepinned +512By ms 0.0243 0.0244 0.0014 0.0230 0.0566 + H2D_Timepinned 1kB ms 0.0242 0.0244 0.0016 0.0230 0.0530 + H2D_Timepinned 2kB ms 0.0242 0.0243 0.0005 0.0232 0.0293 + H2D_Timepinned 4kB ms 0.0242 0.0242 0.0005 0.0232 0.0291 + H2D_Timepinned 8kB ms 0.0245 0.0247 0.0013 0.0234 0.0426 + H2D_Timepinned 16kB ms 0.0250 0.0252 0.0015 0.0237 0.0445 + H2D_Timepinned 32kB ms 0.0258 0.0258 0.0006 0.0246 0.0342 + H2D_Timepinned 64kB ms 0.0271 0.0272 0.0045 0.0250 0.0898 + H2D_Timepinned 128kB ms 0.0280 0.0283 0.0008 0.0272 0.0318 + H2D_Timepinned 256kB ms 0.0334 0.0334 0.0005 0.0326 0.0379 + H2D_Timepinned 512kB ms 0.0437 0.0437 0.0005 0.0429 0.0467 + H2D_Timepinned 1024kB ms 0.0642 0.0642 0.0004 0.0632 0.0654 + H2D_Timepinned 2048kB ms 0.1050 0.1050 0.0004 0.1040 0.1061 + H2D_Timepinned 4096kB ms 0.1883 0.1884 0.0005 0.1874 0.1901 + H2D_Timepinned 8192kB ms 0.3675 0.3681 0.0028 0.3664 0.3934 + H2D_Timepinned 16384kB ms 0.6946 0.6950 0.0012 0.6928 0.6986 + H2D_Timepinned 32768kB ms 1.3492 1.3595 0.0482 1.3474 1.6266 + H2D_Timepinned 65536kB ms 2.7409 2.9163 1.1368 2.7358 10.8670 + H2D_Timepinned 131072kB ms 5.3582 5.3780 0.0576 5.3534 5.5626 + H2D_Timepinned 262144kB ms 10.5983 10.6379 0.0761 10.5892 10.7915 + H2D_Timepinned 524288kB ms 21.6897 21.7622 0.1411 21.6585 22.0794 + +Note: results marked with (*) had missing values such as +might occur with a mixture of architectural capabilities. diff --git a/tests/helper/testcase.py b/tests/helper/testcase.py new file mode 100644 index 00000000..e3d470d0 --- /dev/null +++ b/tests/helper/testcase.py @@ -0,0 +1,83 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Unittest TestCase helpers.""" + +import os +import shutil +import tempfile +from pathlib import Path + + +class BenchmarkTestCase(object): + """Base class for benchmark test case. + + Examples: + Inherit from both BenchmarkTestCase and unittest.TestCase. + ``` + def FooBenchmarkTestCase(BenchmarkTestCase, unittest.TestCase): + def setUp(self): + super().setUp() + ... + ``` + """ + def setUp(self): + """Hook method for setting up the test fixture before exercising it.""" + pass + + def tearDown(self): + """Hook method for deconstructing the test fixture after testing it.""" + pass + + @classmethod + def setUpClass(cls): + """Hook method for setting up class fixture before running tests in the class. + + Will create a temp directory and mock envs for all tests. + Run once for the whole class. + """ + cls._tmp_dir = tempfile.mkdtemp(prefix='sbtest') + cls._curr_mock_envs = {} + + @classmethod + def tearDownClass(cls): + """Hook method for deconstructing the class fixture after running all tests in the class. + + Will restore original envs and cleanup temp directory. + Run once for the whole class. + """ + cls.cleanupMockEnvs(cls) + shutil.rmtree(cls._tmp_dir) + + def createMockEnvs(self, envs=None): + """Create mock envs for tests. + + Args: + envs (dict, optional): Environment variables to be mocked. + Defaults to None and will mock SB_MICRO_PATH to temp directory. + """ + if not envs: + envs = {'SB_MICRO_PATH': self._tmp_dir} + for name in envs: + self._curr_mock_envs[name] = os.environ.get(name, None) + os.environ[name] = envs[name] + + def cleanupMockEnvs(self): + """Cleanup mock envs and restore original envs.""" + for name in self._curr_mock_envs: + if self._curr_mock_envs[name] is None: + del os.environ[name] + else: + os.environ[name] = self._curr_mock_envs[name] + + def createMockFiles(self, files, mode=0o755): + """Create mock files for tests. + + Args: + files (List[str]): List of file names, relative path will be created under temp directory. + mode (int, optional): Octal integer for file mode. Defaults to 0o755. + """ + for filename in files: + filepath = Path(self._tmp_dir) / filename + filepath.parent.mkdir(parents=True, exist_ok=True) + filepath.touch(mode=mode, exist_ok=True) diff --git a/tests/runner/test_ansible.py b/tests/runner/test_ansible.py index d07d3d28..924e1ce2 100644 --- a/tests/runner/test_ansible.py +++ b/tests/runner/test_ansible.py @@ -38,16 +38,17 @@ class AnsibleClientTestCase(unittest.TestCase): 'host_password': 'pass', }) ) + _, self.test_mpi_host_file = tempfile.mkstemp() def tearDown(self): """Hook method for deconstructing the test fixture after testing it.""" Path(self.host_file).unlink() + Path(self.test_mpi_host_file).unlink() def test_init_config(self): """Test initial config of client.""" self.assertDictEqual( self.ansible_client._config, { - 'private_data_dir': None, 'host_pattern': 'all', 'cmdline': f'--forks 5 --inventory {self.host_file} --user user --ask-pass --ask-become-pass', 'passwords': { @@ -62,6 +63,63 @@ class AnsibleClientTestCase(unittest.TestCase): self.assertDictEqual( self.ansible_client.update_mpi_config(self.ansible_client._config), { **self.ansible_client._config, + 'host_pattern': '10.0.0.10', + } + ) + + def test_update_mpi_config_for_different_inventory(self): + """Test update_mpi_config of client for different inventory.""" + # Test for out-of-order + with open(self.test_mpi_host_file, 'w') as fd: + fd.write('all:\n hosts:\n 10.0.0.12:\n 10.0.0.11:\n 10.0.0.10:\n 10.0.0.13:\n 10.0.0.14:\n') + mess_hosts = AnsibleClient( + OmegaConf.create( + { + 'host_file': self.test_mpi_host_file, + 'host_username': 'user', + 'host_password': 'pass', + } + ) + ) + self.assertDictEqual( + mess_hosts.update_mpi_config(mess_hosts._config), { + **mess_hosts._config, + 'host_pattern': '10.0.0.10', + } + ) + # Test for localhost + with open(self.test_mpi_host_file, 'w') as fd: + fd.write('all:\n hosts:\n localhost:\n') + localhost = AnsibleClient( + OmegaConf.create( + { + 'host_file': self.test_mpi_host_file, + 'host_username': 'user', + 'host_password': 'pass', + } + ) + ) + self.assertDictEqual( + localhost.update_mpi_config(localhost._config), { + **localhost._config, + 'host_pattern': 'localhost', + } + ) + # Test for no host + with open(self.test_mpi_host_file, 'w') as fd: + fd.write('all:\n hosts:\n') + no_hosts = AnsibleClient( + OmegaConf.create( + { + 'host_file': self.test_mpi_host_file, + 'host_username': 'user', + 'host_password': 'pass', + } + ) + ) + self.assertDictEqual( + no_hosts.update_mpi_config(no_hosts._config), { + **no_hosts._config, 'host_pattern': 'all[0]', } ) @@ -71,7 +129,6 @@ class AnsibleClientTestCase(unittest.TestCase): cmd = 'ls -la' self.assertDictEqual( self.ansible_client.get_shell_config(cmd), { - 'private_data_dir': None, 'host_pattern': 'all', 'cmdline': f'--forks 5 --inventory {self.host_file} --user user --ask-pass --ask-become-pass', 'passwords': { @@ -87,7 +144,6 @@ class AnsibleClientTestCase(unittest.TestCase): """Test get_playbook_config of client.""" self.assertDictEqual( self.ansible_client.get_playbook_config('play', {'foo': 'bar'}), { - 'private_data_dir': None, 'host_pattern': 'all', 'cmdline': f'--forks 5 --inventory {self.host_file} --user user --ask-pass --ask-become-pass', 'passwords': { diff --git a/tests/runner/test_runner.py b/tests/runner/test_runner.py index 006bca25..2370d1e8 100644 --- a/tests/runner/test_runner.py +++ b/tests/runner/test_runner.py @@ -244,37 +244,37 @@ class RunnerTestCase(unittest.TestCase): """Test __merge_monitor_metrics.""" path = Path('tests/data/monitor/') expected = { - 'gpu_temperature:0': 50, - 'gpu_temperature:1': 27, - 'gpu_temperature:2': 24, - 'gpu_temperature:3': 26, - 'gpu_temperature:4': 25, - 'gpu_temperature:5': 25, - 'gpu_temperature:6': 23, - 'gpu_temperature:7': 26, - 'gpu_power_limit:0': 250, - 'gpu_power_limit:1': 200, - 'gpu_power_limit:2': 250, - 'gpu_power_limit:3': 250, - 'gpu_power_limit:4': 250, - 'gpu_power_limit:5': 250, - 'gpu_power_limit:6': 250, - 'gpu_power_limit:7': 250, - 'gpu_corrected_ecc:0': 12, - 'gpu_corrected_ecc:1': 0, - 'gpu_corrected_ecc:2': 0, - 'gpu_corrected_ecc:3': 0, - 'gpu_corrected_ecc:4': 0, - 'gpu_corrected_ecc:5': 0, - 'gpu_corrected_ecc:6': 0, - 'gpu_corrected_ecc:7': 0, - 'gpu_uncorrected_ecc:0': 0, - 'gpu_uncorrected_ecc:1': 0, - 'gpu_uncorrected_ecc:2': 0, - 'gpu_uncorrected_ecc:3': 0, - 'gpu_uncorrected_ecc:4': 0, - 'gpu_uncorrected_ecc:5': 0, - 'gpu_uncorrected_ecc:6': 0, - 'gpu_uncorrected_ecc:7': 0 + 'monitor/gpu_temperature:0': 50, + 'monitor/gpu_temperature:1': 27, + 'monitor/gpu_temperature:2': 24, + 'monitor/gpu_temperature:3': 26, + 'monitor/gpu_temperature:4': 25, + 'monitor/gpu_temperature:5': 25, + 'monitor/gpu_temperature:6': 23, + 'monitor/gpu_temperature:7': 26, + 'monitor/gpu_power_limit:0': 250, + 'monitor/gpu_power_limit:1': 200, + 'monitor/gpu_power_limit:2': 250, + 'monitor/gpu_power_limit:3': 250, + 'monitor/gpu_power_limit:4': 250, + 'monitor/gpu_power_limit:5': 250, + 'monitor/gpu_power_limit:6': 250, + 'monitor/gpu_power_limit:7': 250, + 'monitor/gpu_corrected_ecc:0': 12, + 'monitor/gpu_corrected_ecc:1': 0, + 'monitor/gpu_corrected_ecc:2': 0, + 'monitor/gpu_corrected_ecc:3': 0, + 'monitor/gpu_corrected_ecc:4': 0, + 'monitor/gpu_corrected_ecc:5': 0, + 'monitor/gpu_corrected_ecc:6': 0, + 'monitor/gpu_corrected_ecc:7': 0, + 'monitor/gpu_uncorrected_ecc:0': 0, + 'monitor/gpu_uncorrected_ecc:1': 0, + 'monitor/gpu_uncorrected_ecc:2': 0, + 'monitor/gpu_uncorrected_ecc:3': 0, + 'monitor/gpu_uncorrected_ecc:4': 0, + 'monitor/gpu_uncorrected_ecc:5': 0, + 'monitor/gpu_uncorrected_ecc:6': 0, + 'monitor/gpu_uncorrected_ecc:7': 0 } self.assertEqual(self.runner._SuperBenchRunner__merge_monitor_metrics(path), expected) diff --git a/third_party/Makefile b/third_party/Makefile index d6dbad75..7fb17491 100755 --- a/third_party/Makefile +++ b/third_party/Makefile @@ -63,7 +63,7 @@ endif # Build FIO from commit d83ac9 (fio-3.28 tag). fio: ifneq (,$(wildcard fio/Makefile)) - cd ./fio && ./configure --prefix=$(SB_MICRO_PATH) && make -j && make install + cd ./fio && ./configure --prefix=$(SB_MICRO_PATH) --disable-native && make -j && make install endif # Build rccl-tests from commit dc1ad48 of develop branch (default branch). diff --git a/website/blog/2021-12-24-release-0-4.md b/website/blog/2021-12-24-release-0-4.md new file mode 100644 index 00000000..f13fba88 --- /dev/null +++ b/website/blog/2021-12-24-release-0-4.md @@ -0,0 +1,58 @@ +--- +slug: release-sb-v0.4 +title: Releasing SuperBench v0.4 +author: Peng Cheng +author_title: SuperBench Team +author_url: https://github.com/cp5555 +author_image_url: https://github.com/cp5555.png +tags: [superbench, announcement, release] +--- + +We are very happy to announce that **SuperBench 0.4.0 version** is officially released today! + +You can install and try superbench by following [Getting Started Tutorial](https://microsoft.github.io/superbenchmark/docs/getting-started/installation). + +## SuperBench 0.4.0 Release Notes + +### SuperBench Framework + +#### Monitor + +- Add monitor framework for NVIDIA GPU, CPU, memory and disk. + +#### Data Diagnosis and Analysis + +- Support baseline-based data diagnosis. +- Support basic analysis feature (boxplot figure, outlier detection, etc.). + +### Single-node Validation + +#### Micro Benchmarks + +- CPU Memory Validation (tool: Intel Memory Latency Checker). +- GPU Copy Bandwidth (tool: built by MSRA). +- Add ORT Model on AMD GPU platform. +- Add inference backend TensorRT. +- Add inference backend ORT. + +### Multi-node Validation + +#### Micro Benchmarks + +- IB Networking validation. +- TCP validation (tool: TCPing). +- GPCNet Validation (tool: GPCNet). + +### Other Improvement + +1. Enhancement + - Add pipeline for AMD docker. + - Integrate system config info script with SuperBench. + - Support FP32 mode without TF32. + - Refine unit test for microbenchmark. + - Unify metric names for all benchmarks. + +2. Document + - Add benchmark list + - Add monitor document + - Add data diagnosis document diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index 51982f06..7563a1ca 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -101,7 +101,7 @@ module.exports = { announcementBar: { id: 'supportus', content: - '📢 v0.3.0 has been released! ' + + '📢 v0.4.0 has been released! ' + '⭐️ If you like SuperBench, give it a star on GitHub! ⭐️', }, algolia: { diff --git a/website/package-lock.json b/website/package-lock.json index 731bffe4..98d00466 100644 --- a/website/package-lock.json +++ b/website/package-lock.json @@ -1,6 +1,6 @@ { "name": "superbench-website", - "version": "0.3.0", + "version": "0.4.0", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/website/package.json b/website/package.json index d2ed79ae..e24dde82 100644 --- a/website/package.json +++ b/website/package.json @@ -1,6 +1,6 @@ { "name": "superbench-website", - "version": "0.3.0", + "version": "0.4.0", "private": true, "scripts": { "docusaurus": "docusaurus",