Benchmarks: Add Benchmark - Add mlc benchmark to superbench (#216)
**Description** Add mlc memory bandwidth and latency micro benchmark to Superbench. **Major Revision** - Add mlc benchmark with test and example files
This commit is contained in:
Родитель
c403b1ca76
Коммит
b590409e0f
|
@ -12,6 +12,8 @@ FROM nvcr.io/nvidia/pytorch:20.12-py3
|
|||
# - OFED: 5.2-2.2.3.0
|
||||
# - HPC-X: v2.8.3
|
||||
# - NCCL RDMA SHARP plugins: 7cccbc1
|
||||
# Intel:
|
||||
# - mlc: v3.9a
|
||||
|
||||
LABEL maintainer="SuperBench"
|
||||
|
||||
|
@ -97,6 +99,16 @@ RUN cd /tmp && \
|
|||
cd /tmp && \
|
||||
rm -rf nccl
|
||||
|
||||
# Install Intel MLC
|
||||
RUN cd /tmp && \
|
||||
mkdir -p mlc && \
|
||||
cd mlc && \
|
||||
wget --user-agent="Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0" https://www.intel.com/content/dam/develop/external/us/en/documents/mlc_v3.9a.tgz && \
|
||||
tar xvf mlc_v3.9a.tgz && \
|
||||
cp ./Linux/mlc /usr/local/bin/ && \
|
||||
cd /tmp && \
|
||||
rm -rf mlc
|
||||
|
||||
ENV PATH="${PATH}" \
|
||||
LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}" \
|
||||
SB_HOME="/opt/superbench" \
|
||||
|
|
|
@ -11,6 +11,8 @@ FROM rocm/pytorch:rocm4.0_ubuntu18.04_py3.6_pytorch_1.7.0
|
|||
# - RCCL: 2.7.8
|
||||
# Mellanox:
|
||||
# - OFED: 5.2-2.2.3.0
|
||||
# Intel:
|
||||
# - mlc: v3.9a
|
||||
|
||||
LABEL maintainer="SuperBench"
|
||||
|
||||
|
@ -88,6 +90,16 @@ RUN cd /opt && \
|
|||
ln -s hpcx-v2.8.3-gcc-MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu18.04-x86_64 hpcx && \
|
||||
rm hpcx-v2.8.3-gcc-MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu18.04-x86_64.tbz
|
||||
|
||||
# Install Intel MLC
|
||||
RUN cd /tmp && \
|
||||
mkdir -p mlc && \
|
||||
cd mlc && \
|
||||
wget --user-agent="Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0" https://www.intel.com/content/dam/develop/external/us/en/documents/mlc_v3.9a.tgz && \
|
||||
tar xvf mlc_v3.9a.tgz && \
|
||||
cp ./Linux/mlc /usr/local/bin/ && \
|
||||
cd /tmp && \
|
||||
rm -rf mlc
|
||||
|
||||
ENV PATH="${PATH}" \
|
||||
LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}" \
|
||||
SB_HOME="/opt/superbench" \
|
||||
|
|
|
@ -11,6 +11,8 @@ FROM rocm/pytorch:rocm4.2_ubuntu18.04_py3.6_pytorch_1.7.0
|
|||
# - RCCL: 2.8.4
|
||||
# Mellanox:
|
||||
# - OFED: 5.2-2.2.3.0
|
||||
# Intel:
|
||||
# - mlc: v3.9a
|
||||
|
||||
LABEL maintainer="SuperBench"
|
||||
|
||||
|
@ -88,6 +90,16 @@ RUN cd /opt && \
|
|||
ln -s hpcx-v2.8.3-gcc-MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu18.04-x86_64 hpcx && \
|
||||
rm hpcx-v2.8.3-gcc-MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu18.04-x86_64.tbz
|
||||
|
||||
# Install Intel MLC
|
||||
RUN cd /tmp && \
|
||||
mkdir -p mlc && \
|
||||
cd mlc && \
|
||||
wget --user-agent="Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0" https://www.intel.com/content/dam/develop/external/us/en/documents/mlc_v3.9a.tgz && \
|
||||
tar xvf mlc_v3.9a.tgz && \
|
||||
cp ./Linux/mlc /usr/local/bin/ && \
|
||||
cd /tmp && \
|
||||
rm -rf mlc
|
||||
|
||||
ENV PATH="${PATH}" \
|
||||
LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}" \
|
||||
SB_HOME="/opt/superbench" \
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
"""Micro benchmark example for disk performance.
|
||||
|
||||
Commands to run:
|
||||
python3 examples/benchmarks/memory_bw_latency_performance.py
|
||||
"""
|
||||
|
||||
from superbench.benchmarks import BenchmarkRegistry, Platform
|
||||
from superbench.common.utils import logger
|
||||
|
||||
if __name__ == '__main__':
|
||||
context = BenchmarkRegistry.create_benchmark_context(
|
||||
'cpu-memory-bw-latency',
|
||||
platform=Platform.CPU,
|
||||
parameters='--tests bandwidth_matrix latency_matrix max_bandwidth'
|
||||
)
|
||||
|
||||
benchmark = BenchmarkRegistry.launch_benchmark(context)
|
||||
if benchmark:
|
||||
logger.info(
|
||||
'benchmark: {}, return code: {}, result: {}'.format(
|
||||
benchmark.name, benchmark.return_code, benchmark.result
|
||||
)
|
||||
)
|
|
@ -14,6 +14,7 @@ from superbench.benchmarks.micro_benchmarks.cuda_memory_bw_performance import Cu
|
|||
from superbench.benchmarks.micro_benchmarks.cuda_nccl_bw_performance import CudaNcclBwBenchmark
|
||||
from superbench.benchmarks.micro_benchmarks.cudnn_function import CudnnBenchmark
|
||||
from superbench.benchmarks.micro_benchmarks.disk_performance import DiskBenchmark
|
||||
from superbench.benchmarks.micro_benchmarks.cpu_memory_bw_latency_performance import CpuMemBwLatencyBenchmark
|
||||
from superbench.benchmarks.micro_benchmarks.gpcnet_performance import GPCNetBenchmark
|
||||
from superbench.benchmarks.micro_benchmarks.gpu_copy_bw_performance import GpuCopyBwBenchmark
|
||||
from superbench.benchmarks.micro_benchmarks.ib_loopback_performance import IBLoopbackBenchmark
|
||||
|
@ -34,6 +35,7 @@ __all__ = [
|
|||
'CudaNcclBwBenchmark',
|
||||
'CudnnBenchmark',
|
||||
'DiskBenchmark',
|
||||
'CpuMemBwLatencyBenchmark',
|
||||
'GPCNetBenchmark',
|
||||
'GemmFlopsBenchmark',
|
||||
'GpuCopyBwBenchmark',
|
||||
|
|
|
@ -0,0 +1,150 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
"""Module for running the Intel MLC tool to measure memory bandwidth and latency."""
|
||||
|
||||
import os
|
||||
|
||||
from superbench.common.utils import logger
|
||||
from superbench.benchmarks import BenchmarkRegistry, ReturnCode
|
||||
from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke
|
||||
|
||||
|
||||
class CpuMemBwLatencyBenchmark(MicroBenchmarkWithInvoke):
|
||||
"""The Memory bandwidth and latency benchmark class."""
|
||||
def __init__(self, name, parameters=''):
|
||||
"""Constructor.
|
||||
|
||||
Args:
|
||||
name (str): benchmark name.
|
||||
parameters (str): benchmark parameters.
|
||||
"""
|
||||
super().__init__(name, parameters)
|
||||
|
||||
self._bin_name = 'mlc'
|
||||
self.__support_mlc_commands = ['bandwidth_matrix', 'latency_matrix', 'max_bandwidth']
|
||||
|
||||
def add_parser_arguments(self):
|
||||
"""Add the specified arguments."""
|
||||
super().add_parser_arguments()
|
||||
|
||||
self._parser.add_argument(
|
||||
'--tests',
|
||||
type=str,
|
||||
nargs='+',
|
||||
default=['bandwidth_matrix'],
|
||||
required=False,
|
||||
help='The modes to run mlc with. Possible values are {}.'.format(' '.join(self.__support_mlc_commands))
|
||||
)
|
||||
|
||||
def _preprocess(self):
|
||||
"""Preprocess/preparation operations before the benchmarking.
|
||||
|
||||
Return:
|
||||
True if _preprocess() succeed.
|
||||
"""
|
||||
if not super()._preprocess():
|
||||
return False
|
||||
|
||||
mlc_path = os.path.join(self._args.bin_dir, self._bin_name)
|
||||
ret_val = os.access(mlc_path, os.X_OK | os.F_OK)
|
||||
if not ret_val:
|
||||
logger.error(
|
||||
'Executable {} not found in {} or it is not executable'.format(self._bin_name, self._args.bin_dir)
|
||||
)
|
||||
return False
|
||||
|
||||
# the mlc command requires hugapage to be enabled
|
||||
mlc_wrapper = ' '.join(
|
||||
[
|
||||
'nr_hugepages=`cat /proc/sys/vm/nr_hugepages`;', 'echo 4000 > /proc/sys/vm/nr_hugepages;', '%s;',
|
||||
'err=$?;', 'echo ${nr_hugepages} > /proc/sys/vm/nr_hugepages;', '(exit $err)'
|
||||
]
|
||||
)
|
||||
for test in self._args.tests:
|
||||
command = mlc_path + ' --%s' % test
|
||||
self._commands.append(mlc_wrapper % command)
|
||||
return True
|
||||
|
||||
def _process_raw_result(self, cmd_idx, raw_output):
|
||||
"""Function to parse raw results and save the summarized results.
|
||||
|
||||
self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
|
||||
|
||||
Args:
|
||||
cmd_idx (int): the index of command corresponding with the raw_output.
|
||||
raw_output (str): raw output string of the micro-benchmark.
|
||||
|
||||
Return:
|
||||
True if the raw output string is valid and result can be extracted.
|
||||
"""
|
||||
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
|
||||
|
||||
# parse the command to see which command this output belongs to
|
||||
# the command is formed as ...; mlc --option; ...
|
||||
# option needs to be extracted
|
||||
if '--' in self._commands[cmd_idx]:
|
||||
mlc_test = self._commands[cmd_idx].split('--')[1]
|
||||
else:
|
||||
logger.error('The command {} is not well formed and missing --'.format(self._commands[cmd_idx]))
|
||||
return False
|
||||
mlc_test = mlc_test.split(';')[0]
|
||||
if 'max_bandwidth' in mlc_test:
|
||||
measure = 'BW'
|
||||
out_table = self._parse_max_bw(raw_output)
|
||||
elif 'bandwidth_matrix' in mlc_test:
|
||||
measure = 'BW'
|
||||
out_table = self._parse_bw_latency(raw_output)
|
||||
elif 'latency_matrix' in mlc_test:
|
||||
measure = 'Latency'
|
||||
out_table = self._parse_bw_latency(raw_output)
|
||||
else:
|
||||
logger.error('Invalid option {} to run the {} command'.format(mlc_test, self._commands[cmd_idx]))
|
||||
return False
|
||||
if len(out_table) == 0:
|
||||
self._result.set_return_code(ReturnCode.MICROBENCHMARK_RESULT_PARSING_FAILURE)
|
||||
logger.error(
|
||||
'The result format is invalid - round: {}, benchmark: {}, raw output: {}.'.format(
|
||||
self._curr_run_index, self._name, raw_output
|
||||
)
|
||||
)
|
||||
return False
|
||||
for key in out_table.keys():
|
||||
for index in range(len(out_table[key])):
|
||||
if 'max_bandwidth' in mlc_test:
|
||||
metric = 'Mem_{}_{}_{}'.format(mlc_test, key, measure)
|
||||
else:
|
||||
metric = 'Mem_{}_{}_{}_{}'.format(mlc_test, key, str(index), measure)
|
||||
self._result.add_result(metric, float(out_table[key][index]))
|
||||
return True
|
||||
|
||||
def _parse_bw_latency(self, raw_output):
|
||||
out_table = dict()
|
||||
for line in raw_output.splitlines():
|
||||
if line.strip() == '':
|
||||
continue
|
||||
# only lines starting with a digit is of interest
|
||||
if line.lstrip()[0].isdigit():
|
||||
vals = line.split()
|
||||
if len(vals) < 2:
|
||||
continue
|
||||
numa_index = 'numa_%s' % vals[0]
|
||||
out_table[numa_index] = vals[1:]
|
||||
return out_table
|
||||
|
||||
def _parse_max_bw(self, raw_output):
|
||||
out_table = dict()
|
||||
# the very last line is empty and only the last 5 lines of the output are of interest
|
||||
for line in raw_output.splitlines()[-6:]:
|
||||
if line.strip() == '':
|
||||
continue
|
||||
vals = line.split()
|
||||
if len(vals) < 2:
|
||||
continue
|
||||
key = '_'.join(vals[0:2]).rstrip(':').replace(':', '_')
|
||||
# making a list to be consistent with the _parse_bw_latency output
|
||||
out_table[key] = [vals[-1]]
|
||||
return out_table
|
||||
|
||||
|
||||
BenchmarkRegistry.register_benchmark('cpu-memory-bw-latency', CpuMemBwLatencyBenchmark)
|
|
@ -47,6 +47,17 @@ superbench:
|
|||
maxbytes: 8G
|
||||
ngpus: 8
|
||||
operation: allreduce
|
||||
cpu-memory-bw-latency:
|
||||
enable: false
|
||||
modes:
|
||||
- name: local
|
||||
proc_num: 1
|
||||
parallel: no
|
||||
parameters:
|
||||
tests:
|
||||
- bandwidth_matrix
|
||||
- latency_matrix
|
||||
- max_bandwidth
|
||||
mem-bw:
|
||||
enable: true
|
||||
modes:
|
||||
|
|
|
@ -48,6 +48,17 @@ superbench:
|
|||
maxbytes: 8G
|
||||
ngpus: 8
|
||||
operation: allreduce
|
||||
cpu-memory-bw-latency:
|
||||
enable: false
|
||||
modes:
|
||||
- name: local
|
||||
proc_num: 1
|
||||
parallel: no
|
||||
parameters:
|
||||
tests:
|
||||
- bandwidth_matrix
|
||||
- latency_matrix
|
||||
- max_bandwidth
|
||||
mem-bw:
|
||||
enable: true
|
||||
modes:
|
||||
|
|
|
@ -58,6 +58,17 @@ superbench:
|
|||
proc_num: 4
|
||||
prefix: PROC_RANK={proc_rank} IB_DEVICES=1,3,5,7 NUMA_NODES=1,0,3,2
|
||||
parallel: yes
|
||||
cpu-memory-bw-latency:
|
||||
enable: false
|
||||
modes:
|
||||
- name: local
|
||||
proc_num: 1
|
||||
parallel: no
|
||||
parameters:
|
||||
tests:
|
||||
- bandwidth_matrix
|
||||
- latency_matrix
|
||||
- max_bandwidth
|
||||
mem-bw:
|
||||
enable: true
|
||||
modes:
|
||||
|
|
|
@ -58,6 +58,17 @@ superbench:
|
|||
proc_num: 4
|
||||
prefix: PROC_RANK={proc_rank} IB_DEVICES=1,3,5,7 NUMA_NODES=1,0,3,2
|
||||
parallel: yes
|
||||
cpu-memory-bw-latency:
|
||||
enable: false
|
||||
modes:
|
||||
- name: local
|
||||
proc_num: 1
|
||||
parallel: no
|
||||
parameters:
|
||||
tests:
|
||||
- bandwidth_matrix
|
||||
- latency_matrix
|
||||
- max_bandwidth
|
||||
mem-bw:
|
||||
enable: true
|
||||
modes:
|
||||
|
|
|
@ -60,6 +60,17 @@ superbench:
|
|||
parameters:
|
||||
block_devices:
|
||||
- /dev/nvme0n1
|
||||
cpu-memory-bw-latency:
|
||||
enable: false
|
||||
modes:
|
||||
- name: local
|
||||
proc_num: 1
|
||||
parallel: no
|
||||
parameters:
|
||||
tests:
|
||||
- bandwidth_matrix
|
||||
- latency_matrix
|
||||
- max_bandwidth
|
||||
mem-bw:
|
||||
enable: true
|
||||
modes:
|
||||
|
|
|
@ -0,0 +1,159 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
"""Tests for cpu-memory-bw-latency benchmark."""
|
||||
|
||||
from pathlib import Path
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
|
||||
|
||||
|
||||
class CpuMemBwLatencyBenchmarkTest(unittest.TestCase):
|
||||
"""Test class for cpu-memory-bw-latency benchmark."""
|
||||
def setUp(self):
|
||||
"""Method called to prepare the test fixture."""
|
||||
# Create fake binary file just for testing.
|
||||
self.__curr_micro_path = os.environ.get('SB_MICRO_PATH', '')
|
||||
os.environ['SB_MICRO_PATH'] = '/tmp/superbench/'
|
||||
binary_path = Path(os.getenv('SB_MICRO_PATH'), 'bin')
|
||||
binary_path.mkdir(parents=True, exist_ok=True)
|
||||
self.__binary_file = binary_path / 'mlc'
|
||||
self.__binary_file.touch(mode=0o755, exist_ok=True)
|
||||
|
||||
def tearDown(self):
|
||||
"""Method called after the test method has been called and the result recorded."""
|
||||
self.__binary_file.unlink()
|
||||
os.environ['SB_MICRO_PATH'] = self.__curr_micro_path
|
||||
|
||||
def test_cpu_mem_bw_latency_benchmark_empty_param(self):
|
||||
"""Test cpu-memory-bw-latency benchmark command generation with empty parameter."""
|
||||
benchmark_name = 'cpu-memory-bw-latency'
|
||||
(benchmark_class,
|
||||
predefine_params) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, Platform.CPU)
|
||||
assert (benchmark_class)
|
||||
|
||||
default_mlc_test = 'bandwidth_matrix'
|
||||
benchmark = benchmark_class(benchmark_name, parameters='')
|
||||
|
||||
# Check basic information
|
||||
assert (benchmark)
|
||||
ret = benchmark._preprocess()
|
||||
assert (ret is True)
|
||||
assert (benchmark.return_code == ReturnCode.SUCCESS)
|
||||
assert (benchmark.name == 'cpu-memory-bw-latency')
|
||||
assert (benchmark.type == BenchmarkType.MICRO)
|
||||
|
||||
# Check commands
|
||||
assert (1 == len(benchmark._commands))
|
||||
assert ('mlc --%s;' % default_mlc_test in benchmark._commands[0])
|
||||
|
||||
def test_cpu_mem_bw_latency_benchmark_result_parsing(self):
|
||||
"""Test cpu-memory-bw-latency benchmark result parsing."""
|
||||
benchmark_name = 'cpu-memory-bw-latency'
|
||||
(benchmark_class,
|
||||
predefine_params) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, Platform.CPU)
|
||||
assert (benchmark_class)
|
||||
|
||||
all_mlc_tests = ['bandwidth_matrix', 'latency_matrix', 'max_bandwidth']
|
||||
param_str = '--tests %s' % ' '.join(all_mlc_tests)
|
||||
benchmark = benchmark_class(benchmark_name, parameters=param_str)
|
||||
|
||||
# Check basic information
|
||||
assert (benchmark)
|
||||
ret = benchmark._preprocess()
|
||||
assert (ret is True)
|
||||
assert (benchmark.return_code == ReturnCode.SUCCESS)
|
||||
assert (benchmark.name == 'cpu-memory-bw-latency')
|
||||
assert (benchmark.type == BenchmarkType.MICRO)
|
||||
|
||||
# Check commands
|
||||
assert (len(all_mlc_tests) == len(benchmark._commands))
|
||||
for mlc_test, command in zip(all_mlc_tests, benchmark._commands):
|
||||
assert ('mlc --%s;' % mlc_test in command)
|
||||
|
||||
# Positive case - valid bandwidth matrix output.
|
||||
test_raw_output = """
|
||||
Intel(R) Memory Latency Checker - v3.9a
|
||||
Command line parameters: --bandwidth_matrix
|
||||
|
||||
Using buffer size of 100.000MiB/thread for reads and an additional 100.000MiB/thread for writes
|
||||
*** Unable to modify prefetchers (try executing 'modprobe msr')
|
||||
*** So, enabling random access for latency measurements
|
||||
Measuring Memory Bandwidths between nodes within system
|
||||
Bandwidths are in MB/sec (1 MB/sec = 1,000,000 Bytes/sec)
|
||||
Using all the threads from each core if Hyper-threading is enabled
|
||||
Using Read-only traffic type
|
||||
Numa node
|
||||
Numa node 0 1
|
||||
0 82542.2 76679.9
|
||||
1 76536.0 82986.5
|
||||
"""
|
||||
assert (benchmark._process_raw_result(0, test_raw_output))
|
||||
assert (benchmark.return_code == ReturnCode.SUCCESS)
|
||||
assert ('raw_output_0' in benchmark.raw_data)
|
||||
assert ([test_raw_output] == benchmark.raw_data['raw_output_0'])
|
||||
assert ([82542.2] == benchmark.result['Mem_bandwidth_matrix_numa_0_0_BW'])
|
||||
assert ([76679.9] == benchmark.result['Mem_bandwidth_matrix_numa_0_1_BW'])
|
||||
assert ([76536.0] == benchmark.result['Mem_bandwidth_matrix_numa_1_0_BW'])
|
||||
assert ([82986.5] == benchmark.result['Mem_bandwidth_matrix_numa_1_1_BW'])
|
||||
|
||||
# Positive case - valid latency matrix output.
|
||||
test_raw_output = """
|
||||
Intel(R) Memory Latency Checker - v3.9a
|
||||
Command line parameters: --latency_matrix
|
||||
|
||||
Using buffer size of 600.000MiB
|
||||
*** Unable to modify prefetchers (try executing 'modprobe msr')
|
||||
*** So, enabling random access for latency measurements
|
||||
Measuring idle latencies (in ns)...
|
||||
Numa node
|
||||
Numa node 0 1
|
||||
0 87.0 101.0
|
||||
1 101.9 86.9
|
||||
"""
|
||||
assert (benchmark._process_raw_result(1, test_raw_output))
|
||||
assert (benchmark.return_code == ReturnCode.SUCCESS)
|
||||
assert ('raw_output_1' in benchmark.raw_data)
|
||||
assert ([test_raw_output] == benchmark.raw_data['raw_output_1'])
|
||||
|
||||
assert ([87.0] == benchmark.result['Mem_latency_matrix_numa_0_0_Latency'])
|
||||
assert ([101.0] == benchmark.result['Mem_latency_matrix_numa_0_1_Latency'])
|
||||
assert ([101.9] == benchmark.result['Mem_latency_matrix_numa_1_0_Latency'])
|
||||
assert ([86.9] == benchmark.result['Mem_latency_matrix_numa_1_1_Latency'])
|
||||
|
||||
# Positive case - valid max bandwidth output.
|
||||
test_raw_output = """
|
||||
Intel(R) Memory Latency Checker - v3.9a
|
||||
Command line parameters: --max_bandwidth
|
||||
|
||||
Using buffer size of 100.000MiB/thread for reads and an additional 100.000MiB/thread for writes
|
||||
*** Unable to modify prefetchers (try executing 'modprobe msr')
|
||||
*** So, enabling random access for latency measurements
|
||||
|
||||
Measuring Maximum Memory Bandwidths for the system
|
||||
Will take several minutes to complete as multiple injection rates will be tried to get the best bandwidth
|
||||
Bandwidths are in MB/sec (1 MB/sec = 1,000,000 Bytes/sec)
|
||||
Using all the threads from each core if Hyper-threading is enabled
|
||||
Using traffic with the following read-write ratios
|
||||
ALL Reads : 165400.60
|
||||
3:1 Reads-Writes : 154975.19
|
||||
2:1 Reads-Writes : 158433.32
|
||||
1:1 Reads-Writes : 157352.05
|
||||
Stream-triad like: 157878.32
|
||||
|
||||
"""
|
||||
assert (benchmark._process_raw_result(2, test_raw_output))
|
||||
assert (benchmark.return_code == ReturnCode.SUCCESS)
|
||||
assert ('raw_output_2' in benchmark.raw_data)
|
||||
assert ([test_raw_output] == benchmark.raw_data['raw_output_2'])
|
||||
assert ([165400.60] == benchmark.result['Mem_max_bandwidth_ALL_Reads_BW'])
|
||||
assert ([154975.19] == benchmark.result['Mem_max_bandwidth_3_1_Reads-Writes_BW'])
|
||||
assert ([158433.32] == benchmark.result['Mem_max_bandwidth_2_1_Reads-Writes_BW'])
|
||||
assert ([157352.05] == benchmark.result['Mem_max_bandwidth_1_1_Reads-Writes_BW'])
|
||||
assert ([157878.32] == benchmark.result['Mem_max_bandwidth_Stream-triad_like_BW'])
|
||||
|
||||
# Negative case - invalid raw output.
|
||||
assert (benchmark._process_raw_result(0, 'Invalid raw output') is False)
|
||||
assert (benchmark.return_code == ReturnCode.MICROBENCHMARK_RESULT_PARSING_FAILURE)
|
Загрузка…
Ссылка в новой задаче