remove command output from saving into result.json for microbenchmarks

2022-12-28 10:00:24 +00:00 · 2022-12-28 10:00:24 +00:00 · 4801e6b064
--- a/superbench/benchmarks/docker_benchmarks/fambench.py
+++ b/superbench/benchmarks/docker_benchmarks/fambench.py
@ -51,8 +51,6 @@ class FAMBenchBenchmark(CudaDockerBenchmark):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output', raw_output, self._args.log_raw_data)
-
        content = raw_output.splitlines(False)
        try:
            result_header = 'benchmark implementation mode config score'
--- a/superbench/benchmarks/docker_benchmarks/rocm_onnxruntime_performance.py
+++ b/superbench/benchmarks/docker_benchmarks/rocm_onnxruntime_performance.py
@ -59,8 +59,6 @@ class RocmOnnxRuntimeModelBenchmark(RocmDockerBenchmark):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output', raw_output, self._args.log_raw_data)
-
        content = raw_output.splitlines(False)
        try:
            name_prefix = '__superbench__ begin '
--- a/superbench/benchmarks/micro_benchmarks/cpu_memory_bw_latency_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/cpu_memory_bw_latency_performance.py
@ -78,8 +78,6 @@ class CpuMemBwLatencyBenchmark(MicroBenchmarkWithInvoke):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
-
        # parse the command to see which command this output belongs to
        # the command is formed as ...; mlc --option; ...
        # option needs to be extracted
--- a/superbench/benchmarks/micro_benchmarks/cublas_function.py
+++ b/superbench/benchmarks/micro_benchmarks/cublas_function.py
@ -272,8 +272,6 @@ class CublasBenchmark(MicroBenchmarkWithInvoke):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
-
        try:
            lines = raw_output.splitlines()
            metric = ''
--- a/superbench/benchmarks/micro_benchmarks/cuda_gemm_flops_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/cuda_gemm_flops_performance.py
@ -110,7 +110,6 @@ class CudaGemmFlopsBenchmark(GemmFlopsBenchmark):
            True if the raw output string is valid and result can be extracted.
        """
        precision = self._precision_need_to_run[cmd_idx]
-        self._result.add_raw_data('raw_output_' + precision, raw_output, self._args.log_raw_data)

        valid = True
        flops = list()
--- a/superbench/benchmarks/micro_benchmarks/cuda_memory_bw_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/cuda_memory_bw_performance.py
@ -77,8 +77,6 @@ class CudaMemBwBenchmark(MemBwBenchmark):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + self._args.mem_type[cmd_idx], raw_output, self._args.log_raw_data)
-
        mem_bw = -1
        valid = True
        content = raw_output.splitlines()
--- a/superbench/benchmarks/micro_benchmarks/cuda_nccl_bw_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/cuda_nccl_bw_performance.py
@ -143,8 +143,6 @@ class CudaNcclBwBenchmark(MicroBenchmarkWithInvoke):
            if rank > 0:
                return True

-        self._result.add_raw_data('raw_output_' + self._args.operation, raw_output, self._args.log_raw_data)
-
        content = raw_output.splitlines()
        size = -1
        busbw_out = -1
--- a/superbench/benchmarks/micro_benchmarks/cudnn_function.py
+++ b/superbench/benchmarks/micro_benchmarks/cudnn_function.py
@ -406,8 +406,6 @@ class CudnnBenchmark(MicroBenchmarkWithInvoke):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
-
        try:
            lines = raw_output.splitlines()
            metric = ''
--- a/superbench/benchmarks/micro_benchmarks/disk_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/disk_performance.py
@ -184,8 +184,6 @@ class DiskBenchmark(MicroBenchmarkWithInvoke):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
-
        try:
            fio_output = json.loads(raw_output)

--- a/superbench/benchmarks/micro_benchmarks/gpcnet_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/gpcnet_performance.py
@ -74,8 +74,6 @@ class GPCNetBenchmark(MicroBenchmarkWithInvoke):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + str(idx), raw_output, self._args.log_raw_data)
-
        try:
            # Parse and add result
            if 'ERROR' not in raw_output:
--- a/superbench/benchmarks/micro_benchmarks/gpu_burn_test.py
+++ b/superbench/benchmarks/micro_benchmarks/gpu_burn_test.py
@ -5,7 +5,7 @@

 import os

-from superbench.common.utils import logger
+from superbench.common.utils import logger, stdout_logger
 from superbench.benchmarks import BenchmarkRegistry, Platform
 from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke

@ -123,9 +123,9 @@ class GpuBurnBenchmark(MicroBenchmarkWithInvoke):
                        self._result.add_result(res.split(':')[0].replace(' ', '_').lower() + '_pass', 1)
                    else:
                        self._result.add_result(res.split(':')[0].replace(' ', '_').lower() + '_pass', 0)
-                    self._result.add_raw_data('GPU-Burn_result', res, self._args.log_raw_data)
+                    stdout_logger.log('GPU-Burn_result' + res)
            else:
-                self._result.add_raw_data('GPU Burn Failure: ', failure_msg, self._args.log_raw_data)
+                stdout_logger.log('GPU Burn Failure: ' + failure_msg)
                self._result.add_result('abort', 1)
                return False
        except BaseException as e:
--- a/superbench/benchmarks/micro_benchmarks/gpu_copy_bw_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/gpu_copy_bw_performance.py
@ -122,8 +122,6 @@ class GpuCopyBwBenchmark(MicroBenchmarkWithInvoke):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
-
        try:
            output_lines = [x.strip() for x in raw_output.strip().splitlines()]
            for output_line in output_lines:
--- a/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py
@ -197,11 +197,6 @@ class IBLoopbackBenchmark(MicroBenchmarkWithInvoke):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data(
-            'raw_output_' + self._args.commands[cmd_idx] + '_IB' + str(self._args.ib_index), raw_output,
-            self._args.log_raw_data
-        )
-
        valid = False
        content = raw_output.splitlines()

--- a/superbench/benchmarks/micro_benchmarks/ib_validation_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/ib_validation_performance.py
@ -367,8 +367,6 @@ class IBBenchmark(MicroBenchmarkWithInvoke):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + self._args.command, raw_output, self._args.log_raw_data)
-
        # If it's invoked by MPI and rank is not 0, no result is expected
        if os.getenv('OMPI_COMM_WORLD_RANK'):
            rank = int(os.getenv('OMPI_COMM_WORLD_RANK'))
--- a/superbench/benchmarks/micro_benchmarks/kernel_launch_overhead.py
+++ b/superbench/benchmarks/micro_benchmarks/kernel_launch_overhead.py
@ -79,8 +79,6 @@ class KernelLaunch(MicroBenchmarkWithInvoke):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
-
        pattern = r'\d+\.\d+'
        result = re.findall(pattern, raw_output)
        if len(result) != 2:
--- a/superbench/benchmarks/micro_benchmarks/rocm_gemm_flops_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/rocm_gemm_flops_performance.py
@ -127,7 +127,6 @@ class RocmGemmFlopsBenchmark(GemmFlopsBenchmark):
            True if the raw output string is valid and result can be extracted.
        """
        precision = self._precision_need_to_run[cmd_idx]
-        self._result.add_raw_data('raw_output_' + precision, raw_output, self._args.log_raw_data)

        content = raw_output.splitlines()
        gflops_index = None
--- a/superbench/benchmarks/micro_benchmarks/rocm_memory_bw_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/rocm_memory_bw_performance.py
@ -60,8 +60,6 @@ class RocmMemBwBenchmark(MemBwBenchmark):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + self._args.mem_type[cmd_idx], raw_output, self._args.log_raw_data)
-
        mem_bw = -1
        value_index = -1
        valid = True
--- a/superbench/benchmarks/micro_benchmarks/tcp_connectivity.py
+++ b/superbench/benchmarks/micro_benchmarks/tcp_connectivity.py
@ -6,7 +6,7 @@
 import tcping
 from joblib import Parallel, delayed

-from superbench.common.utils import logger
+from superbench.common.utils import logger, stdout_logger
 from superbench.benchmarks import BenchmarkRegistry, ReturnCode
 from superbench.benchmarks.micro_benchmarks import MicroBenchmark

@ -135,6 +135,8 @@ class TCPConnectivityBenchmark(MicroBenchmark):

        # Parse the output and get the results
        for host_index, out in enumerate(outputs):
+            stdout_logger.log(f'Host {self.__hosts[host_index]}\n')
+            stdout_logger.log(out)
            if not self._process_raw_result(host_index, out):
                self._result.set_return_code(ReturnCode.MICROBENCHMARK_RESULT_PARSING_FAILURE)
                return False
@ -154,7 +156,6 @@ class TCPConnectivityBenchmark(MicroBenchmark):
            True if the raw output string is valid and result can be extracted.
        """
        host = self.__hosts[idx]
-        self._result.add_raw_data('raw_output_' + host, raw_output, self._args.log_raw_data)

        try:
            # If socket error or exception happens on TCPing, add result values as failed
--- a/superbench/benchmarks/micro_benchmarks/tensorrt_inference_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/tensorrt_inference_performance.py
@ -127,10 +127,6 @@ class TensorRTInferenceBenchmark(MicroBenchmarkWithInvoke):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data(
-            f'raw_output_{self._args.pytorch_models[cmd_idx]}', raw_output, self._args.log_raw_data
-        )
-
        success = False
        try:
            model = self._args.pytorch_models[cmd_idx]
--- a/tests/benchmarks/micro_benchmarks/test_cpu_memory_bw_latency_performance.py
+++ b/tests/benchmarks/micro_benchmarks/test_cpu_memory_bw_latency_performance.py
@ -83,8 +83,6 @@ Numa node            0       1
 """
        assert (benchmark._process_raw_result(0, test_raw_output))
        assert (benchmark.return_code == ReturnCode.SUCCESS)
-        assert ('raw_output_0' in benchmark.raw_data)
-        assert ([test_raw_output] == benchmark.raw_data['raw_output_0'])
        assert ([82542.2] == benchmark.result['mem_bandwidth_matrix_numa_0_0_bw'])
        assert ([76679.9] == benchmark.result['mem_bandwidth_matrix_numa_0_1_bw'])
        assert ([76536.0] == benchmark.result['mem_bandwidth_matrix_numa_1_0_bw'])
@ -106,9 +104,6 @@ Numa node            0       1
 """
        assert (benchmark._process_raw_result(1, test_raw_output))
        assert (benchmark.return_code == ReturnCode.SUCCESS)
-        assert ('raw_output_1' in benchmark.raw_data)
-        assert ([test_raw_output] == benchmark.raw_data['raw_output_1'])
-
        assert ([87.0] == benchmark.result['mem_latency_matrix_numa_0_0_lat'])
        assert ([101.0] == benchmark.result['mem_latency_matrix_numa_0_1_lat'])
        assert ([101.9] == benchmark.result['mem_latency_matrix_numa_1_0_lat'])
@ -137,8 +132,6 @@ Stream-triad like:      157878.32
 """
        assert (benchmark._process_raw_result(2, test_raw_output))
        assert (benchmark.return_code == ReturnCode.SUCCESS)
-        assert ('raw_output_2' in benchmark.raw_data)
-        assert ([test_raw_output] == benchmark.raw_data['raw_output_2'])
        assert ([165400.60] == benchmark.result['mem_max_bandwidth_all_reads_bw'])
        assert ([154975.19] == benchmark.result['mem_max_bandwidth_3_1_reads-writes_bw'])
        assert ([158433.32] == benchmark.result['mem_max_bandwidth_2_1_reads-writes_bw'])
--- a/tests/benchmarks/micro_benchmarks/test_cublas_function.py
+++ b/tests/benchmarks/micro_benchmarks/test_cublas_function.py
@ -34,9 +34,6 @@ def test_cublas_functions():
    # Check results and metrics.
    assert (benchmark.run_count == 1)
    assert (benchmark.return_code == ReturnCode.SUCCESS)
-    assert ('raw_output_0' in benchmark.raw_data)
-    assert (len(benchmark.raw_data['raw_output_0']) == 1)
-    assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))

    assert (19 <= len(benchmark.result))
    for metric in list(benchmark.result.keys()):
@ -70,9 +67,6 @@ def test_cublas_functions():
    # Check results and metrics.
    assert (benchmark.run_count == 1)
    assert (benchmark.return_code == ReturnCode.SUCCESS)
-    assert ('raw_output_0' in benchmark.raw_data)
-    assert (len(benchmark.raw_data['raw_output_0']) == 1)
-    assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))

    assert (1 + benchmark.default_metric_count == len(benchmark.result))
    for metric in list(benchmark.result.keys()):
@ -108,9 +102,6 @@ def test_cublas_functions():
    # Check results and metrics.
    assert (benchmark.run_count == 1)
    assert (benchmark.return_code == ReturnCode.SUCCESS)
-    assert ('raw_output_0' in benchmark.raw_data)
-    assert (len(benchmark.raw_data['raw_output_0']) == 1)
-    assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))

    assert (2 + benchmark.default_metric_count == len(benchmark.result))
    for metric in list(benchmark.result.keys()):
--- a/tests/benchmarks/micro_benchmarks/test_cudnn_function.py
+++ b/tests/benchmarks/micro_benchmarks/test_cudnn_function.py
@ -34,9 +34,6 @@ def test_cudnn_functions():
    # Check results and metrics.
    assert (benchmark.run_count == 1)
    assert (benchmark.return_code == ReturnCode.SUCCESS)
-    assert ('raw_output_0' in benchmark.raw_data)
-    assert (len(benchmark.raw_data['raw_output_0']) == 1)
-    assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))

    assert (18 <= len(benchmark.result))
    for metric in list(benchmark.result.keys()):
@ -74,9 +71,6 @@ def test_cudnn_functions():
    # Check results and metrics.
    assert (benchmark.run_count == 1)
    assert (benchmark.return_code == ReturnCode.SUCCESS)
-    assert ('raw_output_0' in benchmark.raw_data)
-    assert (len(benchmark.raw_data['raw_output_0']) == 1)
-    assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))

    assert (1 + benchmark.default_metric_count == len(benchmark.result))
    for metric in list(benchmark.result.keys()):
@ -116,9 +110,6 @@ def test_cudnn_functions():
    # Check results and metrics.
    assert (benchmark.run_count == 1)
    assert (benchmark.return_code == ReturnCode.SUCCESS)
-    assert ('raw_output_0' in benchmark.raw_data)
-    assert (len(benchmark.raw_data['raw_output_0']) == 1)
-    assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))

    assert (2 + benchmark.default_metric_count == len(benchmark.result))
    for metric in list(benchmark.result.keys()):
--- a/tests/benchmarks/micro_benchmarks/test_gpu_copy_bw_performance.py
+++ b/tests/benchmarks/micro_benchmarks/test_gpu_copy_bw_performance.py
@ -97,7 +97,6 @@ class GpuCopyBwBenchmarkTest(BenchmarkTestCase, unittest.TestCase):
        assert (benchmark._process_raw_result(0, test_raw_output))
        assert (benchmark.return_code == ReturnCode.SUCCESS)

-        assert (1 == len(benchmark.raw_data))
        print(test_raw_output.splitlines())
        test_raw_output_dict = {x.split()[0]: float(x.split()[1]) for x in test_raw_output.strip().splitlines()}
        assert (len(test_raw_output_dict) + benchmark.default_metric_count == len(benchmark.result))
--- a/tests/benchmarks/micro_benchmarks/test_kernel_launch_overhead.py
+++ b/tests/benchmarks/micro_benchmarks/test_kernel_launch_overhead.py
@ -33,9 +33,6 @@ def test_kernel_launch_overhead():
    # Check results and metrics.
    assert (benchmark.run_count == 1)
    assert (benchmark.return_code == ReturnCode.SUCCESS)
-    assert ('raw_output_0' in benchmark.raw_data)
-    assert (len(benchmark.raw_data['raw_output_0']) == 1)
-    assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))
    for metric in ['event_time', 'wall_time']:
        assert (metric in benchmark.result)
        assert (len(benchmark.result[metric]) == 1)