diff --git a/superbench/analyzer/__init__.py b/superbench/analyzer/__init__.py
index e9e74917..f4e27944 100644
--- a/superbench/analyzer/__init__.py
+++ b/superbench/analyzer/__init__.py
@@ -6,5 +6,7 @@
from superbench.analyzer.rule_base import RuleBase
from superbench.analyzer.data_diagnosis import DataDiagnosis
from superbench.analyzer.diagnosis_rule_op import RuleOp, DiagnosisRuleType
+from superbench.analyzer.summary_op import SummaryOp, SummaryType
+from superbench.analyzer.result_summary import ResultSummary
-__all__ = ['DataDiagnosis', 'DiagnosisRuleType', 'RuleOp', 'RuleBase']
+__all__ = ['DataDiagnosis', 'DiagnosisRuleType', 'RuleOp', 'RuleBase', 'SummaryOp', 'SummaryType', 'ResultSummary']
diff --git a/superbench/analyzer/data_analysis.py b/superbench/analyzer/data_analysis.py
index 7c4113bd..d7ac40f1 100644
--- a/superbench/analyzer/data_analysis.py
+++ b/superbench/analyzer/data_analysis.py
@@ -7,6 +7,7 @@ import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
+import re
from superbench.common.utils import logger
@@ -210,3 +211,42 @@ def round_significant_decimal_places(df, digit, cols):
lambda x: float(format_significant_str % x) if abs(x) < 1 else round(x, digit), na_action='ignore'
)
return df
+
+
+def aggregate(raw_data_df, pattern=None):
+ r"""Aggregate data of multiple ranks or multiple devices.
+
+ By default, aggregate results of multiple ranks like 'metric:\\d+' for most metrics.
+ For example, aggregate the results of kernel-launch overhead
+ from 8 GPU devices into one collection.
+ If pattern is given, use pattern to match metric and replace matched part in metric to *
+ to generate a aggregated metric name and then aggpregate these metrics' data.
+
+ Args:
+ raw_data_df (DataFrame): raw data
+
+ Returns:
+ DataFrame: the dataframe of aggregated data
+ """
+ try:
+ metric_store = {}
+ metrics = list(raw_data_df.columns)
+ for metric in metrics:
+ short = metric.strip(metric.split(':')[-1]).strip(':')
+ if pattern:
+ match = re.search(pattern, metric)
+ if match:
+ metric_in_list = list(metric)
+ for i in range(1, len(match.groups()) + 1):
+ metric_in_list[match.start(i):match.end(i)] = '*'
+ short = ''.join(metric_in_list)
+ if short not in metric_store:
+ metric_store[short] = []
+ metric_store[short].extend(raw_data_df[metric].tolist())
+ df = pd.DataFrame()
+ for short in metric_store:
+ df = pd.concat([df, pd.DataFrame(metric_store[short], columns=[short])], axis=1)
+ return df
+ except Exception as e:
+ logger.error('DataAnalyzer: aggregate failed, msg: {}'.format(str(e)))
+ return None
diff --git a/superbench/analyzer/data_diagnosis.py b/superbench/analyzer/data_diagnosis.py
index 870bcfee..66836c15 100644
--- a/superbench/analyzer/data_diagnosis.py
+++ b/superbench/analyzer/data_diagnosis.py
@@ -256,7 +256,7 @@ class DataDiagnosis(RuleBase):
except Exception as e:
logger.error('DataDiagnosis: output json data failed, msg: {}'.format(str(e)))
- def gen_md_lines(self, data_not_accept_df, rules, round):
+ def generate_md_lines(self, data_not_accept_df, rules, round):
"""Convert DataFrame into markdown lines.
Args:
@@ -290,7 +290,7 @@ class DataDiagnosis(RuleBase):
data_not_accept_df = data_analysis.round_significant_decimal_places(
data_not_accept_df, round, [metric]
)
- lines = file_handler.gen_md_table(data_not_accept_df, header)
+ lines = file_handler.generate_md_table(data_not_accept_df, header)
return lines
def run(self, raw_data_file, rule_file, baseline_file, output_dir, output_format='excel', round=2):
@@ -319,7 +319,7 @@ class DataDiagnosis(RuleBase):
output_path = str(Path(output_dir) / 'diagnosis_summary.jsonl')
self.output_diagnosis_in_json(data_not_accept_df, output_path)
elif output_format == 'md' or output_format == 'html':
- lines = self.gen_md_lines(data_not_accept_df, self._sb_rules, round)
+ lines = self.generate_md_lines(data_not_accept_df, self._sb_rules, round)
if output_format == 'md':
output_path = str(Path(output_dir) / 'diagnosis_summary.md')
file_handler.output_lines_in_md(lines, output_path)
diff --git a/superbench/analyzer/file_handler.py b/superbench/analyzer/file_handler.py
index aa6d260e..cc26ffc2 100644
--- a/superbench/analyzer/file_handler.py
+++ b/superbench/analyzer/file_handler.py
@@ -10,6 +10,7 @@ import json
import jsonlines
import pandas as pd
import yaml
+from openpyxl.styles import Alignment
import markdown
from superbench.common.utils import logger
@@ -158,7 +159,7 @@ def output_excel_data_not_accept(writer, data_not_accept_df, rules):
logger.warning('FileHandler: excel_data_output - data_not_accept_df is not DataFrame.')
-def gen_md_table(data_df, header):
+def generate_md_table(data_df, header):
"""Generate table text in markdown format.
| header[0] | header[1] |
@@ -221,3 +222,29 @@ def output_lines_in_html(lines, output_path):
f.writelines(html_str)
except Exception as e:
logger.error('FileHandler: html_data_output - {}'.format(str(e)))
+
+
+def merge_column_in_excel(ws, row, column):
+ """Merge cells in the selected index of column with continuous same contents.
+
+ Args:
+ ws (worksheet): the worksheet of the excel to process
+ row (int): the max row index to merge
+ column (int): the index of the column to merge
+ """
+ dict_from = {}
+ aligncenter = Alignment(horizontal='center', vertical='center')
+ # record continuous row index (start, end) with the same content
+ for row_index in range(1, row + 1):
+ value = str(ws.cell(row_index, column).value)
+ if value not in dict_from:
+ dict_from[value] = [row_index, row_index]
+ else:
+ dict_from[value][1] = dict_from[value][1] + 1
+ # merge the cells
+ for value in dict_from.values():
+ if value[0] != value[1]:
+ ws.merge_cells(start_row=value[0], start_column=column, end_row=value[1], end_column=column)
+ # align center for merged cells
+ for i in range(1, row + 1):
+ ws.cell(row=i, column=column).alignment = aligncenter
diff --git a/superbench/analyzer/result_summary.py b/superbench/analyzer/result_summary.py
new file mode 100644
index 00000000..be1b5ab4
--- /dev/null
+++ b/superbench/analyzer/result_summary.py
@@ -0,0 +1,251 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""A module for Result Summary."""
+
+import re
+from pathlib import Path
+
+import pandas as pd
+
+from superbench.common.utils import logger
+from superbench.analyzer import file_handler
+from superbench.analyzer.summary_op import SummaryOp, SummaryType
+from superbench.analyzer import RuleBase
+from superbench.analyzer import data_analysis
+
+
+class ResultSummary(RuleBase):
+ """Result summary class."""
+ def _check_rules(self, rule, name):
+ """Check whether the formart of the rule is valid.
+
+ Args:
+ rule (dict): the rule
+ name (str): the rule name
+
+ Returns:
+ dict: the rule for the metric
+ """
+ # check if rule is supported
+ super()._check_and_format_rules(rule, name)
+ if 'metrics' not in rule:
+ logger.log_and_raise(exception=Exception, msg='{} lack of metrics'.format(name))
+ if 'statistics' not in rule:
+ logger.log_and_raise(exception=Exception, msg='{} lack of function'.format(name))
+ # convert single statistic str to list
+ if not isinstance(rule['statistics'], list):
+ rule['statistics'] = [rule['statistics']]
+ # check statistics format, should be SummaryType or p\d\d?
+ for function in rule['statistics']:
+ try:
+ if not (re.fullmatch(r'p\d\d?', function) or isinstance(SummaryType(function), SummaryType)):
+ logger.log_and_raise(
+ exception=Exception, msg='{} has invalid statistics name {}'.format(name, function)
+ )
+ except Exception:
+ logger.log_and_raise(
+ exception=Exception, msg='{} has invalid statistics name {}'.format(name, function)
+ )
+ # check aggregate format, should be None or bool or pattern in regex with () group
+ if 'aggregate' in rule and not isinstance(rule['aggregate'],
+ bool) and not re.search(r'\(.*\)', rule['aggregate']):
+ logger.log_and_raise(exception=Exception, msg='{} aggregate must be bool type'.format(name))
+ return rule
+
+ def _parse_rules(self, rules):
+ """Parse the rules for result summary.
+
+ Args:
+ rules (dict): rules from rule yaml file
+
+ Returns:
+ bool: return True if successfully get all rules, otherwise False.
+ """
+ try:
+ if not rules:
+ logger.error('ResultSummary: get rules failed')
+ return False
+ self._sb_rules = {}
+ self._enable_metrics = set()
+ benchmark_rules = rules['superbench']['rules']
+ for rule in benchmark_rules:
+ benchmark_rules[rule] = self._check_rules(benchmark_rules[rule], rule)
+ self._sb_rules[rule] = {}
+ self._sb_rules[rule]['name'] = rule
+ self._sb_rules[rule]['categories'] = benchmark_rules[rule]['categories']
+ self._sb_rules[rule]['metrics'] = {}
+ self._sb_rules[rule]['statistics'] = benchmark_rules[rule]['statistics']
+ self._sb_rules[rule][
+ 'aggregate'] = benchmark_rules[rule]['aggregate'] if 'aggregate' in benchmark_rules[rule] else False
+ super()._get_metrics(rule, benchmark_rules)
+ return True
+ except Exception as e:
+ logger.error('ResultSummary: parse rules failed - {}'.format(str(e)))
+ return False
+
+ def _format_summary_of_rule(self, category, summary_df_of_rule):
+ """Format summary_df of a rule info list of lines.
+
+ Args:
+ category (str): category in the rule
+ summary_df_of_rule ([type]): summary df of a rule, the columns are metrics, the index are statistics
+ Returns:
+ list: list of summary lines like [category, metric, statistic, value]
+ """
+ summary = []
+ metrics = summary_df_of_rule.columns
+ for metric in metrics:
+ for statistic in summary_df_of_rule.index:
+ summary.append([category, metric, statistic, summary_df_of_rule.loc[statistic, metric]])
+ return summary
+
+ def _merge_summary(self, summary):
+ """Merge summary of multiple rules into DataFrame.
+
+ Args:
+ summary (dict): summary dict, the keys are categories, the values are summary lines for the category
+
+ Returns:
+ DataFrame: summary of all rules
+ """
+ summary_df = pd.DataFrame()
+ for category in summary:
+ for i in range(len(summary[category])):
+ summary_df = summary_df.append([summary[category][i]], ignore_index=True)
+ return summary_df
+
+ def _generate_summary(self, round):
+ r"""Generate summay dict of all rules.
+
+ For each rule, aggregate the data by user-defined pattern or ranks (:\\d+), calculate
+ the list of statistics of aggregated metrics, then format the summary in {category, lines}.
+
+ Args:
+ round (int): the number of decimal digits
+
+ Returns:
+ dict: summary dict, the keys are categories, the values are summary lines for the category
+ """
+ summary = {}
+ for rule in self._sb_rules:
+ metrics = list(self._sb_rules[rule]['metrics'].keys())
+ category = self._sb_rules[rule]['categories']
+ data_df_of_rule = self._raw_data_df[metrics]
+ if self._sb_rules[rule]['aggregate']:
+ # if aggregate is True, aggregate in ranks
+ if self._sb_rules[rule]['aggregate'] is True:
+ data_df_of_rule = data_analysis.aggregate(data_df_of_rule)
+ # if aggregate is not empty and is a pattern in regex, aggregate according to pattern
+ else:
+ data_df_of_rule = data_analysis.aggregate(data_df_of_rule, self._sb_rules[rule]['aggregate'])
+ statistics = self._sb_rules[rule]['statistics']
+ summary_df_of_rule = pd.DataFrame(columns=sorted(data_df_of_rule.columns))
+ for statistic_name in statistics:
+ # get SummaryOp and calculate statistics
+ # if statistic_name is 'p\d\d?', SummaryOp should be pencentile
+ if str.startswith(statistic_name, 'p'):
+ rule_op = SummaryOp.get_summary_func(SummaryType('percentile'))
+ val = int(statistic_name.strip('p'))
+ summary_df_of_rule.loc[statistic_name] = rule_op(data_df_of_rule, val)
+ else:
+ rule_op = SummaryOp.get_summary_func(SummaryType(statistic_name))
+ summary_df_of_rule.loc[statistic_name] = rule_op(data_df_of_rule)
+ # format values to n significant decimal digits
+ if round and isinstance(round, int):
+ summary_df_of_rule = data_analysis.round_significant_decimal_places(
+ summary_df_of_rule, round, list(summary_df_of_rule.columns)
+ )
+ # format summary_df of a rule to list of lines
+ summary_lines_of_rule = self._format_summary_of_rule(category, summary_df_of_rule)
+ summary[category] = summary_lines_of_rule
+
+ return summary
+
+ def generate_md_lines(self, summary):
+ """Generate text in markdown foramt.
+
+ Use category to be the 2nd-header, use tables to show the data
+
+ Args:
+ summary (dict): summary dict, the keys are categories, the values are summary lines for the category
+
+ Returns:
+ list: lines in markdown format
+ """
+ lines = []
+ for category in summary:
+ lines.append('## {}\n'.format(category))
+ summary_df = pd.DataFrame(summary[category])
+ summary_df = summary_df.drop(columns=0, axis=1)
+ header = ['metric', 'statistics', 'values']
+ table_lines = file_handler.generate_md_table(summary_df, header)
+ lines.extend(table_lines)
+ lines.append('\n')
+ return lines
+
+ def output_summary_in_excel(self, raw_data_df, summary, output_path):
+ """Output result summary in excel foramt.
+
+ Args:
+ raw_data_df (DataFrame): the DataFrame of raw data df
+ summary (DataFrame): the DataFrame of summary
+ output_path (str): the path of output file
+ """
+ try:
+ writer = pd.ExcelWriter(output_path, engine='openpyxl')
+ # check whether writer is valiad
+ if not isinstance(writer, pd.ExcelWriter):
+ logger.error('ResultSummary: excel_data_output - invalid file path.')
+ return
+ # output the raw data in 'Raw Data' sheet
+ file_handler.output_excel_raw_data(writer, raw_data_df, 'Raw Data')
+ # output the result summary in 'Summary' sheet
+ if isinstance(summary, pd.DataFrame) and not summary.empty:
+ summary.to_excel(writer, 'Summary', index=False, header=False)
+ worksheet = writer.sheets['Summary']
+ row = worksheet.max_row
+ # merge cells in 'category' column with the same category
+ file_handler.merge_column_in_excel(worksheet, row, 1)
+ else:
+ logger.error('ResultSummary: excel_data_output - summary is empty.')
+ writer.save()
+ except Exception as e:
+ logger.error('ResultSummary: excel_data_output - {}'.format(str(e)))
+
+ def run(self, raw_data_file, rule_file, output_dir, output_format, round=2):
+ """Run the main process of result summary.
+
+ Args:
+ raw_data_file (str): the path of raw data jsonl file.
+ rule_file (str): The path of baseline yaml file
+ output_dir (str): the directory of output file
+ output_format (str): the format of the output, 'excel' or 'md' or 'html'
+ round (int): the number of decimal digits
+ """
+ try:
+ rules = self._preprocess(raw_data_file, rule_file)
+ # parse rules for result summary
+ if not self._parse_rules(rules):
+ return
+ # generate result summary for each category
+ summary = self._generate_summary(round)
+ # output result summary to file
+ output_path = ''
+ if output_format == 'excel':
+ output_path = str(Path(output_dir) / 'results_summary.xlsx')
+ summary_df = self._merge_summary(summary)
+ self.output_summary_in_excel(self._raw_data_df, summary_df, output_path)
+ elif output_format == 'md':
+ output_path = str(Path(output_dir) / 'results_summary.md')
+ lines = self.generate_md_lines(summary)
+ file_handler.output_lines_in_md(lines, output_path)
+ elif output_format == 'html':
+ output_path = str(Path(output_dir) / 'results_summary.html')
+ lines = self.generate_md_lines(summary)
+ file_handler.output_lines_in_html(lines, output_path)
+ else:
+ logger.error('ResultSummary: output failed - unsupported output format')
+ logger.info('ResultSummary: Output results to {}'.format(output_path))
+ except Exception as e:
+ logger.error('ResultSummary: run failed - {}'.format(str(e)))
diff --git a/superbench/analyzer/summary_op.py b/superbench/analyzer/summary_op.py
new file mode 100644
index 00000000..b4981fe6
--- /dev/null
+++ b/superbench/analyzer/summary_op.py
@@ -0,0 +1,157 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""A module for result summary ops."""
+
+from typing import Dict, Callable
+import numbers
+
+from superbench.benchmarks.context import Enum
+from superbench.common.utils import logger
+
+
+class SummaryType(Enum):
+ """The Enum class representing different summary ops."""
+
+ MEAN = 'mean'
+ PENCENTILE = 'percentile'
+ MIN = 'min'
+ MAX = 'max'
+ STD = 'std'
+ COUNT = 'count'
+
+
+class SummaryOp:
+ """SummaryOp class to maintain all summary functions."""
+
+ functions: Dict[SummaryType, Callable] = dict()
+
+ @classmethod
+ def add_summary_func(cls, summary_type):
+ """Add summary fuction.
+
+ Args:
+ summary_type (SummaryType): The type of summary function.
+
+ Return:
+ decorator (Callable): return the decorator to add the summary function.
+ """
+ def decorator(func):
+ cls.functions[summary_type] = func
+ return func
+
+ return decorator
+
+ @classmethod
+ def get_summary_func(cls, summary_type):
+ """Get summary fuction by summary_type.
+
+ Args:
+ summary_type (SummaryType): The type of summary function.
+
+ Return:
+ func (Callable): summary function, None means invalid summary type.
+ """
+ if summary_type in cls.functions:
+ return cls.functions[summary_type]
+
+ return None
+
+ @staticmethod
+ def _check_raw_data_df(raw_data_df):
+ """Check whether raw_data_df is empty or None.
+
+ Args:
+ raw_data_df (DataFrame): raw data df
+ """
+ if raw_data_df is None or raw_data_df.empty:
+ logger.log_and_raise(exception=Exception, msg='empty data in summary op')
+
+ @staticmethod
+ def mean(raw_data_df):
+ """Mean of raw_data_df.
+
+ Args:
+ raw_data_df (DataFrame): raw data df
+
+ Returns:
+ Series: mean of raw_data_df
+ """
+ SummaryOp._check_raw_data_df(raw_data_df)
+ return raw_data_df.mean()
+
+ @staticmethod
+ def percentile(raw_data_df, val):
+ """Pencentile$(val) of raw_data_df.
+
+ Args:
+ raw_data_df (DataFrame): raw data df
+ val (numbers.Number): the pencentile value, 1-99
+
+ Returns:
+ Series: mean of raw_data_df
+ """
+ SummaryOp._check_raw_data_df(raw_data_df)
+ if not isinstance(val, numbers.Number) or val < 1 or val > 99:
+ logger.log_and_raise(exception=Exception, msg='val in pencentile should be 1-99')
+ return raw_data_df.quantile(val / 100)
+
+ @staticmethod
+ def min(raw_data_df):
+ """The min of values for each column in raw_data_df.
+
+ Args:
+ raw_data_df (DataFrame): raw data df
+
+ Returns:
+ Series: min of raw_data_df
+ """
+ SummaryOp._check_raw_data_df(raw_data_df)
+ return raw_data_df.min()
+
+ @staticmethod
+ def max(raw_data_df):
+ """The max of values for each column in raw_data_df.
+
+ Args:
+ raw_data_df (DataFrame): raw data df
+
+ Returns:
+ Series: max of raw_data_df
+ """
+ SummaryOp._check_raw_data_df(raw_data_df)
+ return raw_data_df.max()
+
+ @staticmethod
+ def std(raw_data_df):
+ """The std of values for each column in raw_data_df.
+
+ Args:
+ raw_data_df (DataFrame): raw data df
+
+ Returns:
+ Series: std of raw_data_df
+ """
+ SummaryOp._check_raw_data_df(raw_data_df)
+ return raw_data_df.std(axis=0, skipna=True)
+
+ @staticmethod
+ def count(raw_data_df):
+ """The number of values for each column in raw_data_df.
+
+ Args:
+ raw_data_df (DataFrame): raw data df
+
+ Returns:
+ Series: count of raw_data_df
+ """
+ SummaryOp._check_raw_data_df(raw_data_df)
+ return raw_data_df.count()
+
+
+SummaryOp.add_summary_func(SummaryType.MEAN)(SummaryOp.mean)
+SummaryOp.add_summary_func(SummaryType.PENCENTILE)(SummaryOp.percentile)
+SummaryOp.add_summary_func(SummaryType.MIN)(SummaryOp.min)
+SummaryOp.add_summary_func(SummaryType.MAX)(SummaryOp.max)
+SummaryOp.add_summary_func(SummaryType.STD)(SummaryOp.std)
+SummaryOp.add_summary_func(SummaryType.COUNT)(SummaryOp.count)
diff --git a/tests/analyzer/test_data_analysis.py b/tests/analyzer/test_data_analysis.py
index 43b68335..b6e0b66f 100644
--- a/tests/analyzer/test_data_analysis.py
+++ b/tests/analyzer/test_data_analysis.py
@@ -73,3 +73,10 @@ class TestDataAnalysis(unittest.TestCase):
pd.testing.assert_frame_equal(df, pd.DataFrame([[0.0046, 500.6789], [1.53, 100.7424]], columns=['a', 'b']))
df = data_analysis.round_significant_decimal_places(df, 2, 'b')
pd.testing.assert_frame_equal(df, pd.DataFrame([[0.0046, 500.68], [1.53, 100.74]], columns=['a', 'b']))
+ # Test aggregate
+ df = pd.DataFrame([[1, 2], [3, 4]], columns=['a:0', 'a:1'])
+ df = data_analysis.aggregate(df)
+ pd.testing.assert_frame_equal(df, pd.DataFrame({'a': [1, 3, 2, 4]}))
+ df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=['ib_1_a', 'ib_2_a', 'ib_1_b', 'ib_2_b'])
+ df = data_analysis.aggregate(df, pattern='ib_(.)_.')
+ pd.testing.assert_frame_equal(df, pd.DataFrame({'ib_*_a': [1, 5, 2, 6], 'ib_*_b': [3, 7, 4, 8]}))
diff --git a/tests/analyzer/test_data_diagnosis.py b/tests/analyzer/test_data_diagnosis.py
index 151faee1..686978e6 100644
--- a/tests/analyzer/test_data_diagnosis.py
+++ b/tests/analyzer/test_data_diagnosis.py
@@ -196,8 +196,8 @@ class TestDataDiagnosis(unittest.TestCase):
assert ('Category' in line)
assert ('Defective Details' in line)
assert ('Index' in line)
- # Test - gen_md_lines
- lines = diag1.gen_md_lines(data_not_accept_df, diag1._sb_rules, 2)
+ # Test - generate_md_lines
+ lines = diag1.generate_md_lines(data_not_accept_df, diag1._sb_rules, 2)
assert (lines)
expected_md_file = str(self.parent_path / '../data/diagnosis_summary.md')
with open(expected_md_file, 'r') as f:
diff --git a/tests/analyzer/test_file_handler.py b/tests/analyzer/test_file_handler.py
index ea43da05..51cec89a 100644
--- a/tests/analyzer/test_file_handler.py
+++ b/tests/analyzer/test_file_handler.py
@@ -48,8 +48,8 @@ class TestFileHandler(unittest.TestCase):
assert (not baseline)
baseline = file_handler.read_baseline(test_baseline_file)
assert (baseline)
- # Test - gen_md_table
+ # Test - generate_md_table
data_df = pd.DataFrame([[1, 2], [3, 4]])
- lines = file_handler.gen_md_table(data_df, header=['A', 'B'])
+ lines = file_handler.generate_md_table(data_df, header=['A', 'B'])
expected_lines = ['| A | B |\n', '| --- | --- |\n', '| 1 | 2 |\n', '| 3 | 4 |\n']
assert (lines == expected_lines)
diff --git a/tests/analyzer/test_result_summary.py b/tests/analyzer/test_result_summary.py
new file mode 100644
index 00000000..c38ee1e5
--- /dev/null
+++ b/tests/analyzer/test_result_summary.py
@@ -0,0 +1,149 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Tests for ResultSummary module."""
+
+import unittest
+import yaml
+from pathlib import Path
+
+import pandas as pd
+
+from superbench.analyzer import ResultSummary
+import superbench.analyzer.file_handler as file_handler
+
+
+class TestResultSummary(unittest.TestCase):
+ """Test for ResultSummary class."""
+ def setUp(self):
+ """Method called to prepare the test fixture."""
+ self.parent_path = Path(__file__).parent
+ self.output_excel_file = str(self.parent_path / 'results_summary.xlsx')
+ self.output_md_file = str(self.parent_path / 'results_summary.md')
+ self.output_html_file = str(self.parent_path / 'results_summary.html')
+ self.test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml')
+ self.test_raw_data = str(self.parent_path / 'test_results.jsonl')
+ self.test_rule_file = str(self.parent_path / 'test_summary_rules.yaml')
+
+ def tearDown(self):
+ """Method called after the test method has been called and the result recorded."""
+ for file in [self.output_excel_file, self.test_rule_file_fake, self.output_md_file, self.output_html_file]:
+ p = Path(file)
+ if p.is_file():
+ p.unlink()
+
+ def test_result_summary(self):
+ """Test result summary class."""
+ rs1 = ResultSummary()
+ rs1._raw_data_df = file_handler.read_raw_data(self.test_raw_data)
+ rs1._benchmark_metrics_dict = rs1._get_metrics_by_benchmarks(list(rs1._raw_data_df))
+ # Test - _check_rules
+ # Negative case
+ false_rules = [
+ {
+ 'categories': 'KernelLaunch',
+ 'metrics': ['kernel-launch/event_overhead:\\d+']
+ }, {
+ 'categories': 'KernelLaunch',
+ 'statistics': 'abb',
+ 'metrics': ['kernel-launch/event_overhead:\\d+']
+ }, {
+ 'categories': 'KernelLaunch',
+ 'statistics': 'mean',
+ 'metrics': ['kernel-launch/event_overhead:\\d+'],
+ 'aggregate': 'abb'
+ }
+ ]
+ metric = 'kernel-launch/event_overhead:0'
+ for rules in false_rules:
+ self.assertRaises(Exception, rs1._check_rules, rules, metric)
+ # Positive case
+ true_rules = [
+ {
+ 'categories': 'KernelLaunch',
+ 'statistics': 'mean',
+ 'metrics': ['kernel-launch/event_overhead:\\d+'],
+ 'aggregate': True
+ },
+ {
+ 'categories': 'KernelLaunch',
+ 'statistics': ['mean', 'p50'],
+ 'metrics': ['kernel-launch/event_overhead:\\d+']
+ },
+ {
+ 'categories': 'KernelLaunch',
+ 'statistics': 'mean',
+ 'metrics': ['kernel-launch/event_overhead:\\d+'],
+ 'aggregate': 'kernel-launch/event_overhead(:\\d+)'
+ },
+ ]
+ for rules in true_rules:
+ assert (rs1._check_rules(rules, metric))
+
+ # Test - _parse_rules
+ # Negative case
+ rs2 = ResultSummary()
+ fake_rules = file_handler.read_rules(self.test_rule_file_fake)
+ assert (rs2._parse_rules(fake_rules) is False)
+ rs2._raw_data_df = file_handler.read_raw_data(self.test_raw_data)
+ rs2._benchmark_metrics_dict = rs2._get_metrics_by_benchmarks(list(rs2._raw_data_df))
+ p = Path(self.test_rule_file)
+ with p.open() as f:
+ rules = yaml.load(f, Loader=yaml.SafeLoader)
+ rules['superbench']['rules']['fake'] = false_rules[0]
+ with open(self.test_rule_file_fake, 'w') as f:
+ yaml.dump(rules, f)
+ assert (rs1._parse_rules(fake_rules) is False)
+ # Positive case
+ rules = file_handler.read_rules(self.test_rule_file)
+ assert (rs1._parse_rules(rules))
+
+ # Test - _generate_summary
+ summary = rs1._generate_summary(round=2)
+ assert (len(summary) == 3)
+
+ # Test - _merge_summary
+ expected_summary_merge = [
+ ['KernelLaunch', 'kernel-launch/event_overhead', 'mean', 0.0097],
+ ['KernelLaunch', 'kernel-launch/event_overhead', 'p90', 0.006],
+ ['KernelLaunch', 'kernel-launch/event_overhead', 'min', 0.0055],
+ ['KernelLaunch', 'kernel-launch/event_overhead', 'max', 0.1],
+ ['KernelLaunch', 'kernel-launch/wall_overhead', 'mean', 0.01],
+ ['KernelLaunch', 'kernel-launch/wall_overhead', 'p90', 0.011],
+ ['KernelLaunch', 'kernel-launch/wall_overhead', 'min', 0.01],
+ ['KernelLaunch', 'kernel-launch/wall_overhead', 'max', 0.011],
+ ['NCCL', 'nccl-bw/allreduce_8388608_busbw:0', 'mean', 89.51],
+ ['RDMA', 'ib-loopback/IB_write_8388608_Avg_*:0', 'mean', 23925.84]
+ ]
+ expected_summary_merge_df = pd.DataFrame(expected_summary_merge)
+ summary_merge_df = rs1._merge_summary(summary)
+ pd.testing.assert_frame_equal(expected_summary_merge_df, summary_merge_df)
+
+ def test_result_summary_run(self):
+ """Test for the run process of result summary."""
+ # Test - output in excel
+ ResultSummary().run(self.test_raw_data, self.test_rule_file, str(self.parent_path), 'excel', round=2)
+ excel_file = pd.ExcelFile(self.output_excel_file, engine='openpyxl')
+ data_sheet_name = 'Summary'
+ summary = excel_file.parse(data_sheet_name, header=None)
+ expect_result_file = pd.ExcelFile(str(self.parent_path / '../data/results_summary.xlsx'), engine='openpyxl')
+ expect_result = expect_result_file.parse(data_sheet_name, header=None)
+ pd.testing.assert_frame_equal(summary, expect_result)
+
+ # Test - output in md
+ ResultSummary().run(self.test_raw_data, self.test_rule_file, str(self.parent_path), 'md', round=2)
+ expected_md_file = str(self.parent_path / '../data/results_summary.md')
+ with open(expected_md_file, 'r') as f:
+ expect_result = f.read()
+ with open(self.output_md_file, 'r') as f:
+ summary = f.read()
+ assert (summary == expect_result)
+
+ # Test - output in html
+ ResultSummary().run(self.test_raw_data, self.test_rule_file, str(self.parent_path), 'html', round=2)
+ expected_html_file = str(self.parent_path / '../data/results_summary.html')
+ with open(expected_html_file, 'r') as f:
+ expect_result = f.read()
+ with open(self.output_html_file, 'r') as f:
+ summary = f.read()
+ assert (summary == expect_result)
diff --git a/tests/analyzer/test_summary_rules.yaml b/tests/analyzer/test_summary_rules.yaml
new file mode 100644
index 00000000..34a6510e
--- /dev/null
+++ b/tests/analyzer/test_summary_rules.yaml
@@ -0,0 +1,26 @@
+# SuperBench rules
+version: v0.4
+superbench:
+ rules:
+ kernel_launch:
+ statistics:
+ - mean
+ - p90
+ - min
+ - max
+ aggregate: True
+ categories: KernelLaunch
+ metrics:
+ - kernel-launch/event_overhead
+ - kernel-launch/wall_overhead
+ nccl:
+ statistics: mean
+ categories: NCCL
+ metrics:
+ - nccl-bw/allreduce_8388608_busbw
+ ib-loopback:
+ statistics: mean
+ categories: RDMA
+ metrics:
+ - ib-loopback/IB_write_8388608_Avg_\d+
+ aggregate: ib-loopback/IB_write_.*_Avg_(\d+)
diff --git a/tests/analyzer/test_summaryop.py b/tests/analyzer/test_summaryop.py
new file mode 100644
index 00000000..3b105444
--- /dev/null
+++ b/tests/analyzer/test_summaryop.py
@@ -0,0 +1,70 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Tests for SummaryOp module."""
+
+import unittest
+from numpy import NaN, float64
+
+import pandas as pd
+
+from superbench.analyzer import SummaryOp, SummaryType
+
+
+class TestSummaryOp(unittest.TestCase):
+ """Test for Summary Ops."""
+ def test_rule_op(self):
+ """Test for defined rule operators."""
+ # Test - get_rule_func
+ # Negative case
+ assert (not SummaryOp.get_summary_func('fake'))
+ # Positive case
+ summary_op = SummaryOp.get_summary_func(SummaryType.MEAN)
+ assert (summary_op == SummaryOp.mean)
+ summary_op = SummaryOp.get_summary_func(SummaryType.PENCENTILE)
+ assert (summary_op == SummaryOp.percentile)
+ summary_op = SummaryOp.get_summary_func(SummaryType.MIN)
+ assert (summary_op == SummaryOp.min)
+ summary_op = SummaryOp.get_summary_func(SummaryType.MAX)
+ assert (summary_op == SummaryOp.max)
+ summary_op = SummaryOp.get_summary_func(SummaryType.STD)
+ assert (summary_op == SummaryOp.std)
+ summary_op = SummaryOp.get_summary_func(SummaryType.COUNT)
+ assert (summary_op == SummaryOp.count)
+
+ # Test - _check_raw_data_Df
+ # Negative case
+ empty_data_df = pd.DataFrame()
+ self.assertRaises(Exception, SummaryOp._check_raw_data_df, empty_data_df)
+ self.assertRaises(Exception, SummaryOp._check_raw_data_df, None)
+
+ data1 = [[1, 2, 3, 4], [4, 5, 6], [7, 8]]
+ raw_data_df = pd.DataFrame(data1, columns=['a', 'b', 'c', 'd'])
+ # Test - mean
+ result = SummaryOp.mean(raw_data_df)
+ expectedResult = pd.Series([4.0, 5.0, 4.5, 4.0], index=['a', 'b', 'c', 'd'])
+ pd.testing.assert_series_equal(result, expectedResult)
+ # Test - min
+ result = SummaryOp.min(raw_data_df)
+ expectedResult = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'], dtype=float64)
+ pd.testing.assert_series_equal(result, expectedResult)
+ # Test - max
+ result = SummaryOp.max(raw_data_df)
+ expectedResult = pd.Series([7, 8, 6, 4], index=['a', 'b', 'c', 'd'], dtype=float64)
+ pd.testing.assert_series_equal(result, expectedResult)
+ # Test - std
+ result = SummaryOp.std(raw_data_df)
+ print(result)
+ expectedResult = pd.Series([3.0, 3.0, 2.1213203435596424, NaN], index=['a', 'b', 'c', 'd'], dtype=float64)
+ pd.testing.assert_series_equal(result, expectedResult)
+ # Test - count
+ result = SummaryOp.count(raw_data_df)
+ print(result)
+ expectedResult = pd.Series([3, 3, 2, 1], index=['a', 'b', 'c', 'd'])
+ pd.testing.assert_series_equal(result, expectedResult)
+ # Test - pencentile
+ result = SummaryOp.percentile(raw_data_df, 50)
+ print(result)
+ expectedResult = pd.Series([4.0, 5.0, 4.5, 4.0], index=['a', 'b', 'c', 'd'], dtype=float64)
+ pd.testing.assert_series_equal(result, expectedResult, check_names=False)
+ self.assertRaises(Exception, SummaryOp.percentile, 200)
diff --git a/tests/data/results_summary.html b/tests/data/results_summary.html
new file mode 100644
index 00000000..ace0266a
--- /dev/null
+++ b/tests/data/results_summary.html
@@ -0,0 +1,86 @@
+
KernelLaunch
+
+
+
+metric |
+statistics |
+values |
+
+
+
+
+kernel-launch/event_overhead |
+mean |
+0.0097 |
+
+
+kernel-launch/event_overhead |
+p90 |
+0.006 |
+
+
+kernel-launch/event_overhead |
+min |
+0.0055 |
+
+
+kernel-launch/event_overhead |
+max |
+0.1 |
+
+
+kernel-launch/wall_overhead |
+mean |
+0.01 |
+
+
+kernel-launch/wall_overhead |
+p90 |
+0.011 |
+
+
+kernel-launch/wall_overhead |
+min |
+0.01 |
+
+
+kernel-launch/wall_overhead |
+max |
+0.011 |
+
+
+
+NCCL
+
+
+
+metric |
+statistics |
+values |
+
+
+
+
+nccl-bw/allreduce_8388608_busbw:0 |
+mean |
+89.51 |
+
+
+
+RDMA
+
+
+
+metric |
+statistics |
+values |
+
+
+
+
+ib-loopback/IB_write_8388608_Avg_*:0 |
+mean |
+23925.84 |
+
+
+
\ No newline at end of file
diff --git a/tests/data/results_summary.md b/tests/data/results_summary.md
new file mode 100644
index 00000000..2341e9e6
--- /dev/null
+++ b/tests/data/results_summary.md
@@ -0,0 +1,22 @@
+## KernelLaunch
+| metric | statistics | values |
+| --- | --- | --- |
+| kernel-launch/event_overhead | mean | 0.0097 |
+| kernel-launch/event_overhead | p90 | 0.006 |
+| kernel-launch/event_overhead | min | 0.0055 |
+| kernel-launch/event_overhead | max | 0.1 |
+| kernel-launch/wall_overhead | mean | 0.01 |
+| kernel-launch/wall_overhead | p90 | 0.011 |
+| kernel-launch/wall_overhead | min | 0.01 |
+| kernel-launch/wall_overhead | max | 0.011 |
+
+## NCCL
+| metric | statistics | values |
+| --- | --- | --- |
+| nccl-bw/allreduce_8388608_busbw:0 | mean | 89.51 |
+
+## RDMA
+| metric | statistics | values |
+| --- | --- | --- |
+| ib-loopback/IB_write_8388608_Avg_*:0 | mean | 23925.84 |
+
diff --git a/tests/data/results_summary.xlsx b/tests/data/results_summary.xlsx
new file mode 100644
index 00000000..08ad4945
Binary files /dev/null and b/tests/data/results_summary.xlsx differ