Analyzer: Add feature - Add result summary in excel,md,html format (#320)

**Description**
Add result summary in excel,md,html format.

**Major Revision**
- Add ResultSummary class to support result summary in excel,md,html format.
- Abstract RuleBase class for common-used functions in DataDiagnosis and ResultSummary.
This commit is contained in:
Yuting Jiang 2022-03-24 15:32:01 +08:00 коммит произвёл GitHub
Родитель c5aa4f4e38
Коммит 84fed1ce18
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
15 изменённых файлов: 846 добавлений и 9 удалений

Просмотреть файл

@ -6,5 +6,7 @@
from superbench.analyzer.rule_base import RuleBase
from superbench.analyzer.data_diagnosis import DataDiagnosis
from superbench.analyzer.diagnosis_rule_op import RuleOp, DiagnosisRuleType
from superbench.analyzer.summary_op import SummaryOp, SummaryType
from superbench.analyzer.result_summary import ResultSummary
__all__ = ['DataDiagnosis', 'DiagnosisRuleType', 'RuleOp', 'RuleBase']
__all__ = ['DataDiagnosis', 'DiagnosisRuleType', 'RuleOp', 'RuleBase', 'SummaryOp', 'SummaryType', 'ResultSummary']

Просмотреть файл

@ -7,6 +7,7 @@ import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import re
from superbench.common.utils import logger
@ -210,3 +211,42 @@ def round_significant_decimal_places(df, digit, cols):
lambda x: float(format_significant_str % x) if abs(x) < 1 else round(x, digit), na_action='ignore'
)
return df
def aggregate(raw_data_df, pattern=None):
r"""Aggregate data of multiple ranks or multiple devices.
By default, aggregate results of multiple ranks like 'metric:\\d+' for most metrics.
For example, aggregate the results of kernel-launch overhead
from 8 GPU devices into one collection.
If pattern is given, use pattern to match metric and replace matched part in metric to *
to generate a aggregated metric name and then aggpregate these metrics' data.
Args:
raw_data_df (DataFrame): raw data
Returns:
DataFrame: the dataframe of aggregated data
"""
try:
metric_store = {}
metrics = list(raw_data_df.columns)
for metric in metrics:
short = metric.strip(metric.split(':')[-1]).strip(':')
if pattern:
match = re.search(pattern, metric)
if match:
metric_in_list = list(metric)
for i in range(1, len(match.groups()) + 1):
metric_in_list[match.start(i):match.end(i)] = '*'
short = ''.join(metric_in_list)
if short not in metric_store:
metric_store[short] = []
metric_store[short].extend(raw_data_df[metric].tolist())
df = pd.DataFrame()
for short in metric_store:
df = pd.concat([df, pd.DataFrame(metric_store[short], columns=[short])], axis=1)
return df
except Exception as e:
logger.error('DataAnalyzer: aggregate failed, msg: {}'.format(str(e)))
return None

Просмотреть файл

@ -256,7 +256,7 @@ class DataDiagnosis(RuleBase):
except Exception as e:
logger.error('DataDiagnosis: output json data failed, msg: {}'.format(str(e)))
def gen_md_lines(self, data_not_accept_df, rules, round):
def generate_md_lines(self, data_not_accept_df, rules, round):
"""Convert DataFrame into markdown lines.
Args:
@ -290,7 +290,7 @@ class DataDiagnosis(RuleBase):
data_not_accept_df = data_analysis.round_significant_decimal_places(
data_not_accept_df, round, [metric]
)
lines = file_handler.gen_md_table(data_not_accept_df, header)
lines = file_handler.generate_md_table(data_not_accept_df, header)
return lines
def run(self, raw_data_file, rule_file, baseline_file, output_dir, output_format='excel', round=2):
@ -319,7 +319,7 @@ class DataDiagnosis(RuleBase):
output_path = str(Path(output_dir) / 'diagnosis_summary.jsonl')
self.output_diagnosis_in_json(data_not_accept_df, output_path)
elif output_format == 'md' or output_format == 'html':
lines = self.gen_md_lines(data_not_accept_df, self._sb_rules, round)
lines = self.generate_md_lines(data_not_accept_df, self._sb_rules, round)
if output_format == 'md':
output_path = str(Path(output_dir) / 'diagnosis_summary.md')
file_handler.output_lines_in_md(lines, output_path)

Просмотреть файл

@ -10,6 +10,7 @@ import json
import jsonlines
import pandas as pd
import yaml
from openpyxl.styles import Alignment
import markdown
from superbench.common.utils import logger
@ -158,7 +159,7 @@ def output_excel_data_not_accept(writer, data_not_accept_df, rules):
logger.warning('FileHandler: excel_data_output - data_not_accept_df is not DataFrame.')
def gen_md_table(data_df, header):
def generate_md_table(data_df, header):
"""Generate table text in markdown format.
| header[0] | header[1] |
@ -221,3 +222,29 @@ def output_lines_in_html(lines, output_path):
f.writelines(html_str)
except Exception as e:
logger.error('FileHandler: html_data_output - {}'.format(str(e)))
def merge_column_in_excel(ws, row, column):
"""Merge cells in the selected index of column with continuous same contents.
Args:
ws (worksheet): the worksheet of the excel to process
row (int): the max row index to merge
column (int): the index of the column to merge
"""
dict_from = {}
aligncenter = Alignment(horizontal='center', vertical='center')
# record continuous row index (start, end) with the same content
for row_index in range(1, row + 1):
value = str(ws.cell(row_index, column).value)
if value not in dict_from:
dict_from[value] = [row_index, row_index]
else:
dict_from[value][1] = dict_from[value][1] + 1
# merge the cells
for value in dict_from.values():
if value[0] != value[1]:
ws.merge_cells(start_row=value[0], start_column=column, end_row=value[1], end_column=column)
# align center for merged cells
for i in range(1, row + 1):
ws.cell(row=i, column=column).alignment = aligncenter

Просмотреть файл

@ -0,0 +1,251 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""A module for Result Summary."""
import re
from pathlib import Path
import pandas as pd
from superbench.common.utils import logger
from superbench.analyzer import file_handler
from superbench.analyzer.summary_op import SummaryOp, SummaryType
from superbench.analyzer import RuleBase
from superbench.analyzer import data_analysis
class ResultSummary(RuleBase):
"""Result summary class."""
def _check_rules(self, rule, name):
"""Check whether the formart of the rule is valid.
Args:
rule (dict): the rule
name (str): the rule name
Returns:
dict: the rule for the metric
"""
# check if rule is supported
super()._check_and_format_rules(rule, name)
if 'metrics' not in rule:
logger.log_and_raise(exception=Exception, msg='{} lack of metrics'.format(name))
if 'statistics' not in rule:
logger.log_and_raise(exception=Exception, msg='{} lack of function'.format(name))
# convert single statistic str to list
if not isinstance(rule['statistics'], list):
rule['statistics'] = [rule['statistics']]
# check statistics format, should be SummaryType or p\d\d?
for function in rule['statistics']:
try:
if not (re.fullmatch(r'p\d\d?', function) or isinstance(SummaryType(function), SummaryType)):
logger.log_and_raise(
exception=Exception, msg='{} has invalid statistics name {}'.format(name, function)
)
except Exception:
logger.log_and_raise(
exception=Exception, msg='{} has invalid statistics name {}'.format(name, function)
)
# check aggregate format, should be None or bool or pattern in regex with () group
if 'aggregate' in rule and not isinstance(rule['aggregate'],
bool) and not re.search(r'\(.*\)', rule['aggregate']):
logger.log_and_raise(exception=Exception, msg='{} aggregate must be bool type'.format(name))
return rule
def _parse_rules(self, rules):
"""Parse the rules for result summary.
Args:
rules (dict): rules from rule yaml file
Returns:
bool: return True if successfully get all rules, otherwise False.
"""
try:
if not rules:
logger.error('ResultSummary: get rules failed')
return False
self._sb_rules = {}
self._enable_metrics = set()
benchmark_rules = rules['superbench']['rules']
for rule in benchmark_rules:
benchmark_rules[rule] = self._check_rules(benchmark_rules[rule], rule)
self._sb_rules[rule] = {}
self._sb_rules[rule]['name'] = rule
self._sb_rules[rule]['categories'] = benchmark_rules[rule]['categories']
self._sb_rules[rule]['metrics'] = {}
self._sb_rules[rule]['statistics'] = benchmark_rules[rule]['statistics']
self._sb_rules[rule][
'aggregate'] = benchmark_rules[rule]['aggregate'] if 'aggregate' in benchmark_rules[rule] else False
super()._get_metrics(rule, benchmark_rules)
return True
except Exception as e:
logger.error('ResultSummary: parse rules failed - {}'.format(str(e)))
return False
def _format_summary_of_rule(self, category, summary_df_of_rule):
"""Format summary_df of a rule info list of lines.
Args:
category (str): category in the rule
summary_df_of_rule ([type]): summary df of a rule, the columns are metrics, the index are statistics
Returns:
list: list of summary lines like [category, metric, statistic, value]
"""
summary = []
metrics = summary_df_of_rule.columns
for metric in metrics:
for statistic in summary_df_of_rule.index:
summary.append([category, metric, statistic, summary_df_of_rule.loc[statistic, metric]])
return summary
def _merge_summary(self, summary):
"""Merge summary of multiple rules into DataFrame.
Args:
summary (dict): summary dict, the keys are categories, the values are summary lines for the category
Returns:
DataFrame: summary of all rules
"""
summary_df = pd.DataFrame()
for category in summary:
for i in range(len(summary[category])):
summary_df = summary_df.append([summary[category][i]], ignore_index=True)
return summary_df
def _generate_summary(self, round):
r"""Generate summay dict of all rules.
For each rule, aggregate the data by user-defined pattern or ranks (:\\d+), calculate
the list of statistics of aggregated metrics, then format the summary in {category, lines}.
Args:
round (int): the number of decimal digits
Returns:
dict: summary dict, the keys are categories, the values are summary lines for the category
"""
summary = {}
for rule in self._sb_rules:
metrics = list(self._sb_rules[rule]['metrics'].keys())
category = self._sb_rules[rule]['categories']
data_df_of_rule = self._raw_data_df[metrics]
if self._sb_rules[rule]['aggregate']:
# if aggregate is True, aggregate in ranks
if self._sb_rules[rule]['aggregate'] is True:
data_df_of_rule = data_analysis.aggregate(data_df_of_rule)
# if aggregate is not empty and is a pattern in regex, aggregate according to pattern
else:
data_df_of_rule = data_analysis.aggregate(data_df_of_rule, self._sb_rules[rule]['aggregate'])
statistics = self._sb_rules[rule]['statistics']
summary_df_of_rule = pd.DataFrame(columns=sorted(data_df_of_rule.columns))
for statistic_name in statistics:
# get SummaryOp and calculate statistics
# if statistic_name is 'p\d\d?', SummaryOp should be pencentile
if str.startswith(statistic_name, 'p'):
rule_op = SummaryOp.get_summary_func(SummaryType('percentile'))
val = int(statistic_name.strip('p'))
summary_df_of_rule.loc[statistic_name] = rule_op(data_df_of_rule, val)
else:
rule_op = SummaryOp.get_summary_func(SummaryType(statistic_name))
summary_df_of_rule.loc[statistic_name] = rule_op(data_df_of_rule)
# format values to n significant decimal digits
if round and isinstance(round, int):
summary_df_of_rule = data_analysis.round_significant_decimal_places(
summary_df_of_rule, round, list(summary_df_of_rule.columns)
)
# format summary_df of a rule to list of lines
summary_lines_of_rule = self._format_summary_of_rule(category, summary_df_of_rule)
summary[category] = summary_lines_of_rule
return summary
def generate_md_lines(self, summary):
"""Generate text in markdown foramt.
Use category to be the 2nd-header, use tables to show the data
Args:
summary (dict): summary dict, the keys are categories, the values are summary lines for the category
Returns:
list: lines in markdown format
"""
lines = []
for category in summary:
lines.append('## {}\n'.format(category))
summary_df = pd.DataFrame(summary[category])
summary_df = summary_df.drop(columns=0, axis=1)
header = ['metric', 'statistics', 'values']
table_lines = file_handler.generate_md_table(summary_df, header)
lines.extend(table_lines)
lines.append('\n')
return lines
def output_summary_in_excel(self, raw_data_df, summary, output_path):
"""Output result summary in excel foramt.
Args:
raw_data_df (DataFrame): the DataFrame of raw data df
summary (DataFrame): the DataFrame of summary
output_path (str): the path of output file
"""
try:
writer = pd.ExcelWriter(output_path, engine='openpyxl')
# check whether writer is valiad
if not isinstance(writer, pd.ExcelWriter):
logger.error('ResultSummary: excel_data_output - invalid file path.')
return
# output the raw data in 'Raw Data' sheet
file_handler.output_excel_raw_data(writer, raw_data_df, 'Raw Data')
# output the result summary in 'Summary' sheet
if isinstance(summary, pd.DataFrame) and not summary.empty:
summary.to_excel(writer, 'Summary', index=False, header=False)
worksheet = writer.sheets['Summary']
row = worksheet.max_row
# merge cells in 'category' column with the same category
file_handler.merge_column_in_excel(worksheet, row, 1)
else:
logger.error('ResultSummary: excel_data_output - summary is empty.')
writer.save()
except Exception as e:
logger.error('ResultSummary: excel_data_output - {}'.format(str(e)))
def run(self, raw_data_file, rule_file, output_dir, output_format, round=2):
"""Run the main process of result summary.
Args:
raw_data_file (str): the path of raw data jsonl file.
rule_file (str): The path of baseline yaml file
output_dir (str): the directory of output file
output_format (str): the format of the output, 'excel' or 'md' or 'html'
round (int): the number of decimal digits
"""
try:
rules = self._preprocess(raw_data_file, rule_file)
# parse rules for result summary
if not self._parse_rules(rules):
return
# generate result summary for each category
summary = self._generate_summary(round)
# output result summary to file
output_path = ''
if output_format == 'excel':
output_path = str(Path(output_dir) / 'results_summary.xlsx')
summary_df = self._merge_summary(summary)
self.output_summary_in_excel(self._raw_data_df, summary_df, output_path)
elif output_format == 'md':
output_path = str(Path(output_dir) / 'results_summary.md')
lines = self.generate_md_lines(summary)
file_handler.output_lines_in_md(lines, output_path)
elif output_format == 'html':
output_path = str(Path(output_dir) / 'results_summary.html')
lines = self.generate_md_lines(summary)
file_handler.output_lines_in_html(lines, output_path)
else:
logger.error('ResultSummary: output failed - unsupported output format')
logger.info('ResultSummary: Output results to {}'.format(output_path))
except Exception as e:
logger.error('ResultSummary: run failed - {}'.format(str(e)))

Просмотреть файл

@ -0,0 +1,157 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""A module for result summary ops."""
from typing import Dict, Callable
import numbers
from superbench.benchmarks.context import Enum
from superbench.common.utils import logger
class SummaryType(Enum):
"""The Enum class representing different summary ops."""
MEAN = 'mean'
PENCENTILE = 'percentile'
MIN = 'min'
MAX = 'max'
STD = 'std'
COUNT = 'count'
class SummaryOp:
"""SummaryOp class to maintain all summary functions."""
functions: Dict[SummaryType, Callable] = dict()
@classmethod
def add_summary_func(cls, summary_type):
"""Add summary fuction.
Args:
summary_type (SummaryType): The type of summary function.
Return:
decorator (Callable): return the decorator to add the summary function.
"""
def decorator(func):
cls.functions[summary_type] = func
return func
return decorator
@classmethod
def get_summary_func(cls, summary_type):
"""Get summary fuction by summary_type.
Args:
summary_type (SummaryType): The type of summary function.
Return:
func (Callable): summary function, None means invalid summary type.
"""
if summary_type in cls.functions:
return cls.functions[summary_type]
return None
@staticmethod
def _check_raw_data_df(raw_data_df):
"""Check whether raw_data_df is empty or None.
Args:
raw_data_df (DataFrame): raw data df
"""
if raw_data_df is None or raw_data_df.empty:
logger.log_and_raise(exception=Exception, msg='empty data in summary op')
@staticmethod
def mean(raw_data_df):
"""Mean of raw_data_df.
Args:
raw_data_df (DataFrame): raw data df
Returns:
Series: mean of raw_data_df
"""
SummaryOp._check_raw_data_df(raw_data_df)
return raw_data_df.mean()
@staticmethod
def percentile(raw_data_df, val):
"""Pencentile$(val) of raw_data_df.
Args:
raw_data_df (DataFrame): raw data df
val (numbers.Number): the pencentile value, 1-99
Returns:
Series: mean of raw_data_df
"""
SummaryOp._check_raw_data_df(raw_data_df)
if not isinstance(val, numbers.Number) or val < 1 or val > 99:
logger.log_and_raise(exception=Exception, msg='val in pencentile should be 1-99')
return raw_data_df.quantile(val / 100)
@staticmethod
def min(raw_data_df):
"""The min of values for each column in raw_data_df.
Args:
raw_data_df (DataFrame): raw data df
Returns:
Series: min of raw_data_df
"""
SummaryOp._check_raw_data_df(raw_data_df)
return raw_data_df.min()
@staticmethod
def max(raw_data_df):
"""The max of values for each column in raw_data_df.
Args:
raw_data_df (DataFrame): raw data df
Returns:
Series: max of raw_data_df
"""
SummaryOp._check_raw_data_df(raw_data_df)
return raw_data_df.max()
@staticmethod
def std(raw_data_df):
"""The std of values for each column in raw_data_df.
Args:
raw_data_df (DataFrame): raw data df
Returns:
Series: std of raw_data_df
"""
SummaryOp._check_raw_data_df(raw_data_df)
return raw_data_df.std(axis=0, skipna=True)
@staticmethod
def count(raw_data_df):
"""The number of values for each column in raw_data_df.
Args:
raw_data_df (DataFrame): raw data df
Returns:
Series: count of raw_data_df
"""
SummaryOp._check_raw_data_df(raw_data_df)
return raw_data_df.count()
SummaryOp.add_summary_func(SummaryType.MEAN)(SummaryOp.mean)
SummaryOp.add_summary_func(SummaryType.PENCENTILE)(SummaryOp.percentile)
SummaryOp.add_summary_func(SummaryType.MIN)(SummaryOp.min)
SummaryOp.add_summary_func(SummaryType.MAX)(SummaryOp.max)
SummaryOp.add_summary_func(SummaryType.STD)(SummaryOp.std)
SummaryOp.add_summary_func(SummaryType.COUNT)(SummaryOp.count)

Просмотреть файл

@ -73,3 +73,10 @@ class TestDataAnalysis(unittest.TestCase):
pd.testing.assert_frame_equal(df, pd.DataFrame([[0.0046, 500.6789], [1.53, 100.7424]], columns=['a', 'b']))
df = data_analysis.round_significant_decimal_places(df, 2, 'b')
pd.testing.assert_frame_equal(df, pd.DataFrame([[0.0046, 500.68], [1.53, 100.74]], columns=['a', 'b']))
# Test aggregate
df = pd.DataFrame([[1, 2], [3, 4]], columns=['a:0', 'a:1'])
df = data_analysis.aggregate(df)
pd.testing.assert_frame_equal(df, pd.DataFrame({'a': [1, 3, 2, 4]}))
df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=['ib_1_a', 'ib_2_a', 'ib_1_b', 'ib_2_b'])
df = data_analysis.aggregate(df, pattern='ib_(.)_.')
pd.testing.assert_frame_equal(df, pd.DataFrame({'ib_*_a': [1, 5, 2, 6], 'ib_*_b': [3, 7, 4, 8]}))

Просмотреть файл

@ -196,8 +196,8 @@ class TestDataDiagnosis(unittest.TestCase):
assert ('Category' in line)
assert ('Defective Details' in line)
assert ('Index' in line)
# Test - gen_md_lines
lines = diag1.gen_md_lines(data_not_accept_df, diag1._sb_rules, 2)
# Test - generate_md_lines
lines = diag1.generate_md_lines(data_not_accept_df, diag1._sb_rules, 2)
assert (lines)
expected_md_file = str(self.parent_path / '../data/diagnosis_summary.md')
with open(expected_md_file, 'r') as f:

Просмотреть файл

@ -48,8 +48,8 @@ class TestFileHandler(unittest.TestCase):
assert (not baseline)
baseline = file_handler.read_baseline(test_baseline_file)
assert (baseline)
# Test - gen_md_table
# Test - generate_md_table
data_df = pd.DataFrame([[1, 2], [3, 4]])
lines = file_handler.gen_md_table(data_df, header=['A', 'B'])
lines = file_handler.generate_md_table(data_df, header=['A', 'B'])
expected_lines = ['| A | B |\n', '| --- | --- |\n', '| 1 | 2 |\n', '| 3 | 4 |\n']
assert (lines == expected_lines)

Просмотреть файл

@ -0,0 +1,149 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Tests for ResultSummary module."""
import unittest
import yaml
from pathlib import Path
import pandas as pd
from superbench.analyzer import ResultSummary
import superbench.analyzer.file_handler as file_handler
class TestResultSummary(unittest.TestCase):
"""Test for ResultSummary class."""
def setUp(self):
"""Method called to prepare the test fixture."""
self.parent_path = Path(__file__).parent
self.output_excel_file = str(self.parent_path / 'results_summary.xlsx')
self.output_md_file = str(self.parent_path / 'results_summary.md')
self.output_html_file = str(self.parent_path / 'results_summary.html')
self.test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml')
self.test_raw_data = str(self.parent_path / 'test_results.jsonl')
self.test_rule_file = str(self.parent_path / 'test_summary_rules.yaml')
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
for file in [self.output_excel_file, self.test_rule_file_fake, self.output_md_file, self.output_html_file]:
p = Path(file)
if p.is_file():
p.unlink()
def test_result_summary(self):
"""Test result summary class."""
rs1 = ResultSummary()
rs1._raw_data_df = file_handler.read_raw_data(self.test_raw_data)
rs1._benchmark_metrics_dict = rs1._get_metrics_by_benchmarks(list(rs1._raw_data_df))
# Test - _check_rules
# Negative case
false_rules = [
{
'categories': 'KernelLaunch',
'metrics': ['kernel-launch/event_overhead:\\d+']
}, {
'categories': 'KernelLaunch',
'statistics': 'abb',
'metrics': ['kernel-launch/event_overhead:\\d+']
}, {
'categories': 'KernelLaunch',
'statistics': 'mean',
'metrics': ['kernel-launch/event_overhead:\\d+'],
'aggregate': 'abb'
}
]
metric = 'kernel-launch/event_overhead:0'
for rules in false_rules:
self.assertRaises(Exception, rs1._check_rules, rules, metric)
# Positive case
true_rules = [
{
'categories': 'KernelLaunch',
'statistics': 'mean',
'metrics': ['kernel-launch/event_overhead:\\d+'],
'aggregate': True
},
{
'categories': 'KernelLaunch',
'statistics': ['mean', 'p50'],
'metrics': ['kernel-launch/event_overhead:\\d+']
},
{
'categories': 'KernelLaunch',
'statistics': 'mean',
'metrics': ['kernel-launch/event_overhead:\\d+'],
'aggregate': 'kernel-launch/event_overhead(:\\d+)'
},
]
for rules in true_rules:
assert (rs1._check_rules(rules, metric))
# Test - _parse_rules
# Negative case
rs2 = ResultSummary()
fake_rules = file_handler.read_rules(self.test_rule_file_fake)
assert (rs2._parse_rules(fake_rules) is False)
rs2._raw_data_df = file_handler.read_raw_data(self.test_raw_data)
rs2._benchmark_metrics_dict = rs2._get_metrics_by_benchmarks(list(rs2._raw_data_df))
p = Path(self.test_rule_file)
with p.open() as f:
rules = yaml.load(f, Loader=yaml.SafeLoader)
rules['superbench']['rules']['fake'] = false_rules[0]
with open(self.test_rule_file_fake, 'w') as f:
yaml.dump(rules, f)
assert (rs1._parse_rules(fake_rules) is False)
# Positive case
rules = file_handler.read_rules(self.test_rule_file)
assert (rs1._parse_rules(rules))
# Test - _generate_summary
summary = rs1._generate_summary(round=2)
assert (len(summary) == 3)
# Test - _merge_summary
expected_summary_merge = [
['KernelLaunch', 'kernel-launch/event_overhead', 'mean', 0.0097],
['KernelLaunch', 'kernel-launch/event_overhead', 'p90', 0.006],
['KernelLaunch', 'kernel-launch/event_overhead', 'min', 0.0055],
['KernelLaunch', 'kernel-launch/event_overhead', 'max', 0.1],
['KernelLaunch', 'kernel-launch/wall_overhead', 'mean', 0.01],
['KernelLaunch', 'kernel-launch/wall_overhead', 'p90', 0.011],
['KernelLaunch', 'kernel-launch/wall_overhead', 'min', 0.01],
['KernelLaunch', 'kernel-launch/wall_overhead', 'max', 0.011],
['NCCL', 'nccl-bw/allreduce_8388608_busbw:0', 'mean', 89.51],
['RDMA', 'ib-loopback/IB_write_8388608_Avg_*:0', 'mean', 23925.84]
]
expected_summary_merge_df = pd.DataFrame(expected_summary_merge)
summary_merge_df = rs1._merge_summary(summary)
pd.testing.assert_frame_equal(expected_summary_merge_df, summary_merge_df)
def test_result_summary_run(self):
"""Test for the run process of result summary."""
# Test - output in excel
ResultSummary().run(self.test_raw_data, self.test_rule_file, str(self.parent_path), 'excel', round=2)
excel_file = pd.ExcelFile(self.output_excel_file, engine='openpyxl')
data_sheet_name = 'Summary'
summary = excel_file.parse(data_sheet_name, header=None)
expect_result_file = pd.ExcelFile(str(self.parent_path / '../data/results_summary.xlsx'), engine='openpyxl')
expect_result = expect_result_file.parse(data_sheet_name, header=None)
pd.testing.assert_frame_equal(summary, expect_result)
# Test - output in md
ResultSummary().run(self.test_raw_data, self.test_rule_file, str(self.parent_path), 'md', round=2)
expected_md_file = str(self.parent_path / '../data/results_summary.md')
with open(expected_md_file, 'r') as f:
expect_result = f.read()
with open(self.output_md_file, 'r') as f:
summary = f.read()
assert (summary == expect_result)
# Test - output in html
ResultSummary().run(self.test_raw_data, self.test_rule_file, str(self.parent_path), 'html', round=2)
expected_html_file = str(self.parent_path / '../data/results_summary.html')
with open(expected_html_file, 'r') as f:
expect_result = f.read()
with open(self.output_html_file, 'r') as f:
summary = f.read()
assert (summary == expect_result)

Просмотреть файл

@ -0,0 +1,26 @@
# SuperBench rules
version: v0.4
superbench:
rules:
kernel_launch:
statistics:
- mean
- p90
- min
- max
aggregate: True
categories: KernelLaunch
metrics:
- kernel-launch/event_overhead
- kernel-launch/wall_overhead
nccl:
statistics: mean
categories: NCCL
metrics:
- nccl-bw/allreduce_8388608_busbw
ib-loopback:
statistics: mean
categories: RDMA
metrics:
- ib-loopback/IB_write_8388608_Avg_\d+
aggregate: ib-loopback/IB_write_.*_Avg_(\d+)

Просмотреть файл

@ -0,0 +1,70 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Tests for SummaryOp module."""
import unittest
from numpy import NaN, float64
import pandas as pd
from superbench.analyzer import SummaryOp, SummaryType
class TestSummaryOp(unittest.TestCase):
"""Test for Summary Ops."""
def test_rule_op(self):
"""Test for defined rule operators."""
# Test - get_rule_func
# Negative case
assert (not SummaryOp.get_summary_func('fake'))
# Positive case
summary_op = SummaryOp.get_summary_func(SummaryType.MEAN)
assert (summary_op == SummaryOp.mean)
summary_op = SummaryOp.get_summary_func(SummaryType.PENCENTILE)
assert (summary_op == SummaryOp.percentile)
summary_op = SummaryOp.get_summary_func(SummaryType.MIN)
assert (summary_op == SummaryOp.min)
summary_op = SummaryOp.get_summary_func(SummaryType.MAX)
assert (summary_op == SummaryOp.max)
summary_op = SummaryOp.get_summary_func(SummaryType.STD)
assert (summary_op == SummaryOp.std)
summary_op = SummaryOp.get_summary_func(SummaryType.COUNT)
assert (summary_op == SummaryOp.count)
# Test - _check_raw_data_Df
# Negative case
empty_data_df = pd.DataFrame()
self.assertRaises(Exception, SummaryOp._check_raw_data_df, empty_data_df)
self.assertRaises(Exception, SummaryOp._check_raw_data_df, None)
data1 = [[1, 2, 3, 4], [4, 5, 6], [7, 8]]
raw_data_df = pd.DataFrame(data1, columns=['a', 'b', 'c', 'd'])
# Test - mean
result = SummaryOp.mean(raw_data_df)
expectedResult = pd.Series([4.0, 5.0, 4.5, 4.0], index=['a', 'b', 'c', 'd'])
pd.testing.assert_series_equal(result, expectedResult)
# Test - min
result = SummaryOp.min(raw_data_df)
expectedResult = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'], dtype=float64)
pd.testing.assert_series_equal(result, expectedResult)
# Test - max
result = SummaryOp.max(raw_data_df)
expectedResult = pd.Series([7, 8, 6, 4], index=['a', 'b', 'c', 'd'], dtype=float64)
pd.testing.assert_series_equal(result, expectedResult)
# Test - std
result = SummaryOp.std(raw_data_df)
print(result)
expectedResult = pd.Series([3.0, 3.0, 2.1213203435596424, NaN], index=['a', 'b', 'c', 'd'], dtype=float64)
pd.testing.assert_series_equal(result, expectedResult)
# Test - count
result = SummaryOp.count(raw_data_df)
print(result)
expectedResult = pd.Series([3, 3, 2, 1], index=['a', 'b', 'c', 'd'])
pd.testing.assert_series_equal(result, expectedResult)
# Test - pencentile
result = SummaryOp.percentile(raw_data_df, 50)
print(result)
expectedResult = pd.Series([4.0, 5.0, 4.5, 4.0], index=['a', 'b', 'c', 'd'], dtype=float64)
pd.testing.assert_series_equal(result, expectedResult, check_names=False)
self.assertRaises(Exception, SummaryOp.percentile, 200)

Просмотреть файл

@ -0,0 +1,86 @@
<h2>KernelLaunch</h2>
<table>
<thead>
<tr>
<th>metric</th>
<th>statistics</th>
<th>values</th>
</tr>
</thead>
<tbody>
<tr>
<td>kernel-launch/event_overhead</td>
<td>mean</td>
<td>0.0097</td>
</tr>
<tr>
<td>kernel-launch/event_overhead</td>
<td>p90</td>
<td>0.006</td>
</tr>
<tr>
<td>kernel-launch/event_overhead</td>
<td>min</td>
<td>0.0055</td>
</tr>
<tr>
<td>kernel-launch/event_overhead</td>
<td>max</td>
<td>0.1</td>
</tr>
<tr>
<td>kernel-launch/wall_overhead</td>
<td>mean</td>
<td>0.01</td>
</tr>
<tr>
<td>kernel-launch/wall_overhead</td>
<td>p90</td>
<td>0.011</td>
</tr>
<tr>
<td>kernel-launch/wall_overhead</td>
<td>min</td>
<td>0.01</td>
</tr>
<tr>
<td>kernel-launch/wall_overhead</td>
<td>max</td>
<td>0.011</td>
</tr>
</tbody>
</table>
<h2>NCCL</h2>
<table>
<thead>
<tr>
<th>metric</th>
<th>statistics</th>
<th>values</th>
</tr>
</thead>
<tbody>
<tr>
<td>nccl-bw/allreduce_8388608_busbw:0</td>
<td>mean</td>
<td>89.51</td>
</tr>
</tbody>
</table>
<h2>RDMA</h2>
<table>
<thead>
<tr>
<th>metric</th>
<th>statistics</th>
<th>values</th>
</tr>
</thead>
<tbody>
<tr>
<td>ib-loopback/IB_write_8388608_Avg_*:0</td>
<td>mean</td>
<td>23925.84</td>
</tr>
</tbody>
</table>

Просмотреть файл

@ -0,0 +1,22 @@
## KernelLaunch
| metric | statistics | values |
| --- | --- | --- |
| kernel-launch/event_overhead | mean | 0.0097 |
| kernel-launch/event_overhead | p90 | 0.006 |
| kernel-launch/event_overhead | min | 0.0055 |
| kernel-launch/event_overhead | max | 0.1 |
| kernel-launch/wall_overhead | mean | 0.01 |
| kernel-launch/wall_overhead | p90 | 0.011 |
| kernel-launch/wall_overhead | min | 0.01 |
| kernel-launch/wall_overhead | max | 0.011 |
## NCCL
| metric | statistics | values |
| --- | --- | --- |
| nccl-bw/allreduce_8388608_busbw:0 | mean | 89.51 |
## RDMA
| metric | statistics | values |
| --- | --- | --- |
| ib-loopback/IB_write_8388608_Avg_*:0 | mean | 23925.84 |

Двоичные данные
tests/data/results_summary.xlsx Normal file

Двоичный файл не отображается.