diff --git a/superbench/analyzer/__init__.py b/superbench/analyzer/__init__.py index e9e74917..f4e27944 100644 --- a/superbench/analyzer/__init__.py +++ b/superbench/analyzer/__init__.py @@ -6,5 +6,7 @@ from superbench.analyzer.rule_base import RuleBase from superbench.analyzer.data_diagnosis import DataDiagnosis from superbench.analyzer.diagnosis_rule_op import RuleOp, DiagnosisRuleType +from superbench.analyzer.summary_op import SummaryOp, SummaryType +from superbench.analyzer.result_summary import ResultSummary -__all__ = ['DataDiagnosis', 'DiagnosisRuleType', 'RuleOp', 'RuleBase'] +__all__ = ['DataDiagnosis', 'DiagnosisRuleType', 'RuleOp', 'RuleBase', 'SummaryOp', 'SummaryType', 'ResultSummary'] diff --git a/superbench/analyzer/data_analysis.py b/superbench/analyzer/data_analysis.py index 7c4113bd..d7ac40f1 100644 --- a/superbench/analyzer/data_analysis.py +++ b/superbench/analyzer/data_analysis.py @@ -7,6 +7,7 @@ import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt +import re from superbench.common.utils import logger @@ -210,3 +211,42 @@ def round_significant_decimal_places(df, digit, cols): lambda x: float(format_significant_str % x) if abs(x) < 1 else round(x, digit), na_action='ignore' ) return df + + +def aggregate(raw_data_df, pattern=None): + r"""Aggregate data of multiple ranks or multiple devices. + + By default, aggregate results of multiple ranks like 'metric:\\d+' for most metrics. + For example, aggregate the results of kernel-launch overhead + from 8 GPU devices into one collection. + If pattern is given, use pattern to match metric and replace matched part in metric to * + to generate a aggregated metric name and then aggpregate these metrics' data. + + Args: + raw_data_df (DataFrame): raw data + + Returns: + DataFrame: the dataframe of aggregated data + """ + try: + metric_store = {} + metrics = list(raw_data_df.columns) + for metric in metrics: + short = metric.strip(metric.split(':')[-1]).strip(':') + if pattern: + match = re.search(pattern, metric) + if match: + metric_in_list = list(metric) + for i in range(1, len(match.groups()) + 1): + metric_in_list[match.start(i):match.end(i)] = '*' + short = ''.join(metric_in_list) + if short not in metric_store: + metric_store[short] = [] + metric_store[short].extend(raw_data_df[metric].tolist()) + df = pd.DataFrame() + for short in metric_store: + df = pd.concat([df, pd.DataFrame(metric_store[short], columns=[short])], axis=1) + return df + except Exception as e: + logger.error('DataAnalyzer: aggregate failed, msg: {}'.format(str(e))) + return None diff --git a/superbench/analyzer/data_diagnosis.py b/superbench/analyzer/data_diagnosis.py index 870bcfee..66836c15 100644 --- a/superbench/analyzer/data_diagnosis.py +++ b/superbench/analyzer/data_diagnosis.py @@ -256,7 +256,7 @@ class DataDiagnosis(RuleBase): except Exception as e: logger.error('DataDiagnosis: output json data failed, msg: {}'.format(str(e))) - def gen_md_lines(self, data_not_accept_df, rules, round): + def generate_md_lines(self, data_not_accept_df, rules, round): """Convert DataFrame into markdown lines. Args: @@ -290,7 +290,7 @@ class DataDiagnosis(RuleBase): data_not_accept_df = data_analysis.round_significant_decimal_places( data_not_accept_df, round, [metric] ) - lines = file_handler.gen_md_table(data_not_accept_df, header) + lines = file_handler.generate_md_table(data_not_accept_df, header) return lines def run(self, raw_data_file, rule_file, baseline_file, output_dir, output_format='excel', round=2): @@ -319,7 +319,7 @@ class DataDiagnosis(RuleBase): output_path = str(Path(output_dir) / 'diagnosis_summary.jsonl') self.output_diagnosis_in_json(data_not_accept_df, output_path) elif output_format == 'md' or output_format == 'html': - lines = self.gen_md_lines(data_not_accept_df, self._sb_rules, round) + lines = self.generate_md_lines(data_not_accept_df, self._sb_rules, round) if output_format == 'md': output_path = str(Path(output_dir) / 'diagnosis_summary.md') file_handler.output_lines_in_md(lines, output_path) diff --git a/superbench/analyzer/file_handler.py b/superbench/analyzer/file_handler.py index aa6d260e..cc26ffc2 100644 --- a/superbench/analyzer/file_handler.py +++ b/superbench/analyzer/file_handler.py @@ -10,6 +10,7 @@ import json import jsonlines import pandas as pd import yaml +from openpyxl.styles import Alignment import markdown from superbench.common.utils import logger @@ -158,7 +159,7 @@ def output_excel_data_not_accept(writer, data_not_accept_df, rules): logger.warning('FileHandler: excel_data_output - data_not_accept_df is not DataFrame.') -def gen_md_table(data_df, header): +def generate_md_table(data_df, header): """Generate table text in markdown format. | header[0] | header[1] | @@ -221,3 +222,29 @@ def output_lines_in_html(lines, output_path): f.writelines(html_str) except Exception as e: logger.error('FileHandler: html_data_output - {}'.format(str(e))) + + +def merge_column_in_excel(ws, row, column): + """Merge cells in the selected index of column with continuous same contents. + + Args: + ws (worksheet): the worksheet of the excel to process + row (int): the max row index to merge + column (int): the index of the column to merge + """ + dict_from = {} + aligncenter = Alignment(horizontal='center', vertical='center') + # record continuous row index (start, end) with the same content + for row_index in range(1, row + 1): + value = str(ws.cell(row_index, column).value) + if value not in dict_from: + dict_from[value] = [row_index, row_index] + else: + dict_from[value][1] = dict_from[value][1] + 1 + # merge the cells + for value in dict_from.values(): + if value[0] != value[1]: + ws.merge_cells(start_row=value[0], start_column=column, end_row=value[1], end_column=column) + # align center for merged cells + for i in range(1, row + 1): + ws.cell(row=i, column=column).alignment = aligncenter diff --git a/superbench/analyzer/result_summary.py b/superbench/analyzer/result_summary.py new file mode 100644 index 00000000..be1b5ab4 --- /dev/null +++ b/superbench/analyzer/result_summary.py @@ -0,0 +1,251 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""A module for Result Summary.""" + +import re +from pathlib import Path + +import pandas as pd + +from superbench.common.utils import logger +from superbench.analyzer import file_handler +from superbench.analyzer.summary_op import SummaryOp, SummaryType +from superbench.analyzer import RuleBase +from superbench.analyzer import data_analysis + + +class ResultSummary(RuleBase): + """Result summary class.""" + def _check_rules(self, rule, name): + """Check whether the formart of the rule is valid. + + Args: + rule (dict): the rule + name (str): the rule name + + Returns: + dict: the rule for the metric + """ + # check if rule is supported + super()._check_and_format_rules(rule, name) + if 'metrics' not in rule: + logger.log_and_raise(exception=Exception, msg='{} lack of metrics'.format(name)) + if 'statistics' not in rule: + logger.log_and_raise(exception=Exception, msg='{} lack of function'.format(name)) + # convert single statistic str to list + if not isinstance(rule['statistics'], list): + rule['statistics'] = [rule['statistics']] + # check statistics format, should be SummaryType or p\d\d? + for function in rule['statistics']: + try: + if not (re.fullmatch(r'p\d\d?', function) or isinstance(SummaryType(function), SummaryType)): + logger.log_and_raise( + exception=Exception, msg='{} has invalid statistics name {}'.format(name, function) + ) + except Exception: + logger.log_and_raise( + exception=Exception, msg='{} has invalid statistics name {}'.format(name, function) + ) + # check aggregate format, should be None or bool or pattern in regex with () group + if 'aggregate' in rule and not isinstance(rule['aggregate'], + bool) and not re.search(r'\(.*\)', rule['aggregate']): + logger.log_and_raise(exception=Exception, msg='{} aggregate must be bool type'.format(name)) + return rule + + def _parse_rules(self, rules): + """Parse the rules for result summary. + + Args: + rules (dict): rules from rule yaml file + + Returns: + bool: return True if successfully get all rules, otherwise False. + """ + try: + if not rules: + logger.error('ResultSummary: get rules failed') + return False + self._sb_rules = {} + self._enable_metrics = set() + benchmark_rules = rules['superbench']['rules'] + for rule in benchmark_rules: + benchmark_rules[rule] = self._check_rules(benchmark_rules[rule], rule) + self._sb_rules[rule] = {} + self._sb_rules[rule]['name'] = rule + self._sb_rules[rule]['categories'] = benchmark_rules[rule]['categories'] + self._sb_rules[rule]['metrics'] = {} + self._sb_rules[rule]['statistics'] = benchmark_rules[rule]['statistics'] + self._sb_rules[rule][ + 'aggregate'] = benchmark_rules[rule]['aggregate'] if 'aggregate' in benchmark_rules[rule] else False + super()._get_metrics(rule, benchmark_rules) + return True + except Exception as e: + logger.error('ResultSummary: parse rules failed - {}'.format(str(e))) + return False + + def _format_summary_of_rule(self, category, summary_df_of_rule): + """Format summary_df of a rule info list of lines. + + Args: + category (str): category in the rule + summary_df_of_rule ([type]): summary df of a rule, the columns are metrics, the index are statistics + Returns: + list: list of summary lines like [category, metric, statistic, value] + """ + summary = [] + metrics = summary_df_of_rule.columns + for metric in metrics: + for statistic in summary_df_of_rule.index: + summary.append([category, metric, statistic, summary_df_of_rule.loc[statistic, metric]]) + return summary + + def _merge_summary(self, summary): + """Merge summary of multiple rules into DataFrame. + + Args: + summary (dict): summary dict, the keys are categories, the values are summary lines for the category + + Returns: + DataFrame: summary of all rules + """ + summary_df = pd.DataFrame() + for category in summary: + for i in range(len(summary[category])): + summary_df = summary_df.append([summary[category][i]], ignore_index=True) + return summary_df + + def _generate_summary(self, round): + r"""Generate summay dict of all rules. + + For each rule, aggregate the data by user-defined pattern or ranks (:\\d+), calculate + the list of statistics of aggregated metrics, then format the summary in {category, lines}. + + Args: + round (int): the number of decimal digits + + Returns: + dict: summary dict, the keys are categories, the values are summary lines for the category + """ + summary = {} + for rule in self._sb_rules: + metrics = list(self._sb_rules[rule]['metrics'].keys()) + category = self._sb_rules[rule]['categories'] + data_df_of_rule = self._raw_data_df[metrics] + if self._sb_rules[rule]['aggregate']: + # if aggregate is True, aggregate in ranks + if self._sb_rules[rule]['aggregate'] is True: + data_df_of_rule = data_analysis.aggregate(data_df_of_rule) + # if aggregate is not empty and is a pattern in regex, aggregate according to pattern + else: + data_df_of_rule = data_analysis.aggregate(data_df_of_rule, self._sb_rules[rule]['aggregate']) + statistics = self._sb_rules[rule]['statistics'] + summary_df_of_rule = pd.DataFrame(columns=sorted(data_df_of_rule.columns)) + for statistic_name in statistics: + # get SummaryOp and calculate statistics + # if statistic_name is 'p\d\d?', SummaryOp should be pencentile + if str.startswith(statistic_name, 'p'): + rule_op = SummaryOp.get_summary_func(SummaryType('percentile')) + val = int(statistic_name.strip('p')) + summary_df_of_rule.loc[statistic_name] = rule_op(data_df_of_rule, val) + else: + rule_op = SummaryOp.get_summary_func(SummaryType(statistic_name)) + summary_df_of_rule.loc[statistic_name] = rule_op(data_df_of_rule) + # format values to n significant decimal digits + if round and isinstance(round, int): + summary_df_of_rule = data_analysis.round_significant_decimal_places( + summary_df_of_rule, round, list(summary_df_of_rule.columns) + ) + # format summary_df of a rule to list of lines + summary_lines_of_rule = self._format_summary_of_rule(category, summary_df_of_rule) + summary[category] = summary_lines_of_rule + + return summary + + def generate_md_lines(self, summary): + """Generate text in markdown foramt. + + Use category to be the 2nd-header, use tables to show the data + + Args: + summary (dict): summary dict, the keys are categories, the values are summary lines for the category + + Returns: + list: lines in markdown format + """ + lines = [] + for category in summary: + lines.append('## {}\n'.format(category)) + summary_df = pd.DataFrame(summary[category]) + summary_df = summary_df.drop(columns=0, axis=1) + header = ['metric', 'statistics', 'values'] + table_lines = file_handler.generate_md_table(summary_df, header) + lines.extend(table_lines) + lines.append('\n') + return lines + + def output_summary_in_excel(self, raw_data_df, summary, output_path): + """Output result summary in excel foramt. + + Args: + raw_data_df (DataFrame): the DataFrame of raw data df + summary (DataFrame): the DataFrame of summary + output_path (str): the path of output file + """ + try: + writer = pd.ExcelWriter(output_path, engine='openpyxl') + # check whether writer is valiad + if not isinstance(writer, pd.ExcelWriter): + logger.error('ResultSummary: excel_data_output - invalid file path.') + return + # output the raw data in 'Raw Data' sheet + file_handler.output_excel_raw_data(writer, raw_data_df, 'Raw Data') + # output the result summary in 'Summary' sheet + if isinstance(summary, pd.DataFrame) and not summary.empty: + summary.to_excel(writer, 'Summary', index=False, header=False) + worksheet = writer.sheets['Summary'] + row = worksheet.max_row + # merge cells in 'category' column with the same category + file_handler.merge_column_in_excel(worksheet, row, 1) + else: + logger.error('ResultSummary: excel_data_output - summary is empty.') + writer.save() + except Exception as e: + logger.error('ResultSummary: excel_data_output - {}'.format(str(e))) + + def run(self, raw_data_file, rule_file, output_dir, output_format, round=2): + """Run the main process of result summary. + + Args: + raw_data_file (str): the path of raw data jsonl file. + rule_file (str): The path of baseline yaml file + output_dir (str): the directory of output file + output_format (str): the format of the output, 'excel' or 'md' or 'html' + round (int): the number of decimal digits + """ + try: + rules = self._preprocess(raw_data_file, rule_file) + # parse rules for result summary + if not self._parse_rules(rules): + return + # generate result summary for each category + summary = self._generate_summary(round) + # output result summary to file + output_path = '' + if output_format == 'excel': + output_path = str(Path(output_dir) / 'results_summary.xlsx') + summary_df = self._merge_summary(summary) + self.output_summary_in_excel(self._raw_data_df, summary_df, output_path) + elif output_format == 'md': + output_path = str(Path(output_dir) / 'results_summary.md') + lines = self.generate_md_lines(summary) + file_handler.output_lines_in_md(lines, output_path) + elif output_format == 'html': + output_path = str(Path(output_dir) / 'results_summary.html') + lines = self.generate_md_lines(summary) + file_handler.output_lines_in_html(lines, output_path) + else: + logger.error('ResultSummary: output failed - unsupported output format') + logger.info('ResultSummary: Output results to {}'.format(output_path)) + except Exception as e: + logger.error('ResultSummary: run failed - {}'.format(str(e))) diff --git a/superbench/analyzer/summary_op.py b/superbench/analyzer/summary_op.py new file mode 100644 index 00000000..b4981fe6 --- /dev/null +++ b/superbench/analyzer/summary_op.py @@ -0,0 +1,157 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""A module for result summary ops.""" + +from typing import Dict, Callable +import numbers + +from superbench.benchmarks.context import Enum +from superbench.common.utils import logger + + +class SummaryType(Enum): + """The Enum class representing different summary ops.""" + + MEAN = 'mean' + PENCENTILE = 'percentile' + MIN = 'min' + MAX = 'max' + STD = 'std' + COUNT = 'count' + + +class SummaryOp: + """SummaryOp class to maintain all summary functions.""" + + functions: Dict[SummaryType, Callable] = dict() + + @classmethod + def add_summary_func(cls, summary_type): + """Add summary fuction. + + Args: + summary_type (SummaryType): The type of summary function. + + Return: + decorator (Callable): return the decorator to add the summary function. + """ + def decorator(func): + cls.functions[summary_type] = func + return func + + return decorator + + @classmethod + def get_summary_func(cls, summary_type): + """Get summary fuction by summary_type. + + Args: + summary_type (SummaryType): The type of summary function. + + Return: + func (Callable): summary function, None means invalid summary type. + """ + if summary_type in cls.functions: + return cls.functions[summary_type] + + return None + + @staticmethod + def _check_raw_data_df(raw_data_df): + """Check whether raw_data_df is empty or None. + + Args: + raw_data_df (DataFrame): raw data df + """ + if raw_data_df is None or raw_data_df.empty: + logger.log_and_raise(exception=Exception, msg='empty data in summary op') + + @staticmethod + def mean(raw_data_df): + """Mean of raw_data_df. + + Args: + raw_data_df (DataFrame): raw data df + + Returns: + Series: mean of raw_data_df + """ + SummaryOp._check_raw_data_df(raw_data_df) + return raw_data_df.mean() + + @staticmethod + def percentile(raw_data_df, val): + """Pencentile$(val) of raw_data_df. + + Args: + raw_data_df (DataFrame): raw data df + val (numbers.Number): the pencentile value, 1-99 + + Returns: + Series: mean of raw_data_df + """ + SummaryOp._check_raw_data_df(raw_data_df) + if not isinstance(val, numbers.Number) or val < 1 or val > 99: + logger.log_and_raise(exception=Exception, msg='val in pencentile should be 1-99') + return raw_data_df.quantile(val / 100) + + @staticmethod + def min(raw_data_df): + """The min of values for each column in raw_data_df. + + Args: + raw_data_df (DataFrame): raw data df + + Returns: + Series: min of raw_data_df + """ + SummaryOp._check_raw_data_df(raw_data_df) + return raw_data_df.min() + + @staticmethod + def max(raw_data_df): + """The max of values for each column in raw_data_df. + + Args: + raw_data_df (DataFrame): raw data df + + Returns: + Series: max of raw_data_df + """ + SummaryOp._check_raw_data_df(raw_data_df) + return raw_data_df.max() + + @staticmethod + def std(raw_data_df): + """The std of values for each column in raw_data_df. + + Args: + raw_data_df (DataFrame): raw data df + + Returns: + Series: std of raw_data_df + """ + SummaryOp._check_raw_data_df(raw_data_df) + return raw_data_df.std(axis=0, skipna=True) + + @staticmethod + def count(raw_data_df): + """The number of values for each column in raw_data_df. + + Args: + raw_data_df (DataFrame): raw data df + + Returns: + Series: count of raw_data_df + """ + SummaryOp._check_raw_data_df(raw_data_df) + return raw_data_df.count() + + +SummaryOp.add_summary_func(SummaryType.MEAN)(SummaryOp.mean) +SummaryOp.add_summary_func(SummaryType.PENCENTILE)(SummaryOp.percentile) +SummaryOp.add_summary_func(SummaryType.MIN)(SummaryOp.min) +SummaryOp.add_summary_func(SummaryType.MAX)(SummaryOp.max) +SummaryOp.add_summary_func(SummaryType.STD)(SummaryOp.std) +SummaryOp.add_summary_func(SummaryType.COUNT)(SummaryOp.count) diff --git a/tests/analyzer/test_data_analysis.py b/tests/analyzer/test_data_analysis.py index 43b68335..b6e0b66f 100644 --- a/tests/analyzer/test_data_analysis.py +++ b/tests/analyzer/test_data_analysis.py @@ -73,3 +73,10 @@ class TestDataAnalysis(unittest.TestCase): pd.testing.assert_frame_equal(df, pd.DataFrame([[0.0046, 500.6789], [1.53, 100.7424]], columns=['a', 'b'])) df = data_analysis.round_significant_decimal_places(df, 2, 'b') pd.testing.assert_frame_equal(df, pd.DataFrame([[0.0046, 500.68], [1.53, 100.74]], columns=['a', 'b'])) + # Test aggregate + df = pd.DataFrame([[1, 2], [3, 4]], columns=['a:0', 'a:1']) + df = data_analysis.aggregate(df) + pd.testing.assert_frame_equal(df, pd.DataFrame({'a': [1, 3, 2, 4]})) + df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=['ib_1_a', 'ib_2_a', 'ib_1_b', 'ib_2_b']) + df = data_analysis.aggregate(df, pattern='ib_(.)_.') + pd.testing.assert_frame_equal(df, pd.DataFrame({'ib_*_a': [1, 5, 2, 6], 'ib_*_b': [3, 7, 4, 8]})) diff --git a/tests/analyzer/test_data_diagnosis.py b/tests/analyzer/test_data_diagnosis.py index 151faee1..686978e6 100644 --- a/tests/analyzer/test_data_diagnosis.py +++ b/tests/analyzer/test_data_diagnosis.py @@ -196,8 +196,8 @@ class TestDataDiagnosis(unittest.TestCase): assert ('Category' in line) assert ('Defective Details' in line) assert ('Index' in line) - # Test - gen_md_lines - lines = diag1.gen_md_lines(data_not_accept_df, diag1._sb_rules, 2) + # Test - generate_md_lines + lines = diag1.generate_md_lines(data_not_accept_df, diag1._sb_rules, 2) assert (lines) expected_md_file = str(self.parent_path / '../data/diagnosis_summary.md') with open(expected_md_file, 'r') as f: diff --git a/tests/analyzer/test_file_handler.py b/tests/analyzer/test_file_handler.py index ea43da05..51cec89a 100644 --- a/tests/analyzer/test_file_handler.py +++ b/tests/analyzer/test_file_handler.py @@ -48,8 +48,8 @@ class TestFileHandler(unittest.TestCase): assert (not baseline) baseline = file_handler.read_baseline(test_baseline_file) assert (baseline) - # Test - gen_md_table + # Test - generate_md_table data_df = pd.DataFrame([[1, 2], [3, 4]]) - lines = file_handler.gen_md_table(data_df, header=['A', 'B']) + lines = file_handler.generate_md_table(data_df, header=['A', 'B']) expected_lines = ['| A | B |\n', '| --- | --- |\n', '| 1 | 2 |\n', '| 3 | 4 |\n'] assert (lines == expected_lines) diff --git a/tests/analyzer/test_result_summary.py b/tests/analyzer/test_result_summary.py new file mode 100644 index 00000000..c38ee1e5 --- /dev/null +++ b/tests/analyzer/test_result_summary.py @@ -0,0 +1,149 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Tests for ResultSummary module.""" + +import unittest +import yaml +from pathlib import Path + +import pandas as pd + +from superbench.analyzer import ResultSummary +import superbench.analyzer.file_handler as file_handler + + +class TestResultSummary(unittest.TestCase): + """Test for ResultSummary class.""" + def setUp(self): + """Method called to prepare the test fixture.""" + self.parent_path = Path(__file__).parent + self.output_excel_file = str(self.parent_path / 'results_summary.xlsx') + self.output_md_file = str(self.parent_path / 'results_summary.md') + self.output_html_file = str(self.parent_path / 'results_summary.html') + self.test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml') + self.test_raw_data = str(self.parent_path / 'test_results.jsonl') + self.test_rule_file = str(self.parent_path / 'test_summary_rules.yaml') + + def tearDown(self): + """Method called after the test method has been called and the result recorded.""" + for file in [self.output_excel_file, self.test_rule_file_fake, self.output_md_file, self.output_html_file]: + p = Path(file) + if p.is_file(): + p.unlink() + + def test_result_summary(self): + """Test result summary class.""" + rs1 = ResultSummary() + rs1._raw_data_df = file_handler.read_raw_data(self.test_raw_data) + rs1._benchmark_metrics_dict = rs1._get_metrics_by_benchmarks(list(rs1._raw_data_df)) + # Test - _check_rules + # Negative case + false_rules = [ + { + 'categories': 'KernelLaunch', + 'metrics': ['kernel-launch/event_overhead:\\d+'] + }, { + 'categories': 'KernelLaunch', + 'statistics': 'abb', + 'metrics': ['kernel-launch/event_overhead:\\d+'] + }, { + 'categories': 'KernelLaunch', + 'statistics': 'mean', + 'metrics': ['kernel-launch/event_overhead:\\d+'], + 'aggregate': 'abb' + } + ] + metric = 'kernel-launch/event_overhead:0' + for rules in false_rules: + self.assertRaises(Exception, rs1._check_rules, rules, metric) + # Positive case + true_rules = [ + { + 'categories': 'KernelLaunch', + 'statistics': 'mean', + 'metrics': ['kernel-launch/event_overhead:\\d+'], + 'aggregate': True + }, + { + 'categories': 'KernelLaunch', + 'statistics': ['mean', 'p50'], + 'metrics': ['kernel-launch/event_overhead:\\d+'] + }, + { + 'categories': 'KernelLaunch', + 'statistics': 'mean', + 'metrics': ['kernel-launch/event_overhead:\\d+'], + 'aggregate': 'kernel-launch/event_overhead(:\\d+)' + }, + ] + for rules in true_rules: + assert (rs1._check_rules(rules, metric)) + + # Test - _parse_rules + # Negative case + rs2 = ResultSummary() + fake_rules = file_handler.read_rules(self.test_rule_file_fake) + assert (rs2._parse_rules(fake_rules) is False) + rs2._raw_data_df = file_handler.read_raw_data(self.test_raw_data) + rs2._benchmark_metrics_dict = rs2._get_metrics_by_benchmarks(list(rs2._raw_data_df)) + p = Path(self.test_rule_file) + with p.open() as f: + rules = yaml.load(f, Loader=yaml.SafeLoader) + rules['superbench']['rules']['fake'] = false_rules[0] + with open(self.test_rule_file_fake, 'w') as f: + yaml.dump(rules, f) + assert (rs1._parse_rules(fake_rules) is False) + # Positive case + rules = file_handler.read_rules(self.test_rule_file) + assert (rs1._parse_rules(rules)) + + # Test - _generate_summary + summary = rs1._generate_summary(round=2) + assert (len(summary) == 3) + + # Test - _merge_summary + expected_summary_merge = [ + ['KernelLaunch', 'kernel-launch/event_overhead', 'mean', 0.0097], + ['KernelLaunch', 'kernel-launch/event_overhead', 'p90', 0.006], + ['KernelLaunch', 'kernel-launch/event_overhead', 'min', 0.0055], + ['KernelLaunch', 'kernel-launch/event_overhead', 'max', 0.1], + ['KernelLaunch', 'kernel-launch/wall_overhead', 'mean', 0.01], + ['KernelLaunch', 'kernel-launch/wall_overhead', 'p90', 0.011], + ['KernelLaunch', 'kernel-launch/wall_overhead', 'min', 0.01], + ['KernelLaunch', 'kernel-launch/wall_overhead', 'max', 0.011], + ['NCCL', 'nccl-bw/allreduce_8388608_busbw:0', 'mean', 89.51], + ['RDMA', 'ib-loopback/IB_write_8388608_Avg_*:0', 'mean', 23925.84] + ] + expected_summary_merge_df = pd.DataFrame(expected_summary_merge) + summary_merge_df = rs1._merge_summary(summary) + pd.testing.assert_frame_equal(expected_summary_merge_df, summary_merge_df) + + def test_result_summary_run(self): + """Test for the run process of result summary.""" + # Test - output in excel + ResultSummary().run(self.test_raw_data, self.test_rule_file, str(self.parent_path), 'excel', round=2) + excel_file = pd.ExcelFile(self.output_excel_file, engine='openpyxl') + data_sheet_name = 'Summary' + summary = excel_file.parse(data_sheet_name, header=None) + expect_result_file = pd.ExcelFile(str(self.parent_path / '../data/results_summary.xlsx'), engine='openpyxl') + expect_result = expect_result_file.parse(data_sheet_name, header=None) + pd.testing.assert_frame_equal(summary, expect_result) + + # Test - output in md + ResultSummary().run(self.test_raw_data, self.test_rule_file, str(self.parent_path), 'md', round=2) + expected_md_file = str(self.parent_path / '../data/results_summary.md') + with open(expected_md_file, 'r') as f: + expect_result = f.read() + with open(self.output_md_file, 'r') as f: + summary = f.read() + assert (summary == expect_result) + + # Test - output in html + ResultSummary().run(self.test_raw_data, self.test_rule_file, str(self.parent_path), 'html', round=2) + expected_html_file = str(self.parent_path / '../data/results_summary.html') + with open(expected_html_file, 'r') as f: + expect_result = f.read() + with open(self.output_html_file, 'r') as f: + summary = f.read() + assert (summary == expect_result) diff --git a/tests/analyzer/test_summary_rules.yaml b/tests/analyzer/test_summary_rules.yaml new file mode 100644 index 00000000..34a6510e --- /dev/null +++ b/tests/analyzer/test_summary_rules.yaml @@ -0,0 +1,26 @@ +# SuperBench rules +version: v0.4 +superbench: + rules: + kernel_launch: + statistics: + - mean + - p90 + - min + - max + aggregate: True + categories: KernelLaunch + metrics: + - kernel-launch/event_overhead + - kernel-launch/wall_overhead + nccl: + statistics: mean + categories: NCCL + metrics: + - nccl-bw/allreduce_8388608_busbw + ib-loopback: + statistics: mean + categories: RDMA + metrics: + - ib-loopback/IB_write_8388608_Avg_\d+ + aggregate: ib-loopback/IB_write_.*_Avg_(\d+) diff --git a/tests/analyzer/test_summaryop.py b/tests/analyzer/test_summaryop.py new file mode 100644 index 00000000..3b105444 --- /dev/null +++ b/tests/analyzer/test_summaryop.py @@ -0,0 +1,70 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Tests for SummaryOp module.""" + +import unittest +from numpy import NaN, float64 + +import pandas as pd + +from superbench.analyzer import SummaryOp, SummaryType + + +class TestSummaryOp(unittest.TestCase): + """Test for Summary Ops.""" + def test_rule_op(self): + """Test for defined rule operators.""" + # Test - get_rule_func + # Negative case + assert (not SummaryOp.get_summary_func('fake')) + # Positive case + summary_op = SummaryOp.get_summary_func(SummaryType.MEAN) + assert (summary_op == SummaryOp.mean) + summary_op = SummaryOp.get_summary_func(SummaryType.PENCENTILE) + assert (summary_op == SummaryOp.percentile) + summary_op = SummaryOp.get_summary_func(SummaryType.MIN) + assert (summary_op == SummaryOp.min) + summary_op = SummaryOp.get_summary_func(SummaryType.MAX) + assert (summary_op == SummaryOp.max) + summary_op = SummaryOp.get_summary_func(SummaryType.STD) + assert (summary_op == SummaryOp.std) + summary_op = SummaryOp.get_summary_func(SummaryType.COUNT) + assert (summary_op == SummaryOp.count) + + # Test - _check_raw_data_Df + # Negative case + empty_data_df = pd.DataFrame() + self.assertRaises(Exception, SummaryOp._check_raw_data_df, empty_data_df) + self.assertRaises(Exception, SummaryOp._check_raw_data_df, None) + + data1 = [[1, 2, 3, 4], [4, 5, 6], [7, 8]] + raw_data_df = pd.DataFrame(data1, columns=['a', 'b', 'c', 'd']) + # Test - mean + result = SummaryOp.mean(raw_data_df) + expectedResult = pd.Series([4.0, 5.0, 4.5, 4.0], index=['a', 'b', 'c', 'd']) + pd.testing.assert_series_equal(result, expectedResult) + # Test - min + result = SummaryOp.min(raw_data_df) + expectedResult = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'], dtype=float64) + pd.testing.assert_series_equal(result, expectedResult) + # Test - max + result = SummaryOp.max(raw_data_df) + expectedResult = pd.Series([7, 8, 6, 4], index=['a', 'b', 'c', 'd'], dtype=float64) + pd.testing.assert_series_equal(result, expectedResult) + # Test - std + result = SummaryOp.std(raw_data_df) + print(result) + expectedResult = pd.Series([3.0, 3.0, 2.1213203435596424, NaN], index=['a', 'b', 'c', 'd'], dtype=float64) + pd.testing.assert_series_equal(result, expectedResult) + # Test - count + result = SummaryOp.count(raw_data_df) + print(result) + expectedResult = pd.Series([3, 3, 2, 1], index=['a', 'b', 'c', 'd']) + pd.testing.assert_series_equal(result, expectedResult) + # Test - pencentile + result = SummaryOp.percentile(raw_data_df, 50) + print(result) + expectedResult = pd.Series([4.0, 5.0, 4.5, 4.0], index=['a', 'b', 'c', 'd'], dtype=float64) + pd.testing.assert_series_equal(result, expectedResult, check_names=False) + self.assertRaises(Exception, SummaryOp.percentile, 200) diff --git a/tests/data/results_summary.html b/tests/data/results_summary.html new file mode 100644 index 00000000..ace0266a --- /dev/null +++ b/tests/data/results_summary.html @@ -0,0 +1,86 @@ +

KernelLaunch

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
metricstatisticsvalues
kernel-launch/event_overheadmean0.0097
kernel-launch/event_overheadp900.006
kernel-launch/event_overheadmin0.0055
kernel-launch/event_overheadmax0.1
kernel-launch/wall_overheadmean0.01
kernel-launch/wall_overheadp900.011
kernel-launch/wall_overheadmin0.01
kernel-launch/wall_overheadmax0.011
+

NCCL

+ + + + + + + + + + + + + + + +
metricstatisticsvalues
nccl-bw/allreduce_8388608_busbw:0mean89.51
+

RDMA

+ + + + + + + + + + + + + + + +
metricstatisticsvalues
ib-loopback/IB_write_8388608_Avg_*:0mean23925.84
\ No newline at end of file diff --git a/tests/data/results_summary.md b/tests/data/results_summary.md new file mode 100644 index 00000000..2341e9e6 --- /dev/null +++ b/tests/data/results_summary.md @@ -0,0 +1,22 @@ +## KernelLaunch +| metric | statistics | values | +| --- | --- | --- | +| kernel-launch/event_overhead | mean | 0.0097 | +| kernel-launch/event_overhead | p90 | 0.006 | +| kernel-launch/event_overhead | min | 0.0055 | +| kernel-launch/event_overhead | max | 0.1 | +| kernel-launch/wall_overhead | mean | 0.01 | +| kernel-launch/wall_overhead | p90 | 0.011 | +| kernel-launch/wall_overhead | min | 0.01 | +| kernel-launch/wall_overhead | max | 0.011 | + +## NCCL +| metric | statistics | values | +| --- | --- | --- | +| nccl-bw/allreduce_8388608_busbw:0 | mean | 89.51 | + +## RDMA +| metric | statistics | values | +| --- | --- | --- | +| ib-loopback/IB_write_8388608_Avg_*:0 | mean | 23925.84 | + diff --git a/tests/data/results_summary.xlsx b/tests/data/results_summary.xlsx new file mode 100644 index 00000000..08ad4945 Binary files /dev/null and b/tests/data/results_summary.xlsx differ