Analyzer: Add Feature - Output results of all nodes in data diagnosis (#336)

**Description**
Output results of all nodes in data diagnosis.
This commit is contained in:
Yuting Jiang 2022-04-10 18:57:15 +08:00 коммит произвёл GitHub
Родитель 56c9a711a8
Коммит 55b0f9d239
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 239 добавлений и 9 удалений

Просмотреть файл

@ -7,6 +7,7 @@ from pathlib import Path
import json
import pandas as pd
import numpy as np
from superbench.common.utils import logger
from superbench.analyzer.diagnosis_rule_op import RuleOp, DiagnosisRuleType
@ -209,6 +210,48 @@ class DataDiagnosis(RuleBase):
logger.error('DataDiagnosis: run diagnosis rules failed, message: {}'.format(str(e)))
return data_not_accept_df, label_df
def output_all_nodes_results(self, raw_data_df, data_not_accept_df):
"""Output diagnosis results of all nodes.
Args:
raw_data_df (DataFrame): raw data
data_not_accept_df (DataFrame): defective nodes's detailed information
Returns:
DataFrame: all nodes' detailed information inluding ['Accept','#Issues','Category','Issue_Details']
"""
append_columns = ['Accept', '#Issues', 'Category', 'Issue_Details']
all_data_df = (raw_data_df[self._enable_metrics]).astype('float64')
if data_not_accept_df.shape[0] == 0:
all_data_df['Accept'] = [True for i in range(len(all_data_df))]
all_data_df['#Issues'] = [0 for i in range(len(all_data_df))]
all_data_df['Category'] = [None for i in range(len(all_data_df))]
all_data_df['Issue_Details'] = [None for i in range(len(all_data_df))]
elif data_not_accept_df.shape[0] > 0:
data_not_accept_df['Accept'] = [False for i in range(len(data_not_accept_df))]
data_not_accept_df['#Issues'] = data_not_accept_df['Defective Details'].map(lambda x: len(x.split(',')))
data_not_accept_df = data_not_accept_df.rename(columns={'Defective Details': 'Issue_Details'})
for index in range(len(append_columns)):
if append_columns[index] not in data_not_accept_df:
logger.warning(
'DataDiagnosis: output_all_nodes_results - column {} not found in data_not_accept_df.'.format(
append_columns[index]
)
)
all_data_df[append_columns[index]] = None
else:
all_data_df = all_data_df.merge(
data_not_accept_df[[append_columns[index]]], left_index=True, right_index=True, how='left'
)
all_data_df['Accept'] = all_data_df['Accept'].replace(np.nan, True)
all_data_df['#Issues'] = all_data_df['#Issues'].replace(np.nan, 0)
all_data_df = all_data_df.replace(np.nan, '')
return all_data_df
def output_diagnosis_in_excel(self, raw_data_df, data_not_accept_df, output_path, rules):
"""Output the raw_data_df and data_not_accept_df results into excel file.
@ -230,7 +273,7 @@ class DataDiagnosis(RuleBase):
except Exception as e:
logger.error('DataDiagnosis: excel_data_output - {}'.format(str(e)))
def output_diagnosis_in_json(self, data_not_accept_df, output_path):
def output_diagnosis_in_jsonl(self, data_not_accept_df, output_path):
"""Output data_not_accept_df into jsonl file.
Args:
@ -256,6 +299,20 @@ class DataDiagnosis(RuleBase):
except Exception as e:
logger.error('DataDiagnosis: output json data failed, msg: {}'.format(str(e)))
def output_diagnosis_in_json(self, data_not_accept_df, output_path):
"""Output data_not_accept_df into json file.
Args:
data_not_accept_df (DataFrame): the DataFrame to output
output_path (str): the path of output jsonl file
"""
data_not_accept_df['Index'] = data_not_accept_df.index
data_not_accept_json = data_not_accept_df.to_json(orient='records')
data_not_accept = json.loads(data_not_accept_json)
p = Path(output_path)
with p.open('w') as f:
json.dump(data_not_accept, f, indent=4)
def generate_md_lines(self, data_not_accept_df, rules, round):
"""Convert DataFrame into markdown lines.
@ -293,7 +350,9 @@ class DataDiagnosis(RuleBase):
lines = file_handler.generate_md_table(data_not_accept_df, header)
return lines
def run(self, raw_data_file, rule_file, baseline_file, output_dir, output_format='excel', round=2):
def run(
self, raw_data_file, rule_file, baseline_file, output_dir, output_format='excel', output_all=False, round=2
):
"""Run the data diagnosis and output the results.
Args:
@ -301,6 +360,7 @@ class DataDiagnosis(RuleBase):
rule_file (str): The path of baseline yaml file
baseline_file (str): The path of baseline json file
output_dir (str): the directory of output file
output_all (bool): output diagnosis results for all nodes
output_format (str): the format of the output, 'excel' or 'json'
round (int): the number of decimal digits
"""
@ -312,12 +372,21 @@ class DataDiagnosis(RuleBase):
data_not_accept_df, label_df = self.run_diagnosis_rules(rules, baseline)
logger.info('DataDiagnosis: Processed finished')
output_path = ''
# generate all nodes' info
if output_all:
output_path = str(Path(output_dir) / 'diagnosis_summary.json')
data_not_accept_df = self.output_all_nodes_results(self._raw_data_df, data_not_accept_df)
# output according format
if output_format == 'excel':
output_path = str(Path(output_dir) / 'diagnosis_summary.xlsx')
self.output_diagnosis_in_excel(self._raw_data_df, data_not_accept_df, output_path, self._sb_rules)
elif output_format == 'json':
output_path = str(Path(output_dir) / 'diagnosis_summary.jsonl')
self.output_diagnosis_in_json(data_not_accept_df, output_path)
if output_all:
output_path = str(Path(output_dir) / 'diagnosis_summary.json')
self.output_diagnosis_in_json(data_not_accept_df, output_path)
else:
output_path = str(Path(output_dir) / 'diagnosis_summary.jsonl')
self.output_diagnosis_in_jsonl(data_not_accept_df, output_path)
elif output_format == 'md' or output_format == 'html':
lines = self.generate_md_lines(data_not_accept_df, self._sb_rules, round)
if output_format == 'md':

Просмотреть файл

@ -24,12 +24,13 @@ class TestDataDiagnosis(unittest.TestCase):
self.output_json_file = str(self.parent_path / 'diagnosis_summary.jsonl')
self.output_md_file = str(self.parent_path / 'diagnosis_summary.md')
self.output_html_file = str(self.parent_path / 'diagnosis_summary.html')
self.output_all_json_file = str(self.parent_path / 'diagnosis_summary.json')
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
for file in [
self.output_excel_file, self.output_json_file, self.test_rule_file_fake, self.output_md_file,
self.output_html_file
self.output_html_file, self.output_all_json_file
]:
p = Path(file)
if p.is_file():
@ -185,8 +186,8 @@ class TestDataDiagnosis(unittest.TestCase):
assert (len(data_not_accept_read_from_excel) == 2)
assert ('Category' in data_not_accept_read_from_excel)
assert ('Defective Details' in data_not_accept_read_from_excel)
# Test - output in json
diag1.output_diagnosis_in_json(data_not_accept_df, self.output_json_file)
# Test - output in jsonl
diag1.output_diagnosis_in_jsonl(data_not_accept_df, self.output_json_file)
assert (Path(self.output_json_file).is_file())
with Path(self.output_json_file).open() as f:
data_not_accept_read_from_json = f.readlines()
@ -203,6 +204,30 @@ class TestDataDiagnosis(unittest.TestCase):
with open(expected_md_file, 'r') as f:
expect_result = f.readlines()
assert (lines == expect_result)
# Test - output_all_nodes_results
# case 1: 1 accept, 2 not accept
data_df = diag1.output_all_nodes_results(diag1._raw_data_df, data_not_accept_df)
assert (len(data_df) == 3)
assert (not data_df.loc['sb-validation-01']['Accept'])
assert (data_df.loc['sb-validation-02']['Accept'])
assert (not data_df.loc['sb-validation-03']['Accept'])
assert ('Category' in data_df)
assert ('Issue_Details' in data_df)
# case 1: 3 accept, 0 not accept
data_df_all_accept = diag1.output_all_nodes_results(diag1._raw_data_df, pd.DataFrame())
assert (len(data_df_all_accept) == 3)
assert (data_df_all_accept.loc['sb-validation-01']['Accept'])
assert (data_df_all_accept.loc['sb-validation-02']['Accept'])
assert (data_df_all_accept.loc['sb-validation-03']['Accept'])
# Test - output in json
diag1.output_diagnosis_in_json(data_df, self.output_all_json_file)
assert (Path(self.output_all_json_file).is_file())
expected_result_file = str(self.parent_path / '../data/diagnosis_summary.json')
with Path(self.output_all_json_file).open() as f:
data_not_accept_read_from_json = f.read()
with Path(expected_result_file).open() as f:
expect_result = f.read()
assert (data_not_accept_read_from_json == expect_result)
def test_data_diagnosis_run(self):
"""Test for the run process of rule-based data diagnosis."""
@ -228,7 +253,7 @@ class TestDataDiagnosis(unittest.TestCase):
expect_result = f.read()
assert (data_not_accept_read_from_json == expect_result)
# Test - output in md
DataDiagnosis().run(test_raw_data, test_rule_file, test_baseline_file, str(self.parent_path), 'md', 2)
DataDiagnosis().run(test_raw_data, test_rule_file, test_baseline_file, str(self.parent_path), 'md', round=2)
assert (Path(self.output_md_file).is_file())
expected_md_file = str(self.parent_path / '../data/diagnosis_summary.md')
with open(expected_md_file, 'r') as f:
@ -237,7 +262,7 @@ class TestDataDiagnosis(unittest.TestCase):
summary = f.read()
assert (summary == expect_result)
# Test - output in html
DataDiagnosis().run(test_raw_data, test_rule_file, test_baseline_file, str(self.parent_path), 'html', 2)
DataDiagnosis().run(test_raw_data, test_rule_file, test_baseline_file, str(self.parent_path), 'html', round=2)
assert (Path(self.output_html_file).is_file())
expected_html_file = str(self.parent_path / '../data/diagnosis_summary.html')
with open(expected_html_file, 'r') as f:
@ -245,6 +270,17 @@ class TestDataDiagnosis(unittest.TestCase):
with open(self.output_html_file, 'r') as f:
summary = f.read()
assert (summary == expect_result)
# Test - output all nodes results
DataDiagnosis().run(
test_raw_data, test_rule_file, test_baseline_file, str(self.parent_path), 'json', output_all=True
)
assert (Path(self.output_all_json_file).is_file())
expected_result_file = str(self.parent_path / '../data/diagnosis_summary.json')
with Path(self.output_all_json_file).open() as f:
data_not_accept_read_from_json = f.read()
with Path(expected_result_file).open() as f:
expect_result = f.read()
assert (data_not_accept_read_from_json == expect_result)
def test_mutli_rules(self):
"""Test multi rules check feature."""

Просмотреть файл

@ -0,0 +1,125 @@
[
{
"kernel-launch/event_overhead:0": 0.1,
"kernel-launch/event_overhead:1": 0.00595,
"kernel-launch/event_overhead:2": 0.00557,
"kernel-launch/event_overhead:3": 0.0055,
"kernel-launch/event_overhead:4": 0.00592,
"kernel-launch/event_overhead:5": 0.00589,
"kernel-launch/event_overhead:6": 0.00572,
"kernel-launch/event_overhead:7": 0.0059,
"kernel-launch/return_code": 0.0,
"kernel-launch/wall_overhead:0": 0.01026,
"kernel-launch/wall_overhead:1": 0.01026,
"kernel-launch/wall_overhead:2": 0.01046,
"kernel-launch/wall_overhead:3": 0.01049,
"kernel-launch/wall_overhead:4": 0.01063,
"kernel-launch/wall_overhead:5": 0.01006,
"kernel-launch/wall_overhead:6": 0.01045,
"kernel-launch/wall_overhead:7": 0.01071,
"mem-bw/D2H_Mem_BW:0": 24.3,
"mem-bw/D2H_Mem_BW:1": 24.6,
"mem-bw/D2H_Mem_BW:2": 24.5,
"mem-bw/D2H_Mem_BW:3": 24.6,
"mem-bw/D2H_Mem_BW:4": 24.3,
"mem-bw/D2H_Mem_BW:5": 24.3,
"mem-bw/D2H_Mem_BW:6": 23.9,
"mem-bw/D2H_Mem_BW:7": 24.6,
"mem-bw/H2D_Mem_BW:0": 25.6,
"mem-bw/H2D_Mem_BW:1": 25.8,
"mem-bw/H2D_Mem_BW:2": 26.0,
"mem-bw/H2D_Mem_BW:3": 26.1,
"mem-bw/H2D_Mem_BW:4": 26.2,
"mem-bw/H2D_Mem_BW:5": 25.8,
"mem-bw/H2D_Mem_BW:6": 25.3,
"mem-bw/H2D_Mem_BW:7": 26.1,
"mem-bw/return_code": 0.0,
"Accept": false,
"#Issues": 1.0,
"Category": "KernelLaunch",
"Issue_Details": "kernel-launch/event_overhead:0(B/L: 0.0060 VAL: 0.1000 VAR: 1577.85% Rule:lambda x:x>0.05)",
"Index": "sb-validation-01"
},
{
"kernel-launch/event_overhead:0": 0.00595,
"kernel-launch/event_overhead:1": 0.00595,
"kernel-launch/event_overhead:2": 0.00557,
"kernel-launch/event_overhead:3": 0.0055,
"kernel-launch/event_overhead:4": 0.00592,
"kernel-launch/event_overhead:5": 0.00589,
"kernel-launch/event_overhead:6": 0.00572,
"kernel-launch/event_overhead:7": 0.0059,
"kernel-launch/return_code": 0.0,
"kernel-launch/wall_overhead:0": 0.01026,
"kernel-launch/wall_overhead:1": 0.01026,
"kernel-launch/wall_overhead:2": 0.01046,
"kernel-launch/wall_overhead:3": 0.01049,
"kernel-launch/wall_overhead:4": 0.01063,
"kernel-launch/wall_overhead:5": 0.01006,
"kernel-launch/wall_overhead:6": 0.01045,
"kernel-launch/wall_overhead:7": 0.01071,
"mem-bw/D2H_Mem_BW:0": 24.3,
"mem-bw/D2H_Mem_BW:1": 24.6,
"mem-bw/D2H_Mem_BW:2": 24.5,
"mem-bw/D2H_Mem_BW:3": 24.6,
"mem-bw/D2H_Mem_BW:4": 24.3,
"mem-bw/D2H_Mem_BW:5": 24.3,
"mem-bw/D2H_Mem_BW:6": 23.9,
"mem-bw/D2H_Mem_BW:7": 24.6,
"mem-bw/H2D_Mem_BW:0": 25.6,
"mem-bw/H2D_Mem_BW:1": 25.8,
"mem-bw/H2D_Mem_BW:2": 26.0,
"mem-bw/H2D_Mem_BW:3": 26.1,
"mem-bw/H2D_Mem_BW:4": 26.2,
"mem-bw/H2D_Mem_BW:5": 25.8,
"mem-bw/H2D_Mem_BW:6": 25.3,
"mem-bw/H2D_Mem_BW:7": 26.1,
"mem-bw/return_code": 0.0,
"Accept": true,
"#Issues": 0.0,
"Category": "",
"Issue_Details": "",
"Index": "sb-validation-02"
},
{
"kernel-launch/event_overhead:0": 0.00596,
"kernel-launch/event_overhead:1": 0.00595,
"kernel-launch/event_overhead:2": 0.00557,
"kernel-launch/event_overhead:3": 0.0055,
"kernel-launch/event_overhead:4": 0.00592,
"kernel-launch/event_overhead:5": 0.00589,
"kernel-launch/event_overhead:6": 0.00572,
"kernel-launch/event_overhead:7": 0.0059,
"kernel-launch/return_code": 0.0,
"kernel-launch/wall_overhead:0": 0.01026,
"kernel-launch/wall_overhead:1": 0.01026,
"kernel-launch/wall_overhead:2": 0.01046,
"kernel-launch/wall_overhead:3": 0.01049,
"kernel-launch/wall_overhead:4": 0.01063,
"kernel-launch/wall_overhead:5": 0.01006,
"kernel-launch/wall_overhead:6": 0.01045,
"kernel-launch/wall_overhead:7": 0.01071,
"mem-bw/D2H_Mem_BW:0": "",
"mem-bw/D2H_Mem_BW:1": "",
"mem-bw/D2H_Mem_BW:2": "",
"mem-bw/D2H_Mem_BW:3": "",
"mem-bw/D2H_Mem_BW:4": "",
"mem-bw/D2H_Mem_BW:5": "",
"mem-bw/D2H_Mem_BW:6": "",
"mem-bw/D2H_Mem_BW:7": "",
"mem-bw/H2D_Mem_BW:0": "",
"mem-bw/H2D_Mem_BW:1": "",
"mem-bw/H2D_Mem_BW:2": "",
"mem-bw/H2D_Mem_BW:3": "",
"mem-bw/H2D_Mem_BW:4": "",
"mem-bw/H2D_Mem_BW:5": "",
"mem-bw/H2D_Mem_BW:6": "",
"mem-bw/H2D_Mem_BW:7": "",
"mem-bw/return_code": 1.0,
"Accept": false,
"#Issues": 17.0,
"Category": "FailedTest,Mem",
"Issue_Details": "mem-bw/D2H_Mem_BW:0_miss,mem-bw/D2H_Mem_BW:1_miss,mem-bw/D2H_Mem_BW:2_miss,mem-bw/D2H_Mem_BW:3_miss,mem-bw/D2H_Mem_BW:4_miss,mem-bw/D2H_Mem_BW:5_miss,mem-bw/D2H_Mem_BW:6_miss,mem-bw/D2H_Mem_BW:7_miss,mem-bw/H2D_Mem_BW:0_miss,mem-bw/H2D_Mem_BW:1_miss,mem-bw/H2D_Mem_BW:2_miss,mem-bw/H2D_Mem_BW:3_miss,mem-bw/H2D_Mem_BW:4_miss,mem-bw/H2D_Mem_BW:5_miss,mem-bw/H2D_Mem_BW:6_miss,mem-bw/H2D_Mem_BW:7_miss,mem-bw/return_code(VAL: 1.0000 Rule:lambda x:x>0)",
"Index": "sb-validation-03"
}
]