CLI - Integrate data diagnosis (#260)

**Description**
Add cli to integrate data diagnosis module.
This commit is contained in:
Yuting Jiang 2021-12-10 14:11:00 +08:00 коммит произвёл GitHub
Родитель 9f56b2198f
Коммит ed2f3c3c82
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 93 добавлений и 0 удалений

Просмотреть файл

@ -25,6 +25,8 @@ class SuperBenchCommandsLoader(CLICommandsLoader):
g.command('run', 'run_command_handler')
with CommandGroup(self, 'node', 'superbench.cli._node_handler#{}') as g:
g.command('info', 'info_command_handler')
with CommandGroup(self, 'result', 'superbench.cli._result_handler#{}') as g:
g.command('diagnosis', 'diagnosis_command_handler')
return super().load_command_table(args)
def load_arguments(self, command):
@ -59,4 +61,16 @@ class SuperBenchCommandsLoader(CLICommandsLoader):
nargs='+',
help='Extra arguments to override config_file.'
)
with ArgumentsContext(self, 'result') as ac:
ac.argument('raw_data_file', options_list=('--data-file', '-d'), type=str, help='Path to raw data file.')
ac.argument('rule_file', options_list=('--rule-file', '-r'), type=str, help='Path to rule file.')
ac.argument(
'baseline_file', options_list=('--baseline-file', '-b'), type=str, help='Path to baseline file.'
)
ac.argument(
'output_dir',
type=str,
help='Path to output directory, outputs/{datetime} will be used if not specified.'
)
ac.argument('output_file_format', type=str, help='Format of output file, excel or json.')
super().load_arguments(command)

Просмотреть файл

@ -61,6 +61,34 @@ helps['run'] = """
text: {cli_name} run --docker-image superbench/cuda:11.1 --host-file ./host.ini
""".format(cli_name=CLI_NAME)
helps['node'] = """
type: Group
short-summary: Get detailed information or configurations on the local node.
"""
helps['node info'] = """
type: command
short-summary: Get system info.
examples:
- name: get system info of the local node
text: {cli_name} node info
""".format(cli_name=CLI_NAME)
helps['result'] = """
type: Group
short-summary: Process or analyze the results of SuperBench benchmarks.
"""
helps['result diagnosis'] = """
type: command
short-summary: Filter the defective machines automatically from benchmarking results according to rules defined in rule file.
examples:
- name: run data diagnosis and output the results in excel format
text: {cli_name} result diagnosis --data-file 'outputs/results-summary.jsonl' --rule-file 'rule.yaml' --baseline-file 'baseline.json' --output-file-foramt 'excel'
- name: run data diagnosis and output the results in jsonl format
text: {cli_name} result diagnosis --data-file 'outputs/results-summary.jsonl' --rule-file 'rule.yaml' --baseline-file 'baseline.json' --output-file-foramt 'json'
""".format(cli_name=CLI_NAME) # noqa: E501
class SuperBenchCLIHelp(CLIHelp):
"""SuperBench CLI help loader."""

Просмотреть файл

@ -0,0 +1,35 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""SuperBench CLI result subgroup command handler."""
from knack.util import CLIError
from superbench.analyzer import DataDiagnosis
from superbench.common.utils import create_sb_output_dir
from superbench.cli._handler import check_argument_file
def diagnosis_command_handler(raw_data_file, rule_file, baseline_file, output_dir=None, output_file_format='excel'):
"""Run data diagnosis.
Args:
raw_data_file (str): Path to raw data jsonl file.
rule_file (str): Path to baseline yaml file.
baseline_file (str): Path to baseline json file.
output_dir (str): Path to output directory.
output_file_format (str): Format of the output file, 'excel' or 'json'. Defaults to 'excel'.
"""
try:
# Create output directory
sb_output_dir = create_sb_output_dir(output_dir)
# Check arguments
if output_file_format not in ['excel', 'json']:
raise CLIError('Output format must be excel or json.')
check_argument_file('raw_data_file', raw_data_file)
check_argument_file('rule_file', rule_file)
check_argument_file('baseline_file', baseline_file)
# Run data diagnosis
DataDiagnosis().run(raw_data_file, rule_file, baseline_file, sb_output_dir, output_file_format)
except Exception as ex:
raise RuntimeError('Failed to run diagnosis command.') from ex

Просмотреть файл

@ -7,6 +7,7 @@ import io
import contextlib
from functools import wraps
from knack.testsdk import ScenarioTest, StringCheck, NoneCheck
from pathlib import Path
import superbench
from superbench.cli import SuperBenchCLI
@ -85,3 +86,18 @@ class SuperBenchCLIScenarioTest(ScenarioTest):
def test_sb_node_info(self):
"""Test sb node info, should fail."""
self.cmd('sb node info', expect_failure=False)
def test_sb_result_diagnosis(self):
"""Test sb result diagnosis."""
test_analyzer_dir = str(Path(__file__).parent.resolve() / '../analyzer/')
# test positive case
self.cmd(
'sb result diagnosis -d {dir}/test_results.jsonl -r {dir}/test_rules.yaml -b {dir}/test_baseline.json'.
format(dir=test_analyzer_dir) + ' --output-dir outputs/test-diagnosis/'
)
# test invalid output format
self.cmd(
'sb result diagnosis -d {dir}/test_results.jsonl -r {dir}/test_rules.yaml -b {dir}/test_baseline.json'.
format(dir=test_analyzer_dir) + ' --output-dir outputs/test-diagnosis/ --output-file-format abb',
expect_failure=True
)