From ed2f3c3c827d07d1b8dce0490019f22c68fee29e Mon Sep 17 00:00:00 2001 From: Yuting Jiang Date: Fri, 10 Dec 2021 14:11:00 +0800 Subject: [PATCH] CLI - Integrate data diagnosis (#260) **Description** Add cli to integrate data diagnosis module. --- superbench/cli/_commands.py | 14 +++++++++++++ superbench/cli/_help.py | 28 +++++++++++++++++++++++++ superbench/cli/_result_handler.py | 35 +++++++++++++++++++++++++++++++ tests/cli/test_sb.py | 16 ++++++++++++++ 4 files changed, 93 insertions(+) create mode 100644 superbench/cli/_result_handler.py diff --git a/superbench/cli/_commands.py b/superbench/cli/_commands.py index b462235a..e2725331 100644 --- a/superbench/cli/_commands.py +++ b/superbench/cli/_commands.py @@ -25,6 +25,8 @@ class SuperBenchCommandsLoader(CLICommandsLoader): g.command('run', 'run_command_handler') with CommandGroup(self, 'node', 'superbench.cli._node_handler#{}') as g: g.command('info', 'info_command_handler') + with CommandGroup(self, 'result', 'superbench.cli._result_handler#{}') as g: + g.command('diagnosis', 'diagnosis_command_handler') return super().load_command_table(args) def load_arguments(self, command): @@ -59,4 +61,16 @@ class SuperBenchCommandsLoader(CLICommandsLoader): nargs='+', help='Extra arguments to override config_file.' ) + with ArgumentsContext(self, 'result') as ac: + ac.argument('raw_data_file', options_list=('--data-file', '-d'), type=str, help='Path to raw data file.') + ac.argument('rule_file', options_list=('--rule-file', '-r'), type=str, help='Path to rule file.') + ac.argument( + 'baseline_file', options_list=('--baseline-file', '-b'), type=str, help='Path to baseline file.' + ) + ac.argument( + 'output_dir', + type=str, + help='Path to output directory, outputs/{datetime} will be used if not specified.' + ) + ac.argument('output_file_format', type=str, help='Format of output file, excel or json.') super().load_arguments(command) diff --git a/superbench/cli/_help.py b/superbench/cli/_help.py index 699b83cf..6ed26a20 100644 --- a/superbench/cli/_help.py +++ b/superbench/cli/_help.py @@ -61,6 +61,34 @@ helps['run'] = """ text: {cli_name} run --docker-image superbench/cuda:11.1 --host-file ./host.ini """.format(cli_name=CLI_NAME) +helps['node'] = """ + type: Group + short-summary: Get detailed information or configurations on the local node. +""" + +helps['node info'] = """ + type: command + short-summary: Get system info. + examples: + - name: get system info of the local node + text: {cli_name} node info +""".format(cli_name=CLI_NAME) + +helps['result'] = """ + type: Group + short-summary: Process or analyze the results of SuperBench benchmarks. +""" + +helps['result diagnosis'] = """ + type: command + short-summary: Filter the defective machines automatically from benchmarking results according to rules defined in rule file. + examples: + - name: run data diagnosis and output the results in excel format + text: {cli_name} result diagnosis --data-file 'outputs/results-summary.jsonl' --rule-file 'rule.yaml' --baseline-file 'baseline.json' --output-file-foramt 'excel' + - name: run data diagnosis and output the results in jsonl format + text: {cli_name} result diagnosis --data-file 'outputs/results-summary.jsonl' --rule-file 'rule.yaml' --baseline-file 'baseline.json' --output-file-foramt 'json' +""".format(cli_name=CLI_NAME) # noqa: E501 + class SuperBenchCLIHelp(CLIHelp): """SuperBench CLI help loader.""" diff --git a/superbench/cli/_result_handler.py b/superbench/cli/_result_handler.py new file mode 100644 index 00000000..548e1166 --- /dev/null +++ b/superbench/cli/_result_handler.py @@ -0,0 +1,35 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""SuperBench CLI result subgroup command handler.""" + +from knack.util import CLIError + +from superbench.analyzer import DataDiagnosis +from superbench.common.utils import create_sb_output_dir +from superbench.cli._handler import check_argument_file + + +def diagnosis_command_handler(raw_data_file, rule_file, baseline_file, output_dir=None, output_file_format='excel'): + """Run data diagnosis. + + Args: + raw_data_file (str): Path to raw data jsonl file. + rule_file (str): Path to baseline yaml file. + baseline_file (str): Path to baseline json file. + output_dir (str): Path to output directory. + output_file_format (str): Format of the output file, 'excel' or 'json'. Defaults to 'excel'. + """ + try: + # Create output directory + sb_output_dir = create_sb_output_dir(output_dir) + # Check arguments + if output_file_format not in ['excel', 'json']: + raise CLIError('Output format must be excel or json.') + check_argument_file('raw_data_file', raw_data_file) + check_argument_file('rule_file', rule_file) + check_argument_file('baseline_file', baseline_file) + # Run data diagnosis + DataDiagnosis().run(raw_data_file, rule_file, baseline_file, sb_output_dir, output_file_format) + except Exception as ex: + raise RuntimeError('Failed to run diagnosis command.') from ex diff --git a/tests/cli/test_sb.py b/tests/cli/test_sb.py index 193e311b..b0fc14c1 100644 --- a/tests/cli/test_sb.py +++ b/tests/cli/test_sb.py @@ -7,6 +7,7 @@ import io import contextlib from functools import wraps from knack.testsdk import ScenarioTest, StringCheck, NoneCheck +from pathlib import Path import superbench from superbench.cli import SuperBenchCLI @@ -85,3 +86,18 @@ class SuperBenchCLIScenarioTest(ScenarioTest): def test_sb_node_info(self): """Test sb node info, should fail.""" self.cmd('sb node info', expect_failure=False) + + def test_sb_result_diagnosis(self): + """Test sb result diagnosis.""" + test_analyzer_dir = str(Path(__file__).parent.resolve() / '../analyzer/') + # test positive case + self.cmd( + 'sb result diagnosis -d {dir}/test_results.jsonl -r {dir}/test_rules.yaml -b {dir}/test_baseline.json'. + format(dir=test_analyzer_dir) + ' --output-dir outputs/test-diagnosis/' + ) + # test invalid output format + self.cmd( + 'sb result diagnosis -d {dir}/test_results.jsonl -r {dir}/test_rules.yaml -b {dir}/test_baseline.json'. + format(dir=test_analyzer_dir) + ' --output-dir outputs/test-diagnosis/ --output-file-format abb', + expect_failure=True + )