CLI - Integrate data diagnosis (#260)

**Description** Add cli to integrate data diagnosis module.
2021-12-10 14:11:00 +08:00 · 2021-12-10 14:11:00 +08:00 · ed2f3c3c82
--- a/superbench/cli/_commands.py
+++ b/superbench/cli/_commands.py
@ -25,6 +25,8 @@ class SuperBenchCommandsLoader(CLICommandsLoader):
            g.command('run', 'run_command_handler')
        with CommandGroup(self, 'node', 'superbench.cli._node_handler#{}') as g:
            g.command('info', 'info_command_handler')
+        with CommandGroup(self, 'result', 'superbench.cli._result_handler#{}') as g:
+            g.command('diagnosis', 'diagnosis_command_handler')
        return super().load_command_table(args)

    def load_arguments(self, command):
@ -59,4 +61,16 @@ class SuperBenchCommandsLoader(CLICommandsLoader):
                nargs='+',
                help='Extra arguments to override config_file.'
            )
+        with ArgumentsContext(self, 'result') as ac:
+            ac.argument('raw_data_file', options_list=('--data-file', '-d'), type=str, help='Path to raw data file.')
+            ac.argument('rule_file', options_list=('--rule-file', '-r'), type=str, help='Path to rule file.')
+            ac.argument(
+                'baseline_file', options_list=('--baseline-file', '-b'), type=str, help='Path to baseline file.'
+            )
+            ac.argument(
+                'output_dir',
+                type=str,
+                help='Path to output directory, outputs/{datetime} will be used if not specified.'
+            )
+            ac.argument('output_file_format', type=str, help='Format of output file, excel or json.')
        super().load_arguments(command)
--- a/superbench/cli/_help.py
+++ b/superbench/cli/_help.py
@ -61,6 +61,34 @@ helps['run'] = """
          text: {cli_name} run --docker-image superbench/cuda:11.1 --host-file ./host.ini
 """.format(cli_name=CLI_NAME)

+helps['node'] = """
+    type: Group
+    short-summary: Get detailed information or configurations on the local node.
+"""
+
+helps['node info'] = """
+    type: command
+    short-summary: Get system info.
+    examples:
+        - name: get system info of the local node
+          text: {cli_name} node info
+""".format(cli_name=CLI_NAME)
+
+helps['result'] = """
+    type: Group
+    short-summary: Process or analyze the results of SuperBench benchmarks.
+"""
+
+helps['result diagnosis'] = """
+    type: command
+    short-summary: Filter the defective machines automatically from benchmarking results according to rules defined in rule file.
+    examples:
+        - name: run data diagnosis and output the results in excel format
+          text: {cli_name} result diagnosis --data-file 'outputs/results-summary.jsonl' --rule-file 'rule.yaml' --baseline-file 'baseline.json' --output-file-foramt 'excel'
+        - name: run data diagnosis and output the results in jsonl format
+          text: {cli_name} result diagnosis --data-file 'outputs/results-summary.jsonl' --rule-file 'rule.yaml' --baseline-file 'baseline.json' --output-file-foramt 'json'
+""".format(cli_name=CLI_NAME)    # noqa: E501
+

 class SuperBenchCLIHelp(CLIHelp):
    """SuperBench CLI help loader."""
--- a/superbench/cli/_result_handler.py
+++ b/superbench/cli/_result_handler.py
@ -0,0 +1,35 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""SuperBench CLI result subgroup command handler."""
+
+from knack.util import CLIError
+
+from superbench.analyzer import DataDiagnosis
+from superbench.common.utils import create_sb_output_dir
+from superbench.cli._handler import check_argument_file
+
+
+def diagnosis_command_handler(raw_data_file, rule_file, baseline_file, output_dir=None, output_file_format='excel'):
+    """Run data diagnosis.
+
+    Args:
+        raw_data_file (str): Path to raw data jsonl file.
+        rule_file (str): Path to baseline yaml file.
+        baseline_file (str): Path to baseline json file.
+        output_dir (str): Path to output directory.
+        output_file_format (str): Format of the output file, 'excel' or 'json'. Defaults to 'excel'.
+    """
+    try:
+        # Create output directory
+        sb_output_dir = create_sb_output_dir(output_dir)
+        # Check arguments
+        if output_file_format not in ['excel', 'json']:
+            raise CLIError('Output format must be excel or json.')
+        check_argument_file('raw_data_file', raw_data_file)
+        check_argument_file('rule_file', rule_file)
+        check_argument_file('baseline_file', baseline_file)
+        # Run data diagnosis
+        DataDiagnosis().run(raw_data_file, rule_file, baseline_file, sb_output_dir, output_file_format)
+    except Exception as ex:
+        raise RuntimeError('Failed to run diagnosis command.') from ex
--- a/tests/cli/test_sb.py
+++ b/tests/cli/test_sb.py
@ -7,6 +7,7 @@ import io
 import contextlib
 from functools import wraps
 from knack.testsdk import ScenarioTest, StringCheck, NoneCheck
+from pathlib import Path

 import superbench
 from superbench.cli import SuperBenchCLI
@ -85,3 +86,18 @@ class SuperBenchCLIScenarioTest(ScenarioTest):
    def test_sb_node_info(self):
        """Test sb node info, should fail."""
        self.cmd('sb node info', expect_failure=False)
+
+    def test_sb_result_diagnosis(self):
+        """Test sb result diagnosis."""
+        test_analyzer_dir = str(Path(__file__).parent.resolve() / '../analyzer/')
+        # test positive case
+        self.cmd(
+            'sb result diagnosis -d {dir}/test_results.jsonl -r {dir}/test_rules.yaml -b {dir}/test_baseline.json'.
+            format(dir=test_analyzer_dir) + ' --output-dir outputs/test-diagnosis/'
+        )
+        # test invalid output format
+        self.cmd(
+            'sb result diagnosis -d {dir}/test_results.jsonl -r {dir}/test_rules.yaml -b {dir}/test_baseline.json'.
+            format(dir=test_analyzer_dir) + ' --output-dir outputs/test-diagnosis/ --output-file-format abb',
+            expect_failure=True
+        )