From ed2f3c3c827d07d1b8dce0490019f22c68fee29e Mon Sep 17 00:00:00 2001
From: Yuting Jiang <v-yujiang@microsoft.com>
Date: Fri, 10 Dec 2021 14:11:00 +0800
Subject: [PATCH] CLI - Integrate data diagnosis (#260)

**Description**
Add cli to integrate data diagnosis module.
---
 superbench/cli/_commands.py       | 14 +++++++++++++
 superbench/cli/_help.py           | 28 +++++++++++++++++++++++++
 superbench/cli/_result_handler.py | 35 +++++++++++++++++++++++++++++++
 tests/cli/test_sb.py              | 16 ++++++++++++++
 4 files changed, 93 insertions(+)
 create mode 100644 superbench/cli/_result_handler.py

diff --git a/superbench/cli/_commands.py b/superbench/cli/_commands.py
index b462235a..e2725331 100644
--- a/superbench/cli/_commands.py
+++ b/superbench/cli/_commands.py
@@ -25,6 +25,8 @@ class SuperBenchCommandsLoader(CLICommandsLoader):
             g.command('run', 'run_command_handler')
         with CommandGroup(self, 'node', 'superbench.cli._node_handler#{}') as g:
             g.command('info', 'info_command_handler')
+        with CommandGroup(self, 'result', 'superbench.cli._result_handler#{}') as g:
+            g.command('diagnosis', 'diagnosis_command_handler')
         return super().load_command_table(args)
 
     def load_arguments(self, command):
@@ -59,4 +61,16 @@ class SuperBenchCommandsLoader(CLICommandsLoader):
                 nargs='+',
                 help='Extra arguments to override config_file.'
             )
+        with ArgumentsContext(self, 'result') as ac:
+            ac.argument('raw_data_file', options_list=('--data-file', '-d'), type=str, help='Path to raw data file.')
+            ac.argument('rule_file', options_list=('--rule-file', '-r'), type=str, help='Path to rule file.')
+            ac.argument(
+                'baseline_file', options_list=('--baseline-file', '-b'), type=str, help='Path to baseline file.'
+            )
+            ac.argument(
+                'output_dir',
+                type=str,
+                help='Path to output directory, outputs/{datetime} will be used if not specified.'
+            )
+            ac.argument('output_file_format', type=str, help='Format of output file, excel or json.')
         super().load_arguments(command)
diff --git a/superbench/cli/_help.py b/superbench/cli/_help.py
index 699b83cf..6ed26a20 100644
--- a/superbench/cli/_help.py
+++ b/superbench/cli/_help.py
@@ -61,6 +61,34 @@ helps['run'] = """
           text: {cli_name} run --docker-image superbench/cuda:11.1 --host-file ./host.ini
 """.format(cli_name=CLI_NAME)
 
+helps['node'] = """
+    type: Group
+    short-summary: Get detailed information or configurations on the local node.
+"""
+
+helps['node info'] = """
+    type: command
+    short-summary: Get system info.
+    examples:
+        - name: get system info of the local node
+          text: {cli_name} node info
+""".format(cli_name=CLI_NAME)
+
+helps['result'] = """
+    type: Group
+    short-summary: Process or analyze the results of SuperBench benchmarks.
+"""
+
+helps['result diagnosis'] = """
+    type: command
+    short-summary: Filter the defective machines automatically from benchmarking results according to rules defined in rule file.
+    examples:
+        - name: run data diagnosis and output the results in excel format
+          text: {cli_name} result diagnosis --data-file 'outputs/results-summary.jsonl' --rule-file 'rule.yaml' --baseline-file 'baseline.json' --output-file-foramt 'excel'
+        - name: run data diagnosis and output the results in jsonl format
+          text: {cli_name} result diagnosis --data-file 'outputs/results-summary.jsonl' --rule-file 'rule.yaml' --baseline-file 'baseline.json' --output-file-foramt 'json'
+""".format(cli_name=CLI_NAME)    # noqa: E501
+
 
 class SuperBenchCLIHelp(CLIHelp):
     """SuperBench CLI help loader."""
diff --git a/superbench/cli/_result_handler.py b/superbench/cli/_result_handler.py
new file mode 100644
index 00000000..548e1166
--- /dev/null
+++ b/superbench/cli/_result_handler.py
@@ -0,0 +1,35 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""SuperBench CLI result subgroup command handler."""
+
+from knack.util import CLIError
+
+from superbench.analyzer import DataDiagnosis
+from superbench.common.utils import create_sb_output_dir
+from superbench.cli._handler import check_argument_file
+
+
+def diagnosis_command_handler(raw_data_file, rule_file, baseline_file, output_dir=None, output_file_format='excel'):
+    """Run data diagnosis.
+
+    Args:
+        raw_data_file (str): Path to raw data jsonl file.
+        rule_file (str): Path to baseline yaml file.
+        baseline_file (str): Path to baseline json file.
+        output_dir (str): Path to output directory.
+        output_file_format (str): Format of the output file, 'excel' or 'json'. Defaults to 'excel'.
+    """
+    try:
+        # Create output directory
+        sb_output_dir = create_sb_output_dir(output_dir)
+        # Check arguments
+        if output_file_format not in ['excel', 'json']:
+            raise CLIError('Output format must be excel or json.')
+        check_argument_file('raw_data_file', raw_data_file)
+        check_argument_file('rule_file', rule_file)
+        check_argument_file('baseline_file', baseline_file)
+        # Run data diagnosis
+        DataDiagnosis().run(raw_data_file, rule_file, baseline_file, sb_output_dir, output_file_format)
+    except Exception as ex:
+        raise RuntimeError('Failed to run diagnosis command.') from ex
diff --git a/tests/cli/test_sb.py b/tests/cli/test_sb.py
index 193e311b..b0fc14c1 100644
--- a/tests/cli/test_sb.py
+++ b/tests/cli/test_sb.py
@@ -7,6 +7,7 @@ import io
 import contextlib
 from functools import wraps
 from knack.testsdk import ScenarioTest, StringCheck, NoneCheck
+from pathlib import Path
 
 import superbench
 from superbench.cli import SuperBenchCLI
@@ -85,3 +86,18 @@ class SuperBenchCLIScenarioTest(ScenarioTest):
     def test_sb_node_info(self):
         """Test sb node info, should fail."""
         self.cmd('sb node info', expect_failure=False)
+
+    def test_sb_result_diagnosis(self):
+        """Test sb result diagnosis."""
+        test_analyzer_dir = str(Path(__file__).parent.resolve() / '../analyzer/')
+        # test positive case
+        self.cmd(
+            'sb result diagnosis -d {dir}/test_results.jsonl -r {dir}/test_rules.yaml -b {dir}/test_baseline.json'.
+            format(dir=test_analyzer_dir) + ' --output-dir outputs/test-diagnosis/'
+        )
+        # test invalid output format
+        self.cmd(
+            'sb result diagnosis -d {dir}/test_results.jsonl -r {dir}/test_rules.yaml -b {dir}/test_baseline.json'.
+            format(dir=test_analyzer_dir) + ' --output-dir outputs/test-diagnosis/ --output-file-format abb',
+            expect_failure=True
+        )