update error handling

2020-04-30 15:54:46 +08:00 · 2020-04-30 15:54:46 +08:00 · 388a2f0d02
--- a/.gitignore
+++ b/.gitignore
@ -133,9 +133,10 @@ venv.bak/

 # Mac DS_Store files
 .DS_Store
-# VS code 
+# VS code
 .vscode

 # Ev2 Generator binaries
 bin
-packages
+packages
+debug/
--- a/aml_module/invoker.py
+++ b/aml_module/invoker.py
@ -5,6 +5,7 @@ import numpy as np
 from azureml.studio.core.logger import module_logger as logger
 from azureml.studio.core.io.data_frame_directory import load_data_frame_from_directory, save_data_frame_to_directory
 from azureml.studio.core.utils.column_selection import ColumnSelection
+from azureml.studio.core.error import UserError
 from azureml.studio.internal.error_handler import error_handler
 import sr_detector
 from error_messages import *
@ -36,7 +37,6 @@ def is_timestamp_ascending(timestamps):
    return 0


-@error_handler
 def invoke(input_path, detect_mode, timestamp_column, value_column, batch_size, threshold, sensitivity,
            appendMode, compute_stats_in_visualization, output_path):
    data_frame_directory = load_data_frame_from_directory(input_path)
@ -44,10 +44,10 @@ def invoke(input_path, detect_mode, timestamp_column, value_column, batch_size,
    logger.debug(f"Shape of loaded DataFrame: {data_frame_directory.data.shape}")

    if data_frame_directory.data.shape[0] < MIN_POINTS:
-        raise Exception(NotEnoughPoints.format(MIN_POINTS))
+        raise UserError(NotEnoughPoints.format(MIN_POINTS))

    if 0 < batch_size < MIN_POINTS:
-        raise Exception(InvalidBatchSize.format(MIN_POINTS))
+        raise UserError(InvalidBatchSize.format(MIN_POINTS))

    query_string = unquote(timestamp_column)
    timestamp_column_selector = ColumnSelection(query_string)
@ -56,13 +56,13 @@ def invoke(input_path, detect_mode, timestamp_column, value_column, batch_size,
    timestamps = pd.to_datetime(timestamp.iloc[:, 0].values)

    if np.any(np.isnat(timestamps)):
-        raise Exception(InvalidTimestamps)
+        raise UserError(InvalidTimestamps)

    res = is_timestamp_ascending(timestamps)
    if res == -1:
-        raise Exception(InvalidSeriesOrder)
+        raise UserError(InvalidSeriesOrder)
    elif res == -2:
-        raise Exception(DuplicateSeriesTimestamp)
+        raise UserError(DuplicateSeriesTimestamp)


    query_string = unquote(value_column)
@ -73,13 +73,13 @@ def invoke(input_path, detect_mode, timestamp_column, value_column, batch_size,
        try:
            float_data = data_columns[col].apply(float)
        except Exception as e:
-            raise Exception(InvalidValueFormat.format(col))
+            raise UserError(InvalidValueFormat.format(col))

        if not np.all(np.isfinite(float_data)):
-            raise Exception(InvalidSeriesValue.format(col))
+            raise UserError(InvalidSeriesValue.format(col))

        if np.any(np.less(float_data, VALUE_LOWER_BOUND)) or np.any(np.greater(float_data, VALUE_UPPER_BOUND)):
-            raise Exception(ValueOverflow.format(col))
+            raise UserError(ValueOverflow.format(col))

        data_columns[col] = float_data

@ -92,7 +92,8 @@ def invoke(input_path, detect_mode, timestamp_column, value_column, batch_size,
    save_data_frame_to_directory(output_path, result, compute_stats_in_visualization=compute_stats_in_visualization)


-if __name__ == '__main__':
+@error_handler
+def main():
    parser = argparse.ArgumentParser()

    parser.add_argument(
@ -165,3 +166,7 @@ if __name__ == '__main__':
    invoke(args.input_path, args.detect_mode, args.timestamp_column, args.value_column,
        args.batch_size, args.threshold, args.sensitivity, args.append_mode,
        args.compute_stats_in_visualization, args.output_path)
+
+
+if __name__ == '__main__':
+    main()
--- a/aml_module/tests/test_error_input.py
+++ b/aml_module/tests/test_error_input.py
@ -6,7 +6,8 @@ import numpy as np
 import pandas as pd
 import shutil
 import os
-from azureml.studio.core.io.data_frame_directory import load_data_frame_from_directory, save_data_frame_to_directory
+from azureml.studio.core.io.data_frame_directory import save_data_frame_to_directory
+from azureml.studio.core.error import UserError
 import invoker


@ -36,7 +37,7 @@ class TestErrorInput(unittest.TestCase):
    def test_empty_input(self):
        df = pd.DataFrame()
        save_data_frame_to_directory(self.__input_path, df)
-        self.assertRaisesRegexp(Exception, "The dataset should contain at leaslt 12 points to run this module.",
+        self.assertRaisesRegexp(UserError, "The dataset should contain at leaslt 12 points to run this module.",
                                invoker.invoke,
                                self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
                                self.__batch_size, self.__threshold, self.__sensitivity, self.__append_mode,
@ -47,7 +48,7 @@ class TestErrorInput(unittest.TestCase):
        df['timestamp'] = 'invalid'
        df['value'] = np.ones(20)
        save_data_frame_to_directory(self.__input_path, df)
-        self.assertRaisesRegexp(Exception, "The timestamp column specified is malformed.",
+        self.assertRaisesRegexp(UserError, "The timestamp column specified is malformed.",
                                invoker.invoke,
                                self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
                                self.__batch_size, self.__threshold, self.__sensitivity, self.__append_mode,
@ -59,7 +60,7 @@ class TestErrorInput(unittest.TestCase):
        df['timestamp'] = timestamps
        df['value'] = np.ones(20)
        save_data_frame_to_directory(self.__input_path, df)
-        self.assertRaisesRegexp(Exception, "The timestamp column specified is not in ascending order.",
+        self.assertRaisesRegexp(UserError, "The timestamp column specified is not in ascending order.",
                                invoker.invoke,
                                self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
                                self.__batch_size, self.__threshold, self.__sensitivity, self.__append_mode,
@ -70,7 +71,7 @@ class TestErrorInput(unittest.TestCase):
        df['value'] = np.ones(20)
        df['timestamp'] = '2020-01-01'
        save_data_frame_to_directory(self.__input_path, df)
-        self.assertRaisesRegexp(Exception, "The timestamp column specified has duplicated timestamps.",
+        self.assertRaisesRegexp(UserError, "The timestamp column specified has duplicated timestamps.",
                                invoker.invoke,
                                self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
                                self.__batch_size, self.__threshold, self.__sensitivity, self.__append_mode,
@ -82,7 +83,7 @@ class TestErrorInput(unittest.TestCase):
        df['timestamp'] = timestamps
        df['value'] = 'invalid'
        save_data_frame_to_directory(self.__input_path, df)
-        self.assertRaisesRegexp(Exception, 'The data in column "value" can not be parsed as float values.',
+        self.assertRaisesRegexp(UserError, 'The data in column "value" can not be parsed as float values.',
                                invoker.invoke,
                                self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
                                self.__batch_size, self.__threshold, self.__sensitivity, self.__append_mode,
@ -94,7 +95,7 @@ class TestErrorInput(unittest.TestCase):
        df['timestamp'] = timestamps
        df['value'] = np.nan
        save_data_frame_to_directory(self.__input_path, df)
-        self.assertRaisesRegexp(Exception, 'The data in column "value" contains nan values.',
+        self.assertRaisesRegexp(UserError, 'The data in column "value" contains nan values.',
                                invoker.invoke,
                                self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
                                self.__batch_size, self.__threshold, self.__sensitivity, self.__append_mode,
@ -106,7 +107,7 @@ class TestErrorInput(unittest.TestCase):
        df['timestamp'] = timestamps
        df['value'] = 1e200
        save_data_frame_to_directory(self.__input_path, df)
-        self.assertRaisesRegexp(Exception, 'The magnitude of data in column "value" exceeds limitation.',
+        self.assertRaisesRegexp(UserError, 'The magnitude of data in column "value" exceeds limitation.',
                                invoker.invoke,
                                self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
                                self.__batch_size, self.__threshold, self.__sensitivity, self.__append_mode,
@ -118,7 +119,7 @@ class TestErrorInput(unittest.TestCase):
        df['timestamp'] = timestamps
        df['value'] = np.sin(np.linspace(1, 10, 10))
        save_data_frame_to_directory(self.__input_path, df)
-        self.assertRaisesRegexp(Exception, "The dataset should contain at leaslt 12 points to run this module.",
+        self.assertRaisesRegexp(UserError, "The dataset should contain at leaslt 12 points to run this module.",
                                invoker.invoke,
                                self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
                                self.__batch_size, self.__threshold, self.__sensitivity, self.__append_mode,
@ -130,7 +131,7 @@ class TestErrorInput(unittest.TestCase):
        df['timestamp'] = timestamps
        df['value'] = np.sin(np.linspace(1, 10, 20))
        save_data_frame_to_directory(self.__input_path, df)
-        self.assertRaisesRegexp(Exception, 'The "batchSize" parameter should be at least 12 or 0 that indicates to run all data in a batch',
+        self.assertRaisesRegexp(UserError, 'The "batchSize" parameter should be at least 12 or 0 that indicates to run all data in a batch',
                                invoker.invoke,
                                self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
                                5, self.__threshold, self.__sensitivity, self.__append_mode,