This commit is contained in:
yuyi@microsoft.com 2020-04-30 15:54:46 +08:00
Родитель 0c3c20a795
Коммит 388a2f0d02
3 изменённых файлов: 29 добавлений и 22 удалений

5
.gitignore поставляемый
Просмотреть файл

@ -133,9 +133,10 @@ venv.bak/
# Mac DS_Store files
.DS_Store
# VS code
# VS code
.vscode
# Ev2 Generator binaries
bin
packages
packages
debug/

Просмотреть файл

@ -5,6 +5,7 @@ import numpy as np
from azureml.studio.core.logger import module_logger as logger
from azureml.studio.core.io.data_frame_directory import load_data_frame_from_directory, save_data_frame_to_directory
from azureml.studio.core.utils.column_selection import ColumnSelection
from azureml.studio.core.error import UserError
from azureml.studio.internal.error_handler import error_handler
import sr_detector
from error_messages import *
@ -36,7 +37,6 @@ def is_timestamp_ascending(timestamps):
return 0
@error_handler
def invoke(input_path, detect_mode, timestamp_column, value_column, batch_size, threshold, sensitivity,
appendMode, compute_stats_in_visualization, output_path):
data_frame_directory = load_data_frame_from_directory(input_path)
@ -44,10 +44,10 @@ def invoke(input_path, detect_mode, timestamp_column, value_column, batch_size,
logger.debug(f"Shape of loaded DataFrame: {data_frame_directory.data.shape}")
if data_frame_directory.data.shape[0] < MIN_POINTS:
raise Exception(NotEnoughPoints.format(MIN_POINTS))
raise UserError(NotEnoughPoints.format(MIN_POINTS))
if 0 < batch_size < MIN_POINTS:
raise Exception(InvalidBatchSize.format(MIN_POINTS))
raise UserError(InvalidBatchSize.format(MIN_POINTS))
query_string = unquote(timestamp_column)
timestamp_column_selector = ColumnSelection(query_string)
@ -56,13 +56,13 @@ def invoke(input_path, detect_mode, timestamp_column, value_column, batch_size,
timestamps = pd.to_datetime(timestamp.iloc[:, 0].values)
if np.any(np.isnat(timestamps)):
raise Exception(InvalidTimestamps)
raise UserError(InvalidTimestamps)
res = is_timestamp_ascending(timestamps)
if res == -1:
raise Exception(InvalidSeriesOrder)
raise UserError(InvalidSeriesOrder)
elif res == -2:
raise Exception(DuplicateSeriesTimestamp)
raise UserError(DuplicateSeriesTimestamp)
query_string = unquote(value_column)
@ -73,13 +73,13 @@ def invoke(input_path, detect_mode, timestamp_column, value_column, batch_size,
try:
float_data = data_columns[col].apply(float)
except Exception as e:
raise Exception(InvalidValueFormat.format(col))
raise UserError(InvalidValueFormat.format(col))
if not np.all(np.isfinite(float_data)):
raise Exception(InvalidSeriesValue.format(col))
raise UserError(InvalidSeriesValue.format(col))
if np.any(np.less(float_data, VALUE_LOWER_BOUND)) or np.any(np.greater(float_data, VALUE_UPPER_BOUND)):
raise Exception(ValueOverflow.format(col))
raise UserError(ValueOverflow.format(col))
data_columns[col] = float_data
@ -92,7 +92,8 @@ def invoke(input_path, detect_mode, timestamp_column, value_column, batch_size,
save_data_frame_to_directory(output_path, result, compute_stats_in_visualization=compute_stats_in_visualization)
if __name__ == '__main__':
@error_handler
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
@ -165,3 +166,7 @@ if __name__ == '__main__':
invoke(args.input_path, args.detect_mode, args.timestamp_column, args.value_column,
args.batch_size, args.threshold, args.sensitivity, args.append_mode,
args.compute_stats_in_visualization, args.output_path)
if __name__ == '__main__':
main()

Просмотреть файл

@ -6,7 +6,8 @@ import numpy as np
import pandas as pd
import shutil
import os
from azureml.studio.core.io.data_frame_directory import load_data_frame_from_directory, save_data_frame_to_directory
from azureml.studio.core.io.data_frame_directory import save_data_frame_to_directory
from azureml.studio.core.error import UserError
import invoker
@ -36,7 +37,7 @@ class TestErrorInput(unittest.TestCase):
def test_empty_input(self):
df = pd.DataFrame()
save_data_frame_to_directory(self.__input_path, df)
self.assertRaisesRegexp(Exception, "The dataset should contain at leaslt 12 points to run this module.",
self.assertRaisesRegexp(UserError, "The dataset should contain at leaslt 12 points to run this module.",
invoker.invoke,
self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
self.__batch_size, self.__threshold, self.__sensitivity, self.__append_mode,
@ -47,7 +48,7 @@ class TestErrorInput(unittest.TestCase):
df['timestamp'] = 'invalid'
df['value'] = np.ones(20)
save_data_frame_to_directory(self.__input_path, df)
self.assertRaisesRegexp(Exception, "The timestamp column specified is malformed.",
self.assertRaisesRegexp(UserError, "The timestamp column specified is malformed.",
invoker.invoke,
self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
self.__batch_size, self.__threshold, self.__sensitivity, self.__append_mode,
@ -59,7 +60,7 @@ class TestErrorInput(unittest.TestCase):
df['timestamp'] = timestamps
df['value'] = np.ones(20)
save_data_frame_to_directory(self.__input_path, df)
self.assertRaisesRegexp(Exception, "The timestamp column specified is not in ascending order.",
self.assertRaisesRegexp(UserError, "The timestamp column specified is not in ascending order.",
invoker.invoke,
self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
self.__batch_size, self.__threshold, self.__sensitivity, self.__append_mode,
@ -70,7 +71,7 @@ class TestErrorInput(unittest.TestCase):
df['value'] = np.ones(20)
df['timestamp'] = '2020-01-01'
save_data_frame_to_directory(self.__input_path, df)
self.assertRaisesRegexp(Exception, "The timestamp column specified has duplicated timestamps.",
self.assertRaisesRegexp(UserError, "The timestamp column specified has duplicated timestamps.",
invoker.invoke,
self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
self.__batch_size, self.__threshold, self.__sensitivity, self.__append_mode,
@ -82,7 +83,7 @@ class TestErrorInput(unittest.TestCase):
df['timestamp'] = timestamps
df['value'] = 'invalid'
save_data_frame_to_directory(self.__input_path, df)
self.assertRaisesRegexp(Exception, 'The data in column "value" can not be parsed as float values.',
self.assertRaisesRegexp(UserError, 'The data in column "value" can not be parsed as float values.',
invoker.invoke,
self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
self.__batch_size, self.__threshold, self.__sensitivity, self.__append_mode,
@ -94,7 +95,7 @@ class TestErrorInput(unittest.TestCase):
df['timestamp'] = timestamps
df['value'] = np.nan
save_data_frame_to_directory(self.__input_path, df)
self.assertRaisesRegexp(Exception, 'The data in column "value" contains nan values.',
self.assertRaisesRegexp(UserError, 'The data in column "value" contains nan values.',
invoker.invoke,
self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
self.__batch_size, self.__threshold, self.__sensitivity, self.__append_mode,
@ -106,7 +107,7 @@ class TestErrorInput(unittest.TestCase):
df['timestamp'] = timestamps
df['value'] = 1e200
save_data_frame_to_directory(self.__input_path, df)
self.assertRaisesRegexp(Exception, 'The magnitude of data in column "value" exceeds limitation.',
self.assertRaisesRegexp(UserError, 'The magnitude of data in column "value" exceeds limitation.',
invoker.invoke,
self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
self.__batch_size, self.__threshold, self.__sensitivity, self.__append_mode,
@ -118,7 +119,7 @@ class TestErrorInput(unittest.TestCase):
df['timestamp'] = timestamps
df['value'] = np.sin(np.linspace(1, 10, 10))
save_data_frame_to_directory(self.__input_path, df)
self.assertRaisesRegexp(Exception, "The dataset should contain at leaslt 12 points to run this module.",
self.assertRaisesRegexp(UserError, "The dataset should contain at leaslt 12 points to run this module.",
invoker.invoke,
self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
self.__batch_size, self.__threshold, self.__sensitivity, self.__append_mode,
@ -130,7 +131,7 @@ class TestErrorInput(unittest.TestCase):
df['timestamp'] = timestamps
df['value'] = np.sin(np.linspace(1, 10, 20))
save_data_frame_to_directory(self.__input_path, df)
self.assertRaisesRegexp(Exception, 'The "batchSize" parameter should be at least 12 or 0 that indicates to run all data in a batch',
self.assertRaisesRegexp(UserError, 'The "batchSize" parameter should be at least 12 or 0 that indicates to run all data in a batch',
invoker.invoke,
self.__input_path, self.__detect_mode, self.__timestamp_column, self.__value_column,
5, self.__threshold, self.__sensitivity, self.__append_mode,