add anomaly_and_margin mode
This commit is contained in:
Родитель
898c88bf84
Коммит
adf8c51d53
|
@ -1,4 +1,4 @@
|
|||
from msanomalydetector.spectral_residual import SpectralResidual
|
||||
from msanomalydetector.util import MAX_RATIO, THRESHOLD, MAG_WINDOW, SCORE_WINDOW
|
||||
from msanomalydetector.util import MAX_RATIO, THRESHOLD, MAG_WINDOW, SCORE_WINDOW, DetectMode
|
||||
|
||||
__all__ = ['SpectralResidual', 'MAX_RATIO', 'THRESHOLD', 'MAG_WINDOW', 'SCORE_WINDOW']
|
||||
__all__ = ['SpectralResidual', 'MAX_RATIO', 'THRESHOLD', 'MAG_WINDOW', 'SCORE_WINDOW', 'DetectMode']
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
import numpy as np
|
||||
cimport numpy as np
|
||||
import array
|
||||
import bisect
|
||||
|
||||
|
||||
cpdef float sorted_median(float[:] data, int i, int j):
|
||||
cdef int n = j - i
|
||||
cdef int mid
|
||||
if n == 0:
|
||||
raise Exception("no median for empty data")
|
||||
if n % 2 == 1:
|
||||
return data[i + n // 2]
|
||||
else:
|
||||
mid = i + n // 2
|
||||
return (data[mid - 1] + data[mid])/2
|
||||
|
||||
cpdef median_filter(np.ndarray data, int window, bint need_two_end=False):
|
||||
cdef int w_len = window // 2 * 2 + 1
|
||||
cdef int t_len = len(data)
|
||||
cdef float[:] val = array.array('f', [x for x in data])
|
||||
cdef float[:] ans = array.array('f', [x for x in data])
|
||||
cdef float[:] cur_windows = array.array('f', [0 for x in range(w_len)])
|
||||
cdef int delete_id
|
||||
cdef int add_id
|
||||
cdef int index
|
||||
if t_len < w_len:
|
||||
return ans
|
||||
for i in range(0, w_len):
|
||||
index = i
|
||||
add_id = bisect.bisect_right(cur_windows[:i], val[i])
|
||||
while index > add_id:
|
||||
cur_windows[index] = cur_windows[index - 1]
|
||||
index -= 1
|
||||
cur_windows[add_id] = data[i]
|
||||
if i >= w_len // 2 and need_two_end:
|
||||
ans[i - w_len // 2] = sorted_median(cur_windows, 0, i + 1)
|
||||
ans[window // 2] = sorted_median(cur_windows, 0, w_len)
|
||||
for i in range(window // 2 + 1, t_len - window // 2):
|
||||
delete_id = bisect.bisect_right(cur_windows, val[i - window // 2 - 1]) - 1
|
||||
index = delete_id
|
||||
while index < w_len - 1:
|
||||
cur_windows[index] = cur_windows[index + 1]
|
||||
index += 1
|
||||
|
||||
add_id = bisect.bisect_right(cur_windows[:w_len - 1], val[i + window // 2])
|
||||
index = w_len - 1
|
||||
while index > add_id:
|
||||
cur_windows[index] = cur_windows[index - 1]
|
||||
index -= 1
|
||||
cur_windows[add_id] = data[i + window // 2]
|
||||
|
||||
ans[i] = sorted_median(cur_windows, 0, w_len)
|
||||
|
||||
if need_two_end:
|
||||
for i in range(t_len - window // 2, t_len):
|
||||
delete_id = bisect.bisect_right(cur_windows[: w_len], data[i - window // 2 - 1]) - 1
|
||||
index = delete_id
|
||||
while index < w_len - 1:
|
||||
cur_windows[index] = cur_windows[index + 1]
|
||||
index += 1
|
||||
w_len -= 1
|
||||
ans[i] = sorted_median(cur_windows[: w_len], 0, w_len)
|
||||
|
||||
return ans
|
|
@ -0,0 +1,113 @@
|
|||
import bisect
|
||||
import numpy as np
|
||||
from msanomalydetector._anomaly_kernel_cython import median_filter
|
||||
|
||||
|
||||
# pseudo - code to generate the factors.
|
||||
# factors = [1]
|
||||
# for i in range(50):
|
||||
# if i < 40:
|
||||
# factors.append(factors[-1] / (1.15 + 0.001 * i))
|
||||
# else:
|
||||
# factors.append(factors[-1] / (1.25 + 0.01 * i))
|
||||
# for i in range(50):
|
||||
# factors.insert(0, factors[0] * (1.25 + 0.001 * i))
|
||||
|
||||
factors = [
|
||||
184331.62871148242, 141902.71648305038, 109324.12672037778, 84289.9974713784, 65038.57829581667, 50222.84038287002,
|
||||
38812.08684920403, 30017.081863266845, 23233.035497884553, 17996.15452973242, 13950.50738738947, 10822.736530170265,
|
||||
8402.745753237783, 6528.939979205737, 5076.93622022219, 3950.92312857758, 3077.042935029268, 2398.318733460069,
|
||||
1870.7634426365591, 1460.393007522685, 1140.9320371270976, 892.0500681212648, 698.0047481387048, 546.5972968979678,
|
||||
428.36778753759233, 335.97473532360186, 263.71643275007995, 207.16137686573444, 162.8627176617409, 128.13746472206208,
|
||||
100.8956415134347, 79.50799173635517, 62.70346351447568, 49.48971074544253, 39.09139869308257, 30.90229145698227,
|
||||
24.448015393182175, 19.35709849024717, 15.338429865489042, 12.163703303322, 9.653732780414286, 7.667778221139226,
|
||||
6.095213212352326, 4.8490160798347866, 3.8606815922251485, 3.076240312529999, 2.4531421949999994, 1.9578149999999996,
|
||||
1.5637499999999998, 1.25, 1.0, 0.8695652173913044, 0.7554867223208555, 0.655804446459076, 0.5687809596349316,
|
||||
0.4928777813127657, 0.4267340097946024, 0.36914706729636887, 0.3190553736355825, 0.27552277516026125, 0.23772456873189068,
|
||||
0.20493497304473338, 0.17651591132190647, 0.1519069804835684, 0.13061649224726435, 0.11221348131208278, 0.09632058481723846,
|
||||
0.08260770567516164, 0.0707863801843716, 0.06060477755511267, 0.051843265658779024, 0.0443104834690419, 0.03783986632710667,
|
||||
0.03228657536442549, 0.027524787181948417, 0.02344530424356765, 0.019953450420057577, 0.01696721974494692, 0.014415649740821513,
|
||||
0.012237393667929978, 0.010379468759906684, 0.008796159966022614, 0.0074480609365136455, 0.006301235986898177,
|
||||
0.00532648857725966, 0.004498723460523362, 0.0037963911059268884, 0.0032010043051660104, 0.002696718032995797,
|
||||
0.0022699646742388863, 0.0019091376570554135, 0.0011570531254881296, 0.000697019955113331, 0.00041737721863073713,
|
||||
0.000248438820613534, 0.00014700521929794912, 8.647365841055832e-05, 5.056939088336744e-05, 2.9400808653120604e-05,
|
||||
1.6994687082728674e-05, 9.767061541798089e-06
|
||||
]
|
||||
|
||||
|
||||
def calculate_boundary_unit_last(data):
|
||||
if len(data) == 0:
|
||||
return 0
|
||||
|
||||
calculation_size = len(data) - 1
|
||||
window = int(min(calculation_size // 3, 512))
|
||||
trends = np.abs(np.asarray(median_filter(data[:calculation_size], window, need_two_end=True), dtype=float))
|
||||
|
||||
unit = max(np.mean(trends), 1.0)
|
||||
|
||||
if not np.isfinite(unit):
|
||||
raise Exception('Not finite unit value')
|
||||
|
||||
return unit
|
||||
|
||||
|
||||
def calculate_bounary_unit_entire(data, is_anomaly):
|
||||
if len(data) == 0:
|
||||
return []
|
||||
|
||||
window = int(min(len(data)//3, 512))
|
||||
trend_fraction = 0.5
|
||||
trends = np.abs(np.asarray(median_filter(data, window, need_two_end=True), dtype=float))
|
||||
valid_trend = [t for a, t in zip(is_anomaly, trends) if not a]
|
||||
|
||||
if len(valid_trend) > 0:
|
||||
average_part = np.mean(valid_trend)
|
||||
units = trend_fraction * trends + average_part * (1 - trend_fraction)
|
||||
else:
|
||||
units = trends
|
||||
|
||||
if not np.all(np.isfinite(units)):
|
||||
raise Exception('Not finite unit values')
|
||||
|
||||
return units
|
||||
|
||||
|
||||
def calculate_margin(unit, sensitivity):
|
||||
|
||||
def calculate_margin_core(unit, sensitivity):
|
||||
lb = int(sensitivity)
|
||||
if lb == sensitivity:
|
||||
return unit * factors[lb]
|
||||
|
||||
return (factors[lb + 1] + (factors[lb] - factors[lb + 1]) * (1 - sensitivity + lb)) * unit
|
||||
|
||||
if 0 > sensitivity or sensitivity > 100:
|
||||
raise Exception('sensitivity should be integer in [0, 100]')
|
||||
|
||||
if unit <= 0:
|
||||
raise Exception('unit should be a positive number')
|
||||
|
||||
return calculate_margin_core(unit, sensitivity)
|
||||
|
||||
|
||||
def calculate_anomaly_score(value, expected_value, unit, is_anomaly):
|
||||
if not is_anomaly:
|
||||
return 0
|
||||
|
||||
distance = np.abs(expected_value - value)
|
||||
margins = [calculate_margin(unit, i) for i in range(101)][::-1]
|
||||
lb = bisect.bisect_left(margins, distance)
|
||||
|
||||
if lb == 0:
|
||||
score = 0
|
||||
else:
|
||||
a, b = margins[lb-1], margins[lb]
|
||||
score = lb - 1 + (distance - a) / (b - a)
|
||||
|
||||
return score
|
||||
|
||||
|
||||
def calculate_anomaly_scores(values, expected_values, units, is_anomaly):
|
||||
scores = [calculate_anomaly_score(value, exp, unit, anomaly)
|
||||
for value, exp, unit, anomaly in zip(values, expected_values, units, is_anomaly)]
|
||||
return scores
|
|
@ -24,36 +24,75 @@ ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE
|
|||
POSSIBILITY OF SUCH DAMAGE.
|
||||
"""
|
||||
|
||||
from msanomalydetector.util import AnomalyId, AnomalyScore, IsAnomaly, Value, Timestamp, EPS, average_filter
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
from msanomalydetector.util import *
|
||||
import msanomalydetector.boundary_utils as boundary_helper
|
||||
from msanomalydetector._anomaly_kernel_cython import median_filter
|
||||
|
||||
|
||||
class SpectralResidual:
|
||||
def __init__(self, series, threshold, mag_window, score_window):
|
||||
def __init__(self, series, threshold, mag_window, score_window, sensitivity, detect_mode):
|
||||
self.__series__ = series
|
||||
self.__values__ = self.__series__['value'].tolist()
|
||||
self.__threshold__ = threshold
|
||||
self.__mag_window = mag_window
|
||||
self.__score_window = score_window
|
||||
self.__sensitivity = sensitivity
|
||||
self.__detect_mode = detect_mode
|
||||
self.__anomaly_frame = None
|
||||
|
||||
def detect(self):
|
||||
anomaly_scores = self.generate_spectral_score(series=self.__series__['value'].tolist())
|
||||
if self.__anomaly_frame is None:
|
||||
self.__anomaly_frame = self.__detect()
|
||||
|
||||
return self.__anomaly_frame
|
||||
|
||||
return anomaly_frame
|
||||
|
||||
def __detect(self):
|
||||
extended_series = SpectralResidual.extend_series(self.__values__)
|
||||
mags = self.spectral_residual_transform(extended_series)[:len(self.__series__)]
|
||||
anomaly_scores = self.generate_spectral_score(mags)
|
||||
anomaly_frame = pd.DataFrame({Timestamp: self.__series__['timestamp'],
|
||||
Value: self.__series__['value'],
|
||||
AnomalyId: list(range(0, len(anomaly_scores))),
|
||||
Mag: mags,
|
||||
AnomalyScore: anomaly_scores})
|
||||
anomaly_frame[IsAnomaly] = np.where(anomaly_frame[AnomalyScore] >= self.__threshold__, True, False)
|
||||
anomaly_frame.set_index(AnomalyId, inplace=True)
|
||||
|
||||
if self.__detect_mode == DetectMode.anomaly_and_margin:
|
||||
anomaly_frame[ExpectedValue] = self.calculate_expected_value(anomaly_frame[anomaly_frame[IsAnomaly]].index.tolist())
|
||||
boundary_units = boundary_helper.calculate_bounary_unit_entire(np.asarray(self.__values__),
|
||||
anomaly_frame[IsAnomaly].values)
|
||||
anomaly_frame[AnomalyScore] = boundary_helper.calculate_anomaly_scores(
|
||||
values=anomaly_frame[Value].values,
|
||||
expected_values=anomaly_frame[ExpectedValue].values,
|
||||
units=boundary_units,
|
||||
is_anomaly=anomaly_frame[IsAnomaly].values
|
||||
)
|
||||
|
||||
margins = [boundary_helper.calculate_margin(u, self.__sensitivity) for u in boundary_units]
|
||||
|
||||
anomaly_frame[LowerBoundary] = anomaly_frame[ExpectedValue].values - margins
|
||||
anomaly_frame[UpperBoundary] = anomaly_frame[ExpectedValue].values + margins
|
||||
anomaly_frame[IsAnomaly] = np.logical_and(anomaly_frame[IsAnomaly].values,
|
||||
anomaly_frame[LowerBoundary].values <= anomaly_frame[Value].values)
|
||||
anomaly_frame[IsAnomaly] = np.logical_and(anomaly_frame[IsAnomaly].values,
|
||||
anomaly_frame[Value].values <= anomaly_frame[UpperBoundary].values)
|
||||
|
||||
return anomaly_frame
|
||||
|
||||
def generate_spectral_score(self, series):
|
||||
extended_series = SpectralResidual.extend_series(series)
|
||||
mag = self.spectral_residual_transform(extended_series)[:len(series)]
|
||||
ave_mag = average_filter(mag, n=self.__score_window)
|
||||
def generate_spectral_score(self, mags):
|
||||
ave_mag = average_filter(mags, n=self.__score_window)
|
||||
ave_mag[np.where(ave_mag <= EPS)] = EPS
|
||||
|
||||
return abs(mag - ave_mag) / ave_mag
|
||||
raw_scores = abs(mags - ave_mag) / ave_mag
|
||||
scores = np.clip(raw_scores / 10.0, 0, 1.0)
|
||||
|
||||
return scores
|
||||
|
||||
def spectral_residual_transform(self, values):
|
||||
"""
|
||||
|
@ -124,3 +163,12 @@ class SpectralResidual:
|
|||
|
||||
extension = [SpectralResidual.predict_next(values[-look_ahead - 2:-1])] * extend_num
|
||||
return values + extension
|
||||
|
||||
def calculate_expected_value(self, anomaly_index):
|
||||
values = deanomaly_entire(self.__values__, anomaly_index)
|
||||
length = len(values)
|
||||
fft_coef = np.fft.fft(values)
|
||||
fft_coef.real = [v if length * 3 / 8 >= i or i >= length * 5 / 8 else 0 for i, v in enumerate(fft_coef.real)]
|
||||
fft_coef.imag = [v if length * 3 / 8 >= i or i >= length * 5 / 8 else 0 for i, v in enumerate(fft_coef.imag)]
|
||||
exps = np.fft.ifft(fft_coef)
|
||||
return exps
|
||||
|
|
|
@ -23,6 +23,7 @@ IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|||
ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
"""
|
||||
from enum import Enum
|
||||
import numpy as np
|
||||
|
||||
IsAnomaly = "isAnomaly"
|
||||
|
@ -30,14 +31,23 @@ AnomalyId = "id"
|
|||
AnomalyScore = "score"
|
||||
Value = "value"
|
||||
Timestamp = "timestamp"
|
||||
Mag = "mag"
|
||||
ExpectedValue = "expectedValue"
|
||||
UpperBoundary = "upperBoundary"
|
||||
LowerBoundary = "lowerBoundary"
|
||||
|
||||
MAX_RATIO = 0.25
|
||||
EPS = 1e-8
|
||||
THRESHOLD = 3
|
||||
THRESHOLD = 0.3
|
||||
MAG_WINDOW = 3
|
||||
SCORE_WINDOW = 100
|
||||
|
||||
|
||||
class DetectMode(Enum):
|
||||
anomaly_only = 'AnomalyOnly'
|
||||
anomaly_and_margin = 'AnomalyAndMargin'
|
||||
|
||||
|
||||
def average_filter(values, n=3):
|
||||
"""
|
||||
Calculate the sliding window average for the give time series.
|
||||
|
@ -61,3 +71,35 @@ def average_filter(values, n=3):
|
|||
res[i] /= (i + 1)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def leastsq(x, y):
|
||||
n = len(x)
|
||||
sum_x = np.sum(x)
|
||||
sum_y = np.sum(y)
|
||||
sum_xx = np.sum(np.multiply(x, x))
|
||||
sum_xy = np.sum(np.multiply(x, y))
|
||||
a = (n * sum_xy - sum_x * sum_y) / (n * sum_xx - sum_x * sum_x)
|
||||
b = (sum_xx * sum_y - sum_x * sum_xy) / (n * sum_xx - sum_x * sum_x)
|
||||
return a, b
|
||||
|
||||
|
||||
def deanomaly_entire(values, entire_anomalies):
|
||||
min_points_to_fit = 4
|
||||
for idx in entire_anomalies:
|
||||
step = 1
|
||||
start = max(idx - step, 0)
|
||||
end = min(len(values) - 1, idx + step)
|
||||
fit_values = [(i, values[i]) for i in range(start, end+1) if i not in entire_anomalies]
|
||||
while len(fit_values) < min_points_to_fit and (start > 0 or end < len(values)-1):
|
||||
step = step + 2
|
||||
start = max(idx - step, 0)
|
||||
end = min(len(values) - 1, idx + step)
|
||||
fit_values = [(i, values[i]) for i in range(start, end+1) if i not in entire_anomalies]
|
||||
|
||||
if len(fit_values) > 1:
|
||||
x, y = tuple(zip(*fit_values))
|
||||
a, b = leastsq(x, y)
|
||||
values[idx] = a * idx + b
|
||||
|
||||
return values
|
||||
|
|
|
@ -1,2 +1,3 @@
|
|||
Cython>=0.29.2
|
||||
numpy==1.18.1
|
||||
pandas==0.25.3
|
||||
|
|
18
setup.py
18
setup.py
|
@ -1,10 +1,21 @@
|
|||
from setuptools import setup, find_packages
|
||||
from setuptools import setup, find_packages, Extension
|
||||
from Cython.Build import cythonize
|
||||
from Cython.Distutils import build_ext
|
||||
import numpy as np
|
||||
|
||||
__version__ = "can't find version.py"
|
||||
exec(compile(open('version.py').read(),
|
||||
'version.py', 'exec'))
|
||||
|
||||
extensions = [
|
||||
Extension("msanomalydetector._anomaly_kernel_cython", ["msanomalydetector/_anomaly_kernel_cython.pyx"],
|
||||
define_macros=[('CYTHON_TRACE', '1')])
|
||||
]
|
||||
|
||||
cmdclass = {'build_ext': build_ext}
|
||||
|
||||
install_requires = [
|
||||
'Cython>=0.29.2',
|
||||
'numpy==1.18.1',
|
||||
'pandas==0.25.3'
|
||||
]
|
||||
|
@ -13,9 +24,12 @@ setup(
|
|||
name="msanomalydetector",
|
||||
description='Microsoft Anomaly Detector Package Based On Saliency Detection',
|
||||
packages=find_packages(),
|
||||
include_dirs=[np.get_include()],
|
||||
cmdclass=cmdclass,
|
||||
ext_modules=cythonize(extensions),
|
||||
version=__version__,
|
||||
install_requires=install_requires,
|
||||
requires=['numpy', 'pandas'],
|
||||
requires=['Cython', 'numpy', 'pandas'],
|
||||
python_requires='>=3.6.0',
|
||||
package_data={'': ['*.txt']}
|
||||
)
|
||||
|
|
Загрузка…
Ссылка в новой задаче