From d257b880e5534d34ecf6be4570e538be100dd80c Mon Sep 17 00:00:00 2001 From: Yuanxiang Ying Date: Wed, 2 Dec 2020 11:53:22 +0800 Subject: [PATCH] Fix SR anomaly score calculation at beginning (#5502) * adjust expected value * update boundary calculation * fix boundary * adjust default values * fix percent case * fix error in anomaly score calculation * adjust score calculation for first & second points * fix sr do not report anomaly at beginning * fix a issue in batch process * remove a unused parameter Co-authored-by: yuyi@microsoft.com --- .../SrCnnEntireAnomalyDetector.cs | 29 +++++++++- .../TimeSeriesDirectApi.cs | 58 +++++++++++++++++++ test/data/Timeseries/anomaly_at_beginning.csv | 39 +++++++++++++ 3 files changed, 123 insertions(+), 3 deletions(-) create mode 100644 test/data/Timeseries/anomaly_at_beginning.csv diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index ddcbfd79d..45505d5ec 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -309,6 +309,15 @@ namespace Microsoft.ML.TimeSeries _previousBatch = _previousBatch.GetRange(_batch.Count, _bLen); _previousBatch.AddRange(_batch); _modeler.Train(_previousBatch.ToArray(), ref _results); + + // move the values to front + for (int i = 0; i < _batch.Count; ++i) + { + for (int j = 0; j < _outputLength; ++j) + { + _results[i][j] = _results[_bLen + i][j]; + } + } } else { @@ -334,7 +343,7 @@ namespace Microsoft.ML.TimeSeries double src = default; srcGetter(ref src); var result = VBufferEditor.Create(ref dst, _outputLength); - _results[input.Position % _batchSize + _bLen].CopyTo(result.Values); + _results[input.Position % _batchSize].CopyTo(result.Values); dst = result.Commit(); }; return getter; @@ -351,6 +360,15 @@ namespace Microsoft.ML.TimeSeries private static readonly double _deanomalyThreshold = 0.35; private static readonly double _boundSensitivity = 93.0; private static readonly double _unitForZero = 0.3; + private static readonly double _minimumScore = 0.0; + private static readonly double _maximumScore = 1.0; + // If the score window is smaller than this value, the anomaly score is tend to be small. + // Proof: For each point, the SR anomaly score is calculated as (w is average window size): + // (mag - avg_mag) / avg_mag + // = max (w * mag_{a} - sum_{i=0 to w-1} mag_{a - i}) / sum_{i=0 to w-1} mag_{a - i} + // = max ((w - 1) * mag_{a} + C) / (mag_{a} + C) + // <= w - 1 + private static readonly int _minimumScoreWindowSize = (int)(_maximumScore * 10) + 1; // pseudo-code to generate the factors. // factors = [] @@ -577,15 +595,20 @@ namespace Microsoft.ML.TimeSeries { _ifftMagList[i] = Math.Sqrt(_ifftRe[i] * _ifftRe[i] + _ifftIm[i] * _ifftIm[i]); } + AverageFilter(_ifftMagList, Math.Min(_ifftMagList.Length, _judgementWindowSize)); + for (int i = 0; i <= Math.Min(length, _minimumScoreWindowSize); ++i) + { + _cumSumList[i] = _cumSumList[Math.Min(length, _minimumScoreWindowSize) - 1]; + } // Step 7: Calculate raw score and set result for (int i = 0; i < results.GetLength(0); ++i) { var score = CalculateScore(_ifftMagList[i], _cumSumList[i]); score /= 10.0f; - score = Math.Min(score, 1); - score = Math.Max(score, 0); + score = Math.Min(score, _maximumScore); + score = Math.Max(score, _minimumScore); var detres = score > threshold ? 1 : 0; diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 3c8142602..2877c3150 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -776,6 +776,64 @@ namespace Microsoft.ML.Tests } } + [Theory, CombinatorialData] + public void TestSrCnnAnomalyDetectorWithAnomalyAtBeginning( + [CombinatorialValues(SrCnnDeseasonalityMode.Stl, SrCnnDeseasonalityMode.Mean, SrCnnDeseasonalityMode.Median)] SrCnnDeseasonalityMode mode + ) + { + var ml = new MLContext(1); + IDataView dataView; + List data; + + var dataPath = GetDataPath("Timeseries", "anomaly_at_beginning.csv"); + + // Load data from file into the dataView + dataView = ml.Data.LoadFromTextFile(dataPath, hasHeader: true); + data = ml.Data.CreateEnumerable(dataView, reuseRowObject: false).ToList(); + + // Setup the detection arguments + string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction); + string inputColumnName = nameof(TimeSeriesDataDouble.Value); + + // Do batch anomaly detection + var options = new SrCnnEntireAnomalyDetectorOptions() + { + Threshold = 0.30, + BatchSize = -1, + Sensitivity = 80.0, + DetectMode = SrCnnDetectMode.AnomalyAndMargin, + Period = 0, + DeseasonalityMode = mode + }; + + var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, options); + + // Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection. + var predictionColumn = ml.Data.CreateEnumerable( + outputDataView, reuseRowObject: false); + + var anomalyIndex = 1; + + int k = 0; + foreach (var prediction in predictionColumn) + { + Assert.Equal(7, prediction.Prediction.Length); + if (anomalyIndex == k) + { + Assert.Equal(1, prediction.Prediction[0]); + Assert.True(prediction.Prediction[6] > data[k].Value || data[k].Value > prediction.Prediction[5]); + } + else + { + Assert.Equal(0, prediction.Prediction[0]); + Assert.True(prediction.Prediction[6] <= data[k].Value); + Assert.True(data[k].Value <= prediction.Prediction[5]); + } + + ++k; + } + } + [Theory, CombinatorialData] public void TestSrcnnEntireDetectNonnegativeData( [CombinatorialValues(true, false)] bool isPositive) diff --git a/test/data/Timeseries/anomaly_at_beginning.csv b/test/data/Timeseries/anomaly_at_beginning.csv new file mode 100644 index 000000000..609c3a5f1 --- /dev/null +++ b/test/data/Timeseries/anomaly_at_beginning.csv @@ -0,0 +1,39 @@ +Value +181.944 +37.176 +57.14 +67.128 +72.12 +77.112 +82.104 +83.1 +87.09 +92.088 +92.01 +97.08 +102.072 +107.05 +107.06 +117.048 +122.04 +132.024 +147 +151.82 +151.992 +151.72 +151.94 +156.969 +156.984 +156.92 +161.976 +161.94 +161.97 +166.968 +176.952 +181.94 +186.936 +201.91 +201.912 +201.9 +206.904 +216.88