Fix SR anomaly score calculation at beginning (#5502)

* adjust expected value

* update boundary calculation

* fix boundary

* adjust default values

* fix percent case

* fix error in anomaly score calculation

* adjust score calculation for first & second points

* fix sr do not report anomaly at beginning

* fix a issue in batch process

* remove a unused parameter

Co-authored-by: yuyi@microsoft.com <Yuanxiang.Ying@microsoft.com>
This commit is contained in:
Yuanxiang Ying 2020-12-02 11:53:22 +08:00 коммит произвёл GitHub
Родитель 652abaa052
Коммит d257b880e5
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 123 добавлений и 3 удалений

Просмотреть файл

@ -309,6 +309,15 @@ namespace Microsoft.ML.TimeSeries
_previousBatch = _previousBatch.GetRange(_batch.Count, _bLen);
_previousBatch.AddRange(_batch);
_modeler.Train(_previousBatch.ToArray(), ref _results);
// move the values to front
for (int i = 0; i < _batch.Count; ++i)
{
for (int j = 0; j < _outputLength; ++j)
{
_results[i][j] = _results[_bLen + i][j];
}
}
}
else
{
@ -334,7 +343,7 @@ namespace Microsoft.ML.TimeSeries
double src = default;
srcGetter(ref src);
var result = VBufferEditor.Create(ref dst, _outputLength);
_results[input.Position % _batchSize + _bLen].CopyTo(result.Values);
_results[input.Position % _batchSize].CopyTo(result.Values);
dst = result.Commit();
};
return getter;
@ -351,6 +360,15 @@ namespace Microsoft.ML.TimeSeries
private static readonly double _deanomalyThreshold = 0.35;
private static readonly double _boundSensitivity = 93.0;
private static readonly double _unitForZero = 0.3;
private static readonly double _minimumScore = 0.0;
private static readonly double _maximumScore = 1.0;
// If the score window is smaller than this value, the anomaly score is tend to be small.
// Proof: For each point, the SR anomaly score is calculated as (w is average window size):
// (mag - avg_mag) / avg_mag
// = max (w * mag_{a} - sum_{i=0 to w-1} mag_{a - i}) / sum_{i=0 to w-1} mag_{a - i}
// = max ((w - 1) * mag_{a} + C) / (mag_{a} + C)
// <= w - 1
private static readonly int _minimumScoreWindowSize = (int)(_maximumScore * 10) + 1;
// pseudo-code to generate the factors.
// factors = []
@ -577,15 +595,20 @@ namespace Microsoft.ML.TimeSeries
{
_ifftMagList[i] = Math.Sqrt(_ifftRe[i] * _ifftRe[i] + _ifftIm[i] * _ifftIm[i]);
}
AverageFilter(_ifftMagList, Math.Min(_ifftMagList.Length, _judgementWindowSize));
for (int i = 0; i <= Math.Min(length, _minimumScoreWindowSize); ++i)
{
_cumSumList[i] = _cumSumList[Math.Min(length, _minimumScoreWindowSize) - 1];
}
// Step 7: Calculate raw score and set result
for (int i = 0; i < results.GetLength(0); ++i)
{
var score = CalculateScore(_ifftMagList[i], _cumSumList[i]);
score /= 10.0f;
score = Math.Min(score, 1);
score = Math.Max(score, 0);
score = Math.Min(score, _maximumScore);
score = Math.Max(score, _minimumScore);
var detres = score > threshold ? 1 : 0;

Просмотреть файл

@ -776,6 +776,64 @@ namespace Microsoft.ML.Tests
}
}
[Theory, CombinatorialData]
public void TestSrCnnAnomalyDetectorWithAnomalyAtBeginning(
[CombinatorialValues(SrCnnDeseasonalityMode.Stl, SrCnnDeseasonalityMode.Mean, SrCnnDeseasonalityMode.Median)] SrCnnDeseasonalityMode mode
)
{
var ml = new MLContext(1);
IDataView dataView;
List<TimeSeriesDataDouble> data;
var dataPath = GetDataPath("Timeseries", "anomaly_at_beginning.csv");
// Load data from file into the dataView
dataView = ml.Data.LoadFromTextFile<TimeSeriesDataDouble>(dataPath, hasHeader: true);
data = ml.Data.CreateEnumerable<TimeSeriesDataDouble>(dataView, reuseRowObject: false).ToList();
// Setup the detection arguments
string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction);
string inputColumnName = nameof(TimeSeriesDataDouble.Value);
// Do batch anomaly detection
var options = new SrCnnEntireAnomalyDetectorOptions()
{
Threshold = 0.30,
BatchSize = -1,
Sensitivity = 80.0,
DetectMode = SrCnnDetectMode.AnomalyAndMargin,
Period = 0,
DeseasonalityMode = mode
};
var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, options);
// Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection.
var predictionColumn = ml.Data.CreateEnumerable<SrCnnAnomalyDetection>(
outputDataView, reuseRowObject: false);
var anomalyIndex = 1;
int k = 0;
foreach (var prediction in predictionColumn)
{
Assert.Equal(7, prediction.Prediction.Length);
if (anomalyIndex == k)
{
Assert.Equal(1, prediction.Prediction[0]);
Assert.True(prediction.Prediction[6] > data[k].Value || data[k].Value > prediction.Prediction[5]);
}
else
{
Assert.Equal(0, prediction.Prediction[0]);
Assert.True(prediction.Prediction[6] <= data[k].Value);
Assert.True(data[k].Value <= prediction.Prediction[5]);
}
++k;
}
}
[Theory, CombinatorialData]
public void TestSrcnnEntireDetectNonnegativeData(
[CombinatorialValues(true, false)] bool isPositive)

Просмотреть файл

@ -0,0 +1,39 @@
Value
181.944
37.176
57.14
67.128
72.12
77.112
82.104
83.1
87.09
92.088
92.01
97.08
102.072
107.05
107.06
117.048
122.04
132.024
147
151.82
151.992
151.72
151.94
156.969
156.984
156.92
161.976
161.94
161.97
166.968
176.952
181.94
186.936
201.91
201.912
201.9
206.904
216.88
1 Value
2 181.944
3 37.176
4 57.14
5 67.128
6 72.12
7 77.112
8 82.104
9 83.1
10 87.09
11 92.088
12 92.01
13 97.08
14 102.072
15 107.05
16 107.06
17 117.048
18 122.04
19 132.024
20 147
21 151.82
22 151.992
23 151.72
24 151.94
25 156.969
26 156.984
27 156.92
28 161.976
29 161.94
30 161.97
31 166.968
32 176.952
33 181.94
34 186.936
35 201.91
36 201.912
37 201.9
38 206.904
39 216.88