Fix SR anomaly score calculation at beginning (#5502)
* adjust expected value * update boundary calculation * fix boundary * adjust default values * fix percent case * fix error in anomaly score calculation * adjust score calculation for first & second points * fix sr do not report anomaly at beginning * fix a issue in batch process * remove a unused parameter Co-authored-by: yuyi@microsoft.com <Yuanxiang.Ying@microsoft.com>
This commit is contained in:
Родитель
652abaa052
Коммит
d257b880e5
|
@ -309,6 +309,15 @@ namespace Microsoft.ML.TimeSeries
|
|||
_previousBatch = _previousBatch.GetRange(_batch.Count, _bLen);
|
||||
_previousBatch.AddRange(_batch);
|
||||
_modeler.Train(_previousBatch.ToArray(), ref _results);
|
||||
|
||||
// move the values to front
|
||||
for (int i = 0; i < _batch.Count; ++i)
|
||||
{
|
||||
for (int j = 0; j < _outputLength; ++j)
|
||||
{
|
||||
_results[i][j] = _results[_bLen + i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -334,7 +343,7 @@ namespace Microsoft.ML.TimeSeries
|
|||
double src = default;
|
||||
srcGetter(ref src);
|
||||
var result = VBufferEditor.Create(ref dst, _outputLength);
|
||||
_results[input.Position % _batchSize + _bLen].CopyTo(result.Values);
|
||||
_results[input.Position % _batchSize].CopyTo(result.Values);
|
||||
dst = result.Commit();
|
||||
};
|
||||
return getter;
|
||||
|
@ -351,6 +360,15 @@ namespace Microsoft.ML.TimeSeries
|
|||
private static readonly double _deanomalyThreshold = 0.35;
|
||||
private static readonly double _boundSensitivity = 93.0;
|
||||
private static readonly double _unitForZero = 0.3;
|
||||
private static readonly double _minimumScore = 0.0;
|
||||
private static readonly double _maximumScore = 1.0;
|
||||
// If the score window is smaller than this value, the anomaly score is tend to be small.
|
||||
// Proof: For each point, the SR anomaly score is calculated as (w is average window size):
|
||||
// (mag - avg_mag) / avg_mag
|
||||
// = max (w * mag_{a} - sum_{i=0 to w-1} mag_{a - i}) / sum_{i=0 to w-1} mag_{a - i}
|
||||
// = max ((w - 1) * mag_{a} + C) / (mag_{a} + C)
|
||||
// <= w - 1
|
||||
private static readonly int _minimumScoreWindowSize = (int)(_maximumScore * 10) + 1;
|
||||
|
||||
// pseudo-code to generate the factors.
|
||||
// factors = []
|
||||
|
@ -577,15 +595,20 @@ namespace Microsoft.ML.TimeSeries
|
|||
{
|
||||
_ifftMagList[i] = Math.Sqrt(_ifftRe[i] * _ifftRe[i] + _ifftIm[i] * _ifftIm[i]);
|
||||
}
|
||||
|
||||
AverageFilter(_ifftMagList, Math.Min(_ifftMagList.Length, _judgementWindowSize));
|
||||
for (int i = 0; i <= Math.Min(length, _minimumScoreWindowSize); ++i)
|
||||
{
|
||||
_cumSumList[i] = _cumSumList[Math.Min(length, _minimumScoreWindowSize) - 1];
|
||||
}
|
||||
|
||||
// Step 7: Calculate raw score and set result
|
||||
for (int i = 0; i < results.GetLength(0); ++i)
|
||||
{
|
||||
var score = CalculateScore(_ifftMagList[i], _cumSumList[i]);
|
||||
score /= 10.0f;
|
||||
score = Math.Min(score, 1);
|
||||
score = Math.Max(score, 0);
|
||||
score = Math.Min(score, _maximumScore);
|
||||
score = Math.Max(score, _minimumScore);
|
||||
|
||||
var detres = score > threshold ? 1 : 0;
|
||||
|
||||
|
|
|
@ -776,6 +776,64 @@ namespace Microsoft.ML.Tests
|
|||
}
|
||||
}
|
||||
|
||||
[Theory, CombinatorialData]
|
||||
public void TestSrCnnAnomalyDetectorWithAnomalyAtBeginning(
|
||||
[CombinatorialValues(SrCnnDeseasonalityMode.Stl, SrCnnDeseasonalityMode.Mean, SrCnnDeseasonalityMode.Median)] SrCnnDeseasonalityMode mode
|
||||
)
|
||||
{
|
||||
var ml = new MLContext(1);
|
||||
IDataView dataView;
|
||||
List<TimeSeriesDataDouble> data;
|
||||
|
||||
var dataPath = GetDataPath("Timeseries", "anomaly_at_beginning.csv");
|
||||
|
||||
// Load data from file into the dataView
|
||||
dataView = ml.Data.LoadFromTextFile<TimeSeriesDataDouble>(dataPath, hasHeader: true);
|
||||
data = ml.Data.CreateEnumerable<TimeSeriesDataDouble>(dataView, reuseRowObject: false).ToList();
|
||||
|
||||
// Setup the detection arguments
|
||||
string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction);
|
||||
string inputColumnName = nameof(TimeSeriesDataDouble.Value);
|
||||
|
||||
// Do batch anomaly detection
|
||||
var options = new SrCnnEntireAnomalyDetectorOptions()
|
||||
{
|
||||
Threshold = 0.30,
|
||||
BatchSize = -1,
|
||||
Sensitivity = 80.0,
|
||||
DetectMode = SrCnnDetectMode.AnomalyAndMargin,
|
||||
Period = 0,
|
||||
DeseasonalityMode = mode
|
||||
};
|
||||
|
||||
var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, options);
|
||||
|
||||
// Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection.
|
||||
var predictionColumn = ml.Data.CreateEnumerable<SrCnnAnomalyDetection>(
|
||||
outputDataView, reuseRowObject: false);
|
||||
|
||||
var anomalyIndex = 1;
|
||||
|
||||
int k = 0;
|
||||
foreach (var prediction in predictionColumn)
|
||||
{
|
||||
Assert.Equal(7, prediction.Prediction.Length);
|
||||
if (anomalyIndex == k)
|
||||
{
|
||||
Assert.Equal(1, prediction.Prediction[0]);
|
||||
Assert.True(prediction.Prediction[6] > data[k].Value || data[k].Value > prediction.Prediction[5]);
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert.Equal(0, prediction.Prediction[0]);
|
||||
Assert.True(prediction.Prediction[6] <= data[k].Value);
|
||||
Assert.True(data[k].Value <= prediction.Prediction[5]);
|
||||
}
|
||||
|
||||
++k;
|
||||
}
|
||||
}
|
||||
|
||||
[Theory, CombinatorialData]
|
||||
public void TestSrcnnEntireDetectNonnegativeData(
|
||||
[CombinatorialValues(true, false)] bool isPositive)
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
Value
|
||||
181.944
|
||||
37.176
|
||||
57.14
|
||||
67.128
|
||||
72.12
|
||||
77.112
|
||||
82.104
|
||||
83.1
|
||||
87.09
|
||||
92.088
|
||||
92.01
|
||||
97.08
|
||||
102.072
|
||||
107.05
|
||||
107.06
|
||||
117.048
|
||||
122.04
|
||||
132.024
|
||||
147
|
||||
151.82
|
||||
151.992
|
||||
151.72
|
||||
151.94
|
||||
156.969
|
||||
156.984
|
||||
156.92
|
||||
161.976
|
||||
161.94
|
||||
161.97
|
||||
166.968
|
||||
176.952
|
||||
181.94
|
||||
186.936
|
||||
201.91
|
||||
201.912
|
||||
201.9
|
||||
206.904
|
||||
216.88
|
|
Загрузка…
Ссылка в новой задаче