Time Series Anomaly Detection sample (#326)
This commit is contained in:
Родитель
056cb7e727
Коммит
8b5420ac3b
|
@ -0,0 +1,25 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 15
|
||||
VisualStudioVersion = 15.0.28307.438
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "PowerAnomalyDetection", "PowerAnomalyDetection\PowerAnomalyDetection.csproj", "{AD86BF81-1064-4060-850C-AFA168BE2F8D}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{AD86BF81-1064-4060-850C-AFA168BE2F8D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{AD86BF81-1064-4060-850C-AFA168BE2F8D}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{AD86BF81-1064-4060-850C-AFA168BE2F8D}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{AD86BF81-1064-4060-850C-AFA168BE2F8D}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {FE79C243-836C-4EE2-8A41-3B9ADB48DBF3}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -0,0 +1,91 @@
|
|||
name,time,ConsumptionDiffNormalized
|
||||
electricity,2018-10-16T15:04:24.716773225Z,973.1295253
|
||||
electricity,2018-10-17T11:00:29.512806957Z,745.2842809
|
||||
electricity,2018-10-18T11:00:27.036704805Z,836
|
||||
electricity,2018-10-19T11:03:24.020147654Z,574.8024948
|
||||
electricity,2018-10-20T11:03:20.863944395Z,666
|
||||
electricity,2018-10-21T11:02:19.250133768Z,1370.95205
|
||||
electricity,2018-10-22T11:00:14.404278998Z,1162.614743
|
||||
electricity,2018-10-23T11:00:12.651322022Z,1076
|
||||
electricity,2018-10-24T11:01:15.809933852Z,876.3913949
|
||||
electricity,2018-10-25T11:03:15.36995305Z,1083.495146
|
||||
electricity,2018-10-26T11:02:11.241108936Z,1465.017373
|
||||
electricity,2018-10-27T11:00:09.052992298Z,786.0917942
|
||||
electricity,2018-10-28T11:00:11.467872717Z,1149
|
||||
electricity,2018-10-29T11:01:09.535792743Z,1037.279667
|
||||
electricity,2018-10-30T11:03:10.797205871Z,1027.572816
|
||||
electricity,2018-10-31T11:02:15.999460931Z,1134.788047
|
||||
electricity,2018-11-01T11:00:07.879700663Z,1557.162726
|
||||
electricity,2018-11-02T11:00:14.638020219Z,1236
|
||||
electricity,2018-11-03T11:01:10.952799972Z,840.4163775
|
||||
electricity,2018-11-04T12:02:32.758670147Z,1348.860759
|
||||
electricity,2018-11-05T12:00:33.291043985Z,828.1502086
|
||||
electricity,2018-11-06T12:00:36.862550768Z,1089
|
||||
electricity,2018-11-07T12:01:43.817502228Z,870.3955586
|
||||
electricity,2018-11-08T12:03:46.038299786Z,1181.359223
|
||||
electricity,2018-11-09T12:02:39.682038073Z,1064.739402
|
||||
electricity,2018-11-10T12:00:32.385072518Z,1012.40612
|
||||
electricity,2018-11-11T12:00:32.977646396Z,1143
|
||||
electricity,2018-11-12T12:01:31.36645663Z,1443.997224
|
||||
electricity,2018-11-13T12:03:35.665076463Z,1399.056865
|
||||
electricity,2018-11-14T12:02:36.92724329Z,829.5760945
|
||||
electricity,2018-11-15T12:00:33.468022168Z,1031.432545
|
||||
electricity,2018-11-16T12:00:34.682478095Z,1131
|
||||
electricity,2018-11-17T12:01:38.251993836Z,899.3754337
|
||||
electricity,2018-11-18T12:03:44.002550241Z,1639.722607
|
||||
electricity,2018-11-19T12:02:39.321065353Z,961.6678249
|
||||
electricity,2018-11-20T12:00:44.409934095Z,1537.13491
|
||||
electricity,2018-11-21T12:00:40.846895801Z,1281
|
||||
electricity,2018-11-22T12:01:38.649709827Z,1597.890354
|
||||
electricity,2018-11-23T12:03:35.801712367Z,1681.664355
|
||||
electricity,2018-11-24T12:02:31.800458217Z,766.5323141
|
||||
electricity,2018-11-25T12:00:39.713883761Z,630.876217
|
||||
electricity,2018-11-26T12:00:38.465425661Z,1752
|
||||
electricity,2018-11-27T12:01:38.762402379Z,1151.200555
|
||||
electricity,2018-11-28T12:03:39.560825017Z,609.1539528
|
||||
electricity,2018-11-29T12:02:44.965302508Z,823.5719249
|
||||
electricity,2018-11-30T12:00:45.73245264Z,1102.531293
|
||||
electricity,2018-12-01T12:00:38.811779493Z,1209
|
||||
electricity,2018-12-02T12:01:43.463479318Z,1499.958362
|
||||
electricity,2018-12-03T12:03:33.901317358Z,1475.950069
|
||||
electricity,2018-12-04T12:02:26.266817519Z,1770.229326
|
||||
electricity,2018-12-05T12:17:32.947182856Z,980.7835051
|
||||
electricity,2018-12-08T12:03:15.542821375Z,1059.767766
|
||||
electricity,2018-12-09T12:02:22.203337669Z,928.6448923
|
||||
electricity,2018-12-10T12:00:21.318328208Z,1745.4242
|
||||
electricity,2018-12-11T12:00:24.84376091Z,1002
|
||||
electricity,2018-12-12T12:18:31.050946094Z,1070.617284
|
||||
electricity,2018-12-13T12:03:23.37065738Z,1527.915789
|
||||
electricity,2018-12-14T12:02:32.94616461Z,1020.708826
|
||||
electricity,2018-12-15T12:00:33.520589573Z,1022.420028
|
||||
electricity,2018-12-16T12:00:36.114413708Z,1474
|
||||
electricity,2018-12-17T18:38:56.449440894Z,1656.235038
|
||||
electricity,2018-12-18T12:03:34.464725337Z,1482.717703
|
||||
electricity,2018-12-19T12:02:38.95296873Z,1038.721334
|
||||
electricity,2018-12-20T12:00:38.178647496Z,1526.119611
|
||||
electricity,2018-12-21T12:00:42.590417397Z,1385
|
||||
electricity,2018-12-22T12:01:46.401083536Z,1784.760583
|
||||
electricity,2018-12-23T12:03:51.377391197Z,4324.993065
|
||||
electricity,2018-12-24T12:02:48.887016526Z,2374.649062
|
||||
electricity,2018-12-25T12:00:52.159535565Z,1902.642559
|
||||
electricity,2018-12-26T12:01:00.584614442Z,1529.937543
|
||||
electricity,2018-12-27T12:02:04.083896054Z,1527.938931
|
||||
electricity,2018-12-28T12:03:56.947398941Z,1951.644691
|
||||
electricity,2018-12-29T12:02:49.77098413Z,1654.148714
|
||||
electricity,2018-12-30T12:00:48.200366004Z,2038.831711
|
||||
electricity,2018-12-31T12:00:50.353929786Z,1877
|
||||
electricity,2019-01-01T12:01:50.983812005Z,2537.238029
|
||||
electricity,2019-01-02T12:03:53.328563148Z,2718.224688
|
||||
electricity,2019-01-03T12:02:54.740479207Z,2213.537179
|
||||
electricity,2019-01-04T12:00:54.964384689Z,1970.737135
|
||||
electricity,2019-01-05T12:00:52.734179324Z,1413
|
||||
electricity,2019-01-06T12:01:40.787551102Z,1036.280361
|
||||
electricity,2019-01-07T12:03:34.747711344Z,1497.919556
|
||||
electricity,2019-01-08T12:02:30.802117529Z,1283.891591
|
||||
electricity,2019-01-09T12:00:34.579232337Z,1205.674548
|
||||
electricity,2019-01-10T12:00:39.767674832Z,1625
|
||||
electricity,2019-01-11T12:01:38.043328492Z,1632.866065
|
||||
electricity,2019-01-12T12:03:48.412084146Z,1034.563107
|
||||
electricity,2019-01-13T12:02:50.376222299Z,936.6504517
|
||||
electricity,2019-01-14T12:00:51.371630151Z,1684.33936
|
||||
electricity,2019-01-15T12:00:56.847535944Z,1260
|
|
|
@ -0,0 +1,18 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>netcoreapp2.2</TargetFramework>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Data.DataView" Version="0.11.0" />
|
||||
<PackageReference Include="Microsoft.ML" Version="0.11.0" />
|
||||
<PackageReference Include="Microsoft.ML.TimeSeries" Version="0.11.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Folder Include="Data\" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
|
@ -0,0 +1,111 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using Microsoft.Data.DataView;
|
||||
using Microsoft.ML;
|
||||
using Microsoft.ML.Data;
|
||||
using Microsoft.ML.Transforms.TimeSeries;
|
||||
|
||||
namespace myApp
|
||||
{
|
||||
class Program
|
||||
{
|
||||
class MeterData
|
||||
{
|
||||
[LoadColumn(0)]
|
||||
public string name { get; set; }
|
||||
[LoadColumn(1)]
|
||||
public DateTime time { get; set; }
|
||||
[LoadColumn(2)]
|
||||
public float ConsumptionDiffNormalized { get; set; }
|
||||
}
|
||||
|
||||
class SpikePrediction
|
||||
{
|
||||
[VectorType(3)]
|
||||
public double[] Prediction { get; set; }
|
||||
}
|
||||
|
||||
private static string DatasetsLocation = @"../../../Data";
|
||||
private static string TrainingData = $"{DatasetsLocation}/power-export_min.csv";
|
||||
|
||||
public static IDataView LoadPowerDataMin(MLContext ml)
|
||||
{
|
||||
var dataView = ml.Data.LoadFromTextFile<MeterData>(
|
||||
TrainingData,
|
||||
separatorChar: ',',
|
||||
hasHeader: true);
|
||||
|
||||
// take a peek to make sure data is loaded
|
||||
//var col = dataView.GetColumn<float>(ml, "ConsumptionDiffNormalized").ToArray();
|
||||
|
||||
return dataView;
|
||||
}
|
||||
|
||||
static void Main()
|
||||
{
|
||||
var ml = new MLContext();
|
||||
|
||||
// load data
|
||||
var dataView = LoadPowerDataMin(ml);
|
||||
|
||||
// transform options
|
||||
BuildTrainEvaluateModel(ml, dataView); // using SsaSpikeEstimator
|
||||
|
||||
Console.WriteLine("\nPress any key to exit");
|
||||
Console.Read();
|
||||
}
|
||||
|
||||
|
||||
public static void BuildTrainEvaluateModel(MLContext ml, IDataView dataView)
|
||||
{
|
||||
// Configure the Estimator
|
||||
const int PValueSize = 30;
|
||||
const int SeasonalitySize = 30;
|
||||
const int TrainingSize = 90;
|
||||
const int ConfidenceInterval = 98;
|
||||
|
||||
string outputColumnName = nameof(SpikePrediction.Prediction);
|
||||
string inputColumnName = nameof(MeterData.ConsumptionDiffNormalized);
|
||||
|
||||
var estimator = ml.Transforms.SsaSpikeEstimator(
|
||||
outputColumnName,
|
||||
inputColumnName,
|
||||
confidence: ConfidenceInterval,
|
||||
pvalueHistoryLength: PValueSize,
|
||||
trainingWindowSize: TrainingSize,
|
||||
seasonalityWindowSize: SeasonalitySize);
|
||||
|
||||
var model = estimator.Fit(dataView);
|
||||
|
||||
var transformedData = model.Transform(dataView);
|
||||
|
||||
// Getting the data of the newly created column as an IEnumerable
|
||||
IEnumerable<SpikePrediction> predictionColumn =
|
||||
ml.Data.CreateEnumerable<SpikePrediction>(transformedData, false);
|
||||
|
||||
var colCDN = dataView.GetColumn<float>(ml, "ConsumptionDiffNormalized").ToArray();
|
||||
var colTime = dataView.GetColumn<DateTime>(ml, "time").ToArray();
|
||||
|
||||
// Output the input data and predictions
|
||||
Console.WriteLine($"{outputColumnName} column obtained post-transformation.");
|
||||
Console.WriteLine("Date \tReadingDiff\tAlert\tScore\tP-Value");
|
||||
|
||||
int i = 0;
|
||||
foreach (var p in predictionColumn)
|
||||
{
|
||||
if (p.Prediction[0] == 1)
|
||||
{
|
||||
Console.BackgroundColor = ConsoleColor.DarkYellow;
|
||||
Console.ForegroundColor = ConsoleColor.Black;
|
||||
}
|
||||
Console.WriteLine("{0}\t{1:0.0000}\t{2:0.00}\t{3:0.00}\t{4:0.00}",
|
||||
colTime[i], colCDN[i],
|
||||
p.Prediction[0], p.Prediction[1], p.Prediction[2]);
|
||||
Console.ResetColor();
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,140 @@
|
|||
# Power Consumption Anomaly Detection
|
||||
|
||||
| ML.NET version | API type | Status | App Type | Data type | Scenario | ML Task | Algorithms |
|
||||
|----------------|-------------------|-------------------------------|-------------|-----------|---------------------|---------------------------|-----------------------------|
|
||||
| v0.11 | Dynamic API | Up-to-date | Console app | .csv files | Time Series Anomaly Detection | Time Series | SsaSpikeDetection |
|
||||
|
||||
In this sample, you'll see how to use [ML.NET](https://www.microsoft.com/net/learn/apps/machine-learning-and-ai/ml-dotnet) to detect anomalies in time series data.
|
||||
|
||||
## Problem
|
||||
This problem is focused on finding spikes in power consumption based on daily readings from a smart electric meter.
|
||||
|
||||
To solve this problem, we will build an ML model that takes as inputs:
|
||||
* date and time
|
||||
* meter reading difference, normalized by the time span between readings (ConsumptionDiffNormalized)
|
||||
|
||||
and generate an alert if an anomaly is detected.
|
||||
|
||||
## ML task - Time Series
|
||||
The goal is the identification of rare items, events or observations which raise suspicions by differing significantly from the majority of the time series data.
|
||||
|
||||
## Solution
|
||||
To solve this problem, you build and train an ML model on existing training data, evaluate how good it is (analyzing the obtained metrics), and lastly you can consume/test the model to predict the demand given input data variables.
|
||||
|
||||
![Build -> Train -> Evaluate -> Consume](../shared_content/modelpipeline.png)
|
||||
|
||||
However, in this example we will build and train the model to demonstrate the Time Series anomaly detection library since it detects on actual data and does not have an evaluate method. We will then review the detected anomalies in the Prediction output column.
|
||||
|
||||
### 1. Build model
|
||||
Building a model includes:
|
||||
|
||||
- Prepare and Load the data with LoadFromTextFile
|
||||
|
||||
- Choosing a time series Estimator and setting parameters
|
||||
|
||||
|
||||
The initial code is similar to the following:
|
||||
|
||||
`````csharp
|
||||
|
||||
// Create a common ML.NET context.
|
||||
var ml = new MLContext();
|
||||
|
||||
[...]
|
||||
|
||||
// Create a class for the dataset
|
||||
class MeterData
|
||||
{
|
||||
[LoadColumn(0)]
|
||||
public string name { get; set; }
|
||||
[LoadColumn(1)]
|
||||
public DateTime time { get; set; }
|
||||
[LoadColumn(2)]
|
||||
public float ConsumptionDiffNormalized { get; set; }
|
||||
}
|
||||
|
||||
[...]
|
||||
|
||||
// Load the data
|
||||
private static string DatasetsLocation = @"../../../Data";
|
||||
private static string TrainingData = $"{DatasetsLocation}/power-export_min.csv";
|
||||
|
||||
[...]
|
||||
|
||||
var dataView = ml.Data.LoadFromTextFile<MeterData>(
|
||||
TrainingData,
|
||||
separatorChar: ',',
|
||||
hasHeader: true);
|
||||
|
||||
[...]
|
||||
|
||||
// Prepare the Prediction output column for the model
|
||||
class SpikePrediction
|
||||
{
|
||||
[VectorType(3)]
|
||||
public double[] Prediction { get; set; }
|
||||
}
|
||||
|
||||
[...]
|
||||
|
||||
// Configure the Estimator
|
||||
const int PValueSize = 30;
|
||||
const int SeasonalitySize = 30;
|
||||
const int TrainingSize = 90;
|
||||
const int ConfidenceInterval = 98;
|
||||
|
||||
string outputColumnName = nameof(SpikePrediction.Prediction);
|
||||
string inputColumnName = nameof(MeterData.ConsumptionDiffNormalized);
|
||||
|
||||
var estimator = ml.Transforms.SsaSpikeEstimator(
|
||||
outputColumnName,
|
||||
inputColumnName,
|
||||
confidence: ConfidenceInterval,
|
||||
pvalueHistoryLength: PValueSize,
|
||||
trainingWindowSize: TrainingSize,
|
||||
seasonalityWindowSize: SeasonalitySize);
|
||||
|
||||
`````
|
||||
|
||||
### 2. Train model
|
||||
Training the model is a process of running the chosen algorithm on a training data (with known anomaly values) to tune the parameters of the model. It is implemented in the `Fit()` method from the Estimator object.
|
||||
|
||||
To perform training you need to call the `Fit()` method while providing the training dataset (`power-export_min.csv`) in a DataView object.
|
||||
|
||||
`````csharp
|
||||
var model = estimator.Fit(dataView);
|
||||
`````
|
||||
|
||||
### 3. View predictions
|
||||
You can view the detected anomalies from the Time Series model by accessing the output column.
|
||||
|
||||
`````csharp
|
||||
var transformedData = model.Transform(dataView);
|
||||
|
||||
// Getting the data of the newly created column as an IEnumerable
|
||||
IEnumerable<SpikePrediction> predictionColumn =
|
||||
ml.Data.CreateEnumerable<SpikePrediction>(transformedData, false);
|
||||
|
||||
var colCDN = dataView.GetColumn<float>(ml, "ConsumptionDiffNormalized").ToArray();
|
||||
var colTime = dataView.GetColumn<DateTime>(ml, "time").ToArray();
|
||||
|
||||
// Output the input data and predictions
|
||||
Console.WriteLine($"{outputColumnName} column obtained post-transformation.");
|
||||
Console.WriteLine("Date \tReadingDiff\tAlert\tScore\tP-Value");
|
||||
|
||||
int i = 0;
|
||||
foreach (var p in predictionColumn)
|
||||
{
|
||||
if (p.Prediction[0] == 1)
|
||||
{
|
||||
Console.BackgroundColor = ConsoleColor.DarkYellow;
|
||||
Console.ForegroundColor = ConsoleColor.Black;
|
||||
}
|
||||
Console.WriteLine("{0}\t{1:0.0000}\t{2:0.00}\t{3:0.00}\t{4:0.00}",
|
||||
colTime[i], colCDN[i],
|
||||
p.Prediction[0], p.Prediction[1], p.Prediction[2]);
|
||||
Console.ResetColor();
|
||||
i++;
|
||||
}
|
||||
|
||||
`````
|
Загрузка…
Ссылка в новой задаче