updates to readme, help and assets
This commit is contained in:
Родитель
41701d080c
Коммит
1515e42f61
33
README.md
33
README.md
|
@ -74,29 +74,34 @@ Which would result in the shiny server app running on port 3838.
|
|||
|
||||
## Instructions of use
|
||||
1. Import time series CSV file. Assumed structure:
|
||||
- date (`"%Y-%m-%d %H:%M:%S"`). TagAnomaly will attempt to infer the date from other patterns as well, using the *parsedate* package
|
||||
- category (optional)
|
||||
- date ("%Y-%m-%d %H:%M:%S")
|
||||
- category
|
||||
- value
|
||||
|
||||
2. (Optional) Import raw data time series CSV file.
|
||||
|
||||
If the original time series is an aggreation over time windows, this time series is the raw values themselves. This way we could dive deeper into an anomalous value and see what it is comprised of.
|
||||
2. (Optional) Import raw data time series CSV file. If the original time series is an aggreation over time windows, this time series is the raw values themselves. This way we could dive deeper into an anomalous value and see what it is comprised of.
|
||||
Assumed structure:
|
||||
- date (`"%Y-%m-%d %H:%M:%S"`). TagAnomaly will attempt to infer the date from other patterns as well, using the *parsedate* package
|
||||
- category (optional)
|
||||
- content
|
||||
- date ("%Y-%m-%d %H:%M:%S")
|
||||
- category
|
||||
- value
|
||||
|
||||
2. Select category (optional, if exists)
|
||||
2. Select category (if exists)
|
||||
|
||||
3. Select time range on slider
|
||||
|
||||
4. Select points on plot that look anomalous.
|
||||
Optional (1): click on one time range on the table below the plot to see raw data on this time range
|
||||
Optional (2): Open the `All Categories` tab to see how other time series behave on the same time range.
|
||||
5. Once you decide that these are actual anomalies, save the resulting table to csv by clicking on `Download labels set` and continue to the next category.
|
||||
4. Inspect your time series:
|
||||
(1): click on one time range on the table below the plot to see raw data on this time range
|
||||
(2): Open the "All Categories" tab to see how other time series behave on the same time range.
|
||||
|
||||
4.Select points on plot that look anomalous.
|
||||
|
||||
5. Click "Add selected points" to add the marked points to the candidate list.
|
||||
|
||||
7. Once you decide that these are actual anomalies, save the resulting table to csv by clicking on "Download labels set" and continue to the next category.
|
||||
|
||||
#### Current limitations/issues
|
||||
It is currently impossible to have multiple selections on one plot. A workaround is to select one area, download the csv and select the next area. Each downloaded CSV has a random string so files won't override each other. Once labeling is finished, one option is to run the provided [prep_labels.py](https://github.com/Microsoft/TagAnomaly/blob/master/prep_labels.py) file in order to concatenate all of TagAnomaly's output file to one CSV.
|
||||
Points added but not saved will be lost in case the date slider or categories are changed, hence it is difficult to save multiple points from a complex time series. Once all segments are labeled, one can run the provided [prep_labels.py](https://github.com/Microsoft/TagAnomaly/blob/master/prep_labels.py) file in order to concatenate all of TagAnomaly's output file to one CSV.
|
||||
|
||||
|
||||
# Contributing
|
||||
|
||||
This project welcomes contributions and suggestions. Most contributions require you to agree to a
|
||||
|
|
Двоичные данные
assets/selected.png
Двоичные данные
assets/selected.png
Двоичный файл не отображается.
До Ширина: | Высота: | Размер: 66 KiB После Ширина: | Высота: | Размер: 32 KiB |
72
server.R
72
server.R
|
@ -12,7 +12,9 @@ options(shiny.maxRequestSize=150*1024^2)
|
|||
## Shiny server function
|
||||
server <- function(input,output, session) {
|
||||
|
||||
## Reactive values
|
||||
####---- Reactive values ---####
|
||||
|
||||
|
||||
# A boolean checking if the provided dataset contains multiple categories or not. This affects the UI
|
||||
hasCategories <- reactiveVal(value = T,label='hasCategories')
|
||||
|
||||
|
@ -27,20 +29,41 @@ server <- function(input,output, session) {
|
|||
brushedPoints(getTimeFilteredCategoryDataset(), input$user_brush)
|
||||
})
|
||||
|
||||
#### Event observers ####
|
||||
|
||||
# Update selected points when the user clicks 'Add'
|
||||
observeEvent(input$add, {
|
||||
selectedPoints(selectedPoints() %>% bind_rows(brushed()))
|
||||
})
|
||||
|
||||
# Update selected points when the user clicks 'Remove'
|
||||
observeEvent(input$delete, {
|
||||
if (dim(selectedPoints())[1] > 0) {
|
||||
selectedPoints(selectedPoints()%>% anti_join(brushed()))
|
||||
}
|
||||
})
|
||||
|
||||
####---- Time-Series data handling ----####
|
||||
####---- Time-Series data injestion and handling ----####
|
||||
|
||||
## Read CSV input file
|
||||
|
||||
getDataset <- reactive({
|
||||
## Get time-series dataset from file upload
|
||||
|
||||
if(is.null(input$timeseriesfile)) return(NULL)
|
||||
dataset <- tryReadFile()
|
||||
|
||||
|
||||
validate(
|
||||
need(nrow(dataset) > 0, "Input file is empty"),
|
||||
need(('date' %in% names(dataset)),"date column not found. Consider renaming your timestamp column to date"),
|
||||
need(('value' %in% names(dataset)),"value column not found. Consider renaming your value column to value")
|
||||
|
||||
)
|
||||
dataset
|
||||
})
|
||||
|
||||
tryReadFile <- function() {
|
||||
## Read CSV input file from the user provided path
|
||||
out <- tryCatch(
|
||||
{
|
||||
read.csv(input$timeseriesfile$datapath,stringsAsFactors = F)
|
||||
|
@ -60,6 +83,7 @@ server <- function(input,output, session) {
|
|||
}
|
||||
|
||||
padMissingDates <- function(dataset,padValue = 0, timeSeriesGapValue){
|
||||
## Interpolate missing time/date values
|
||||
category <- dataset[1,'category'] %>% unlist()
|
||||
|
||||
pad <- data.frame(date = seq(from = min(dataset$date),to = max(dataset$date),by = timeSeriesGapValue))
|
||||
|
@ -73,25 +97,12 @@ server <- function(input,output, session) {
|
|||
|
||||
dataset
|
||||
}
|
||||
|
||||
## Get time-series dataset from file upload
|
||||
getDataset <- reactive({
|
||||
|
||||
if(is.null(input$timeseriesfile)) return(NULL)
|
||||
dataset <- tryReadFile()
|
||||
|
||||
|
||||
validate(
|
||||
need(nrow(dataset) > 0, "Input file is empty"),
|
||||
need(('date' %in% names(dataset)),"date column not found. Consider renaming your timestamp column to date"),
|
||||
need(('value' %in% names(dataset)),"value column not found. Consider renaming your value column to value")
|
||||
|
||||
)
|
||||
dataset
|
||||
})
|
||||
|
||||
|
||||
|
||||
getTimeSeriesDataset <- reactive({
|
||||
### Turn dataset into a time series by transforming the date column into POSIXct.
|
||||
### If dataset is numeric, turn numericTimestamp flag to TRUE.
|
||||
dataset <- getDataset()
|
||||
if(is.null(dataset)) return(NULL)
|
||||
|
||||
|
@ -137,8 +148,9 @@ server <- function(input,output, session) {
|
|||
dataset
|
||||
})
|
||||
|
||||
## Get a dataset for a specific category
|
||||
|
||||
getCategoryDataset <- reactive({
|
||||
## Get a dataset for a specific category
|
||||
ts <- getTimeSeriesDataset()
|
||||
if(is.null(ts)) return(NULL)
|
||||
|
||||
|
@ -158,8 +170,9 @@ server <- function(input,output, session) {
|
|||
dataset
|
||||
})
|
||||
|
||||
## Get the entire dataset, filtered by the slider range
|
||||
|
||||
getTimeFilteredDataset <- reactive({
|
||||
## Get the entire dataset, filtered by the slider range
|
||||
dataset <- getTimeSeriesDataset()
|
||||
if(is.null(dataset)) return(NULL)
|
||||
if(is.null(input$slider)) return(NULL)
|
||||
|
@ -167,8 +180,9 @@ server <- function(input,output, session) {
|
|||
dataset %>% filter(date >= input$slider[1], date <= input$slider[2])
|
||||
})
|
||||
|
||||
## Get category dataset, filtered by the slider range
|
||||
|
||||
getTimeFilteredCategoryDataset <- reactive({
|
||||
## Get category dataset, filtered by the slider range
|
||||
dataset <- getCategoryDataset()
|
||||
if(is.null(dataset)) return(NULL)
|
||||
if(is.null(input$slider)) return(NULL)
|
||||
|
@ -181,9 +195,11 @@ server <- function(input,output, session) {
|
|||
|
||||
####---- Raw data handling ----####
|
||||
|
||||
## Get raw data (an additional dataset for which the time-series dataset is an aggregation)
|
||||
## See R/create_sample_data.R for a script that creates demo time-series and raw datasets
|
||||
|
||||
getRawData <- reactive({
|
||||
## Get raw data (an additional dataset for which the time-series dataset is an aggregation)
|
||||
## See R/create_sample_data.R for a script that creates demo time-series and raw datasets
|
||||
|
||||
cate <- input$category
|
||||
|
||||
if(is.null(input$rawfile)) return(NULL)
|
||||
|
@ -210,8 +226,9 @@ server <- function(input,output, session) {
|
|||
raw
|
||||
})
|
||||
|
||||
## get raw data for a sample selected by the user
|
||||
|
||||
getRawDataForSample <- reactive({
|
||||
## get raw data for a sample selected by the user
|
||||
lastclicked <- input$summaryTable_rows_selected
|
||||
if(is.null(lastclicked)) return(NULL)
|
||||
|
||||
|
@ -468,6 +485,7 @@ server <- function(input,output, session) {
|
|||
####---- Data output ----####
|
||||
|
||||
## download selected points
|
||||
|
||||
output$mydownload <- downloadHandler(
|
||||
filename = function(){
|
||||
random_string <- paste0(paste0(sample(LETTERS,2 , TRUE),collapse=''),sample(999, 1, TRUE), paste0(sample(LETTERS,2 , TRUE),collapse=''),collapse = '')
|
||||
|
@ -477,9 +495,9 @@ server <- function(input,output, session) {
|
|||
} else{
|
||||
paste0(gsub(".csv",replacement = "",input$timeseriesfile$name),'-',random_string,'-labels.csv')
|
||||
}
|
||||
},
|
||||
content = function(file) {
|
||||
}, content = function(file) {
|
||||
write.csv(selectedPoints(),file)
|
||||
}
|
||||
)
|
||||
|
||||
}
|
11
ui.R
11
ui.R
|
@ -23,10 +23,11 @@ header <- dashboardHeader(title = 'Taganomaly - Anomaly detection labeling tool'
|
|||
- value</P><P></P><P>
|
||||
<B>2. Select category</B> (if exists)</P><P>
|
||||
<B>3. Select time range on slider</B></P><P>
|
||||
<B>4.Select points on plot that look anomalous</B>.
|
||||
<BR>Optional (1): click on one time range on the table below the plot to see raw data on this time range
|
||||
<BR>Optional (2): Open the "All Categories" tab to see how other time series behave on the same time range.
|
||||
<BR><B>5.</B> Once you decide that these are actual anomalies, save the resulting table to csv by clicking on "Download labels set" and continue to the next category.</P>'
|
||||
<B>4.Select points on plot</B> that look anomalous.</P><P>
|
||||
<B>5. Click "Add selected points"</B> to add the marked points to the candidate list.</P><P>
|
||||
b <BR>Optional (1): click on one time range on the table below the plot to see raw data on this time range
|
||||
<BR>Optional (2): Open the "All Categories" tab to see how other time series behave on the same time range.<P>
|
||||
<B>7.</B> Once you decide that these are actual anomalies, save the resulting table to csv by clicking on "Download labels set" and continue to the next category.</P>'
|
||||
)))
|
||||
)
|
||||
|
||||
|
@ -65,7 +66,7 @@ body <- dashboardBody(
|
|||
plotOutput("plot", brush = "user_brush"),
|
||||
actionButton("add", "Add selected points"),
|
||||
actionButton("delete", "Remove selected points"),
|
||||
h2('Selected points:'),
|
||||
h2('Currently marked points:'),
|
||||
dataTableOutput("summaryTable"),
|
||||
h2('Inspect raw data:'),
|
||||
h5('Select a point or more on the graph, then select a record on the \"Selected Points\" table to see raw data'),
|
||||
|
|
Загрузка…
Ссылка в новой задаче