Merge pull request #16 from microsoft/omri/new_brushing
Omri/new brushing
This commit is contained in:
Коммит
e6fd5a5e49
33
README.md
33
README.md
|
@ -74,29 +74,34 @@ Which would result in the shiny server app running on port 3838.
|
|||
|
||||
## Instructions of use
|
||||
1. Import time series CSV file. Assumed structure:
|
||||
- date (`"%Y-%m-%d %H:%M:%S"`). TagAnomaly will attempt to infer the date from other patterns as well, using the *parsedate* package
|
||||
- category (optional)
|
||||
- date ("%Y-%m-%d %H:%M:%S")
|
||||
- category
|
||||
- value
|
||||
|
||||
2. (Optional) Import raw data time series CSV file.
|
||||
|
||||
If the original time series is an aggreation over time windows, this time series is the raw values themselves. This way we could dive deeper into an anomalous value and see what it is comprised of.
|
||||
2. (Optional) Import raw data time series CSV file. If the original time series is an aggreation over time windows, this time series is the raw values themselves. This way we could dive deeper into an anomalous value and see what it is comprised of.
|
||||
Assumed structure:
|
||||
- date (`"%Y-%m-%d %H:%M:%S"`). TagAnomaly will attempt to infer the date from other patterns as well, using the *parsedate* package
|
||||
- category (optional)
|
||||
- content
|
||||
- date ("%Y-%m-%d %H:%M:%S")
|
||||
- category
|
||||
- value
|
||||
|
||||
2. Select category (optional, if exists)
|
||||
2. Select category (if exists)
|
||||
|
||||
3. Select time range on slider
|
||||
|
||||
4. Select points on plot that look anomalous.
|
||||
Optional (1): click on one time range on the table below the plot to see raw data on this time range
|
||||
Optional (2): Open the `All Categories` tab to see how other time series behave on the same time range.
|
||||
5. Once you decide that these are actual anomalies, save the resulting table to csv by clicking on `Download labels set` and continue to the next category.
|
||||
4. Inspect your time series:
|
||||
(1): click on one time range on the table below the plot to see raw data on this time range
|
||||
(2): Open the "All Categories" tab to see how other time series behave on the same time range.
|
||||
|
||||
4.Select points on plot that look anomalous.
|
||||
|
||||
5. Click "Add selected points" to add the marked points to the candidate list.
|
||||
|
||||
7. Once you decide that these are actual anomalies, save the resulting table to csv by clicking on "Download labels set" and continue to the next category.
|
||||
|
||||
#### Current limitations/issues
|
||||
It is currently impossible to have multiple selections on one plot. A workaround is to select one area, download the csv and select the next area. Each downloaded CSV has a random string so files won't override each other. Once labeling is finished, one option is to run the provided [prep_labels.py](https://github.com/Microsoft/TagAnomaly/blob/master/prep_labels.py) file in order to concatenate all of TagAnomaly's output file to one CSV.
|
||||
Points added but not saved will be lost in case the date slider or categories are changed, hence it is difficult to save multiple points from a complex time series. Once all segments are labeled, one can run the provided [prep_labels.py](https://github.com/Microsoft/TagAnomaly/blob/master/prep_labels.py) file in order to concatenate all of TagAnomaly's output file to one CSV.
|
||||
|
||||
|
||||
# Contributing
|
||||
|
||||
This project welcomes contributions and suggestions. Most contributions require you to agree to a
|
||||
|
|
Двоичные данные
assets/selected.png
Двоичные данные
assets/selected.png
Двоичный файл не отображается.
До Ширина: | Высота: | Размер: 66 KiB После Ширина: | Высота: | Размер: 32 KiB |
114
server.R
114
server.R
|
@ -5,7 +5,6 @@ library(parsedate) # for cases when the provided date wasn't in a specific patte
|
|||
library(DT)
|
||||
library(ggplot2)
|
||||
|
||||
|
||||
## Extend the max file upload size
|
||||
options(shiny.maxRequestSize=150*1024^2)
|
||||
|
||||
|
@ -13,7 +12,9 @@ options(shiny.maxRequestSize=150*1024^2)
|
|||
## Shiny server function
|
||||
server <- function(input,output, session) {
|
||||
|
||||
## Reactive values
|
||||
####---- Reactive values ---####
|
||||
|
||||
|
||||
# A boolean checking if the provided dataset contains multiple categories or not. This affects the UI
|
||||
hasCategories <- reactiveVal(value = T,label='hasCategories')
|
||||
|
||||
|
@ -23,10 +24,46 @@ server <- function(input,output, session) {
|
|||
# Whether the first column holds a numeric value (TRUE), or a date value (FALSE)
|
||||
numericTimestamp <- reactiveVal(value = F,label = 'numericTimestamp')
|
||||
|
||||
####---- Time-Series data handling ----####
|
||||
selectedPoints <- reactiveVal(value = data.frame(),label='selectedPoints')
|
||||
brushed <- reactive({
|
||||
brushedPoints(getTimeFilteredCategoryDataset(), input$user_brush)
|
||||
})
|
||||
|
||||
## Read CSV input file
|
||||
#### Event observers ####
|
||||
|
||||
# Update selected points when the user clicks 'Add'
|
||||
observeEvent(input$add, {
|
||||
selectedPoints(selectedPoints() %>% bind_rows(brushed()))
|
||||
})
|
||||
|
||||
# Update selected points when the user clicks 'Remove'
|
||||
observeEvent(input$delete, {
|
||||
if (dim(selectedPoints())[1] > 0) {
|
||||
selectedPoints(selectedPoints()%>% anti_join(brushed()))
|
||||
}
|
||||
})
|
||||
|
||||
####---- Time-Series data injestion and handling ----####
|
||||
|
||||
|
||||
getDataset <- reactive({
|
||||
## Get time-series dataset from file upload
|
||||
|
||||
if(is.null(input$timeseriesfile)) return(NULL)
|
||||
dataset <- tryReadFile()
|
||||
|
||||
|
||||
validate(
|
||||
need(nrow(dataset) > 0, "Input file is empty"),
|
||||
need(('date' %in% names(dataset)),"date column not found. Consider renaming your timestamp column to date"),
|
||||
need(('value' %in% names(dataset)),"value column not found. Consider renaming your value column to value")
|
||||
|
||||
)
|
||||
dataset
|
||||
})
|
||||
|
||||
tryReadFile <- function() {
|
||||
## Read CSV input file from the user provided path
|
||||
out <- tryCatch(
|
||||
{
|
||||
read.csv(input$timeseriesfile$datapath,stringsAsFactors = F)
|
||||
|
@ -46,6 +83,7 @@ server <- function(input,output, session) {
|
|||
}
|
||||
|
||||
padMissingDates <- function(dataset,padValue = 0, timeSeriesGapValue){
|
||||
## Interpolate missing time/date values
|
||||
category <- dataset[1,'category'] %>% unlist()
|
||||
|
||||
pad <- data.frame(date = seq(from = min(dataset$date),to = max(dataset$date),by = timeSeriesGapValue))
|
||||
|
@ -59,24 +97,12 @@ server <- function(input,output, session) {
|
|||
|
||||
dataset
|
||||
}
|
||||
|
||||
## Get time-series dataset from file upload
|
||||
getDataset <- reactive({
|
||||
if(is.null(input$timeseriesfile)) return(NULL)
|
||||
|
||||
dataset <- tryReadFile()
|
||||
|
||||
validate(
|
||||
need(nrow(dataset) > 0, "Input file is empty"),
|
||||
need(('date' %in% names(dataset)),"date column not found. Consider renaming your timestamp column to date"),
|
||||
need(('value' %in% names(dataset)),"value column not found. Consider renaming your value column to value")
|
||||
|
||||
)
|
||||
dataset
|
||||
})
|
||||
|
||||
|
||||
|
||||
getTimeSeriesDataset <- reactive({
|
||||
### Turn dataset into a time series by transforming the date column into POSIXct.
|
||||
### If dataset is numeric, turn numericTimestamp flag to TRUE.
|
||||
dataset <- getDataset()
|
||||
if(is.null(dataset)) return(NULL)
|
||||
|
||||
|
@ -122,8 +148,9 @@ server <- function(input,output, session) {
|
|||
dataset
|
||||
})
|
||||
|
||||
## Get a dataset for a specific category
|
||||
|
||||
getCategoryDataset <- reactive({
|
||||
## Get a dataset for a specific category
|
||||
ts <- getTimeSeriesDataset()
|
||||
if(is.null(ts)) return(NULL)
|
||||
|
||||
|
@ -143,8 +170,9 @@ server <- function(input,output, session) {
|
|||
dataset
|
||||
})
|
||||
|
||||
## Get the entire dataset, filtered by the slider range
|
||||
|
||||
getTimeFilteredDataset <- reactive({
|
||||
## Get the entire dataset, filtered by the slider range
|
||||
dataset <- getTimeSeriesDataset()
|
||||
if(is.null(dataset)) return(NULL)
|
||||
if(is.null(input$slider)) return(NULL)
|
||||
|
@ -152,20 +180,26 @@ server <- function(input,output, session) {
|
|||
dataset %>% filter(date >= input$slider[1], date <= input$slider[2])
|
||||
})
|
||||
|
||||
## Get category dataset, filtered by the slider range
|
||||
|
||||
getTimeFilteredCategoryDataset <- reactive({
|
||||
## Get category dataset, filtered by the slider range
|
||||
dataset <- getCategoryDataset()
|
||||
if(is.null(dataset)) return(NULL)
|
||||
if(is.null(input$slider)) return(NULL)
|
||||
|
||||
|
||||
session$resetBrush("input$user_brush")
|
||||
selectedPoints(data.frame())
|
||||
dataset %>% filter(date >= input$slider[1], date <= input$slider[2])
|
||||
})
|
||||
|
||||
####---- Raw data handling ----####
|
||||
|
||||
## Get raw data (an additional dataset for which the time-series dataset is an aggregation)
|
||||
## See R/create_sample_data.R for a script that creates demo time-series and raw datasets
|
||||
|
||||
getRawData <- reactive({
|
||||
## Get raw data (an additional dataset for which the time-series dataset is an aggregation)
|
||||
## See R/create_sample_data.R for a script that creates demo time-series and raw datasets
|
||||
|
||||
cate <- input$category
|
||||
|
||||
if(is.null(input$rawfile)) return(NULL)
|
||||
|
@ -192,8 +226,9 @@ server <- function(input,output, session) {
|
|||
raw
|
||||
})
|
||||
|
||||
## get raw data for a sample selected by the user
|
||||
|
||||
getRawDataForSample <- reactive({
|
||||
## get raw data for a sample selected by the user
|
||||
lastclicked <- input$summaryTable_rows_selected
|
||||
if(is.null(lastclicked)) return(NULL)
|
||||
|
||||
|
@ -276,6 +311,7 @@ server <- function(input,output, session) {
|
|||
|
||||
## Select input for categories, based on the categories found in the time series dataset
|
||||
output$category <- renderUI({
|
||||
|
||||
if(hasCategories()==TRUE){
|
||||
req(input$timeseriesfile)
|
||||
dataset <- getTimeSeriesDataset()
|
||||
|
@ -303,25 +339,16 @@ server <- function(input,output, session) {
|
|||
panel.grid.minor = element_blank(),
|
||||
text = element_text(size = 14))
|
||||
|
||||
|
||||
|
||||
if (dim(selectedPoints())[1] > 0) {
|
||||
g <- g + geom_point(aes(date, value), data = selectedPoints(), color = "red")
|
||||
}
|
||||
|
||||
g
|
||||
},message = "Rendering plot...")
|
||||
})
|
||||
|
||||
|
||||
|
||||
|
||||
## Capture the selected points on the graph
|
||||
selectedPoints <- reactive({
|
||||
user_brush <- input$user_brush
|
||||
pts <- brushedPoints(getTimeFilteredCategoryDataset(), user_brush, xvar = "date", yvar = "value")
|
||||
if(is.null(pts)) return(NULL)
|
||||
if(hasCategories()){
|
||||
pts %>% select(date, category, value)
|
||||
} else {
|
||||
pts %>% select(date, value)
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
## Plot showing all categories
|
||||
output$allplot <- renderPlot({
|
||||
|
||||
|
@ -458,6 +485,7 @@ server <- function(input,output, session) {
|
|||
####---- Data output ----####
|
||||
|
||||
## download selected points
|
||||
|
||||
output$mydownload <- downloadHandler(
|
||||
filename = function(){
|
||||
random_string <- paste0(paste0(sample(LETTERS,2 , TRUE),collapse=''),sample(999, 1, TRUE), paste0(sample(LETTERS,2 , TRUE),collapse=''),collapse = '')
|
||||
|
@ -467,9 +495,9 @@ server <- function(input,output, session) {
|
|||
} else{
|
||||
paste0(gsub(".csv",replacement = "",input$timeseriesfile$name),'-',random_string,'-labels.csv')
|
||||
}
|
||||
},
|
||||
content = function(file) {
|
||||
}, content = function(file) {
|
||||
write.csv(selectedPoints(),file)
|
||||
}
|
||||
)
|
||||
|
||||
}
|
13
ui.R
13
ui.R
|
@ -23,10 +23,11 @@ header <- dashboardHeader(title = 'Taganomaly - Anomaly detection labeling tool'
|
|||
- value</P><P></P><P>
|
||||
<B>2. Select category</B> (if exists)</P><P>
|
||||
<B>3. Select time range on slider</B></P><P>
|
||||
<B>4.Select points on plot that look anomalous</B>.
|
||||
<BR>Optional (1): click on one time range on the table below the plot to see raw data on this time range
|
||||
<BR>Optional (2): Open the "All Categories" tab to see how other time series behave on the same time range.
|
||||
<BR><B>5.</B> Once you decide that these are actual anomalies, save the resulting table to csv by clicking on "Download labels set" and continue to the next category.</P>'
|
||||
<B>4.Select points on plot</B> that look anomalous.</P><P>
|
||||
<B>5. Click "Add selected points"</B> to add the marked points to the candidate list.</P><P>
|
||||
b <BR>Optional (1): click on one time range on the table below the plot to see raw data on this time range
|
||||
<BR>Optional (2): Open the "All Categories" tab to see how other time series behave on the same time range.<P>
|
||||
<B>7.</B> Once you decide that these are actual anomalies, save the resulting table to csv by clicking on "Download labels set" and continue to the next category.</P>'
|
||||
)))
|
||||
)
|
||||
|
||||
|
@ -63,7 +64,9 @@ body <- dashboardBody(
|
|||
h2('Time Series for labeling:'),
|
||||
h5("Graph might take a few moments to load"),
|
||||
plotOutput("plot", brush = "user_brush"),
|
||||
h2('Selected points:'),
|
||||
actionButton("add", "Add selected points"),
|
||||
actionButton("delete", "Remove selected points"),
|
||||
h2('Currently marked points:'),
|
||||
dataTableOutput("summaryTable"),
|
||||
h2('Inspect raw data:'),
|
||||
h5('Select a point or more on the graph, then select a record on the \"Selected Points\" table to see raw data'),
|
||||
|
|
Загрузка…
Ссылка в новой задаче