431 строка
14 KiB
R
431 строка
14 KiB
R
# General ----
|
|
|
|
# Generate vega specs
|
|
# Faceted and unfaceted ones need to be generated separately because the encoding is embedded in the faceted case
|
|
generate_vega_specs <- function(.data, mapping, meta, spec_encoding, facet_encoding, height, width, facet_dims, column = FALSE, row = FALSE, color = FALSE, errorbar = FALSE, y_type = NULL) {
|
|
if (!column & !row) { # No facets
|
|
generate_unfacet_vega_specs(.data, meta, spec_encoding, height, width, color, errorbar, y_type)
|
|
} else { # Facets
|
|
generate_facet_vega_specs(.data, mapping, meta, spec_encoding, facet_encoding, height, width, facet_dims, column, row, color, errorbar, y_type)
|
|
}
|
|
}
|
|
|
|
# Generate vega specs that are not faceted
|
|
generate_unfacet_vega_specs <- function(.data, meta, spec_encoding, height, width, color = FALSE, errorbar = FALSE, y_type = NULL) {
|
|
|
|
# Remove color encoding if it's flagged not to be shown, OR if it's just not in the mapping
|
|
# So even if color = TRUE, if it's not there, it'll be removed!
|
|
if (!color & "color" %in% names(spec_encoding)) {
|
|
spec_encoding <- within(spec_encoding, rm("color"))
|
|
}
|
|
|
|
spec_encoding <- purrr::compact(spec_encoding)
|
|
|
|
mark_type <- "point"
|
|
|
|
# TODO - handle color
|
|
|
|
# If there's no errorbar, everything can be on one layer
|
|
if (!errorbar) {
|
|
list(
|
|
height = height,
|
|
width = width,
|
|
`$schema` = vegawidget::vega_schema(),
|
|
meta = meta,
|
|
data = list(values = .data),
|
|
mark = list(type = mark_type, filled = TRUE, strokeWidth = 1),
|
|
encoding = spec_encoding
|
|
) %>%
|
|
vegawidget::as_vegaspec()
|
|
} else { # If there is an error bar, then we need to put the points and error bars on multiple layers
|
|
|
|
# The errorbar has its own encoding, and it uses data y_raw (the actual raw values) to calculate the errorbar
|
|
errorbar_spec_encoding <- spec_encoding
|
|
errorbar_spec_encoding$y$field <- Y_RAW_FIELD_CHR
|
|
|
|
list(
|
|
height = height,
|
|
width = width,
|
|
`$schema` = vegawidget::vega_schema(),
|
|
meta = meta,
|
|
data = list(values = .data),
|
|
layer = list(
|
|
# Errorbar layer
|
|
list(
|
|
mark = "errorbar",
|
|
encoding = errorbar_spec_encoding
|
|
),
|
|
# Point layer
|
|
list(
|
|
mark = list(type = mark_type, filled = TRUE, strokeWidth = 1),
|
|
encoding = spec_encoding
|
|
)
|
|
)
|
|
) %>%
|
|
vegawidget::as_vegaspec()
|
|
}
|
|
}
|
|
|
|
# Generate vega specs that are faceted
|
|
generate_facet_vega_specs <- function(.data, mapping, meta, spec_encoding, facet_encoding, height, width, facet_dims, column = FALSE, row = FALSE, color = FALSE, errorbar = FALSE, y_type = NULL) {
|
|
|
|
# Remove color encoding if it's flagged not to be shown, OR if it's just not in the mapping
|
|
# So even if color = TRUE, if it's not there, it'll be removed!
|
|
if ((is.null(mapping$color) | !color) & "color" %in% names(spec_encoding)) {
|
|
spec_encoding <- within(spec_encoding, rm("color"))
|
|
}
|
|
|
|
spec_encoding <- purrr::compact(spec_encoding)
|
|
|
|
mark_type <- "point"
|
|
|
|
# Remove facet encoding(s) if they're flagged not to be shown, or aren't in the mapping
|
|
# So even if e.g. row = TRUE, if there's no variable to facet by, it'll be removed!
|
|
if (is.null(mapping$column) | !column) {
|
|
facet_encoding <- facet_encoding[names(facet_encoding) != "column"]
|
|
}
|
|
|
|
if (is.null(mapping$row) | !row) {
|
|
facet_encoding <- facet_encoding[names(facet_encoding) != "row"]
|
|
}
|
|
|
|
# If there's no rows in this spec, set nrow = 1
|
|
if (!row) {
|
|
facet_dims$nrow <- 1
|
|
}
|
|
|
|
# If there's no errorbar, everything can be on one layer
|
|
if (!errorbar) {
|
|
list(
|
|
`$schema` = vegawidget::vega_schema(),
|
|
meta = meta,
|
|
data = list(values = .data),
|
|
facet = facet_encoding,
|
|
spec = list(
|
|
height = height / facet_dims[["nrow"]],
|
|
width = width / facet_dims[["ncol"]],
|
|
mark = list(type = mark_type, filled = TRUE, strokeWidth = 1),
|
|
encoding = spec_encoding
|
|
)
|
|
) %>%
|
|
vegawidget::as_vegaspec()
|
|
} else {
|
|
# If there is an error bar, then we need to put the points and error bars on multiple layers
|
|
|
|
# The errorbar has its own encoding, and it uses data y_raw (the actual raw values) to calculate the errorbar
|
|
errorbar_spec_encoding <- spec_encoding
|
|
errorbar_spec_encoding$y$field <- Y_RAW_FIELD_CHR
|
|
|
|
list(
|
|
`$schema` = vegawidget::vega_schema(),
|
|
meta = meta,
|
|
data = list(values = .data),
|
|
facet = facet_encoding,
|
|
spec = list(
|
|
height = height / facet_dims[["nrow"]],
|
|
width = width / facet_dims[["ncol"]],
|
|
layer = list(
|
|
# Errorbar layer
|
|
list(
|
|
mark = "errorbar",
|
|
encoding = errorbar_spec_encoding
|
|
),
|
|
# Point layer
|
|
list(
|
|
mark = list(type = mark_type, filled = TRUE, strokeWidth = 1),
|
|
encoding = spec_encoding
|
|
)
|
|
)
|
|
)
|
|
) %>%
|
|
vegawidget::as_vegaspec()
|
|
}
|
|
}
|
|
|
|
# Group by ----
|
|
|
|
# Generate a description for group by steps
|
|
generate_group_by_description <- function(mapping, ...) {
|
|
mapping_sep <- mapping[c(...)] %>%
|
|
unlist() %>%
|
|
paste0(collapse = ", ")
|
|
glue::glue("Group by {mapping_sep}")
|
|
}
|
|
|
|
generate_group_by_tooltip <- function(.data) {
|
|
tooltip_vars <- .data %>%
|
|
dplyr::select(-one_of("n", "gemini_ids")) %>%
|
|
names()
|
|
|
|
purrr::map(tooltip_vars, ~ list(field = .x, type = "nominal"))
|
|
}
|
|
|
|
# Handle NAs in data by coercing to literal "NA", and order grouping levels alphabetically (with "NA" last) so that they appear consistently (and get consistent IDs) across frames
|
|
arrange_by_groups_coalesce_na <- function(.data, group_vars, group_vars_chr) {
|
|
.data %>%
|
|
# Remove any existing grouping
|
|
dplyr::ungroup() %>%
|
|
# Arrange once to get alphabetical order
|
|
dplyr::arrange(!!!group_vars) %>%
|
|
dplyr::mutate_at(dplyr::all_of(group_vars_chr), function(x) {
|
|
x <- x %>%
|
|
as.character() %>% # Convert numeric grouping variable to character
|
|
dplyr::coalesce("NA") # NA to "NA"
|
|
|
|
if (any(x == "NA")) {
|
|
x %>%
|
|
forcats::fct_inorder() %>% # Order factor alphabetically
|
|
forcats::fct_relevel("NA", after = Inf) # Place any NAs last in factor
|
|
} else {
|
|
x %>%
|
|
forcats::fct_inorder() %>%
|
|
forcats::as_factor()
|
|
}
|
|
}) %>%
|
|
# then again to get new order, with NAs last
|
|
dplyr::arrange(!!!group_vars)
|
|
}
|
|
|
|
# Calculate the dimensions (number of rows and columns) of a faceted plot to set the size of each, since we want sizing to approximately be consistent across frames
|
|
calculate_facet_dimensions <- function(.data, group_vars, mapping) {
|
|
.group_keys <- .data %>%
|
|
dplyr::group_by(!!!group_vars) %>%
|
|
dplyr::group_keys()
|
|
|
|
if (!is.null(mapping$column)) {
|
|
col_facets <- .group_keys %>%
|
|
dplyr::pull(tidyselect::all_of(mapping$column)) %>%
|
|
unique()
|
|
|
|
n_col_facets <- col_facets %>%
|
|
length()
|
|
} else {
|
|
n_col_facets <- 1
|
|
}
|
|
|
|
if (!is.null(mapping$row)) {
|
|
row_facets <- .group_keys %>%
|
|
dplyr::pull(tidyselect::all_of(mapping$row)) %>%
|
|
unique()
|
|
|
|
n_row_facets <- row_facets %>%
|
|
length()
|
|
} else {
|
|
n_row_facets <- 1
|
|
}
|
|
|
|
list(
|
|
nrow = n_row_facets,
|
|
ncol = n_col_facets
|
|
)
|
|
}
|
|
|
|
# Summarize ----
|
|
|
|
# Create an expression to label axes with values instead of numbers
|
|
# Similar to breaks = c(1, 2, 3), labels = c("One", "Two", "Three") etc in ggplot
|
|
# But more of an expression like an ifelse()
|
|
generate_labelsExpr <- function(data) {
|
|
if (is.null(data)) {
|
|
return(list(
|
|
breaks = c(1, 1), # Do 1 twice, otherwise it gets auto unboxed which doesn't actually work!
|
|
labelExpr = ""
|
|
))
|
|
}
|
|
|
|
# handling for undefined only needs to happen for character vector of labels
|
|
if(is.numeric(data[["label"]])) {
|
|
data <- data %>%
|
|
dplyr::arrange(.data$datamations_x)
|
|
}
|
|
else {
|
|
data <- data %>%
|
|
dplyr::mutate(label = dplyr::coalesce(.data$label, "undefined")) %>%
|
|
dplyr::arrange(.data$datamations_x)
|
|
}
|
|
|
|
n_breaks <- nrow(data)
|
|
breaks <- data[[X_FIELD_CHR]]
|
|
labels <- data[["label"]]
|
|
|
|
labelExpr <- c(glue::glue("round(datum.label) == {ceiling(breaks[1:(n_breaks - 1)])} ? '{labels[1:(n_breaks - 1)]}'"), glue::glue("'{labels[n_breaks]}'")) %>% paste0(collapse = " : ")
|
|
|
|
list(breaks = breaks, labelExpr = labelExpr)
|
|
}
|
|
|
|
# Generate the x-axis domain
|
|
# They have values e.g. 1, 2, 3, but we want some padding on either end so it goes from 0.5 to 3.5
|
|
# And this ensures a consistent domain across frames
|
|
generate_x_domain <- function(data) {
|
|
if (is.null(data)) {
|
|
list(domain = c(0.5, 1.5))
|
|
} else {
|
|
list(domain = c(min(data[[X_FIELD_CHR]]) - 1.0, max(data[[X_FIELD_CHR]]) + 1.0))
|
|
}
|
|
}
|
|
|
|
# Generate description for summarize steps
|
|
# Depending on whether there's errorbars, any groups, etc.
|
|
generate_summarize_description <- function(summary_variable, summary_function = NULL, errorbar = FALSE, group_by = TRUE) {
|
|
if (errorbar) {
|
|
return(glue::glue("Plot mean {summary_variable}{group_description}, with errorbar",
|
|
group_description = ifelse(group_by, " of each group", "")
|
|
))
|
|
}
|
|
|
|
if (is.null(summary_function)) {
|
|
glue::glue("Plot {summary_variable}{group_description}",
|
|
group_description = ifelse(group_by, " within each group", "")
|
|
)
|
|
} else if (is.null(summary_variable)) {
|
|
glue::glue("Plot {summary_function}(){group_description}",
|
|
group_description = ifelse(group_by, " of each group", "")
|
|
)
|
|
} else {
|
|
glue::glue("Plot {summary_function} {summary_variable}{group_description}",
|
|
group_description = ifelse(group_by, " of each group", "")
|
|
)
|
|
}
|
|
}
|
|
|
|
generate_summarize_tooltip <- function(.data, summary_variable, summary_function = NULL) {
|
|
if (is.null(summary_function)) {
|
|
y_tooltip <- list(field = Y_TOOLTIP_FIELD_CHR, type = "quantitative", title = summary_variable)
|
|
} else {
|
|
if (!is.null(summary_variable)) {
|
|
y_tooltip <- list(field = Y_TOOLTIP_FIELD_CHR, type = "quantitative", title = glue::glue("{summary_function}({summary_variable})"))
|
|
} else {
|
|
y_tooltip <- list(field = Y_TOOLTIP_FIELD_CHR, type = "quantitative")
|
|
}
|
|
}
|
|
|
|
tooltip_vars <- .data %>%
|
|
dplyr::select(-tidyselect::any_of(c("gemini_id", X_FIELD_CHR, Y_FIELD_CHR, Y_TOOLTIP_FIELD_CHR, "stroke", "gemini_ids"))) %>%
|
|
names()
|
|
|
|
tooltip <- purrr::map(tooltip_vars, ~ list(field = .x, type = "nominal"))
|
|
|
|
append(list(y_tooltip), tooltip)
|
|
}
|
|
|
|
|
|
# Generate description for summarize steps
|
|
# Depending on whether there's errorbars, any groups, etc.
|
|
generate_generic_description <- function(variable, function_obj = NULL, group_by = TRUE) {
|
|
|
|
if (is.null(function_obj)) {
|
|
glue::glue("Plot variable {variable}{group_description}",
|
|
group_description = ifelse(group_by, " within each group", "")
|
|
)
|
|
} else if (is.null(variable)) {
|
|
glue::glue("Plot {variable}(){group_description}",
|
|
group_description = ifelse(group_by, " of each group", "")
|
|
)
|
|
} else {
|
|
glue::glue("Plot {variable} of the function {function_obj}{group_description}",
|
|
group_description = ifelse(group_by, " of each group", "")
|
|
)
|
|
}
|
|
}
|
|
|
|
# Generate description for summarize steps
|
|
# Depending on whether there's errorbars, any groups, etc.
|
|
generate_mutation_description <- function(mutation_variable, mutation_function = NULL, group_by = TRUE) {
|
|
|
|
if (is.null(mutation_function)) {
|
|
glue::glue("Create new variable {mutation_variable}{group_description}",
|
|
group_description = ifelse(group_by, " within each group", "")
|
|
)
|
|
} else if (is.null(mutation_variable)) {
|
|
glue::glue("Plot {mutation_function}(){group_description}",
|
|
group_description = ifelse(group_by, " of each group", "")
|
|
)
|
|
} else {
|
|
glue::glue("Create new variable {mutation_variable} as `{mutation_function}`{group_description}",
|
|
group_description = ifelse(group_by, " of each group", "")
|
|
)
|
|
}
|
|
}
|
|
|
|
generate_mutation_tooltip <- function(.data, mutation_variable, mutation_function = NULL) {
|
|
if (is.null(mutation_function)) {
|
|
y_tooltip <- list(field = Y_TOOLTIP_FIELD_CHR, type = "quantitative", title = mutation_variable)
|
|
} else {
|
|
if (!is.null(mutation_variable)) {
|
|
y_tooltip <- list(field = Y_TOOLTIP_FIELD_CHR, type = "quantitative", title = glue::glue("{mutation_function}({mutation_variable})"))
|
|
} else {
|
|
y_tooltip <- list(field = Y_TOOLTIP_FIELD_CHR, type = "quantitative")
|
|
}
|
|
}
|
|
|
|
tooltip_vars <- .data %>%
|
|
dplyr::select(-tidyselect::any_of(c("gemini_id", X_FIELD_CHR, Y_FIELD_CHR, Y_TOOLTIP_FIELD_CHR, "stroke", "gemini_ids"))) %>%
|
|
names()
|
|
|
|
tooltip <- purrr::map(tooltip_vars, ~ list(field = .x, type = "nominal"))
|
|
|
|
append(list(y_tooltip), tooltip)
|
|
}
|
|
|
|
split_strings <- function(string, max_characters, min_final) {
|
|
|
|
if(nchar(string) < max_characters) return (string)
|
|
|
|
number_of_chops = trunc(nchar(string) / max_characters)
|
|
split_starts <- seq(1, nchar(string), by = max_characters)
|
|
last_split <- split_starts[length(split_starts)]
|
|
|
|
# TODO UPDATE CUTOFF HANDLING HERE
|
|
#if ((nchar(string) - last)<min_final) split_starts <- setdiff(split_starts, last_split)
|
|
|
|
string_vec <- sapply(split_starts, function(ii) {
|
|
substr(string, ii, ii+max_characters-1)
|
|
})
|
|
|
|
return(string_vec)
|
|
|
|
}
|
|
|
|
split_string_sensibly <- function(string, max_characters, min_final) {
|
|
|
|
if(nchar(string) < max_characters) {return(string)}
|
|
|
|
if(stringr::str_detect(string, '_')) {
|
|
split_string <- strsplit(string, "(?<=[_])", perl = TRUE)[[1]]
|
|
} else {
|
|
split_string <- string
|
|
}
|
|
|
|
if(any(nchar(split_string)>max_characters)) {
|
|
strings_cutoff <- lapply(split_string, split_strings, max_characters, min_final)
|
|
strings_flattened <- unlist(strings_cutoff)
|
|
|
|
prelim_string <- strings_flattened
|
|
|
|
} else {
|
|
prelim_string <- split_string
|
|
}
|
|
|
|
final_string <- list()
|
|
|
|
# post adjustments
|
|
# Look at biterms and check character count, combine if less than threshold
|
|
for (i in seq(1, length(prelim_string), 2)) {
|
|
# if we are not at the final string
|
|
if(!is.na(prelim_string[i+1])) {
|
|
# if the character count of the biterm is less than the max
|
|
if(nchar(prelim_string[i]) + nchar(prelim_string[i+1]) < max_characters) {
|
|
final_string[i] <- paste0(prelim_string[i], prelim_string[i+1])
|
|
} else {
|
|
final_string[i] <- prelim_string[i]
|
|
final_string[i+1] <- prelim_string[i+1]
|
|
}
|
|
} else {
|
|
final_string[i] <- prelim_string[i]
|
|
}
|
|
}
|
|
return(unlist(final_string))
|
|
|
|
}
|
|
|