feat: correct labels in Euler diagrams

This commit is contained in:
Andreas Gammelgaard Damsbo 2025-10-02 11:14:35 +02:00
parent 15c7392a17
commit 2c39313ffb
No known key found for this signature in database
4 changed files with 155 additions and 110 deletions

View file

@ -154,7 +154,8 @@ dummy_Imports <- function() {
parameters::ci(),
DT::addRow(),
bslib::accordion(),
NHANES::NHANES()
NHANES::NHANES(),
stRoke::add_padding()
)
# https://github.com/hadley/r-pkgs/issues/828
}
@ -668,3 +669,84 @@ is_identical_to_previous <- function(data, no.name = TRUE) {
simple_snake <- function(data){
gsub("[\\s+]","_",gsub("[^\\w\\s:-]", "", tolower(data), perl=TRUE), perl=TRUE)
}
#' Data type assessment.
#'
#' @description
#' These are more overall than the native typeof. This is used to assess a more
#' meaningful "clinical" data type.
#'
#' @param data vector or data.frame. if data frame, each column is evaluated.
#'
#' @returns outcome type
#' @export
#'
#' @examples
#' mtcars |>
#' default_parsing() |>
#' lapply(data_type)
#' mtcars |>
#' default_parsing() |>
#' data_type()
#' c(1, 2) |> data_type()
#' 1 |> data_type()
#' c(rep(NA, 10)) |> data_type()
#' sample(1:100, 50) |> data_type()
#' factor(letters[1:20]) |> data_type()
#' as.Date(1:20) |> data_type()
data_type <- function(data) {
if (is.data.frame(data)) {
sapply(data, data_type)
} else {
cl_d <- class(data)
l_unique <- length(unique(na.omit(data)))
if (all(is.na(data))) {
out <- "empty"
} else if (l_unique < 2) {
out <- "monotone"
} else if (any(c("factor", "logical") %in% cl_d) | l_unique == 2) {
if (identical("logical", cl_d) | l_unique == 2) {
out <- "dichotomous"
} else {
# if (is.ordered(data)) {
# out <- "ordinal"
# } else {
out <- "categorical"
# }
}
} else if (identical(cl_d, "character")) {
out <- "text"
} else if (any(c("hms", "Date", "POSIXct", "POSIXt") %in% cl_d)) {
out <- "datetime"
} else if (l_unique > 2) {
## Previously had all thinkable classes
## Now just assumes the class has not been defined above
## any(c("numeric", "integer", "hms", "Date", "timediff") %in% cl_d) &
out <- "continuous"
} else {
out <- "unknown"
}
out
}
}
#' Recognised data types from data_type
#'
#' @returns vector
#' @export
#'
#' @examples
#' data_types()
data_types <- function() {
list(
"empty" = list(descr="Variable of all NAs",classes="Any class"),
"monotone" = list(descr="Variable with only one unique value",classes="Any class"),
"dichotomous" = list(descr="Variable with only two unique values",classes="Any class"),
"categorical"= list(descr="Factor variable",classes="factor (ordered or unordered)"),
"text"= list(descr="Character variable",classes="character"),
"datetime"= list(descr="Variable of time, date or datetime values",classes="hms, Date, POSIXct and POSIXt"),
"continuous"= list(descr="Numeric variable",classes="numeric, integer or double"),
"unknown"= list(descr="Anything not falling within the previous",classes="Any other class")
)
}

View file

@ -1,7 +1,7 @@
#' Area proportional venn diagrams
#'
#' @description
#' THis is slightly modified from https://gist.github.com/danlooo/d23d8bcf8856c7dd8e86266097404ded
#' This is slightly modified from https://gist.github.com/danlooo/d23d8bcf8856c7dd8e86266097404ded
#'
#' This functions uses eulerr::euler to plot area proportional venn diagramms
#' but plots it using ggplot2
@ -11,18 +11,27 @@
#' @param show_quantities whether to show number of intersecting elements
#' @param show_labels whether to show set names
#' @param ... further arguments passed to eulerr::euler
#'
#' @include data_plots.R
ggeulerr <- function(
combinations,
show_quantities = TRUE,
show_labels = TRUE,
...) {
# browser()
## Extracting labels
labs <- sapply(names(combinations),\(.x){
# browser()
get_label(combinations,.x)
})
data <-
eulerr::euler(combinations = combinations, ...) |>
## Set labels as variable names for nicer plotting
setNames(as.data.frame(combinations),labs) |>
eulerr::euler(...) |>
plot(quantities = show_quantities) |>
purrr::pluck("data")
tibble::as_tibble(data$ellipses, rownames = "Variables") |>
ggplot2::ggplot() +
ggforce::geom_ellipse(
@ -38,7 +47,8 @@ ggeulerr <- function(
dplyr::mutate(
label = labels |> purrr::map2(quantities, ~ {
if (!is.na(.x) && !is.na(.y) && show_labels) {
paste0(.x, "\n", sprintf(.y, fmt = "%.2g"))
paste0(.x, "\n", sprintf(.y, fmt = "%.4g"))
# glue::glue("{.x}\n{round(.y,0)}")
} else if (!is.na(.x) && show_labels) {
.x
} else if (!is.na(.y)) {
@ -77,6 +87,21 @@ ggeulerr <- function(
#' ) |> plot_euler("A", c("B", "C"), "D", seed = 4)
#' mtcars |> plot_euler("vs", "am", seed = 1)
#' mtcars |> plot_euler("vs", "am", "cyl", seed = 1)
#' stRoke::trial |>
#' dplyr::mutate(
#' mfi_cut = cut(mfi_6, c(0, 12, max(mfi_6, na.rm = TRUE))),
#' mdi_cut = cut(mdi_6, c(0, 20, max(mdi_6, na.rm = TRUE)))
#' ) |>
#' purrr::map2(
#' c(sapply(stRoke::trial, \(.x)REDCapCAST::get_attr(.x, attr = "label")), "Fatigue", "Depression"),
#' \(.x, .y){
#' REDCapCAST::set_attr(.x, .y, "label")
#' }
#' ) |>
#' dplyr::bind_cols() |>
#' plot_euler("mfi_cut", "mdi_cut")
#' stRoke::trial |>
#' plot_euler(pri="male", sec=c("hypertension"))
plot_euler <- function(data, pri, sec, ter = NULL, seed = 2103) {
set.seed(seed = seed)
if (!is.null(ter)) {
@ -84,16 +109,13 @@ plot_euler <- function(data, pri, sec, ter = NULL, seed = 2103) {
} else {
ds <- list(data)
}
out <- lapply(ds, \(.x){
.x[c(pri, sec)] |>
as.data.frame() |>
na.omit() |>
plot_euler_single()
})
# browser()
wrap_plot_list(out,title=glue::glue("Grouped by {get_label(data,ter)}"))
# patchwork::wrap_plots(out)
wrap_plot_list(out, title = glue::glue(i18n$t("Grouped by {get_label(data,ter)}")))
}
#' Easily plot single euler diagrams

View file

@ -72,13 +72,15 @@ regression_ui <- function(id, ...) {
shiny::radioButtons(
inputId = ns("all"),
label = i18n$t("Specify covariables"),
inline = TRUE, selected = 2,
inline = TRUE,
selected = 2,
choiceNames = c(
"Yes",
"No"
),
choiceValues = c(1, 2)
),
# shiny::uiOutput(outputId = ns("all")),
shiny::conditionalPanel(
condition = "input.all==1",
shiny::uiOutput(outputId = ns("regression_vars")),
@ -131,7 +133,7 @@ regression_ui <- function(id, ...) {
)
),
bslib::nav_panel(
title = "Coefficient plot",
title = i18n$t("Coefficient plot"),
bslib::layout_sidebar(
sidebar = bslib::sidebar(
bslib::accordion(
@ -243,11 +245,6 @@ regression_server <- function(id,
}
})
shiny::observe({
bslib::accordion_panel_update(id = "acc_reg", target = "acc_pan_reg", title = i18n$t("Regression"))
bslib::accordion_panel_update(id = "acc_coef_plot", target = "acc_pan_coef_plot", title = i18n$t("Coefficients plot"))
bslib::accordion_panel_update(id = "acc_checks", target = "acc_pan_checks", title = i18n$t("Checks"))
})
output$data_info <- shiny::renderUI({
shiny::req(regression_vars())
@ -255,6 +252,31 @@ regression_server <- function(id,
data_description(data_r()[regression_vars()])
})
## Update on laguage change
shiny::observe({
bslib::accordion_panel_update(id = "acc_reg", target = "acc_pan_reg", title = i18n$t("Regression"))
bslib::accordion_panel_update(id = "acc_coef_plot", target = "acc_pan_coef_plot", title = i18n$t("Coefficients plot"))
bslib::accordion_panel_update(id = "acc_checks", target = "acc_pan_checks", title = i18n$t("Checks"))
})
# shiny::observe({
# shiny::updateRadioButtons(
# session = session,
# inputId = "all",
# label = i18n$t("Specify covariables"),
# # inline = TRUE,
# # selected = 2,
# choiceNames = c(
# i18n$t("Yes"),
# i18n$t("No")
# ),
# choiceValues = c(1, 2)
# )
# })
##############################################################################
#########
######### Input fields
@ -278,7 +300,7 @@ regression_server <- function(id,
columnSelectInput(
inputId = ns("outcome_var"),
selected = NULL,
label = "Select outcome variable",
label = i18n$t("Select outcome variable"),
data = data_r(),
multiple = FALSE
)
@ -288,7 +310,7 @@ regression_server <- function(id,
shiny::req(input$outcome_var)
shiny::selectizeInput(
inputId = ns("regression_type"),
label = "Choose regression analysis",
label = i18n$t("Choose regression analysis"),
## The below ifelse statement handles the case of loading a new dataset
choices = possible_functions(
data = dplyr::select(
@ -307,7 +329,7 @@ regression_server <- function(id,
shiny::selectizeInput(
inputId = ns("factor_vars"),
selected = colnames(data_r())[sapply(data_r(), is.factor)],
label = "Covariables to format as categorical",
label = i18n$t("Covariables to format as categorical"),
choices = colnames(data_r()),
multiple = TRUE
)
@ -327,7 +349,7 @@ regression_server <- function(id,
columnSelectInput(
inputId = ns("strat_var"),
selected = "none",
label = "Select variable to stratify baseline",
label = i18n$t("Select variable to stratify baseline"),
data = data_r(),
col_subset = c(
"none",
@ -342,7 +364,7 @@ regression_server <- function(id,
shiny::selectInput(
inputId = ns("plot_model"),
selected = 1,
label = "Select models to plot",
label = i18n$t("Select models to plot"),
choices = names(rv$list$regression$tables),
multiple = TRUE
)
@ -392,7 +414,7 @@ regression_server <- function(id,
rv$list$regression$models <- model_lists
},
error = function(err) {
showNotification(paste0("Creating regression models failed with the following error: ", err), type = "err")
showNotification(paste(i18n$t("Creating regression models failed with the following error:"), err), type = "err")
}
)
}
@ -457,7 +479,7 @@ regression_server <- function(id,
showNotification(paste0(warn), type = "warning")
},
error = function(err) {
showNotification(paste0("Creating a regression table failed with the following error: ", err), type = "err")
showNotification(paste(i18n$t("Creating a regression table failed with the following error:"), err), type = "err")
}
)
}
@ -558,7 +580,7 @@ regression_server <- function(id,
output$download_plot <- shiny::downloadHandler(
filename = paste0("regression_plot.", input$plot_type),
content = function(file) {
shiny::withProgress(message = "Saving the plot. Hold on for a moment..", {
shiny::withProgress(message = i18n$t("Saving the plot. Hold on for a moment.."), {
ggplot2::ggsave(
filename = file,
plot = rv$plot,
@ -595,7 +617,7 @@ regression_server <- function(id,
# showNotification(paste0(warn), type = "warning")
# },
error = function(err) {
showNotification(paste0("Running model assumptions checks failed with the following error: ", err), type = "err")
showNotification(paste(i18n$t("Running model assumptions checks failed with the following error:"), err), type = "err")
}
)
}
@ -616,7 +638,7 @@ regression_server <- function(id,
vectorSelectInput(
inputId = ns("plot_checks"),
selected = 1,
label = "Select checks to plot",
label = i18n$t("Select checks to plot"),
choices = names,
multiple = TRUE
)
@ -631,7 +653,7 @@ regression_server <- function(id,
if (!is.null(rv$list$regression$tables)) {
p <- rv$check_plot() +
# patchwork::wrap_plots() +
patchwork::plot_annotation(title = "Multivariable regression model checks")
patchwork::plot_annotation(title = i18n$t("Multivariable regression model checks"))
layout <- sapply(seq_len(length(p)), \(.x){

View file

@ -242,87 +242,6 @@ regression_model_uv <- function(data,
### HELPERS
#' Data type assessment.
#'
#' @description
#' These are more overall than the native typeof. This is used to assess a more
#' meaningful "clinical" data type.
#'
#' @param data vector or data.frame. if data frame, each column is evaluated.
#'
#' @returns outcome type
#' @export
#'
#' @examples
#' mtcars |>
#' default_parsing() |>
#' lapply(data_type)
#' mtcars |>
#' default_parsing() |>
#' data_type()
#' c(1, 2) |> data_type()
#' 1 |> data_type()
#' c(rep(NA, 10)) |> data_type()
#' sample(1:100, 50) |> data_type()
#' factor(letters[1:20]) |> data_type()
#' as.Date(1:20) |> data_type()
data_type <- function(data) {
if (is.data.frame(data)) {
sapply(data, data_type)
} else {
cl_d <- class(data)
l_unique <- length(unique(na.omit(data)))
if (all(is.na(data))) {
out <- "empty"
} else if (l_unique < 2) {
out <- "monotone"
} else if (any(c("factor", "logical") %in% cl_d) | l_unique == 2) {
if (identical("logical", cl_d) | l_unique == 2) {
out <- "dichotomous"
} else {
# if (is.ordered(data)) {
# out <- "ordinal"
# } else {
out <- "categorical"
# }
}
} else if (identical(cl_d, "character")) {
out <- "text"
} else if (any(c("hms", "Date", "POSIXct", "POSIXt") %in% cl_d)) {
out <- "datetime"
} else if (l_unique > 2) {
## Previously had all thinkable classes
## Now just assumes the class has not been defined above
## any(c("numeric", "integer", "hms", "Date", "timediff") %in% cl_d) &
out <- "continuous"
} else {
out <- "unknown"
}
out
}
}
#' Recognised data types from data_type
#'
#' @returns vector
#' @export
#'
#' @examples
#' data_types()
data_types <- function() {
list(
"empty" = list(descr="Variable of all NAs",classes="Any class"),
"monotone" = list(descr="Variable with only one unique value",classes="Any class"),
"dichotomous" = list(descr="Variable with only two unique values",classes="Any class"),
"categorical"= list(descr="Factor variable",classes="factor (ordered or unordered)"),
"text"= list(descr="Character variable",classes="character"),
"datetime"= list(descr="Variable of time, date or datetime values",classes="hms, Date, POSIXct and POSIXt"),
"continuous"= list(descr="Numeric variable",classes="numeric, integer or double"),
"unknown"= list(descr="Anything not falling within the previous",classes="Any other class")
)
}
#' Implemented functions
#'