FreesearchR/R/correlations-module.R

178 lines
5.4 KiB
R
Raw Normal View History

#' Data correlations evaluation module
#'
#' @param id Module id. (Use 'ns("id")')
#'
#' @name data-correlations
#' @returns Shiny ui module
#' @export
data_correlations_ui <- function(id, ...) {
2025-02-19 13:17:16 +01:00
ns <- shiny::NS(id)
shiny::tagList(
shiny::textOutput(outputId = ns("suggest")),
shiny::plotOutput(outputId = ns("correlation_plot"), ...)
)
}
#'
2025-09-11 15:21:04 +02:00
#' @param id id
#' @param data data
2025-09-11 15:21:04 +02:00
#' @param include.class character vector of classes to include. Default is NULL
#' @param cutoff numeric
#' @param warning_str Character string. Exposed to allow dynamic translations
#' @param warning_no_str Character string. Exposed to allow dynamic translations
#' @param and_strCharacter string. Exposed to allow dynamic translations
#' @param ... arguments passed to toastui::datagrid
#'
#' @name data-correlations
#' @returns shiny server module
#' @export
data_correlations_server <- function(id,
data,
include.class = NULL,
cutoff = .7,
2025-09-11 15:21:04 +02:00
warning_str = i18n$t("The following variable pairs are highly correlated: {sentence_paste(.x,and_str)}.\nConsider excluding one {more}from the dataset to ensure variables are independent."),
warning_no_str = i18n$t("No variables have a correlation measure above the threshold."),
and_str = i18n$t("and"),
...) {
shiny::moduleServer(
id = id,
module = function(input, output, session) {
# ns <- session$ns
rv <- shiny::reactiveValues(
2025-09-23 12:24:17 +02:00
data = NULL,
pairs = NULL
)
rv$data <- shiny::reactive({
shiny::req(data)
if (!is.null(include.class)) {
filter <- sapply(data(), class) %in% include.class
out <- data()[filter]
} else {
out <- data()
}
# out |> dplyr::mutate(dplyr::across(tidyselect::everything(),as.numeric))
2025-09-23 12:24:17 +02:00
sapply(out, as.numeric)
2025-03-13 14:13:18 +01:00
# as.numeric()
})
# rv <- list()
# rv$data <- mtcars
2025-09-23 12:24:17 +02:00
rv$pairs <- shiny::reactive({
shiny::req(rv$data)
shiny::req(cutoff)
# tryCatch(
# {
correlation_pairs(rv$data(), threshold = cutoff())
# },
# error = function(err) {
# showNotification(paste0(i18n$t("The following error occured on determining correlations: "), err), type = "err")
# }
# )
})
output$suggest <- shiny::renderPrint({
shiny::req(rv$data)
shiny::req(cutoff)
2025-09-23 12:24:17 +02:00
pairs <- rv$pairs()
2025-09-11 15:21:04 +02:00
more <- ifelse(nrow(pairs) > 1, i18n$t("from each pair"), "")
if (nrow(pairs) == 0) {
2025-09-11 15:21:04 +02:00
out <- glue::glue(warning_no_str)
} else {
out <- pairs |>
apply(1, \(.x){
2025-09-11 15:21:04 +02:00
glue::glue("'{.x[1]}'x'{.x[2]}' ({round(as.numeric(.x[3]),2)})")
}) |>
(\(.x){
2025-09-11 15:21:04 +02:00
glue::glue(warning_str)
})()
}
out
})
output$correlation_plot <- shiny::renderPlot({
2025-03-13 14:13:18 +01:00
ggcorrplot::ggcorrplot(cor(rv$data())) +
# ggplot2::theme_void() +
ggplot2::theme(
# legend.position = "none",
legend.title = ggplot2::element_text(size = 20),
legend.text = ggplot2::element_text(size = 14),
# panel.grid.major = element_blank(),
# panel.grid.minor = element_blank(),
# axis.text.y = element_blank(),
# axis.title.y = element_blank(),
axis.text.x = ggplot2::element_text(size = 20),
axis.text.y = ggplot2::element_text(size = 20),
# text = element_text(size = 5),
# plot.title = element_blank(),
# panel.background = ggplot2::element_rect(fill = "white"),
# plot.background = ggplot2::element_rect(fill = "white"),
panel.border = ggplot2::element_blank()
)
# psych::pairs.panels(rv$data())
})
2025-09-23 12:24:17 +02:00
return(shiny::isolate(rv$pairs))
}
)
}
2025-09-23 12:24:17 +02:00
#' Determine significant correlations in the data set
#'
#' @param data data.frame
#' @param threshold correlation threshold
#'
#' @returns data.frame
#' @export
#'
#' @examples
#' correlation_pairs(mtcars)
#' correlation_pairs(mtcars,.9)
#' correlation_pairs(mtcars[c(1:4),])
correlation_pairs <- function(data, threshold = .8) {
data <- as.data.frame(data)[!sapply(as.data.frame(data), is.character)]
2025-09-23 12:24:17 +02:00
data <- sapply(data, \(.x)if (is.factor(.x)) as.numeric(.x) else .x) |> as.data.frame()
# data <- data |> dplyr::mutate(dplyr::across(dplyr::where(is.factor), as.numeric))
2025-09-23 12:24:17 +02:00
if (nrow(data) > 4) {
cor <- Hmisc::rcorr(as.matrix(data))
r <- cor$r %>% as.table()
d <- r |>
as.data.frame() |>
dplyr::filter(abs(Freq) > threshold, Freq != 1)
} else {
expand.grid(names(data),names(data))
d <- data.frame(matrix(ncol = 3))
d <- d[!is.na(d[3]),]
}
d[1:2] |>
apply(1, \(.x){
sort(unname(.x))
},
simplify = logical(1)
) |>
duplicated() |>
(\(.x){
d[!.x, ]
})() |>
setNames(c("var1", "var2", "cor"))
}
sentence_paste <- function(data, and.str = "and") {
and.str <- gsub(" ", "", and.str)
if (length(data) < 2) {
data
} else if (length(data) == 2) {
paste(data, collapse = glue::glue(" {and.str} "))
} else if (length(data) > 2) {
2025-04-03 14:31:34 +02:00
paste(paste(data[-length(data)], collapse = ", "), data[length(data)], sep = glue::glue(" {and.str} "))
}
}