FreesearchR/R/correlations-module.R

147 lines
4.6 KiB
R
Raw Normal View History

#' Data correlations evaluation module
#'
#' @param id Module id. (Use 'ns("id")')
#'
#' @name data-correlations
#' @returns Shiny ui module
#' @export
data_correlations_ui <- function(id, ...) {
2025-02-19 13:17:16 +01:00
ns <- shiny::NS(id)
shiny::tagList(
shiny::textOutput(outputId = ns("suggest")),
shiny::plotOutput(outputId = ns("correlation_plot"), ...)
)
}
#'
2025-09-11 15:21:04 +02:00
#' @param id id
#' @param data data
2025-09-11 15:21:04 +02:00
#' @param include.class character vector of classes to include. Default is NULL
#' @param cutoff numeric
#' @param warning_str Character string. Exposed to allow dynamic translations
#' @param warning_no_str Character string. Exposed to allow dynamic translations
#' @param and_strCharacter string. Exposed to allow dynamic translations
#' @param ... arguments passed to toastui::datagrid
#'
#' @name data-correlations
#' @returns shiny server module
#' @export
data_correlations_server <- function(id,
data,
include.class = NULL,
cutoff = .7,
2025-09-11 15:21:04 +02:00
warning_str = i18n$t("The following variable pairs are highly correlated: {sentence_paste(.x,and_str)}.\nConsider excluding one {more}from the dataset to ensure variables are independent."),
warning_no_str = i18n$t("No variables have a correlation measure above the threshold."),
and_str = i18n$t("and"),
...) {
shiny::moduleServer(
id = id,
module = function(input, output, session) {
# ns <- session$ns
rv <- shiny::reactiveValues(
data = NULL
)
rv$data <- shiny::reactive({
shiny::req(data)
if (!is.null(include.class)) {
filter <- sapply(data(), class) %in% include.class
out <- data()[filter]
} else {
out <- data()
}
# out |> dplyr::mutate(dplyr::across(tidyselect::everything(),as.numeric))
2025-04-15 16:27:43 +02:00
sapply(out,as.numeric)
2025-03-13 14:13:18 +01:00
# as.numeric()
})
# rv <- list()
# rv$data <- mtcars
output$suggest <- shiny::renderPrint({
shiny::req(rv$data)
shiny::req(cutoff)
pairs <- correlation_pairs(rv$data(), threshold = cutoff())
2025-09-11 15:21:04 +02:00
more <- ifelse(nrow(pairs) > 1, i18n$t("from each pair"), "")
if (nrow(pairs) == 0) {
2025-09-11 15:21:04 +02:00
out <- glue::glue(warning_no_str)
} else {
out <- pairs |>
apply(1, \(.x){
2025-09-11 15:21:04 +02:00
glue::glue("'{.x[1]}'x'{.x[2]}' ({round(as.numeric(.x[3]),2)})")
}) |>
(\(.x){
2025-09-11 15:21:04 +02:00
glue::glue(warning_str)
})()
}
out
})
output$correlation_plot <- shiny::renderPlot({
2025-03-13 14:13:18 +01:00
ggcorrplot::ggcorrplot(cor(rv$data())) +
# ggplot2::theme_void() +
ggplot2::theme(
# legend.position = "none",
legend.title = ggplot2::element_text(size = 20),
legend.text = ggplot2::element_text(size = 14),
# panel.grid.major = element_blank(),
# panel.grid.minor = element_blank(),
# axis.text.y = element_blank(),
# axis.title.y = element_blank(),
axis.text.x = ggplot2::element_text(size = 20),
axis.text.y = ggplot2::element_text(size = 20),
# text = element_text(size = 5),
# plot.title = element_blank(),
# panel.background = ggplot2::element_rect(fill = "white"),
# plot.background = ggplot2::element_rect(fill = "white"),
panel.border = ggplot2::element_blank()
)
# psych::pairs.panels(rv$data())
})
}
)
}
correlation_pairs <- function(data, threshold = .8) {
data <- as.data.frame(data)[!sapply(as.data.frame(data), is.character)]
data <- sapply(data,\(.x)if (is.factor(.x)) as.numeric(.x) else .x) |> as.data.frame()
# data <- data |> dplyr::mutate(dplyr::across(dplyr::where(is.factor), as.numeric))
cor <- Hmisc::rcorr(as.matrix(data))
r <- cor$r %>% as.table()
d <- r |>
as.data.frame() |>
dplyr::filter(abs(Freq) > threshold, Freq != 1)
d[1:2] |>
apply(1, \(.x){
sort(unname(.x))
},
simplify = logical(1)
) |>
duplicated() |>
(\(.x){
d[!.x, ]
})() |>
setNames(c("var1", "var2", "cor"))
}
sentence_paste <- function(data, and.str = "and") {
and.str <- gsub(" ", "", and.str)
if (length(data) < 2) {
data
} else if (length(data) == 2) {
paste(data, collapse = glue::glue(" {and.str} "))
} else if (length(data) > 2) {
2025-04-03 14:31:34 +02:00
paste(paste(data[-length(data)], collapse = ", "), data[length(data)], sep = glue::glue(" {and.str} "))
}
}