mirror of
https://github.com/agdamsbo/FreesearchR.git
synced 2025-09-12 18:09:39 +02:00
137 lines
3.5 KiB
R
137 lines
3.5 KiB
R
|
#' Data correlations evaluation module
|
||
|
#'
|
||
|
#' @param id Module id. (Use 'ns("id")')
|
||
|
#'
|
||
|
#' @name data-correlations
|
||
|
#' @returns Shiny ui module
|
||
|
#' @export
|
||
|
data_correlations_ui <- function(id, ...) {
|
||
|
ns <- NS(id)
|
||
|
|
||
|
shiny::tagList(
|
||
|
shiny::textOutput(outputId = ns("suggest")),
|
||
|
shiny::plotOutput(outputId = ns("correlation_plot"), ...)
|
||
|
)
|
||
|
}
|
||
|
|
||
|
|
||
|
#'
|
||
|
#' @param data data
|
||
|
#' @param color.main main color
|
||
|
#' @param color.sec secondary color
|
||
|
#' @param ... arguments passed to toastui::datagrid
|
||
|
#'
|
||
|
#' @name data-correlations
|
||
|
#' @returns shiny server module
|
||
|
#' @export
|
||
|
data_correlations_server <- function(id,
|
||
|
data,
|
||
|
include.class = NULL,
|
||
|
cutoff = .7,
|
||
|
...) {
|
||
|
shiny::moduleServer(
|
||
|
id = id,
|
||
|
module = function(input, output, session) {
|
||
|
# ns <- session$ns
|
||
|
|
||
|
rv <- shiny::reactiveValues(
|
||
|
data = NULL
|
||
|
)
|
||
|
|
||
|
rv$data <- shiny::reactive({
|
||
|
shiny::req(data)
|
||
|
if (!is.null(include.class)) {
|
||
|
filter <- sapply(data(), class) %in% include.class
|
||
|
out <- data()[filter]
|
||
|
} else {
|
||
|
out <- data()
|
||
|
}
|
||
|
out
|
||
|
})
|
||
|
|
||
|
# rv <- list()
|
||
|
# rv$data <- mtcars
|
||
|
|
||
|
output$suggest <- shiny::renderPrint({
|
||
|
shiny::req(rv$data)
|
||
|
shiny::req(cutoff)
|
||
|
pairs <- correlation_pairs(rv$data(), threshold = cutoff())
|
||
|
|
||
|
more <- ifelse(nrow(pairs) > 1, "from each pair ", "")
|
||
|
|
||
|
if (nrow(pairs) == 0) {
|
||
|
out <- glue::glue("No variables have a correlation measure above the threshold.")
|
||
|
} else {
|
||
|
out <- pairs |>
|
||
|
apply(1, \(.x){
|
||
|
glue::glue("'{.x[1]}'x'{.x[2]}'({round(as.numeric(.x[3]),2)})")
|
||
|
}) |>
|
||
|
(\(.x){
|
||
|
glue::glue("The following variable pairs are highly correlated: {sentence_paste(.x)}.\nConsider excluding one {more}from the dataset to ensure variables are independent.")
|
||
|
})()
|
||
|
}
|
||
|
out
|
||
|
})
|
||
|
|
||
|
output$correlation_plot <- shiny::renderPlot({
|
||
|
psych::pairs.panels(rv$data())
|
||
|
})
|
||
|
}
|
||
|
)
|
||
|
}
|
||
|
|
||
|
correlation_pairs <- function(data, threshold = .8) {
|
||
|
data <- data[!sapply(data, is.character)]
|
||
|
data <- data |> dplyr::mutate(dplyr::across(dplyr::where(is.factor), as.numeric))
|
||
|
cor <- Hmisc::rcorr(as.matrix(data))
|
||
|
r <- cor$r %>% as.table()
|
||
|
d <- r |>
|
||
|
as.data.frame() |>
|
||
|
dplyr::filter(abs(Freq) > threshold, Freq != 1)
|
||
|
|
||
|
d[1:2] |>
|
||
|
apply(1, \(.x){
|
||
|
sort(unname(.x))
|
||
|
},
|
||
|
simplify = logical(1)
|
||
|
) |>
|
||
|
duplicated() |>
|
||
|
(\(.x){
|
||
|
d[!.x, ]
|
||
|
})() |>
|
||
|
setNames(c("var1", "var2", "cor"))
|
||
|
}
|
||
|
|
||
|
sentence_paste <- function(data, and.str = "and") {
|
||
|
and.str <- gsub(" ", "", and.str)
|
||
|
if (length(data) < 2) {
|
||
|
data
|
||
|
} else if (length(data) == 2) {
|
||
|
paste(data, collapse = glue::glue(" {and.str} "))
|
||
|
} else if (length(data) > 2) {
|
||
|
paste(paste(data[-length(data)], collapse = ", "), data[length(data)], collapse = glue::glue(" {and.str} "))
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
cor_app <- function() {
|
||
|
ui <- shiny::fluidPage(
|
||
|
shiny::sliderInput(
|
||
|
inputId = "cor_cutoff",
|
||
|
label = "Correlation cut-off",
|
||
|
min = 0,
|
||
|
max = 1,
|
||
|
step = .1,
|
||
|
value = .7,
|
||
|
ticks = FALSE
|
||
|
),
|
||
|
data_correlations_ui("data", height = 600)
|
||
|
)
|
||
|
server <- function(input, output, session) {
|
||
|
data_correlations_server("data", data = shiny::reactive(mtcars), cutoff = shiny::reactive(input$cor_cutoff))
|
||
|
}
|
||
|
shiny::shinyApp(ui, server)
|
||
|
}
|
||
|
|
||
|
cor_app()
|