2025-02-07 16:24:09 +01:00
#' Data correlations evaluation module
#'
#' @param id Module id. (Use 'ns("id")')
#'
#' @name data-correlations
#' @returns Shiny ui module
#' @export
data_correlations_ui <- function ( id , ... ) {
2025-02-19 13:17:16 +01:00
ns <- shiny :: NS ( id )
2025-02-07 16:24:09 +01:00
shiny :: tagList (
shiny :: textOutput ( outputId = ns ( " suggest" ) ) ,
shiny :: plotOutput ( outputId = ns ( " correlation_plot" ) , ... )
)
}
#'
#' @param data data
#' @param color.main main color
#' @param color.sec secondary color
#' @param ... arguments passed to toastui::datagrid
#'
#' @name data-correlations
#' @returns shiny server module
#' @export
data_correlations_server <- function ( id ,
data ,
include.class = NULL ,
cutoff = .7 ,
... ) {
shiny :: moduleServer (
id = id ,
module = function ( input , output , session ) {
# ns <- session$ns
rv <- shiny :: reactiveValues (
data = NULL
)
rv $ data <- shiny :: reactive ( {
shiny :: req ( data )
if ( ! is.null ( include.class ) ) {
filter <- sapply ( data ( ) , class ) %in% include.class
out <- data ( ) [filter ]
} else {
out <- data ( )
}
out
} )
# rv <- list()
# rv$data <- mtcars
output $ suggest <- shiny :: renderPrint ( {
shiny :: req ( rv $ data )
shiny :: req ( cutoff )
pairs <- correlation_pairs ( rv $ data ( ) , threshold = cutoff ( ) )
more <- ifelse ( nrow ( pairs ) > 1 , " from each pair " , " " )
if ( nrow ( pairs ) == 0 ) {
out <- glue :: glue ( " No variables have a correlation measure above the threshold." )
} else {
out <- pairs | >
apply ( 1 , \ ( .x ) {
glue :: glue ( " '{.x[1]}'x'{.x[2]}'({round(as.numeric(.x[3]),2)})" )
} ) | >
( \ ( .x ) {
glue :: glue ( " The following variable pairs are highly correlated: {sentence_paste(.x)}.\nConsider excluding one {more}from the dataset to ensure variables are independent." )
} ) ( )
}
out
} )
output $ correlation_plot <- shiny :: renderPlot ( {
psych :: pairs.panels ( rv $ data ( ) )
} )
}
)
}
correlation_pairs <- function ( data , threshold = .8 ) {
data <- data [ ! sapply ( data , is.character ) ]
data <- data | > dplyr :: mutate ( dplyr :: across ( dplyr :: where ( is.factor ) , as.numeric ) )
cor <- Hmisc :: rcorr ( as.matrix ( data ) )
r <- cor $ r %>% as.table ( )
d <- r | >
as.data.frame ( ) | >
dplyr :: filter ( abs ( Freq ) > threshold , Freq != 1 )
d [1 : 2 ] | >
apply ( 1 , \ ( .x ) {
sort ( unname ( .x ) )
} ,
simplify = logical ( 1 )
) | >
duplicated ( ) | >
( \ ( .x ) {
d [ ! .x , ]
} ) ( ) | >
setNames ( c ( " var1" , " var2" , " cor" ) )
}
sentence_paste <- function ( data , and.str = " and" ) {
and.str <- gsub ( " " , " " , and.str )
if ( length ( data ) < 2 ) {
data
} else if ( length ( data ) == 2 ) {
paste ( data , collapse = glue :: glue ( " {and.str} " ) )
} else if ( length ( data ) > 2 ) {
paste ( paste ( data [ - length ( data ) ] , collapse = " , " ) , data [length ( data ) ] , collapse = glue :: glue ( " {and.str} " ) )
}
}
cor_app <- function ( ) {
ui <- shiny :: fluidPage (
shiny :: sliderInput (
inputId = " cor_cutoff" ,
label = " Correlation cut-off" ,
min = 0 ,
max = 1 ,
step = .1 ,
value = .7 ,
ticks = FALSE
) ,
data_correlations_ui ( " data" , height = 600 )
)
server <- function ( input , output , session ) {
data_correlations_server ( " data" , data = shiny :: reactive ( mtcars ) , cutoff = shiny :: reactive ( input $ cor_cutoff ) )
}
shiny :: shinyApp ( ui , server )
}
cor_app ( )