new completeness filter, analyses have been split, correlation plot included.

This commit is contained in:
Andreas Gammelgaard Damsbo 2025-02-07 16:24:09 +01:00
commit b268b90aae
No known key found for this signature in database
17 changed files with 10547 additions and 2479 deletions

View file

@ -1 +1 @@
app_version <- function()'250130_1152'
app_version <- function()'250207_1622'

136
R/correlations-module.R Normal file
View file

@ -0,0 +1,136 @@
#' Data correlations evaluation module
#'
#' @param id Module id. (Use 'ns("id")')
#'
#' @name data-correlations
#' @returns Shiny ui module
#' @export
data_correlations_ui <- function(id, ...) {
ns <- NS(id)
shiny::tagList(
shiny::textOutput(outputId = ns("suggest")),
shiny::plotOutput(outputId = ns("correlation_plot"), ...)
)
}
#'
#' @param data data
#' @param color.main main color
#' @param color.sec secondary color
#' @param ... arguments passed to toastui::datagrid
#'
#' @name data-correlations
#' @returns shiny server module
#' @export
data_correlations_server <- function(id,
data,
include.class = NULL,
cutoff = .7,
...) {
shiny::moduleServer(
id = id,
module = function(input, output, session) {
# ns <- session$ns
rv <- shiny::reactiveValues(
data = NULL
)
rv$data <- shiny::reactive({
shiny::req(data)
if (!is.null(include.class)) {
filter <- sapply(data(), class) %in% include.class
out <- data()[filter]
} else {
out <- data()
}
out
})
# rv <- list()
# rv$data <- mtcars
output$suggest <- shiny::renderPrint({
shiny::req(rv$data)
shiny::req(cutoff)
pairs <- correlation_pairs(rv$data(), threshold = cutoff())
more <- ifelse(nrow(pairs) > 1, "from each pair ", "")
if (nrow(pairs) == 0) {
out <- glue::glue("No variables have a correlation measure above the threshold.")
} else {
out <- pairs |>
apply(1, \(.x){
glue::glue("'{.x[1]}'x'{.x[2]}'({round(as.numeric(.x[3]),2)})")
}) |>
(\(.x){
glue::glue("The following variable pairs are highly correlated: {sentence_paste(.x)}.\nConsider excluding one {more}from the dataset to ensure variables are independent.")
})()
}
out
})
output$correlation_plot <- shiny::renderPlot({
psych::pairs.panels(rv$data())
})
}
)
}
correlation_pairs <- function(data, threshold = .8) {
data <- data[!sapply(data, is.character)]
data <- data |> dplyr::mutate(dplyr::across(dplyr::where(is.factor), as.numeric))
cor <- Hmisc::rcorr(as.matrix(data))
r <- cor$r %>% as.table()
d <- r |>
as.data.frame() |>
dplyr::filter(abs(Freq) > threshold, Freq != 1)
d[1:2] |>
apply(1, \(.x){
sort(unname(.x))
},
simplify = logical(1)
) |>
duplicated() |>
(\(.x){
d[!.x, ]
})() |>
setNames(c("var1", "var2", "cor"))
}
sentence_paste <- function(data, and.str = "and") {
and.str <- gsub(" ", "", and.str)
if (length(data) < 2) {
data
} else if (length(data) == 2) {
paste(data, collapse = glue::glue(" {and.str} "))
} else if (length(data) > 2) {
paste(paste(data[-length(data)], collapse = ", "), data[length(data)], collapse = glue::glue(" {and.str} "))
}
}
cor_app <- function() {
ui <- shiny::fluidPage(
shiny::sliderInput(
inputId = "cor_cutoff",
label = "Correlation cut-off",
min = 0,
max = 1,
step = .1,
value = .7,
ticks = FALSE
),
data_correlations_ui("data", height = 600)
)
server <- function(input, output, session) {
data_correlations_server("data", data = shiny::reactive(mtcars), cutoff = shiny::reactive(input$cor_cutoff))
}
shiny::shinyApp(ui, server)
}
cor_app()

View file

@ -249,3 +249,20 @@ remove_na_attr <- function(data,attr="label"){
dplyr::bind_cols(out)
}
#' Removes columns with completenes below cutoff
#'
#' @param data data frame
#' @param cutoff numeric
#'
#' @returns data frame
#' @export
#'
#' @examples
#'data.frame(a=1:10,b=NA, c=c(2,NA)) |> remove_empty_cols(cutoff=.5)
remove_empty_cols <- function(data,cutoff=.7){
filter <- apply(X = data,MARGIN = 2,FUN = \(.x){
sum(as.numeric(!is.na(.x)))/length(.x)
}) >= cutoff
data[filter]
}

View file

@ -79,3 +79,4 @@ modify_qmd <- function(file, format) {
specify_qmd_format(fileformat = "all") |>
writeLines(paste0(tools::file_path_sans_ext(file), "_format.", tools::file_ext(file)))
}

View file

@ -25,6 +25,7 @@ custom_theme <- function(...,
){
bslib::bs_theme(
...,
"navbar-bg" = primary,
version = version,
primary = primary,
secondary = secondary,

View file

@ -461,6 +461,15 @@ update_variables_datagrid <- function(data, height = NULL, selectionId = NULL, b
fontStyle = "italic"
)
grid <- toastui::grid_filters(
grid = grid,
column = "name",
# columns = unname(std_names[std_names!="vals"]),
showApplyBtn = FALSE,
showClearBtn = TRUE,
type = "text"
)
# grid <- toastui::grid_columns(
# grid = grid,
# columns = "name_toset",
@ -571,6 +580,7 @@ convert_to <- function(data,
new_class <- match.arg(new_class, several.ok = TRUE)
stopifnot(length(new_class) == length(variable))
args <- list(...)
args$format <- clean_sep(args$format)
if (length(variable) > 1) {
for (i in seq_along(variable)) {
data <- convert_to(data, variable[i], new_class[i], ...)
@ -602,10 +612,10 @@ convert_to <- function(data,
setNames(list(expr(as.integer(!!sym(variable)))), variable)
)
} else if (identical(new_class, "date")) {
data[[variable]] <- as.Date(x = data[[variable]], ...)
data[[variable]] <- as.Date(x = clean_date(data[[variable]]), ...)
attr(data, "code_03_convert") <- c(
attr(data, "code_03_convert"),
setNames(list(expr(as.Date(!!sym(variable), origin = !!args$origin))), variable)
setNames(list(expr(as.Date(clean_date(!!sym(variable)), origin = !!args$origin, format=clean_sep(!!args$format)))), variable)
)
} else if (identical(new_class, "datetime")) {
data[[variable]] <- as.POSIXct(x = data[[variable]], ...)
@ -710,3 +720,39 @@ get_vars_to_convert <- function(vars, classes_input) {
}
#' gsub wrapper for piping with default values for separator substituting
#'
#' @param data character vector
#' @param old.sep old separator
#' @param new.sep new separator
#'
#' @returns character vector
#' @export
#'
clean_sep <- function(data,old.sep="[-.,/]",new.sep="-"){
gsub(old.sep,new.sep,data)
}
#' Attempts at applying uniform date format
#'
#' @param data character string vector of possible dates
#'
#' @returns character string
#' @export
#'
clean_date <- function(data){
data |>
clean_sep() |>
sapply(\(.x){
if (is.na(.x)){
.x
} else {
strsplit(.x,"-") |>
unlist()|>
lapply(\(.y){
if (nchar(.y)==1) paste0("0",.y) else .y
}) |> paste(collapse="-")
}
}) |>
unname()
}