mirror of
https://github.com/agdamsbo/FreesearchR.git
synced 2026-06-21 05:19:07 +02:00
new completeness filter, analyses have been split, correlation plot included.
This commit is contained in:
parent
f728bb1e8e
commit
b268b90aae
17 changed files with 10547 additions and 2479 deletions
|
|
@ -1 +1 @@
|
|||
app_version <- function()'250130_1152'
|
||||
app_version <- function()'250207_1622'
|
||||
|
|
|
|||
136
R/correlations-module.R
Normal file
136
R/correlations-module.R
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
#' Data correlations evaluation module
|
||||
#'
|
||||
#' @param id Module id. (Use 'ns("id")')
|
||||
#'
|
||||
#' @name data-correlations
|
||||
#' @returns Shiny ui module
|
||||
#' @export
|
||||
data_correlations_ui <- function(id, ...) {
|
||||
ns <- NS(id)
|
||||
|
||||
shiny::tagList(
|
||||
shiny::textOutput(outputId = ns("suggest")),
|
||||
shiny::plotOutput(outputId = ns("correlation_plot"), ...)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
#'
|
||||
#' @param data data
|
||||
#' @param color.main main color
|
||||
#' @param color.sec secondary color
|
||||
#' @param ... arguments passed to toastui::datagrid
|
||||
#'
|
||||
#' @name data-correlations
|
||||
#' @returns shiny server module
|
||||
#' @export
|
||||
data_correlations_server <- function(id,
|
||||
data,
|
||||
include.class = NULL,
|
||||
cutoff = .7,
|
||||
...) {
|
||||
shiny::moduleServer(
|
||||
id = id,
|
||||
module = function(input, output, session) {
|
||||
# ns <- session$ns
|
||||
|
||||
rv <- shiny::reactiveValues(
|
||||
data = NULL
|
||||
)
|
||||
|
||||
rv$data <- shiny::reactive({
|
||||
shiny::req(data)
|
||||
if (!is.null(include.class)) {
|
||||
filter <- sapply(data(), class) %in% include.class
|
||||
out <- data()[filter]
|
||||
} else {
|
||||
out <- data()
|
||||
}
|
||||
out
|
||||
})
|
||||
|
||||
# rv <- list()
|
||||
# rv$data <- mtcars
|
||||
|
||||
output$suggest <- shiny::renderPrint({
|
||||
shiny::req(rv$data)
|
||||
shiny::req(cutoff)
|
||||
pairs <- correlation_pairs(rv$data(), threshold = cutoff())
|
||||
|
||||
more <- ifelse(nrow(pairs) > 1, "from each pair ", "")
|
||||
|
||||
if (nrow(pairs) == 0) {
|
||||
out <- glue::glue("No variables have a correlation measure above the threshold.")
|
||||
} else {
|
||||
out <- pairs |>
|
||||
apply(1, \(.x){
|
||||
glue::glue("'{.x[1]}'x'{.x[2]}'({round(as.numeric(.x[3]),2)})")
|
||||
}) |>
|
||||
(\(.x){
|
||||
glue::glue("The following variable pairs are highly correlated: {sentence_paste(.x)}.\nConsider excluding one {more}from the dataset to ensure variables are independent.")
|
||||
})()
|
||||
}
|
||||
out
|
||||
})
|
||||
|
||||
output$correlation_plot <- shiny::renderPlot({
|
||||
psych::pairs.panels(rv$data())
|
||||
})
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
correlation_pairs <- function(data, threshold = .8) {
|
||||
data <- data[!sapply(data, is.character)]
|
||||
data <- data |> dplyr::mutate(dplyr::across(dplyr::where(is.factor), as.numeric))
|
||||
cor <- Hmisc::rcorr(as.matrix(data))
|
||||
r <- cor$r %>% as.table()
|
||||
d <- r |>
|
||||
as.data.frame() |>
|
||||
dplyr::filter(abs(Freq) > threshold, Freq != 1)
|
||||
|
||||
d[1:2] |>
|
||||
apply(1, \(.x){
|
||||
sort(unname(.x))
|
||||
},
|
||||
simplify = logical(1)
|
||||
) |>
|
||||
duplicated() |>
|
||||
(\(.x){
|
||||
d[!.x, ]
|
||||
})() |>
|
||||
setNames(c("var1", "var2", "cor"))
|
||||
}
|
||||
|
||||
sentence_paste <- function(data, and.str = "and") {
|
||||
and.str <- gsub(" ", "", and.str)
|
||||
if (length(data) < 2) {
|
||||
data
|
||||
} else if (length(data) == 2) {
|
||||
paste(data, collapse = glue::glue(" {and.str} "))
|
||||
} else if (length(data) > 2) {
|
||||
paste(paste(data[-length(data)], collapse = ", "), data[length(data)], collapse = glue::glue(" {and.str} "))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
cor_app <- function() {
|
||||
ui <- shiny::fluidPage(
|
||||
shiny::sliderInput(
|
||||
inputId = "cor_cutoff",
|
||||
label = "Correlation cut-off",
|
||||
min = 0,
|
||||
max = 1,
|
||||
step = .1,
|
||||
value = .7,
|
||||
ticks = FALSE
|
||||
),
|
||||
data_correlations_ui("data", height = 600)
|
||||
)
|
||||
server <- function(input, output, session) {
|
||||
data_correlations_server("data", data = shiny::reactive(mtcars), cutoff = shiny::reactive(input$cor_cutoff))
|
||||
}
|
||||
shiny::shinyApp(ui, server)
|
||||
}
|
||||
|
||||
cor_app()
|
||||
17
R/helpers.R
17
R/helpers.R
|
|
@ -249,3 +249,20 @@ remove_na_attr <- function(data,attr="label"){
|
|||
|
||||
dplyr::bind_cols(out)
|
||||
}
|
||||
|
||||
#' Removes columns with completenes below cutoff
|
||||
#'
|
||||
#' @param data data frame
|
||||
#' @param cutoff numeric
|
||||
#'
|
||||
#' @returns data frame
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#'data.frame(a=1:10,b=NA, c=c(2,NA)) |> remove_empty_cols(cutoff=.5)
|
||||
remove_empty_cols <- function(data,cutoff=.7){
|
||||
filter <- apply(X = data,MARGIN = 2,FUN = \(.x){
|
||||
sum(as.numeric(!is.na(.x)))/length(.x)
|
||||
}) >= cutoff
|
||||
data[filter]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,3 +79,4 @@ modify_qmd <- function(file, format) {
|
|||
specify_qmd_format(fileformat = "all") |>
|
||||
writeLines(paste0(tools::file_path_sans_ext(file), "_format.", tools::file_ext(file)))
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ custom_theme <- function(...,
|
|||
){
|
||||
bslib::bs_theme(
|
||||
...,
|
||||
"navbar-bg" = primary,
|
||||
version = version,
|
||||
primary = primary,
|
||||
secondary = secondary,
|
||||
|
|
|
|||
|
|
@ -461,6 +461,15 @@ update_variables_datagrid <- function(data, height = NULL, selectionId = NULL, b
|
|||
fontStyle = "italic"
|
||||
)
|
||||
|
||||
grid <- toastui::grid_filters(
|
||||
grid = grid,
|
||||
column = "name",
|
||||
# columns = unname(std_names[std_names!="vals"]),
|
||||
showApplyBtn = FALSE,
|
||||
showClearBtn = TRUE,
|
||||
type = "text"
|
||||
)
|
||||
|
||||
# grid <- toastui::grid_columns(
|
||||
# grid = grid,
|
||||
# columns = "name_toset",
|
||||
|
|
@ -571,6 +580,7 @@ convert_to <- function(data,
|
|||
new_class <- match.arg(new_class, several.ok = TRUE)
|
||||
stopifnot(length(new_class) == length(variable))
|
||||
args <- list(...)
|
||||
args$format <- clean_sep(args$format)
|
||||
if (length(variable) > 1) {
|
||||
for (i in seq_along(variable)) {
|
||||
data <- convert_to(data, variable[i], new_class[i], ...)
|
||||
|
|
@ -602,10 +612,10 @@ convert_to <- function(data,
|
|||
setNames(list(expr(as.integer(!!sym(variable)))), variable)
|
||||
)
|
||||
} else if (identical(new_class, "date")) {
|
||||
data[[variable]] <- as.Date(x = data[[variable]], ...)
|
||||
data[[variable]] <- as.Date(x = clean_date(data[[variable]]), ...)
|
||||
attr(data, "code_03_convert") <- c(
|
||||
attr(data, "code_03_convert"),
|
||||
setNames(list(expr(as.Date(!!sym(variable), origin = !!args$origin))), variable)
|
||||
setNames(list(expr(as.Date(clean_date(!!sym(variable)), origin = !!args$origin, format=clean_sep(!!args$format)))), variable)
|
||||
)
|
||||
} else if (identical(new_class, "datetime")) {
|
||||
data[[variable]] <- as.POSIXct(x = data[[variable]], ...)
|
||||
|
|
@ -710,3 +720,39 @@ get_vars_to_convert <- function(vars, classes_input) {
|
|||
}
|
||||
|
||||
|
||||
#' gsub wrapper for piping with default values for separator substituting
|
||||
#'
|
||||
#' @param data character vector
|
||||
#' @param old.sep old separator
|
||||
#' @param new.sep new separator
|
||||
#'
|
||||
#' @returns character vector
|
||||
#' @export
|
||||
#'
|
||||
clean_sep <- function(data,old.sep="[-.,/]",new.sep="-"){
|
||||
gsub(old.sep,new.sep,data)
|
||||
}
|
||||
|
||||
#' Attempts at applying uniform date format
|
||||
#'
|
||||
#' @param data character string vector of possible dates
|
||||
#'
|
||||
#' @returns character string
|
||||
#' @export
|
||||
#'
|
||||
clean_date <- function(data){
|
||||
data |>
|
||||
clean_sep() |>
|
||||
sapply(\(.x){
|
||||
if (is.na(.x)){
|
||||
.x
|
||||
} else {
|
||||
strsplit(.x,"-") |>
|
||||
unlist()|>
|
||||
lapply(\(.y){
|
||||
if (nchar(.y)==1) paste0("0",.y) else .y
|
||||
}) |> paste(collapse="-")
|
||||
}
|
||||
}) |>
|
||||
unname()
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue