FreesearchR/R/helpers.R

619 lines
15 KiB
R
Raw Normal View History

2024-11-08 15:13:33 +01:00
#' Wrapper function to get function from character vector referring to function from namespace. Passed to 'do.call()'
#'
#' @description
#' This function follows the idea from this comment: https://stackoverflow.com/questions/38983179/do-call-a-function-in-r-without-loading-the-package
#' @param x function or function name
#'
#' @return function or character vector
#' @export
#'
#' @examples
#' getfun("stats::lm")
getfun <- function(x) {
if ("character" %in% class(x)) {
2024-11-08 15:13:33 +01:00
if (length(grep("::", x)) > 0) {
parts <- strsplit(x, "::")[[1]]
requireNamespace(parts[1])
getExportedValue(parts[1], parts[2])
}
} else {
2024-11-08 15:13:33 +01:00
x
}
}
#' Wrapper to save data in RDS, load into specified qmd and render
#'
#' @param data list to pass to qmd
#' @param ... Passed to `quarto::quarto_render()`
2024-11-08 15:13:33 +01:00
#'
#' @return output file name
2024-11-08 15:13:33 +01:00
#' @export
#'
2025-03-24 14:40:30 +01:00
write_quarto <- function(data, ...) {
# Exports data to temporary location
#
# I assume this is more secure than putting it in the www folder and deleting
# on session end
# temp <- base::tempfile(fileext = ".rds")
# readr::write_rds(data, file = here)
readr::write_rds(data, file = "www/web_data.rds")
2024-11-08 15:13:33 +01:00
## Specifying a output path will make the rendering fail
## Ref: https://github.com/quarto-dev/quarto-cli/discussions/4041
## Outputs to the same as the .qmd file
quarto::quarto_render(
execute_params = list(data.file = "web_data.rds"),
# execute_params = list(data.file = temp),
...
2024-11-08 15:13:33 +01:00
)
}
2025-03-24 14:40:30 +01:00
write_rmd <- function(data, ...) {
# Exports data to temporary location
#
# I assume this is more secure than putting it in the www folder and deleting
# on session end
# temp <- base::tempfile(fileext = ".rds")
# readr::write_rds(data, file = here)
readr::write_rds(data, file = "www/web_data.rds")
## Specifying a output path will make the rendering fail
## Ref: https://github.com/quarto-dev/quarto-cli/discussions/4041
## Outputs to the same as the .qmd file
rmarkdown::render(
params = list(data.file = "web_data.rds"),
# execute_params = list(data.file = temp),
...
)
}
2024-11-08 15:13:33 +01:00
#' Flexible file import based on extension
#'
#' @param file file name
#' @param consider.na character vector of strings to consider as NAs
#'
#' @return tibble
#' @export
#'
#' @examples
#' read_input("https://raw.githubusercontent.com/agdamsbo/cognitive.index.lookup/main/data/sample.csv")
read_input <- function(file, consider.na = c("NA", '""', "")) {
ext <- tools::file_ext(file)
2024-11-08 15:13:33 +01:00
if (ext == "csv") {
df <- readr::read_csv(file = file, na = consider.na)
} else if (ext %in% c("xls", "xlsx")) {
df <- openxlsx2::read_xlsx(file = file, na.strings = consider.na)
} else if (ext == "dta") {
df <- haven::read_dta(file = file)
} else if (ext == "ods") {
df <- readODS::read_ods(path = file)
} else if (ext == "rds") {
df <- readr::read_rds(file = file)
2024-11-08 15:13:33 +01:00
} else {
stop("Input file format has to be on of:
'.csv', '.xls', '.xlsx', '.dta', '.ods' or '.rds'")
2024-11-08 15:13:33 +01:00
}
df
}
#' Convert string of arguments to list of arguments
#'
#' @description
#' Idea from the answer: https://stackoverflow.com/a/62979238
#'
#' @param string string to convert to list to use with do.call
#'
#' @return list
#' @export
#'
2025-04-15 08:55:35 +02:00
#' @examples
#' argsstring2list("A=1:5,b=2:4")
#'
argsstring2list <- function(string) {
2024-11-08 15:13:33 +01:00
eval(parse(text = paste0("list(", string, ")")))
}
#' Factorize variables in data.frame
#'
#' @param data data.frame
#' @param vars variables to force factorize
#'
#' @return data.frame
#' @export
2025-04-15 08:55:35 +02:00
#'
#' @examples
#' factorize(mtcars,names(mtcars))
factorize <- function(data, vars) {
if (!is.null(vars)) {
data |>
dplyr::mutate(
dplyr::across(
dplyr::all_of(vars),
REDCapCAST::as_factor
)
)
} else {
data
}
}
dummy_Imports <- function() {
list(
MASS::as.fractions(),
broom::augment(),
broom.helpers::all_categorical(),
here::here(),
cardx::all_of(),
parameters::ci(),
DT::addRow(),
bslib::accordion()
)
# https://github.com/hadley/r-pkgs/issues/828
}
#' Title
#'
#' @param data data
#' @param output.format output
#' @param filename filename
#' @param ... passed on
#'
#' @returns data
#' @export
#'
file_export <- function(data, output.format = c("df", "teal", "list"), filename, ...) {
output.format <- match.arg(output.format)
filename <- gsub("-", "_", filename)
if (output.format == "teal") {
out <- within(
teal_data(),
{
assign(name, value |>
2025-01-15 16:21:38 +01:00
dplyr::bind_cols(.name_repair = "unique_quiet") |>
default_parsing())
},
value = data,
name = filename
)
datanames(out) <- filename
} else if (output.format == "df") {
out <- data |>
default_parsing()
} else if (output.format == "list") {
out <- list(
data = data,
name = filename
)
out <- c(out, ...)
}
out
}
#' Default data parsing
#'
#' @param data data
#'
#' @returns data.frame or tibble
#' @export
#'
#' @examples
#' mtcars |> str()
#' mtcars |>
#' default_parsing() |>
#' str()
#' head(starwars, 5) |> str()
2025-04-10 15:46:42 +02:00
#' starwars |>
#' default_parsing() |>
#' head(5) |>
#' str()
default_parsing <- function(data) {
2025-03-24 14:40:30 +01:00
name_labels <- lapply(data, \(.x) REDCapCAST::get_attr(.x, attr = "label"))
# browser()
2025-01-15 16:21:38 +01:00
out <- data |>
2025-04-10 15:46:42 +02:00
setNames(make.names(names(data), unique = TRUE)) |>
## Temporary step to avoid nested list and crashing
remove_nested_list() |>
REDCapCAST::parse_data() |>
REDCapCAST::as_factor() |>
2025-03-24 14:40:30 +01:00
REDCapCAST::numchar2fct(numeric.threshold = 8, character.throshold = 10) |>
2025-03-19 13:10:56 +01:00
REDCapCAST::as_logical() |>
REDCapCAST::fct_drop()
2025-01-15 16:21:38 +01:00
set_column_label(out, setNames(name_labels, names(out)), overwrite = FALSE)
# purrr::map2(
# out,
# name_labels[names(name_labels) %in% names(out)],
# \(.x, .l){
# if (!(is.na(.l) | .l == "")) {
# REDCapCAST::set_attr(.x, .l, attr = "label")
# } else {
# attr(x = .x, which = "label") <- NULL
# .x
# }
# # REDCapCAST::set_attr(data = .x, label = .l,attr = "label", overwrite = FALSE)
# }
# ) |> dplyr::bind_cols()
2025-01-15 16:21:38 +01:00
}
2025-04-15 08:55:35 +02:00
#' Remove empty/NA attributes
2025-01-15 16:21:38 +01:00
#'
#' @param data data
#'
2025-04-15 08:55:35 +02:00
#' @returns data of same class as input
2025-01-15 16:21:38 +01:00
#' @export
#'
#' @examples
2025-04-15 08:55:35 +02:00
#' ds <- mtcars |> lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |> dplyr::bind_cols()
2025-03-24 14:40:30 +01:00
#' ds |>
2025-04-15 08:55:35 +02:00
#' remove_empty_attr() |>
2025-03-24 14:40:30 +01:00
#' str()
2025-04-15 08:55:35 +02:00
#' mtcars |> lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |> remove_empty_attr() |>
#' str()
#'
remove_empty_attr <- function(data) {
if (is.data.frame(data)){
data |> lapply(remove_empty_attr) |> dplyr::bind_cols()
} else if (is.list(data)){
data |> lapply(remove_empty_attr)
}else{
attributes(data)[is.na(attributes(data))] <- NULL
data
}
}
#' Removes columns with completenes below cutoff
#'
#' @param data data frame
#' @param cutoff numeric
#'
#' @returns data frame
#' @export
#'
#' @examples
2025-03-24 14:40:30 +01:00
#' data.frame(a = 1:10, b = NA, c = c(2, NA)) |> remove_empty_cols(cutoff = .5)
remove_empty_cols <- function(data, cutoff = .7) {
filter <- apply(X = data, MARGIN = 2, FUN = \(.x){
sum(as.numeric(!is.na(.x))) / length(.x)
}) >= cutoff
data[filter]
}
#' Append list with named index
#'
#' @param data data to add to list
#' @param list list
#' @param index index name
#'
#' @returns list
2025-03-19 13:10:56 +01:00
#' @export
#'
#' @examples
2025-03-24 14:40:30 +01:00
#' ls_d <- list(test = c(1:20))
#' ls_d <- list()
2025-03-24 14:40:30 +01:00
#' data.frame(letters[1:20], 1:20) |> append_list(ls_d, "letters")
#' letters[1:20] |> append_list(ls_d, "letters")
append_list <- function(data, list, index) {
## This will overwrite and not warn
## Not very safe, but convenient to append code to list
2025-03-24 14:40:30 +01:00
if (index %in% names(list)) {
list[[index]] <- data
out <- list
} else {
2025-03-24 14:40:30 +01:00
out <- setNames(c(list, list(data)), c(names(list), index))
}
out
}
2025-03-12 18:27:46 +01:00
#' Get missingsness fraction
#'
#' @param data data
#'
#' @returns numeric vector
#' @export
#'
#' @examples
2025-03-24 14:40:30 +01:00
#' c(NA, 1:10, rep(NA, 3)) |> missing_fraction()
missing_fraction <- function(data) {
NROW(data[is.na(data)]) / NROW(data)
}
#' Ultra short data dascription
#'
#' @param data
#'
#' @returns character vector
#' @export
#'
#' @examples
#' data.frame(
#' sample(1:8, 20, TRUE),
#' sample(c(1:8, NA), 20, TRUE)
#' ) |> data_description()
2025-04-14 10:10:33 +02:00
data_description <- function(data, data_text = "Data") {
2025-03-24 14:40:30 +01:00
data <- if (shiny::is.reactive(data)) data() else data
2025-03-26 12:07:28 +01:00
n <- nrow(data)
n_var <- ncol(data)
n_complete <- sum(complete.cases(data))
2025-04-10 15:46:42 +02:00
p_complete <- n_complete / n
2025-03-26 12:07:28 +01:00
2025-03-24 14:40:30 +01:00
sprintf(
"%s has %s observations and %s variables, with %s (%s%%) complete cases.",
2025-04-14 10:10:33 +02:00
data_text,
2025-03-26 12:07:28 +01:00
n,
n_var,
n_complete,
signif(100 * p_complete, 3)
2025-03-24 14:40:30 +01:00
)
2025-03-12 18:27:46 +01:00
}
2025-03-31 14:37:28 +02:00
2025-04-14 10:10:33 +02:00
#' Filter function to filter data set by variable type
#'
#' @param data data frame
#' @param type vector of data types (recognised: data_types)
#'
#' @returns data.frame
#' @export
#'
#' @examples
2025-04-15 08:55:35 +02:00
#' default_parsing(mtcars) |>
#' data_type_filter(type = c("categorical", "continuous")) |>
#' attributes()
#' default_parsing(mtcars) |>
#' data_type_filter(type = NULL) |>
#' attributes()
2025-04-14 10:10:33 +02:00
#' \dontrun{
2025-04-15 08:55:35 +02:00
#' default_parsing(mtcars) |> data_type_filter(type = c("test", "categorical", "continuous"))
2025-04-14 10:10:33 +02:00
#' }
2025-04-15 08:55:35 +02:00
data_type_filter <- function(data, type) {
2025-04-14 10:10:33 +02:00
## Please ensure to only provide recognised data types
assertthat::assert_that(all(type %in% data_types()))
2025-04-15 08:55:35 +02:00
if (!is.null(type)) {
out <- data[data_type(data) %in% type]
code <- rlang::call2("data_type_filter", !!!list(type = type), .ns = "FreesearchR")
attr(out, "code") <- code
} else {
out <- data
}
2025-04-14 10:10:33 +02:00
out
}
2025-03-31 14:37:28 +02:00
#' Drop-in replacement for the base::sort_by with option to remove NAs
#'
#' @param x x
#' @param y y
#' @param na.rm remove NAs
#' @param ... passed to base_sort_by
#'
#' @returns vector
2025-03-31 14:37:28 +02:00
#' @export
#'
#' @examples
2025-04-10 15:46:42 +02:00
#' sort_by(c("Multivariable", "Univariable"), c("Univariable", "Minimal", "Multivariable"))
sort_by <- function(x, y, na.rm = FALSE, ...) {
out <- base::sort_by(x, y, ...)
if (na.rm == TRUE) {
2025-03-31 14:37:28 +02:00
out[!is.na(out)]
} else {
out
}
}
2025-04-10 15:46:42 +02:00
get_ggplot_label <- function(data, label) {
assertthat::assert_that(ggplot2::is.ggplot(data))
data$labels[[label]]
}
#' Return if available
#'
#' @param data vector
#' @param default assigned value for missings
#'
#' @returns vector
#' @export
#'
#' @examples
#' NULL |> if_not_missing("new")
2025-04-10 15:46:42 +02:00
#' c(2, "a", NA) |> if_not_missing()
#' "See" |> if_not_missing()
2025-04-10 15:46:42 +02:00
if_not_missing <- function(data, default = NULL) {
if (length(data) > 1) {
Reduce(c, lapply(data, if_not_missing))
} else if (is.na(data) || is.null(data)) {
return(default)
} else {
return(data)
}
}
2025-04-09 12:31:08 +02:00
#' Merge list of expressions
#'
#' @param data list
#'
#' @returns expression
#' @export
#'
#' @examples
#' list(
2025-04-10 15:46:42 +02:00
#' rlang::call2(.fn = "select", !!!list(c("cyl", "disp")), .ns = "dplyr"),
#' rlang::call2(.fn = "default_parsing", .ns = "FreesearchR")
2025-04-09 12:31:08 +02:00
#' ) |> merge_expression()
2025-04-10 15:46:42 +02:00
merge_expression <- function(data) {
2025-04-09 12:31:08 +02:00
Reduce(
f = function(x, y) rlang::expr(!!x %>% !!y),
x = data
)
}
#' Reduce character vector with the native pipe operator or character string
#'
#' @param data list
#'
#' @returns character string
#' @export
#'
#' @examples
#' list(
#' "mtcars",
#' rlang::call2(.fn = "select", !!!list(c("cyl", "disp")), .ns = "dplyr"),
#' rlang::call2(.fn = "default_parsing", .ns = "FreesearchR")
#' ) |>
#' lapply(expression_string) |>
#' pipe_string() |>
#' expression_string("data<-")
pipe_string <- function(data, collapse = "|>\n") {
if (is.list(data)) {
Reduce(
f = function(x, y) glue::glue("{x}{collapse}{y}"),
x = data
)
} else {
data
}
}
2025-04-09 12:31:08 +02:00
#' Deparses expression as string, substitutes native pipe and adds assign
#'
#' @param data expression
#'
#' @returns string
#' @export
#'
#' @examples
#' list(
2025-04-15 08:55:35 +02:00
#' as.symbol(paste0("mtcars$", "mpg")),
2025-04-10 15:46:42 +02:00
#' rlang::call2(.fn = "select", !!!list(c("cyl", "disp")), .ns = "dplyr"),
#' rlang::call2(.fn = "default_parsing", .ns = "FreesearchR")
#' ) |>
#' merge_expression() |>
#' expression_string()
expression_string <- function(data, assign.str = "") {
exp.str <- if (is.call(data)) deparse(data) else data
# browser()
out <- paste0(assign.str, gsub("%>%", "|>\n", paste(gsub('"', "'", paste(exp.str, collapse = "")), collapse = "")))
gsub(" |`", "", out)
2025-04-09 12:31:08 +02:00
}
2025-04-10 15:46:42 +02:00
2025-04-15 08:55:35 +02:00
#' Very simple function to remove nested lists, like when uploading .rds
2025-04-10 15:46:42 +02:00
#'
#' @param data data
#'
#' @returns data.frame
#' @export
#'
#' @examples
#' dplyr::tibble(a = 1:10, b = rep(list("a"), 10)) |> remove_nested_list()
#' dplyr::tibble(a = 1:10, b = rep(list(c("a", "b")), 10)) |> as.data.frame()
remove_nested_list <- function(data) {
data[!sapply(data, is.list)]
}
#' (Re)label columns in data.frame
#'
#' @param data data.frame to be labelled
#' @param label named list or vector
#'
#' @returns data.frame
#' @export
#'
#' @examples
#' ls <- list("mpg" = "", "cyl" = "Cylinders", "disp" = "", "hp" = "", "drat" = "", "wt" = "", "qsec" = "", "vs" = "", "am" = "", "gear" = "", "carb" = "")
#' ls2 <- c("mpg" = "", "cyl" = "Cylinders", "disp" = "", "hp" = "Horses", "drat" = "", "wt" = "", "qsec" = "", "vs" = "", "am" = "", "gear" = "", "carb" = "")
#' ls3 <- c("mpg" = "", "cyl" = "", "disp" = "", "hp" = "Horses", "drat" = "", "wt" = "", "qsec" = "", "vs" = "", "am" = "", "gear" = "", "carb" = "")
#' mtcars |>
#' set_column_label(ls) |>
#' set_column_label(ls2) |>
#' set_column_label(ls3)
#' rlang::expr(FreesearchR::set_column_label(label = !!ls3)) |> expression_string()
set_column_label <- function(data, label, overwrite = TRUE) {
purrr::imap(data, function(.data, .name) {
ls <- if (is.list(label)) unlist(label) else label
ls[ls == ""] <- NA
if (.name %in% names(ls)) {
out <- REDCapCAST::set_attr(.data, unname(ls[.name]), attr = "label", overwrite = overwrite)
remove_empty_attr(out)
} else {
.data
}
}) |> dplyr::bind_cols(.name_repair = "unique_quiet")
}
#' Append a column to a data.frame
#'
#' @param data data
#' @param column new column (vector) or data.frame with 1 column
#' @param name new name (pre-fix)
#' @param index desired location. May be "left", "right" or numeric index.
#'
#' @returns data.frame
#' @export
#'
#' @examples
#' mtcars |>
#' dplyr::mutate(mpg_cut = mpg) |>
#' append_column(mtcars$mpg, "mpg_cutter")
append_column <- function(data, column, name, index = "right") {
assertthat::assert_that(NCOL(column) == 1)
assertthat::assert_that(length(index) == 1)
if (index == "right") {
index <- ncol(data) + 1
} else if (index == "left") {
index <- 1
} else if (is.numeric(index)) {
if (index > ncol(data)) {
index <- ncol(data) + 1
}
} else {
index <- ncol(data) + 1
}
## Identifying potential naming conflicts
nm_conflicts <- names(data)[startsWith(names(data), name)]
## Simple attemt to create new unique name
if (length(nm_conflicts) > 0) {
name <- glue::glue("{name}_{length(nm_conflicts)+1}")
}
## If the above not achieves a unique name, the generic approach is used
if (name %in% names(data)) {
name <- make.names(c(name, names(data)), unique = TRUE)[1]
}
new_df <- setNames(data.frame(column), name)
list(
data[seq_len(index - 1)],
new_df,
if (!index > ncol(data)) data[index:ncol(data)]
) |>
dplyr::bind_cols()
}