2024-11-28 21:02:23 +01:00
|
|
|
#' Create a regression model programatically
|
2024-11-08 15:13:33 +01:00
|
|
|
#'
|
|
|
|
#' @param data data set
|
|
|
|
#' @param fun Name of function as character vector or function to use for model creation.
|
|
|
|
#' @param vars character vector of variables to include
|
|
|
|
#' @param outcome.str Name of outcome variable. Character vector.
|
2024-11-21 12:34:27 +01:00
|
|
|
#' @param auto.mode Make assumptions on function dependent on outcome data format. Overwrites other arguments.
|
2024-11-08 15:13:33 +01:00
|
|
|
#' @param formula.str Formula as string. Passed through 'glue::glue'. If given, 'outcome.str' and 'vars' are ignored. Optional.
|
|
|
|
#' @param args.list List of arguments passed to 'fun' with 'do.call'.
|
2024-11-28 21:02:23 +01:00
|
|
|
#' @param ... ignored for now
|
2024-11-08 15:13:33 +01:00
|
|
|
#'
|
|
|
|
#' @importFrom stats as.formula
|
|
|
|
#'
|
|
|
|
#' @return object of standard class for fun
|
|
|
|
#' @export
|
2025-02-19 13:17:16 +01:00
|
|
|
#' @rdname regression_model
|
2024-11-08 15:13:33 +01:00
|
|
|
#'
|
|
|
|
#' @examples
|
|
|
|
#' gtsummary::trial |>
|
2024-11-21 12:34:27 +01:00
|
|
|
#' regression_model(outcome.str = "age")
|
2024-11-08 15:13:33 +01:00
|
|
|
#' gtsummary::trial |>
|
|
|
|
#' regression_model(
|
|
|
|
#' outcome.str = "age",
|
2024-11-21 12:34:27 +01:00
|
|
|
#' auto.mode = FALSE,
|
2024-11-08 15:13:33 +01:00
|
|
|
#' fun = "stats::lm",
|
|
|
|
#' formula.str = "{outcome.str}~.",
|
|
|
|
#' args.list = NULL
|
|
|
|
#' )
|
2025-01-30 14:31:38 +01:00
|
|
|
#' gtsummary::trial |>
|
|
|
|
#' default_parsing() |>
|
|
|
|
#' regression_model(
|
|
|
|
#' outcome.str = "trt",
|
|
|
|
#' auto.mode = FALSE,
|
|
|
|
#' fun = "stats::glm",
|
|
|
|
#' args.list = list(family = binomial(link = "logit"))
|
|
|
|
#' )
|
|
|
|
#' m <- mtcars |>
|
2025-01-17 15:59:24 +01:00
|
|
|
#' default_parsing() |>
|
|
|
|
#' regression_model(
|
|
|
|
#' outcome.str = "mpg",
|
|
|
|
#' auto.mode = FALSE,
|
|
|
|
#' fun = "stats::lm",
|
|
|
|
#' formula.str = "{outcome.str}~{paste(vars,collapse='+')}",
|
|
|
|
#' args.list = NULL,
|
|
|
|
#' vars = c("mpg", "cyl")
|
2025-01-30 14:31:38 +01:00
|
|
|
#' )
|
2025-03-20 11:46:02 +01:00
|
|
|
#' broom::tidy(m)
|
2024-11-08 15:13:33 +01:00
|
|
|
regression_model <- function(data,
|
2025-04-11 13:23:18 +02:00
|
|
|
outcome.str = NULL,
|
2025-01-17 15:59:24 +01:00
|
|
|
auto.mode = FALSE,
|
2024-11-08 15:13:33 +01:00
|
|
|
formula.str = NULL,
|
|
|
|
args.list = NULL,
|
|
|
|
fun = NULL,
|
2024-11-28 21:02:23 +01:00
|
|
|
vars = NULL,
|
|
|
|
...) {
|
2024-11-21 12:34:27 +01:00
|
|
|
if (!is.null(formula.str)) {
|
|
|
|
if (formula.str == "") {
|
|
|
|
formula.str <- NULL
|
|
|
|
}
|
2024-11-15 22:22:17 +01:00
|
|
|
}
|
|
|
|
|
2025-01-30 14:31:38 +01:00
|
|
|
## This will handle if outcome is not in data for nicer shiny behavior
|
2025-04-11 13:23:18 +02:00
|
|
|
if (isTRUE(!outcome.str %in% names(data))) {
|
2025-01-30 14:31:38 +01:00
|
|
|
outcome.str <- names(data)[1]
|
2025-04-11 13:23:18 +02:00
|
|
|
print("Outcome variable is not in data, first column is used")
|
2025-01-17 15:59:24 +01:00
|
|
|
}
|
|
|
|
|
2024-11-15 22:22:17 +01:00
|
|
|
if (!is.null(formula.str)) {
|
2025-01-17 15:59:24 +01:00
|
|
|
formula.glue <- glue::glue(formula.str)
|
2025-04-11 13:23:18 +02:00
|
|
|
outcome.str <- NULL
|
2024-11-08 15:13:33 +01:00
|
|
|
} else {
|
|
|
|
assertthat::assert_that(outcome.str %in% names(data),
|
|
|
|
msg = "Outcome variable is not present in the provided dataset"
|
|
|
|
)
|
2025-01-17 15:59:24 +01:00
|
|
|
formula.glue <- glue::glue("{outcome.str}~{paste(vars,collapse='+')}")
|
2024-11-08 15:13:33 +01:00
|
|
|
}
|
|
|
|
|
2025-04-11 13:23:18 +02:00
|
|
|
if (is.null(vars)) {
|
|
|
|
vars <- names(data)[!names(data) %in% outcome.str]
|
|
|
|
} else if (!is.null(outcome.str)) {
|
|
|
|
if (outcome.str %in% vars) {
|
|
|
|
vars <- vars[!vars %in% outcome.str]
|
|
|
|
}
|
|
|
|
data <- data |> dplyr::select(dplyr::all_of(c(vars, outcome.str)))
|
|
|
|
}
|
|
|
|
|
2024-11-08 15:13:33 +01:00
|
|
|
# Formatting character variables as factor
|
|
|
|
# Improvement should add a missing vector to format as NA
|
2024-11-28 21:02:23 +01:00
|
|
|
data <- data |>
|
|
|
|
purrr::map(\(.x){
|
|
|
|
if (is.character(.x)) {
|
2024-12-19 21:21:29 +01:00
|
|
|
suppressWarnings(REDCapCAST::as_factor(.x))
|
2024-11-28 21:02:23 +01:00
|
|
|
} else {
|
|
|
|
.x
|
|
|
|
}
|
|
|
|
}) |>
|
2025-01-15 16:21:38 +01:00
|
|
|
dplyr::bind_cols(.name_repair = "unique_quiet")
|
2024-11-28 21:02:23 +01:00
|
|
|
|
|
|
|
if (is.null(fun)) auto.mode <- TRUE
|
2024-11-08 15:13:33 +01:00
|
|
|
|
2025-04-09 12:31:08 +02:00
|
|
|
if (isTRUE(auto.mode)) {
|
2024-11-15 21:57:38 +01:00
|
|
|
if (is.numeric(data[[outcome.str]])) {
|
2024-11-08 15:13:33 +01:00
|
|
|
fun <- "stats::lm"
|
2024-11-15 21:57:38 +01:00
|
|
|
} else if (is.factor(data[[outcome.str]])) {
|
|
|
|
if (length(levels(data[[outcome.str]])) == 2) {
|
2024-11-08 15:13:33 +01:00
|
|
|
fun <- "stats::glm"
|
2024-11-21 12:34:27 +01:00
|
|
|
args.list <- list(family = stats::binomial(link = "logit"))
|
2024-11-15 21:57:38 +01:00
|
|
|
} else if (length(levels(data[[outcome.str]])) > 2) {
|
2024-11-08 15:13:33 +01:00
|
|
|
fun <- "MASS::polr"
|
2024-11-15 21:57:38 +01:00
|
|
|
args.list <- list(
|
2024-11-08 15:13:33 +01:00
|
|
|
Hess = TRUE,
|
|
|
|
method = "logistic"
|
|
|
|
)
|
|
|
|
} else {
|
|
|
|
stop("The provided output variable only has one level")
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
stop("Output variable should be either numeric or factor for auto.mode")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
assertthat::assert_that("character" %in% class(fun),
|
|
|
|
msg = "Please provide the function as a character vector."
|
|
|
|
)
|
|
|
|
|
|
|
|
out <- do.call(
|
|
|
|
getfun(fun),
|
|
|
|
c(
|
2025-01-30 14:31:38 +01:00
|
|
|
list(
|
|
|
|
data = data,
|
|
|
|
formula = as.formula(formula.glue)
|
|
|
|
),
|
2024-11-08 15:13:33 +01:00
|
|
|
args.list
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
2025-04-02 11:31:04 +02:00
|
|
|
# out <- REDCapCAST::set_attr(out,label = fun,attr = "fun.call")
|
|
|
|
|
2024-11-08 15:13:33 +01:00
|
|
|
# Recreating the call
|
|
|
|
# out$call <- match.call(definition=eval(parse(text=fun)), call(fun, data = 'data',formula = as.formula(formula.str),args.list))
|
|
|
|
|
2024-11-28 21:02:23 +01:00
|
|
|
return(out)
|
|
|
|
}
|
|
|
|
|
|
|
|
#' Create a regression model programatically
|
|
|
|
#'
|
|
|
|
#' @param data data set
|
|
|
|
#' @param fun Name of function as character vector or function to use for model creation.
|
|
|
|
#' @param vars character vector of variables to include
|
|
|
|
#' @param outcome.str Name of outcome variable. Character vector.
|
|
|
|
#' @param args.list List of arguments passed to 'fun' with 'do.call'.
|
|
|
|
#' @param ... ignored for now
|
|
|
|
#'
|
|
|
|
#' @importFrom stats as.formula
|
2025-02-19 13:17:16 +01:00
|
|
|
#' @rdname regression_model
|
2024-11-28 21:02:23 +01:00
|
|
|
#'
|
|
|
|
#' @return object of standard class for fun
|
|
|
|
#' @export
|
|
|
|
#'
|
|
|
|
#' @examples
|
|
|
|
#' \dontrun{
|
|
|
|
#' gtsummary::trial |>
|
|
|
|
#' regression_model_uv(outcome.str = "age")
|
|
|
|
#' gtsummary::trial |>
|
|
|
|
#' regression_model_uv(
|
|
|
|
#' outcome.str = "age",
|
|
|
|
#' fun = "stats::lm",
|
|
|
|
#' args.list = NULL
|
|
|
|
#' )
|
2025-01-30 14:31:38 +01:00
|
|
|
#' m <- gtsummary::trial |> regression_model_uv(
|
2024-11-28 21:02:23 +01:00
|
|
|
#' outcome.str = "trt",
|
|
|
|
#' fun = "stats::glm",
|
|
|
|
#' args.list = list(family = stats::binomial(link = "logit"))
|
|
|
|
#' )
|
2025-03-20 11:46:02 +01:00
|
|
|
#' lapply(m, broom::tidy) |> dplyr::bind_rows()
|
2024-11-28 21:02:23 +01:00
|
|
|
#' }
|
|
|
|
regression_model_uv <- function(data,
|
|
|
|
outcome.str,
|
|
|
|
args.list = NULL,
|
|
|
|
fun = NULL,
|
|
|
|
vars = NULL,
|
|
|
|
...) {
|
2025-01-30 14:31:38 +01:00
|
|
|
## This will handle if outcome is not in data for nicer shiny behavior
|
2025-03-20 11:46:02 +01:00
|
|
|
if (!outcome.str %in% names(data)) {
|
2025-01-30 14:31:38 +01:00
|
|
|
outcome.str <- names(data)[1]
|
|
|
|
print("outcome is not in data, first column is used")
|
|
|
|
}
|
|
|
|
|
2024-11-28 21:02:23 +01:00
|
|
|
if (!is.null(vars)) {
|
|
|
|
data <- data |>
|
|
|
|
dplyr::select(dplyr::all_of(
|
|
|
|
unique(c(outcome.str, vars))
|
|
|
|
))
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is.null(args.list)) {
|
|
|
|
args.list <- list()
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is.null(fun)) {
|
|
|
|
if (is.numeric(data[[outcome.str]])) {
|
|
|
|
fun <- "stats::lm"
|
|
|
|
} else if (is.factor(data[[outcome.str]])) {
|
|
|
|
if (length(levels(data[[outcome.str]])) == 2) {
|
|
|
|
fun <- "stats::glm"
|
|
|
|
args.list <- list(family = stats::binomial(link = "logit"))
|
|
|
|
} else if (length(levels(data[[outcome.str]])) > 2) {
|
|
|
|
fun <- "MASS::polr"
|
|
|
|
args.list <- list(
|
|
|
|
Hess = TRUE,
|
|
|
|
method = "logistic"
|
|
|
|
)
|
|
|
|
} else {
|
|
|
|
stop("The provided output variable only has one level")
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
stop("Output variable should be either numeric or factor for auto.mode")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
assertthat::assert_that("character" %in% class(fun),
|
|
|
|
msg = "Please provide the function as a character vector."
|
|
|
|
)
|
|
|
|
|
|
|
|
out <- names(data)[!names(data) %in% outcome.str] |>
|
|
|
|
purrr::map(\(.var){
|
|
|
|
do.call(
|
|
|
|
regression_model,
|
|
|
|
c(
|
2025-01-30 14:31:38 +01:00
|
|
|
list(
|
|
|
|
data = data[match(c(outcome.str, .var), names(data))],
|
|
|
|
outcome.str = outcome.str
|
|
|
|
),
|
|
|
|
args.list
|
2024-11-28 21:02:23 +01:00
|
|
|
)
|
|
|
|
)
|
|
|
|
})
|
|
|
|
|
2024-11-08 15:13:33 +01:00
|
|
|
return(out)
|
|
|
|
}
|
2024-11-29 14:30:02 +01:00
|
|
|
|
2025-01-17 15:59:24 +01:00
|
|
|
|
|
|
|
### HELPERS
|
|
|
|
|
2025-04-14 10:10:33 +02:00
|
|
|
#' Data type assessment.
|
2025-01-17 15:59:24 +01:00
|
|
|
#'
|
2025-04-14 10:10:33 +02:00
|
|
|
#' @description
|
|
|
|
#' These are more overall than the native typeof. This is used to assess a more
|
|
|
|
#' meaningful "clinical" data type.
|
|
|
|
#'
|
|
|
|
#' @param data vector or data.frame. if data frame, each column is evaluated.
|
2025-01-17 15:59:24 +01:00
|
|
|
#'
|
|
|
|
#' @returns outcome type
|
|
|
|
#' @export
|
|
|
|
#'
|
|
|
|
#' @examples
|
|
|
|
#' mtcars |>
|
|
|
|
#' default_parsing() |>
|
2025-03-20 11:46:02 +01:00
|
|
|
#' lapply(data_type)
|
2025-04-14 10:10:33 +02:00
|
|
|
#' mtcars |>
|
|
|
|
#' default_parsing() |>
|
|
|
|
#' data_type()
|
2025-03-20 11:46:02 +01:00
|
|
|
#' c(1, 2) |> data_type()
|
|
|
|
#' 1 |> data_type()
|
|
|
|
#' c(rep(NA, 10)) |> data_type()
|
|
|
|
#' sample(1:100, 50) |> data_type()
|
|
|
|
#' factor(letters[1:20]) |> data_type()
|
2025-04-14 10:10:33 +02:00
|
|
|
#' as.Date(1:20) |> data_type()
|
2025-03-20 11:46:02 +01:00
|
|
|
data_type <- function(data) {
|
2025-04-14 10:10:33 +02:00
|
|
|
if (is.data.frame(data)) {
|
|
|
|
sapply(data, data_type)
|
|
|
|
} else {
|
|
|
|
cl_d <- class(data)
|
2025-05-10 13:02:04 +02:00
|
|
|
l_unique <- length(unique(na.omit(data)))
|
2025-04-14 10:10:33 +02:00
|
|
|
if (all(is.na(data))) {
|
|
|
|
out <- "empty"
|
2025-05-10 13:02:04 +02:00
|
|
|
} else if (l_unique < 2) {
|
2025-04-14 10:10:33 +02:00
|
|
|
out <- "monotone"
|
2025-05-10 13:02:04 +02:00
|
|
|
} else if (any(c("factor", "logical") %in% cl_d) | l_unique == 2) {
|
|
|
|
if (identical("logical", cl_d) | l_unique == 2) {
|
2025-04-14 10:10:33 +02:00
|
|
|
out <- "dichotomous"
|
2025-03-20 11:46:02 +01:00
|
|
|
} else {
|
2025-04-22 09:58:34 +02:00
|
|
|
# if (is.ordered(data)) {
|
|
|
|
# out <- "ordinal"
|
|
|
|
# } else {
|
2025-04-14 10:10:33 +02:00
|
|
|
out <- "categorical"
|
2025-04-22 09:58:34 +02:00
|
|
|
# }
|
2025-03-20 11:46:02 +01:00
|
|
|
}
|
2025-04-14 10:10:33 +02:00
|
|
|
} else if (identical(cl_d, "character")) {
|
|
|
|
out <- "text"
|
|
|
|
} else if (any(c("hms", "Date", "POSIXct", "POSIXt") %in% cl_d)) {
|
|
|
|
out <- "datetime"
|
2025-05-10 13:02:04 +02:00
|
|
|
} else if (l_unique > 2) {
|
2025-04-14 10:10:33 +02:00
|
|
|
## Previously had all thinkable classes
|
|
|
|
## Now just assumes the class has not been defined above
|
|
|
|
## any(c("numeric", "integer", "hms", "Date", "timediff") %in% cl_d) &
|
|
|
|
out <- "continuous"
|
|
|
|
} else {
|
|
|
|
out <- "unknown"
|
2025-01-17 15:59:24 +01:00
|
|
|
}
|
2025-04-14 10:10:33 +02:00
|
|
|
|
|
|
|
out
|
2025-01-17 15:59:24 +01:00
|
|
|
}
|
2025-04-14 10:10:33 +02:00
|
|
|
}
|
2025-01-17 15:59:24 +01:00
|
|
|
|
2025-04-14 10:10:33 +02:00
|
|
|
#' Recognised data types from data_type
|
|
|
|
#'
|
|
|
|
#' @returns vector
|
|
|
|
#' @export
|
|
|
|
#'
|
|
|
|
#' @examples
|
|
|
|
#' data_types()
|
|
|
|
data_types <- function() {
|
2025-04-22 09:58:34 +02:00
|
|
|
list(
|
|
|
|
"empty" = list(descr="Variable of all NAs",classes="Any class"),
|
|
|
|
"monotone" = list(descr="Variable with only one unique value",classes="Any class"),
|
|
|
|
"dichotomous" = list(descr="Variable with only two unique values",classes="Any class"),
|
|
|
|
"categorical"= list(descr="Factor variable",classes="factor (ordered or unordered)"),
|
|
|
|
"text"= list(descr="Character variable",classes="character"),
|
|
|
|
"datetime"= list(descr="Variable of time, date or datetime values",classes="hms, Date, POSIXct and POSIXt"),
|
|
|
|
"continuous"= list(descr="Numeric variable",classes="numeric, integer or double"),
|
|
|
|
"unknown"= list(descr="Anything not falling within the previous",classes="Any other class")
|
|
|
|
)
|
2025-01-17 15:59:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#' Implemented functions
|
|
|
|
#'
|
|
|
|
#' @description
|
|
|
|
#' Library of supported functions. The list name and "descr" element should be
|
|
|
|
#' unique for each element on list.
|
|
|
|
#'
|
|
|
|
#'
|
|
|
|
#' @returns list
|
|
|
|
#' @export
|
|
|
|
#'
|
|
|
|
#' @examples
|
|
|
|
#' supported_functions()
|
|
|
|
supported_functions <- function() {
|
|
|
|
list(
|
|
|
|
lm = list(
|
|
|
|
descr = "Linear regression model",
|
|
|
|
design = "cross-sectional",
|
|
|
|
out.type = "continuous",
|
|
|
|
fun = "stats::lm",
|
|
|
|
args.list = NULL,
|
2025-01-23 08:44:38 +01:00
|
|
|
formula.str = "{outcome.str}~{paste(vars,collapse='+')}",
|
2025-01-30 14:31:38 +01:00
|
|
|
table.fun = "gtsummary::tbl_regression",
|
|
|
|
table.args.list = list(exponentiate = FALSE)
|
2025-01-17 15:59:24 +01:00
|
|
|
),
|
|
|
|
glm = list(
|
|
|
|
descr = "Logistic regression model",
|
|
|
|
design = "cross-sectional",
|
|
|
|
out.type = "dichotomous",
|
|
|
|
fun = "stats::glm",
|
2025-04-09 12:31:08 +02:00
|
|
|
args.list = list(family = "binomial"),
|
2025-01-23 08:44:38 +01:00
|
|
|
formula.str = "{outcome.str}~{paste(vars,collapse='+')}",
|
2025-01-30 14:31:38 +01:00
|
|
|
table.fun = "gtsummary::tbl_regression",
|
|
|
|
table.args.list = list()
|
2025-01-17 15:59:24 +01:00
|
|
|
),
|
|
|
|
polr = list(
|
|
|
|
descr = "Ordinal logistic regression model",
|
|
|
|
design = "cross-sectional",
|
2025-04-22 09:58:34 +02:00
|
|
|
out.type = c("categorical"),
|
2025-01-17 15:59:24 +01:00
|
|
|
fun = "MASS::polr",
|
|
|
|
args.list = list(
|
|
|
|
Hess = TRUE,
|
|
|
|
method = "logistic"
|
|
|
|
),
|
2025-01-23 08:44:38 +01:00
|
|
|
formula.str = "{outcome.str}~{paste(vars,collapse='+')}",
|
2025-01-30 14:31:38 +01:00
|
|
|
table.fun = "gtsummary::tbl_regression",
|
|
|
|
table.args.list = list()
|
2025-01-17 15:59:24 +01:00
|
|
|
)
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#' Get possible regression models
|
|
|
|
#'
|
|
|
|
#' @param data data
|
|
|
|
#'
|
2025-02-19 13:17:16 +01:00
|
|
|
#' @returns character vector
|
2025-01-17 15:59:24 +01:00
|
|
|
#' @export
|
|
|
|
#'
|
|
|
|
#' @examples
|
|
|
|
#' mtcars |>
|
|
|
|
#' default_parsing() |>
|
|
|
|
#' dplyr::pull("cyl") |>
|
|
|
|
#' possible_functions(design = "cross-sectional")
|
|
|
|
#'
|
|
|
|
#' mtcars |>
|
|
|
|
#' default_parsing() |>
|
|
|
|
#' dplyr::select("cyl") |>
|
|
|
|
#' possible_functions(design = "cross-sectional")
|
|
|
|
possible_functions <- function(data, design = c("cross-sectional")) {
|
2025-04-11 13:23:18 +02:00
|
|
|
#
|
2025-03-20 13:13:14 +01:00
|
|
|
# data <- if (is.reactive(data)) data() else data
|
2025-01-17 15:59:24 +01:00
|
|
|
if (is.data.frame(data)) {
|
|
|
|
data <- data[[1]]
|
|
|
|
}
|
|
|
|
|
|
|
|
design <- match.arg(design)
|
2025-03-20 11:46:02 +01:00
|
|
|
type <- data_type(data)
|
2025-01-17 15:59:24 +01:00
|
|
|
|
|
|
|
design_ls <- supported_functions() |>
|
|
|
|
lapply(\(.x){
|
|
|
|
if (design %in% .x$design) {
|
|
|
|
.x
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
if (type == "unknown") {
|
|
|
|
out <- type
|
|
|
|
} else {
|
|
|
|
out <- design_ls |>
|
|
|
|
lapply(\(.x){
|
|
|
|
if (type %in% .x$out.type) {
|
|
|
|
.x$descr
|
|
|
|
}
|
|
|
|
}) |>
|
|
|
|
unlist()
|
|
|
|
}
|
|
|
|
unname(out)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#' Get the function options based on the selected function description
|
|
|
|
#'
|
|
|
|
#' @param data vector
|
|
|
|
#'
|
|
|
|
#' @returns list
|
|
|
|
#' @export
|
|
|
|
#'
|
|
|
|
#' @examples
|
|
|
|
#' mtcars |>
|
|
|
|
#' default_parsing() |>
|
|
|
|
#' dplyr::pull(mpg) |>
|
|
|
|
#' possible_functions(design = "cross-sectional") |>
|
|
|
|
#' (\(.x){
|
|
|
|
#' .x[[1]]
|
|
|
|
#' })() |>
|
|
|
|
#' get_fun_options()
|
|
|
|
get_fun_options <- function(data) {
|
|
|
|
descrs <- supported_functions() |>
|
|
|
|
lapply(\(.x){
|
|
|
|
.x$descr
|
|
|
|
}) |>
|
|
|
|
unlist()
|
|
|
|
supported_functions() |>
|
|
|
|
(\(.x){
|
|
|
|
.x[match(data, descrs)]
|
|
|
|
})()
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#' Wrapper to create regression model based on supported models
|
|
|
|
#'
|
|
|
|
#' @description
|
|
|
|
#' Output is a concatenated list of model information and model
|
|
|
|
#'
|
|
|
|
#'
|
|
|
|
#' @param data data
|
|
|
|
#' @param outcome.str name of outcome variable
|
|
|
|
#' @param fun.descr Description of chosen function matching description in
|
|
|
|
#' "supported_functions()"
|
|
|
|
#' @param fun name of custom function. Default is NULL.
|
|
|
|
#' @param formula.str custom formula glue string. Default is NULL.
|
|
|
|
#' @param args.list custom character string to be converted using
|
|
|
|
#' argsstring2list() or list of arguments. Default is NULL.
|
|
|
|
#' @param ... ignored
|
|
|
|
#'
|
2025-02-19 13:17:16 +01:00
|
|
|
#' @returns list
|
2025-01-17 15:59:24 +01:00
|
|
|
#' @export
|
2025-02-19 13:17:16 +01:00
|
|
|
#' @rdname regression_model
|
2025-01-17 15:59:24 +01:00
|
|
|
#'
|
|
|
|
#' @examples
|
2025-01-30 14:31:38 +01:00
|
|
|
#' \dontrun{
|
2025-01-17 15:59:24 +01:00
|
|
|
#' gtsummary::trial |>
|
|
|
|
#' regression_model(
|
|
|
|
#' outcome.str = "age",
|
|
|
|
#' fun = "stats::lm",
|
|
|
|
#' formula.str = "{outcome.str}~.",
|
|
|
|
#' args.list = NULL
|
|
|
|
#' )
|
|
|
|
#' ls <- regression_model_list(data = default_parsing(mtcars), outcome.str = "cyl", fun.descr = "Ordinal logistic regression model")
|
|
|
|
#' summary(ls$model)
|
2025-04-09 12:31:08 +02:00
|
|
|
#' ls <- regression_model_list(data = default_parsing(mtcars), outcome.str = "mpg", fun.descr = "Linear regression model")
|
2025-01-30 14:31:38 +01:00
|
|
|
#'
|
|
|
|
#' ls <- regression_model_list(data = default_parsing(gtsummary::trial), outcome.str = "trt", fun.descr = "Logistic regression model")
|
|
|
|
#' tbl <- gtsummary::tbl_regression(ls$model, exponentiate = TRUE)
|
|
|
|
#' m <- gtsummary::trial |>
|
|
|
|
#' default_parsing() |>
|
|
|
|
#' regression_model(
|
|
|
|
#' outcome.str = "trt",
|
|
|
|
#' fun = "stats::glm",
|
|
|
|
#' formula.str = "{outcome.str}~.",
|
2025-04-09 12:31:08 +02:00
|
|
|
#' args.list = list(family = "binomial")
|
2025-01-30 14:31:38 +01:00
|
|
|
#' )
|
|
|
|
#' tbl2 <- gtsummary::tbl_regression(m, exponentiate = TRUE)
|
|
|
|
#' broom::tidy(ls$model)
|
|
|
|
#' broom::tidy(m)
|
|
|
|
#' }
|
2025-01-17 15:59:24 +01:00
|
|
|
regression_model_list <- function(data,
|
|
|
|
outcome.str,
|
|
|
|
fun.descr,
|
|
|
|
fun = NULL,
|
|
|
|
formula.str = NULL,
|
|
|
|
args.list = NULL,
|
|
|
|
vars = NULL,
|
|
|
|
...) {
|
|
|
|
options <- get_fun_options(fun.descr) |>
|
|
|
|
(\(.x){
|
|
|
|
.x[[1]]
|
|
|
|
})()
|
|
|
|
|
|
|
|
## Custom, specific fun, args and formula options
|
|
|
|
|
|
|
|
if (is.null(formula.str)) {
|
|
|
|
formula.str.c <- options$formula.str
|
|
|
|
} else {
|
|
|
|
formula.str.c <- formula.str
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is.null(fun)) {
|
|
|
|
fun.c <- options$fun
|
|
|
|
} else {
|
|
|
|
fun.c <- fun
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is.null(args.list)) {
|
|
|
|
args.list.c <- options$args.list
|
|
|
|
} else {
|
|
|
|
args.list.c <- args.list
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is.character(args.list.c)) args.list.c <- argsstring2list(args.list.c)
|
|
|
|
|
|
|
|
## Handling vars to print code
|
|
|
|
|
|
|
|
if (is.null(vars)) {
|
|
|
|
vars <- names(data)[!names(data) %in% outcome.str]
|
|
|
|
} else {
|
|
|
|
if (outcome.str %in% vars) {
|
|
|
|
vars <- vars[!vars %in% outcome.str]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-04-09 12:31:08 +02:00
|
|
|
parameters <- list(
|
2025-04-11 13:23:18 +02:00
|
|
|
data = data,
|
2025-04-09 12:31:08 +02:00
|
|
|
fun = fun.c,
|
2025-04-11 13:23:18 +02:00
|
|
|
formula.str = glue::glue(formula.str.c),
|
2025-04-09 12:31:08 +02:00
|
|
|
args.list = args.list.c
|
|
|
|
)
|
|
|
|
|
2025-01-17 15:59:24 +01:00
|
|
|
model <- do.call(
|
|
|
|
regression_model,
|
2025-04-11 13:23:18 +02:00
|
|
|
parameters
|
2025-01-17 15:59:24 +01:00
|
|
|
)
|
|
|
|
|
2025-04-11 13:23:18 +02:00
|
|
|
parameters_code <- Filter(
|
|
|
|
length,
|
|
|
|
modifyList(parameters, list(
|
2025-04-14 10:10:33 +02:00
|
|
|
data = as.symbol("df"),
|
2025-04-11 13:23:18 +02:00
|
|
|
formula.str = as.character(glue::glue(formula.str.c)),
|
|
|
|
outcome.str = NULL
|
|
|
|
# args.list = NULL,
|
2025-04-14 10:10:33 +02:00
|
|
|
))
|
|
|
|
)
|
2025-04-09 12:31:08 +02:00
|
|
|
|
2025-04-11 13:23:18 +02:00
|
|
|
## The easiest solution was to simple paste as a string
|
|
|
|
## The rlang::call2 or rlang::expr functions would probably work as well
|
|
|
|
# code <- glue::glue("FreesearchR::regression_model({parameters_print}, args.list=list({list2str(args.list.c)}))", .null = "NULL")
|
2025-04-14 10:10:33 +02:00
|
|
|
code <- rlang::call2("regression_model", !!!parameters_code, .ns = "FreesearchR")
|
2025-01-17 15:59:24 +01:00
|
|
|
|
|
|
|
list(
|
|
|
|
options = options,
|
|
|
|
model = model,
|
2025-04-11 13:23:18 +02:00
|
|
|
code = expression_string(code)
|
2025-01-17 15:59:24 +01:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
list2str <- function(data) {
|
|
|
|
out <- purrr::imap(data, \(.x, .i){
|
|
|
|
if (is.logical(.x)) {
|
|
|
|
arg <- .x
|
|
|
|
} else {
|
|
|
|
arg <- glue::glue("'{.x}'")
|
|
|
|
}
|
|
|
|
glue::glue("{.i} = {arg}")
|
|
|
|
}) |>
|
|
|
|
unlist() |>
|
|
|
|
paste(collapse = (", "))
|
|
|
|
|
2025-01-30 14:31:38 +01:00
|
|
|
if (out == "") {
|
2025-01-17 15:59:24 +01:00
|
|
|
return(NULL)
|
|
|
|
} else {
|
|
|
|
out
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2025-01-30 14:31:38 +01:00
|
|
|
#' @returns list
|
2025-01-17 15:59:24 +01:00
|
|
|
#' @export
|
2025-02-19 13:17:16 +01:00
|
|
|
#' @rdname regression_model
|
2025-01-17 15:59:24 +01:00
|
|
|
#'
|
|
|
|
#' @examples
|
2025-01-30 14:31:38 +01:00
|
|
|
#' \dontrun{
|
2025-03-20 11:46:02 +01:00
|
|
|
#' gtsummary::trial |>
|
|
|
|
#' regression_model_uv(
|
|
|
|
#' outcome.str = "trt",
|
|
|
|
#' fun = "stats::glm",
|
|
|
|
#' args.list = list(family = stats::binomial(link = "logit"))
|
|
|
|
#' ) |>
|
|
|
|
#' lapply(broom::tidy) |>
|
|
|
|
#' dplyr::bind_rows()
|
2025-01-17 15:59:24 +01:00
|
|
|
#' ms <- regression_model_uv_list(data = default_parsing(mtcars), outcome.str = "mpg", fun.descr = "Linear regression model")
|
2025-04-09 12:31:08 +02:00
|
|
|
#' ms$code
|
2025-04-11 13:23:18 +02:00
|
|
|
#' ls <- regression_model_uv_list(data = default_parsing(mtcars), outcome.str = "am", fun.descr = "Logistic regression model")
|
|
|
|
#' ls$code
|
2025-03-20 11:46:02 +01:00
|
|
|
#' lapply(ms$model, broom::tidy) |> dplyr::bind_rows()
|
2025-01-30 14:31:38 +01:00
|
|
|
#' }
|
2025-01-17 15:59:24 +01:00
|
|
|
regression_model_uv_list <- function(data,
|
|
|
|
outcome.str,
|
|
|
|
fun.descr,
|
|
|
|
fun = NULL,
|
|
|
|
formula.str = NULL,
|
|
|
|
args.list = NULL,
|
|
|
|
vars = NULL,
|
|
|
|
...) {
|
|
|
|
options <- get_fun_options(fun.descr) |>
|
|
|
|
(\(.x){
|
|
|
|
.x[[1]]
|
|
|
|
})()
|
|
|
|
|
|
|
|
## Custom, specific fun, args and formula options
|
|
|
|
|
|
|
|
if (is.null(formula.str)) {
|
|
|
|
formula.str.c <- options$formula.str
|
|
|
|
} else {
|
|
|
|
formula.str.c <- formula.str
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is.null(fun)) {
|
|
|
|
fun.c <- options$fun
|
|
|
|
} else {
|
|
|
|
fun.c <- fun
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is.null(args.list)) {
|
|
|
|
args.list.c <- options$args.list
|
|
|
|
} else {
|
|
|
|
args.list.c <- args.list
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is.character(args.list.c)) args.list.c <- argsstring2list(args.list.c)
|
|
|
|
|
|
|
|
## Handling vars to print code
|
|
|
|
|
|
|
|
if (is.null(vars)) {
|
|
|
|
vars <- names(data)[!names(data) %in% outcome.str]
|
|
|
|
} else {
|
|
|
|
if (outcome.str %in% vars) {
|
|
|
|
vars <- vars[!vars %in% outcome.str]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# assertthat::assert_that("character" %in% class(fun),
|
|
|
|
# msg = "Please provide the function as a character vector."
|
|
|
|
# )
|
|
|
|
|
|
|
|
# model <- do.call(
|
|
|
|
# regression_model,
|
|
|
|
# c(
|
|
|
|
# list(data = data),
|
|
|
|
# list(outcome.str = outcome.str),
|
|
|
|
# list(fun = fun.c),
|
|
|
|
# list(formula.str = formula.str.c),
|
|
|
|
# args.list.c
|
|
|
|
# )
|
|
|
|
# )
|
|
|
|
|
|
|
|
model <- vars |>
|
|
|
|
lapply(\(.var){
|
2025-04-11 13:23:18 +02:00
|
|
|
parameters <-
|
|
|
|
list(
|
|
|
|
fun = fun.c,
|
|
|
|
data = data[c(outcome.str, .var)],
|
|
|
|
formula.str = as.character(glue::glue(gsub("vars", ".var", formula.str.c))),
|
|
|
|
args.list = args.list.c
|
|
|
|
)
|
|
|
|
|
2025-04-09 12:31:08 +02:00
|
|
|
out <- do.call(
|
2025-01-17 15:59:24 +01:00
|
|
|
regression_model,
|
2025-04-11 13:23:18 +02:00
|
|
|
parameters
|
2025-01-17 15:59:24 +01:00
|
|
|
)
|
|
|
|
|
2025-04-09 12:31:08 +02:00
|
|
|
## This is the very long version
|
|
|
|
## Handles deeply nested glue string
|
2025-04-11 13:23:18 +02:00
|
|
|
# code <- glue::glue("FreesearchR::regression_model(data=df,{list2str(modifyList(parameters,list(data=NULL,args.list=list2str(args.list.c))))})")
|
2025-04-14 10:10:33 +02:00
|
|
|
code <- rlang::call2("regression_model", !!!modifyList(parameters, list(data = as.symbol("df"), args.list = args.list.c)), .ns = "FreesearchR")
|
2025-04-09 12:31:08 +02:00
|
|
|
REDCapCAST::set_attr(out, code, "code")
|
|
|
|
})
|
2025-01-17 15:59:24 +01:00
|
|
|
|
2025-04-09 12:31:08 +02:00
|
|
|
code <- model |>
|
|
|
|
lapply(\(.x)REDCapCAST::get_attr(.x, "code")) |>
|
2025-04-11 13:23:18 +02:00
|
|
|
lapply(expression_string) |>
|
|
|
|
pipe_string(collapse = ",\n") |>
|
2025-04-09 12:31:08 +02:00
|
|
|
(\(.x){
|
2025-04-11 13:23:18 +02:00
|
|
|
paste0("list(\n", .x, ")")
|
2025-04-09 12:31:08 +02:00
|
|
|
})()
|
2025-01-17 15:59:24 +01:00
|
|
|
|
|
|
|
|
|
|
|
list(
|
|
|
|
options = options,
|
|
|
|
model = model,
|
|
|
|
code = code
|
|
|
|
)
|
|
|
|
}
|
2025-04-11 13:23:18 +02:00
|
|
|
|
|
|
|
|
|
|
|
# regression_model(mtcars, fun = "stats::lm", formula.str = "mpg~cyl")
|