From aaceb55fe80b9310824ad4df9eb5f8549ea34c27 Mon Sep 17 00:00:00 2001 From: Andreas Gammelgaard Damsbo Date: Tue, 22 Apr 2025 09:58:34 +0200 Subject: [PATCH] revised data types --- R/app_version.R | 2 +- R/data-summary.R | 7 +++--- R/data_plots.R | 36 ++++++++++++++-------------- R/helpers.R | 56 ++++++++++++++++++++++++++++++++++++-------- R/regression_model.R | 21 ++++++++++++----- 5 files changed, 84 insertions(+), 38 deletions(-) diff --git a/R/app_version.R b/R/app_version.R index e562fc1..d49df0c 100644 --- a/R/app_version.R +++ b/R/app_version.R @@ -1 +1 @@ -app_version <- function()'Version: 25.4.3.250415_1627' +app_version <- function()'Version: 25.4.3.250422' diff --git a/R/data-summary.R b/R/data-summary.R index e70eb50..f0e6be3 100644 --- a/R/data-summary.R +++ b/R/data-summary.R @@ -155,8 +155,8 @@ overview_vars <- function(data) { data <- as.data.frame(data) dplyr::tibble( - icon = data_type(data), - type = icon, + icon = get_classes(data), + class = icon, name = names(data), n_missing = unname(colSums(is.na(data))), p_complete = 1 - n_missing / nrow(data), @@ -189,6 +189,7 @@ create_overview_datagrid <- function(data,...) { std_names <- c( "Name" = "name", "Icon" = "icon", + "Class" = "class", "Type" = "type", "Missings" = "n_missing", "Complete" = "p_complete", @@ -235,7 +236,7 @@ create_overview_datagrid <- function(data,...) { grid <- add_class_icon( grid = grid, column = "icon", - fun = type_icons + fun = class_icons ) grid <- toastui::grid_format( diff --git a/R/data_plots.R b/R/data_plots.R index 783a8d4..55856ca 100644 --- a/R/data_plots.R +++ b/R/data_plots.R @@ -408,7 +408,7 @@ all_but <- function(data, ...) { #' #' @examples #' default_parsing(mtcars) |> subset_types("ordinal") -#' default_parsing(mtcars) |> subset_types(c("dichotomous", "ordinal", "categorical")) +#' default_parsing(mtcars) |> subset_types(c("dichotomous", "categorical")) #' #' default_parsing(mtcars) |> subset_types("factor",class) subset_types <- function(data, types, type.fun = data_type) { data[sapply(data, type.fun) %in% types] @@ -443,58 +443,58 @@ supported_plots <- function() { fun = "plot_hbars", descr = "Stacked horizontal bars", note = "A classical way of visualising the distribution of an ordinal scale like the modified Ranking Scale and known as Grotta bars", - primary.type = c("dichotomous", "ordinal", "categorical"), - secondary.type = c("dichotomous", "ordinal", "categorical"), + primary.type = c("dichotomous", "categorical"), + secondary.type = c("dichotomous", "categorical"), secondary.multi = FALSE, - tertiary.type = c("dichotomous", "ordinal", "categorical"), + tertiary.type = c("dichotomous", "categorical"), secondary.extra = "none" ), plot_violin = list( fun = "plot_violin", descr = "Violin plot", note = "A modern alternative to the classic boxplot to visualise data distribution", - primary.type = c("datatime", "continuous", "dichotomous", "ordinal", "categorical"), - secondary.type = c("dichotomous", "ordinal", "categorical"), + primary.type = c("datatime", "continuous", "dichotomous", "categorical"), + secondary.type = c("dichotomous", "categorical"), secondary.multi = FALSE, secondary.extra = "none", - tertiary.type = c("dichotomous", "ordinal", "categorical") + tertiary.type = c("dichotomous", "categorical") ), # plot_ridge = list( # descr = "Ridge plot", # note = "An alternative option to visualise data distribution", # primary.type = "continuous", - # secondary.type = c("dichotomous", "ordinal" ,"categorical"), - # tertiary.type = c("dichotomous", "ordinal" ,"categorical"), + # secondary.type = c("dichotomous" ,"categorical"), + # tertiary.type = c("dichotomous" ,"categorical"), # secondary.extra = NULL # ), plot_sankey = list( fun = "plot_sankey", descr = "Sankey plot", note = "A way of visualising change between groups", - primary.type = c("dichotomous", "ordinal", "categorical"), - secondary.type = c("dichotomous", "ordinal", "categorical"), + primary.type = c("dichotomous", "categorical"), + secondary.type = c("dichotomous", "categorical"), secondary.multi = FALSE, secondary.extra = NULL, - tertiary.type = c("dichotomous", "ordinal", "categorical") + tertiary.type = c("dichotomous", "categorical") ), plot_scatter = list( fun = "plot_scatter", descr = "Scatter plot", note = "A classic way of showing the association between to variables", primary.type = c("datatime", "continuous"), - secondary.type = c("datatime", "continuous", "ordinal", "categorical"), + secondary.type = c("datatime", "continuous", "categorical"), secondary.multi = FALSE, - tertiary.type = c("dichotomous", "ordinal", "categorical"), + tertiary.type = c("dichotomous", "categorical"), secondary.extra = NULL ), plot_box = list( fun = "plot_box", descr = "Box plot", note = "A classic way to plot data distribution by groups", - primary.type = c("datatime", "continuous", "dichotomous", "ordinal", "categorical"), - secondary.type = c("dichotomous", "ordinal", "categorical"), + primary.type = c("datatime", "continuous", "dichotomous", "categorical"), + secondary.type = c("dichotomous", "categorical"), secondary.multi = FALSE, - tertiary.type = c("dichotomous", "ordinal", "categorical"), + tertiary.type = c("dichotomous", "categorical"), secondary.extra = "none" ), plot_euler = list( @@ -505,7 +505,7 @@ supported_plots <- function() { secondary.type = "dichotomous", secondary.multi = TRUE, secondary.max = 4, - tertiary.type = c("dichotomous", "ordinal", "categorical"), + tertiary.type = c("dichotomous", "categorical"), secondary.extra = NULL ) ) diff --git a/R/helpers.R b/R/helpers.R index d906c76..aed6b82 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -129,7 +129,7 @@ argsstring2list <- function(string) { #' @export #' #' @examples -#' factorize(mtcars,names(mtcars)) +#' factorize(mtcars, names(mtcars)) factorize <- function(data, vars) { if (!is.null(vars)) { data |> @@ -258,21 +258,27 @@ default_parsing <- function(data) { #' @export #' #' @examples -#' ds <- mtcars |> lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |> dplyr::bind_cols() +#' ds <- mtcars |> +#' lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |> +#' dplyr::bind_cols() #' ds |> #' remove_empty_attr() |> #' str() -#' mtcars |> lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |> remove_empty_attr() |> +#' mtcars |> +#' lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |> +#' remove_empty_attr() |> #' str() #' remove_empty_attr <- function(data) { - if (is.data.frame(data)){ - data |> lapply(remove_empty_attr) |> dplyr::bind_cols() - } else if (is.list(data)){ + if (is.data.frame(data)) { + data |> + lapply(remove_empty_attr) |> + dplyr::bind_cols() + } else if (is.list(data)) { data |> lapply(remove_empty_attr) - }else{ - attributes(data)[is.na(attributes(data))] <- NULL - data + } else { + attributes(data)[is.na(attributes(data))] <- NULL + data } } @@ -387,7 +393,7 @@ data_description <- function(data, data_text = "Data") { #' } data_type_filter <- function(data, type) { ## Please ensure to only provide recognised data types - assertthat::assert_that(all(type %in% data_types())) + assertthat::assert_that(all(type %in% names(data_types()))) if (!is.null(type)) { out <- data[data_type(data) %in% type] @@ -616,3 +622,33 @@ append_column <- function(data, column, name, index = "right") { ) |> dplyr::bind_cols() } + + + +#' Test if element is identical to the previous +#' +#' @param data data. vector, data.frame or list +#' @param no.name logical to remove names attribute before testing +#' +#' @returns logical vector +#' @export +#' +#' @examples +#' c(1, 1, 2, 3, 3, 2, 4, 4) |> is_identical_to_previous() +#' mtcars[c(1, 1, 2, 3, 3, 2, 4, 4)] |> is_identical_to_previous() +#' list(1, 1, list(2), "A", "a", "a") |> is_identical_to_previous() +is_identical_to_previous <- function(data, no.name = TRUE) { + if (is.data.frame(data)) { + lagged <- data.frame(FALSE, data[seq_len(length(data) - 1)]) + } else { + lagged <- c(FALSE, data[seq_len(length(data) - 1)]) + } + + vapply(seq_len(length(data)), \(.x){ + if (isTRUE(no.name)) { + identical(unname(lagged[.x]), unname(data[.x])) + } else { + identical(lagged[.x], data[.x]) + } + }, FUN.VALUE = logical(1)) +} diff --git a/R/regression_model.R b/R/regression_model.R index 4ad0d59..252cbf1 100644 --- a/R/regression_model.R +++ b/R/regression_model.R @@ -279,11 +279,11 @@ data_type <- function(data) { if (identical("logical", cl_d) | length(unique(data)) == 2) { out <- "dichotomous" } else { - if (is.ordered(data)) { - out <- "ordinal" - } else { + # if (is.ordered(data)) { + # out <- "ordinal" + # } else { out <- "categorical" - } + # } } } else if (identical(cl_d, "character")) { out <- "text" @@ -310,7 +310,16 @@ data_type <- function(data) { #' @examples #' data_types() data_types <- function() { - c("dichotomous", "ordinal", "categorical", "datatime", "continuous", "text", "empty", "monotone", "unknown") + list( + "empty" = list(descr="Variable of all NAs",classes="Any class"), + "monotone" = list(descr="Variable with only one unique value",classes="Any class"), + "dichotomous" = list(descr="Variable with only two unique values",classes="Any class"), + "categorical"= list(descr="Factor variable",classes="factor (ordered or unordered)"), + "text"= list(descr="Character variable",classes="character"), + "datetime"= list(descr="Variable of time, date or datetime values",classes="hms, Date, POSIXct and POSIXt"), + "continuous"= list(descr="Numeric variable",classes="numeric, integer or double"), + "unknown"= list(descr="Anything not falling within the previous",classes="Any other class") + ) } @@ -351,7 +360,7 @@ supported_functions <- function() { polr = list( descr = "Ordinal logistic regression model", design = "cross-sectional", - out.type = c("ordinal", "categorical"), + out.type = c("categorical"), fun = "MASS::polr", args.list = list( Hess = TRUE,