revised data types

This commit is contained in:
Andreas Gammelgaard Damsbo 2025-04-22 09:58:34 +02:00
parent 07e94f4401
commit aaceb55fe8
No known key found for this signature in database
5 changed files with 84 additions and 38 deletions

View file

@ -1 +1 @@
app_version <- function()'Version: 25.4.3.250415_1627' app_version <- function()'Version: 25.4.3.250422'

View file

@ -155,8 +155,8 @@ overview_vars <- function(data) {
data <- as.data.frame(data) data <- as.data.frame(data)
dplyr::tibble( dplyr::tibble(
icon = data_type(data), icon = get_classes(data),
type = icon, class = icon,
name = names(data), name = names(data),
n_missing = unname(colSums(is.na(data))), n_missing = unname(colSums(is.na(data))),
p_complete = 1 - n_missing / nrow(data), p_complete = 1 - n_missing / nrow(data),
@ -189,6 +189,7 @@ create_overview_datagrid <- function(data,...) {
std_names <- c( std_names <- c(
"Name" = "name", "Name" = "name",
"Icon" = "icon", "Icon" = "icon",
"Class" = "class",
"Type" = "type", "Type" = "type",
"Missings" = "n_missing", "Missings" = "n_missing",
"Complete" = "p_complete", "Complete" = "p_complete",
@ -235,7 +236,7 @@ create_overview_datagrid <- function(data,...) {
grid <- add_class_icon( grid <- add_class_icon(
grid = grid, grid = grid,
column = "icon", column = "icon",
fun = type_icons fun = class_icons
) )
grid <- toastui::grid_format( grid <- toastui::grid_format(

View file

@ -408,7 +408,7 @@ all_but <- function(data, ...) {
#' #'
#' @examples #' @examples
#' default_parsing(mtcars) |> subset_types("ordinal") #' default_parsing(mtcars) |> subset_types("ordinal")
#' default_parsing(mtcars) |> subset_types(c("dichotomous", "ordinal", "categorical")) #' default_parsing(mtcars) |> subset_types(c("dichotomous", "categorical"))
#' #' default_parsing(mtcars) |> subset_types("factor",class) #' #' default_parsing(mtcars) |> subset_types("factor",class)
subset_types <- function(data, types, type.fun = data_type) { subset_types <- function(data, types, type.fun = data_type) {
data[sapply(data, type.fun) %in% types] data[sapply(data, type.fun) %in% types]
@ -443,58 +443,58 @@ supported_plots <- function() {
fun = "plot_hbars", fun = "plot_hbars",
descr = "Stacked horizontal bars", descr = "Stacked horizontal bars",
note = "A classical way of visualising the distribution of an ordinal scale like the modified Ranking Scale and known as Grotta bars", note = "A classical way of visualising the distribution of an ordinal scale like the modified Ranking Scale and known as Grotta bars",
primary.type = c("dichotomous", "ordinal", "categorical"), primary.type = c("dichotomous", "categorical"),
secondary.type = c("dichotomous", "ordinal", "categorical"), secondary.type = c("dichotomous", "categorical"),
secondary.multi = FALSE, secondary.multi = FALSE,
tertiary.type = c("dichotomous", "ordinal", "categorical"), tertiary.type = c("dichotomous", "categorical"),
secondary.extra = "none" secondary.extra = "none"
), ),
plot_violin = list( plot_violin = list(
fun = "plot_violin", fun = "plot_violin",
descr = "Violin plot", descr = "Violin plot",
note = "A modern alternative to the classic boxplot to visualise data distribution", note = "A modern alternative to the classic boxplot to visualise data distribution",
primary.type = c("datatime", "continuous", "dichotomous", "ordinal", "categorical"), primary.type = c("datatime", "continuous", "dichotomous", "categorical"),
secondary.type = c("dichotomous", "ordinal", "categorical"), secondary.type = c("dichotomous", "categorical"),
secondary.multi = FALSE, secondary.multi = FALSE,
secondary.extra = "none", secondary.extra = "none",
tertiary.type = c("dichotomous", "ordinal", "categorical") tertiary.type = c("dichotomous", "categorical")
), ),
# plot_ridge = list( # plot_ridge = list(
# descr = "Ridge plot", # descr = "Ridge plot",
# note = "An alternative option to visualise data distribution", # note = "An alternative option to visualise data distribution",
# primary.type = "continuous", # primary.type = "continuous",
# secondary.type = c("dichotomous", "ordinal" ,"categorical"), # secondary.type = c("dichotomous" ,"categorical"),
# tertiary.type = c("dichotomous", "ordinal" ,"categorical"), # tertiary.type = c("dichotomous" ,"categorical"),
# secondary.extra = NULL # secondary.extra = NULL
# ), # ),
plot_sankey = list( plot_sankey = list(
fun = "plot_sankey", fun = "plot_sankey",
descr = "Sankey plot", descr = "Sankey plot",
note = "A way of visualising change between groups", note = "A way of visualising change between groups",
primary.type = c("dichotomous", "ordinal", "categorical"), primary.type = c("dichotomous", "categorical"),
secondary.type = c("dichotomous", "ordinal", "categorical"), secondary.type = c("dichotomous", "categorical"),
secondary.multi = FALSE, secondary.multi = FALSE,
secondary.extra = NULL, secondary.extra = NULL,
tertiary.type = c("dichotomous", "ordinal", "categorical") tertiary.type = c("dichotomous", "categorical")
), ),
plot_scatter = list( plot_scatter = list(
fun = "plot_scatter", fun = "plot_scatter",
descr = "Scatter plot", descr = "Scatter plot",
note = "A classic way of showing the association between to variables", note = "A classic way of showing the association between to variables",
primary.type = c("datatime", "continuous"), primary.type = c("datatime", "continuous"),
secondary.type = c("datatime", "continuous", "ordinal", "categorical"), secondary.type = c("datatime", "continuous", "categorical"),
secondary.multi = FALSE, secondary.multi = FALSE,
tertiary.type = c("dichotomous", "ordinal", "categorical"), tertiary.type = c("dichotomous", "categorical"),
secondary.extra = NULL secondary.extra = NULL
), ),
plot_box = list( plot_box = list(
fun = "plot_box", fun = "plot_box",
descr = "Box plot", descr = "Box plot",
note = "A classic way to plot data distribution by groups", note = "A classic way to plot data distribution by groups",
primary.type = c("datatime", "continuous", "dichotomous", "ordinal", "categorical"), primary.type = c("datatime", "continuous", "dichotomous", "categorical"),
secondary.type = c("dichotomous", "ordinal", "categorical"), secondary.type = c("dichotomous", "categorical"),
secondary.multi = FALSE, secondary.multi = FALSE,
tertiary.type = c("dichotomous", "ordinal", "categorical"), tertiary.type = c("dichotomous", "categorical"),
secondary.extra = "none" secondary.extra = "none"
), ),
plot_euler = list( plot_euler = list(
@ -505,7 +505,7 @@ supported_plots <- function() {
secondary.type = "dichotomous", secondary.type = "dichotomous",
secondary.multi = TRUE, secondary.multi = TRUE,
secondary.max = 4, secondary.max = 4,
tertiary.type = c("dichotomous", "ordinal", "categorical"), tertiary.type = c("dichotomous", "categorical"),
secondary.extra = NULL secondary.extra = NULL
) )
) )

View file

@ -129,7 +129,7 @@ argsstring2list <- function(string) {
#' @export #' @export
#' #'
#' @examples #' @examples
#' factorize(mtcars,names(mtcars)) #' factorize(mtcars, names(mtcars))
factorize <- function(data, vars) { factorize <- function(data, vars) {
if (!is.null(vars)) { if (!is.null(vars)) {
data |> data |>
@ -258,19 +258,25 @@ default_parsing <- function(data) {
#' @export #' @export
#' #'
#' @examples #' @examples
#' ds <- mtcars |> lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |> dplyr::bind_cols() #' ds <- mtcars |>
#' lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |>
#' dplyr::bind_cols()
#' ds |> #' ds |>
#' remove_empty_attr() |> #' remove_empty_attr() |>
#' str() #' str()
#' mtcars |> lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |> remove_empty_attr() |> #' mtcars |>
#' lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |>
#' remove_empty_attr() |>
#' str() #' str()
#' #'
remove_empty_attr <- function(data) { remove_empty_attr <- function(data) {
if (is.data.frame(data)){ if (is.data.frame(data)) {
data |> lapply(remove_empty_attr) |> dplyr::bind_cols() data |>
} else if (is.list(data)){ lapply(remove_empty_attr) |>
dplyr::bind_cols()
} else if (is.list(data)) {
data |> lapply(remove_empty_attr) data |> lapply(remove_empty_attr)
}else{ } else {
attributes(data)[is.na(attributes(data))] <- NULL attributes(data)[is.na(attributes(data))] <- NULL
data data
} }
@ -387,7 +393,7 @@ data_description <- function(data, data_text = "Data") {
#' } #' }
data_type_filter <- function(data, type) { data_type_filter <- function(data, type) {
## Please ensure to only provide recognised data types ## Please ensure to only provide recognised data types
assertthat::assert_that(all(type %in% data_types())) assertthat::assert_that(all(type %in% names(data_types())))
if (!is.null(type)) { if (!is.null(type)) {
out <- data[data_type(data) %in% type] out <- data[data_type(data) %in% type]
@ -616,3 +622,33 @@ append_column <- function(data, column, name, index = "right") {
) |> ) |>
dplyr::bind_cols() dplyr::bind_cols()
} }
#' Test if element is identical to the previous
#'
#' @param data data. vector, data.frame or list
#' @param no.name logical to remove names attribute before testing
#'
#' @returns logical vector
#' @export
#'
#' @examples
#' c(1, 1, 2, 3, 3, 2, 4, 4) |> is_identical_to_previous()
#' mtcars[c(1, 1, 2, 3, 3, 2, 4, 4)] |> is_identical_to_previous()
#' list(1, 1, list(2), "A", "a", "a") |> is_identical_to_previous()
is_identical_to_previous <- function(data, no.name = TRUE) {
if (is.data.frame(data)) {
lagged <- data.frame(FALSE, data[seq_len(length(data) - 1)])
} else {
lagged <- c(FALSE, data[seq_len(length(data) - 1)])
}
vapply(seq_len(length(data)), \(.x){
if (isTRUE(no.name)) {
identical(unname(lagged[.x]), unname(data[.x]))
} else {
identical(lagged[.x], data[.x])
}
}, FUN.VALUE = logical(1))
}

View file

@ -279,11 +279,11 @@ data_type <- function(data) {
if (identical("logical", cl_d) | length(unique(data)) == 2) { if (identical("logical", cl_d) | length(unique(data)) == 2) {
out <- "dichotomous" out <- "dichotomous"
} else { } else {
if (is.ordered(data)) { # if (is.ordered(data)) {
out <- "ordinal" # out <- "ordinal"
} else { # } else {
out <- "categorical" out <- "categorical"
} # }
} }
} else if (identical(cl_d, "character")) { } else if (identical(cl_d, "character")) {
out <- "text" out <- "text"
@ -310,7 +310,16 @@ data_type <- function(data) {
#' @examples #' @examples
#' data_types() #' data_types()
data_types <- function() { data_types <- function() {
c("dichotomous", "ordinal", "categorical", "datatime", "continuous", "text", "empty", "monotone", "unknown") list(
"empty" = list(descr="Variable of all NAs",classes="Any class"),
"monotone" = list(descr="Variable with only one unique value",classes="Any class"),
"dichotomous" = list(descr="Variable with only two unique values",classes="Any class"),
"categorical"= list(descr="Factor variable",classes="factor (ordered or unordered)"),
"text"= list(descr="Character variable",classes="character"),
"datetime"= list(descr="Variable of time, date or datetime values",classes="hms, Date, POSIXct and POSIXt"),
"continuous"= list(descr="Numeric variable",classes="numeric, integer or double"),
"unknown"= list(descr="Anything not falling within the previous",classes="Any other class")
)
} }
@ -351,7 +360,7 @@ supported_functions <- function() {
polr = list( polr = list(
descr = "Ordinal logistic regression model", descr = "Ordinal logistic regression model",
design = "cross-sectional", design = "cross-sectional",
out.type = c("ordinal", "categorical"), out.type = c("categorical"),
fun = "MASS::polr", fun = "MASS::polr",
args.list = list( args.list = list(
Hess = TRUE, Hess = TRUE,