revised data types

2026-06-19 12:37:30 +02:00 · 2025-04-22 09:58:34 +02:00 · 2025-04-22 09:58:34 +02:00 · aaceb55fe8
commit aaceb55fe8
parent 07e94f4401
5 changed files with 84 additions and 38 deletions
--- a/R/app_version.R
+++ b/R/app_version.R
@ -1 +1 @@
-app_version <- function()'Version: 25.4.3.250415_1627'
+app_version <- function()'Version: 25.4.3.250422'
--- a/R/data-summary.R
+++ b/R/data-summary.R
@ -155,8 +155,8 @@ overview_vars <- function(data) {
  data <- as.data.frame(data)

  dplyr::tibble(
-    icon = data_type(data),
-    type = icon,
+    icon = get_classes(data),
+    class = icon,
    name = names(data),
    n_missing = unname(colSums(is.na(data))),
    p_complete = 1 - n_missing / nrow(data),
@ -189,6 +189,7 @@ create_overview_datagrid <- function(data,...) {
  std_names <- c(
    "Name" = "name",
    "Icon" = "icon",
+    "Class" = "class",
    "Type" = "type",
    "Missings" = "n_missing",
    "Complete" = "p_complete",
@ -235,7 +236,7 @@ create_overview_datagrid <- function(data,...) {
  grid <- add_class_icon(
    grid = grid,
    column = "icon",
-    fun = type_icons
+    fun = class_icons
  )

  grid <- toastui::grid_format(
--- a/R/data_plots.R
+++ b/R/data_plots.R
@ -408,7 +408,7 @@ all_but <- function(data, ...) {
 #'
 #' @examples
 #' default_parsing(mtcars) |> subset_types("ordinal")
-#' default_parsing(mtcars) |> subset_types(c("dichotomous", "ordinal", "categorical"))
+#' default_parsing(mtcars) |> subset_types(c("dichotomous", "categorical"))
 #' #' default_parsing(mtcars) |> subset_types("factor",class)
 subset_types <- function(data, types, type.fun = data_type) {
  data[sapply(data, type.fun) %in% types]
@ -443,58 +443,58 @@ supported_plots <- function() {
      fun = "plot_hbars",
      descr = "Stacked horizontal bars",
      note = "A classical way of visualising the distribution of an ordinal scale like the modified Ranking Scale and known as Grotta bars",
-      primary.type = c("dichotomous", "ordinal", "categorical"),
-      secondary.type = c("dichotomous", "ordinal", "categorical"),
+      primary.type = c("dichotomous", "categorical"),
+      secondary.type = c("dichotomous", "categorical"),
      secondary.multi = FALSE,
-      tertiary.type = c("dichotomous", "ordinal", "categorical"),
+      tertiary.type = c("dichotomous", "categorical"),
      secondary.extra = "none"
    ),
    plot_violin = list(
      fun = "plot_violin",
      descr = "Violin plot",
      note = "A modern alternative to the classic boxplot to visualise data distribution",
-      primary.type = c("datatime", "continuous", "dichotomous", "ordinal", "categorical"),
-      secondary.type = c("dichotomous", "ordinal", "categorical"),
+      primary.type = c("datatime", "continuous", "dichotomous", "categorical"),
+      secondary.type = c("dichotomous", "categorical"),
      secondary.multi = FALSE,
      secondary.extra = "none",
-      tertiary.type = c("dichotomous", "ordinal", "categorical")
+      tertiary.type = c("dichotomous", "categorical")
    ),
    # plot_ridge = list(
    #   descr = "Ridge plot",
    #   note = "An alternative option to visualise data distribution",
    #   primary.type = "continuous",
-    #   secondary.type = c("dichotomous", "ordinal" ,"categorical"),
-    #   tertiary.type = c("dichotomous", "ordinal" ,"categorical"),
+    #   secondary.type = c("dichotomous" ,"categorical"),
+    #   tertiary.type = c("dichotomous" ,"categorical"),
    #   secondary.extra = NULL
    # ),
    plot_sankey = list(
      fun = "plot_sankey",
      descr = "Sankey plot",
      note = "A way of visualising change between groups",
-      primary.type = c("dichotomous", "ordinal", "categorical"),
-      secondary.type = c("dichotomous", "ordinal", "categorical"),
+      primary.type = c("dichotomous", "categorical"),
+      secondary.type = c("dichotomous", "categorical"),
      secondary.multi = FALSE,
      secondary.extra = NULL,
-      tertiary.type = c("dichotomous", "ordinal", "categorical")
+      tertiary.type = c("dichotomous", "categorical")
    ),
    plot_scatter = list(
      fun = "plot_scatter",
      descr = "Scatter plot",
      note = "A classic way of showing the association between to variables",
      primary.type = c("datatime", "continuous"),
-      secondary.type = c("datatime", "continuous", "ordinal", "categorical"),
+      secondary.type = c("datatime", "continuous", "categorical"),
      secondary.multi = FALSE,
-      tertiary.type = c("dichotomous", "ordinal", "categorical"),
+      tertiary.type = c("dichotomous", "categorical"),
      secondary.extra = NULL
    ),
    plot_box = list(
      fun = "plot_box",
      descr = "Box plot",
      note = "A classic way to plot data distribution by groups",
-      primary.type = c("datatime", "continuous", "dichotomous", "ordinal", "categorical"),
-      secondary.type = c("dichotomous", "ordinal", "categorical"),
+      primary.type = c("datatime", "continuous", "dichotomous", "categorical"),
+      secondary.type = c("dichotomous", "categorical"),
      secondary.multi = FALSE,
-      tertiary.type = c("dichotomous", "ordinal", "categorical"),
+      tertiary.type = c("dichotomous", "categorical"),
      secondary.extra = "none"
    ),
    plot_euler = list(
@ -505,7 +505,7 @@ supported_plots <- function() {
      secondary.type = "dichotomous",
      secondary.multi = TRUE,
      secondary.max = 4,
-      tertiary.type = c("dichotomous", "ordinal", "categorical"),
+      tertiary.type = c("dichotomous", "categorical"),
      secondary.extra = NULL
    )
  )
--- a/R/helpers.R
+++ b/R/helpers.R
@ -129,7 +129,7 @@ argsstring2list <- function(string) {
 #' @export
 #'
 #' @examples
-#' factorize(mtcars,names(mtcars))
+#' factorize(mtcars, names(mtcars))
 factorize <- function(data, vars) {
  if (!is.null(vars)) {
    data |>
@ -258,19 +258,25 @@ default_parsing <- function(data) {
 #' @export
 #'
 #' @examples
-#' ds <- mtcars |> lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |> dplyr::bind_cols()
+#' ds <- mtcars |>
+#'   lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |>
+#'   dplyr::bind_cols()
 #' ds |>
 #'   remove_empty_attr() |>
 #'   str()
-#'  mtcars |> lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |> remove_empty_attr() |>
+#' mtcars |>
+#'   lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |>
+#'   remove_empty_attr() |>
 #'   str()
 #'
 remove_empty_attr <- function(data) {
-  if (is.data.frame(data)){
-    data |> lapply(remove_empty_attr) |> dplyr::bind_cols()
-  } else if (is.list(data)){
+  if (is.data.frame(data)) {
+    data |>
+      lapply(remove_empty_attr) |>
+      dplyr::bind_cols()
+  } else if (is.list(data)) {
    data |> lapply(remove_empty_attr)
-  }else{
+  } else {
    attributes(data)[is.na(attributes(data))] <- NULL
    data
  }
@ -387,7 +393,7 @@ data_description <- function(data, data_text = "Data") {
 #' }
 data_type_filter <- function(data, type) {
  ## Please ensure to only provide recognised data types
-  assertthat::assert_that(all(type %in% data_types()))
+  assertthat::assert_that(all(type %in% names(data_types())))

  if (!is.null(type)) {
    out <- data[data_type(data) %in% type]
@ -616,3 +622,33 @@ append_column <- function(data, column, name, index = "right") {
  ) |>
    dplyr::bind_cols()
 }
+
+
+
+#' Test if element is identical to the previous
+#'
+#' @param data data. vector, data.frame or list
+#' @param no.name logical to remove names attribute before testing
+#'
+#' @returns logical vector
+#' @export
+#'
+#' @examples
+#' c(1, 1, 2, 3, 3, 2, 4, 4) |> is_identical_to_previous()
+#' mtcars[c(1, 1, 2, 3, 3, 2, 4, 4)] |> is_identical_to_previous()
+#' list(1, 1, list(2), "A", "a", "a") |> is_identical_to_previous()
+is_identical_to_previous <- function(data, no.name = TRUE) {
+  if (is.data.frame(data)) {
+    lagged <- data.frame(FALSE, data[seq_len(length(data) - 1)])
+  } else {
+    lagged <- c(FALSE, data[seq_len(length(data) - 1)])
+  }
+
+  vapply(seq_len(length(data)), \(.x){
+    if (isTRUE(no.name)) {
+      identical(unname(lagged[.x]), unname(data[.x]))
+    } else {
+      identical(lagged[.x], data[.x])
+    }
+  }, FUN.VALUE = logical(1))
+}
--- a/R/regression_model.R
+++ b/R/regression_model.R
@ -279,11 +279,11 @@ data_type <- function(data) {
      if (identical("logical", cl_d) | length(unique(data)) == 2) {
        out <- "dichotomous"
      } else {
-        if (is.ordered(data)) {
-          out <- "ordinal"
-        } else {
+        # if (is.ordered(data)) {
+        #   out <- "ordinal"
+        # } else {
          out <- "categorical"
-        }
+        # }
      }
    } else if (identical(cl_d, "character")) {
      out <- "text"
@ -310,7 +310,16 @@ data_type <- function(data) {
 #' @examples
 #' data_types()
 data_types <- function() {
-  c("dichotomous", "ordinal", "categorical", "datatime", "continuous", "text", "empty", "monotone", "unknown")
+  list(
+    "empty" = list(descr="Variable of all NAs",classes="Any class"),
+    "monotone" = list(descr="Variable with only one unique value",classes="Any class"),
+    "dichotomous" = list(descr="Variable with only two unique values",classes="Any class"),
+    "categorical"= list(descr="Factor variable",classes="factor (ordered or unordered)"),
+    "text"= list(descr="Character variable",classes="character"),
+    "datetime"= list(descr="Variable of time, date or datetime values",classes="hms, Date, POSIXct and POSIXt"),
+    "continuous"= list(descr="Numeric variable",classes="numeric, integer or double"),
+    "unknown"= list(descr="Anything not falling within the previous",classes="Any other class")
+    )
 }


@ -351,7 +360,7 @@ supported_functions <- function() {
    polr = list(
      descr = "Ordinal logistic regression model",
      design = "cross-sectional",
-      out.type = c("ordinal", "categorical"),
+      out.type = c("categorical"),
      fun = "MASS::polr",
      args.list = list(
        Hess = TRUE,