From aaceb55fe80b9310824ad4df9eb5f8549ea34c27 Mon Sep 17 00:00:00 2001
From: Andreas Gammelgaard Damsbo <andreas@gdamsbo.dk>
Date: Tue, 22 Apr 2025 09:58:34 +0200
Subject: [PATCH] revised data types

---
 R/app_version.R      |  2 +-
 R/data-summary.R     |  7 +++---
 R/data_plots.R       | 36 ++++++++++++++--------------
 R/helpers.R          | 56 ++++++++++++++++++++++++++++++++++++--------
 R/regression_model.R | 21 ++++++++++++-----
 5 files changed, 84 insertions(+), 38 deletions(-)

diff --git a/R/app_version.R b/R/app_version.R
index e562fc1..d49df0c 100644
--- a/R/app_version.R
+++ b/R/app_version.R
@@ -1 +1 @@
-app_version <- function()'Version: 25.4.3.250415_1627'
+app_version <- function()'Version: 25.4.3.250422'
diff --git a/R/data-summary.R b/R/data-summary.R
index e70eb50..f0e6be3 100644
--- a/R/data-summary.R
+++ b/R/data-summary.R
@@ -155,8 +155,8 @@ overview_vars <- function(data) {
   data <- as.data.frame(data)
 
   dplyr::tibble(
-    icon = data_type(data),
-    type = icon,
+    icon = get_classes(data),
+    class = icon,
     name = names(data),
     n_missing = unname(colSums(is.na(data))),
     p_complete = 1 - n_missing / nrow(data),
@@ -189,6 +189,7 @@ create_overview_datagrid <- function(data,...) {
   std_names <- c(
     "Name" = "name",
     "Icon" = "icon",
+    "Class" = "class",
     "Type" = "type",
     "Missings" = "n_missing",
     "Complete" = "p_complete",
@@ -235,7 +236,7 @@ create_overview_datagrid <- function(data,...) {
   grid <- add_class_icon(
     grid = grid,
     column = "icon",
-    fun = type_icons
+    fun = class_icons
   )
 
   grid <- toastui::grid_format(
diff --git a/R/data_plots.R b/R/data_plots.R
index 783a8d4..55856ca 100644
--- a/R/data_plots.R
+++ b/R/data_plots.R
@@ -408,7 +408,7 @@ all_but <- function(data, ...) {
 #'
 #' @examples
 #' default_parsing(mtcars) |> subset_types("ordinal")
-#' default_parsing(mtcars) |> subset_types(c("dichotomous", "ordinal", "categorical"))
+#' default_parsing(mtcars) |> subset_types(c("dichotomous", "categorical"))
 #' #' default_parsing(mtcars) |> subset_types("factor",class)
 subset_types <- function(data, types, type.fun = data_type) {
   data[sapply(data, type.fun) %in% types]
@@ -443,58 +443,58 @@ supported_plots <- function() {
       fun = "plot_hbars",
       descr = "Stacked horizontal bars",
       note = "A classical way of visualising the distribution of an ordinal scale like the modified Ranking Scale and known as Grotta bars",
-      primary.type = c("dichotomous", "ordinal", "categorical"),
-      secondary.type = c("dichotomous", "ordinal", "categorical"),
+      primary.type = c("dichotomous", "categorical"),
+      secondary.type = c("dichotomous", "categorical"),
       secondary.multi = FALSE,
-      tertiary.type = c("dichotomous", "ordinal", "categorical"),
+      tertiary.type = c("dichotomous", "categorical"),
       secondary.extra = "none"
     ),
     plot_violin = list(
       fun = "plot_violin",
       descr = "Violin plot",
       note = "A modern alternative to the classic boxplot to visualise data distribution",
-      primary.type = c("datatime", "continuous", "dichotomous", "ordinal", "categorical"),
-      secondary.type = c("dichotomous", "ordinal", "categorical"),
+      primary.type = c("datatime", "continuous", "dichotomous", "categorical"),
+      secondary.type = c("dichotomous", "categorical"),
       secondary.multi = FALSE,
       secondary.extra = "none",
-      tertiary.type = c("dichotomous", "ordinal", "categorical")
+      tertiary.type = c("dichotomous", "categorical")
     ),
     # plot_ridge = list(
     #   descr = "Ridge plot",
     #   note = "An alternative option to visualise data distribution",
     #   primary.type = "continuous",
-    #   secondary.type = c("dichotomous", "ordinal" ,"categorical"),
-    #   tertiary.type = c("dichotomous", "ordinal" ,"categorical"),
+    #   secondary.type = c("dichotomous" ,"categorical"),
+    #   tertiary.type = c("dichotomous" ,"categorical"),
     #   secondary.extra = NULL
     # ),
     plot_sankey = list(
       fun = "plot_sankey",
       descr = "Sankey plot",
       note = "A way of visualising change between groups",
-      primary.type = c("dichotomous", "ordinal", "categorical"),
-      secondary.type = c("dichotomous", "ordinal", "categorical"),
+      primary.type = c("dichotomous", "categorical"),
+      secondary.type = c("dichotomous", "categorical"),
       secondary.multi = FALSE,
       secondary.extra = NULL,
-      tertiary.type = c("dichotomous", "ordinal", "categorical")
+      tertiary.type = c("dichotomous", "categorical")
     ),
     plot_scatter = list(
       fun = "plot_scatter",
       descr = "Scatter plot",
       note = "A classic way of showing the association between to variables",
       primary.type = c("datatime", "continuous"),
-      secondary.type = c("datatime", "continuous", "ordinal", "categorical"),
+      secondary.type = c("datatime", "continuous", "categorical"),
       secondary.multi = FALSE,
-      tertiary.type = c("dichotomous", "ordinal", "categorical"),
+      tertiary.type = c("dichotomous", "categorical"),
       secondary.extra = NULL
     ),
     plot_box = list(
       fun = "plot_box",
       descr = "Box plot",
       note = "A classic way to plot data distribution by groups",
-      primary.type = c("datatime", "continuous", "dichotomous", "ordinal", "categorical"),
-      secondary.type = c("dichotomous", "ordinal", "categorical"),
+      primary.type = c("datatime", "continuous", "dichotomous", "categorical"),
+      secondary.type = c("dichotomous", "categorical"),
       secondary.multi = FALSE,
-      tertiary.type = c("dichotomous", "ordinal", "categorical"),
+      tertiary.type = c("dichotomous", "categorical"),
       secondary.extra = "none"
     ),
     plot_euler = list(
@@ -505,7 +505,7 @@ supported_plots <- function() {
       secondary.type = "dichotomous",
       secondary.multi = TRUE,
       secondary.max = 4,
-      tertiary.type = c("dichotomous", "ordinal", "categorical"),
+      tertiary.type = c("dichotomous", "categorical"),
       secondary.extra = NULL
     )
   )
diff --git a/R/helpers.R b/R/helpers.R
index d906c76..aed6b82 100644
--- a/R/helpers.R
+++ b/R/helpers.R
@@ -129,7 +129,7 @@ argsstring2list <- function(string) {
 #' @export
 #'
 #' @examples
-#' factorize(mtcars,names(mtcars))
+#' factorize(mtcars, names(mtcars))
 factorize <- function(data, vars) {
   if (!is.null(vars)) {
     data |>
@@ -258,21 +258,27 @@ default_parsing <- function(data) {
 #' @export
 #'
 #' @examples
-#' ds <- mtcars |> lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |> dplyr::bind_cols()
+#' ds <- mtcars |>
+#'   lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |>
+#'   dplyr::bind_cols()
 #' ds |>
 #'   remove_empty_attr() |>
 #'   str()
-#'  mtcars |> lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |> remove_empty_attr() |>
+#' mtcars |>
+#'   lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |>
+#'   remove_empty_attr() |>
 #'   str()
 #'
 remove_empty_attr <- function(data) {
-  if (is.data.frame(data)){
-    data |> lapply(remove_empty_attr) |> dplyr::bind_cols()
-  } else if (is.list(data)){
+  if (is.data.frame(data)) {
+    data |>
+      lapply(remove_empty_attr) |>
+      dplyr::bind_cols()
+  } else if (is.list(data)) {
     data |> lapply(remove_empty_attr)
-  }else{
-  attributes(data)[is.na(attributes(data))] <- NULL
-  data
+  } else {
+    attributes(data)[is.na(attributes(data))] <- NULL
+    data
   }
 }
 
@@ -387,7 +393,7 @@ data_description <- function(data, data_text = "Data") {
 #' }
 data_type_filter <- function(data, type) {
   ## Please ensure to only provide recognised data types
-  assertthat::assert_that(all(type %in% data_types()))
+  assertthat::assert_that(all(type %in% names(data_types())))
 
   if (!is.null(type)) {
     out <- data[data_type(data) %in% type]
@@ -616,3 +622,33 @@ append_column <- function(data, column, name, index = "right") {
   ) |>
     dplyr::bind_cols()
 }
+
+
+
+#' Test if element is identical to the previous
+#'
+#' @param data data. vector, data.frame or list
+#' @param no.name logical to remove names attribute before testing
+#'
+#' @returns logical vector
+#' @export
+#'
+#' @examples
+#' c(1, 1, 2, 3, 3, 2, 4, 4) |> is_identical_to_previous()
+#' mtcars[c(1, 1, 2, 3, 3, 2, 4, 4)] |> is_identical_to_previous()
+#' list(1, 1, list(2), "A", "a", "a") |> is_identical_to_previous()
+is_identical_to_previous <- function(data, no.name = TRUE) {
+  if (is.data.frame(data)) {
+    lagged <- data.frame(FALSE, data[seq_len(length(data) - 1)])
+  } else {
+    lagged <- c(FALSE, data[seq_len(length(data) - 1)])
+  }
+
+  vapply(seq_len(length(data)), \(.x){
+    if (isTRUE(no.name)) {
+      identical(unname(lagged[.x]), unname(data[.x]))
+    } else {
+      identical(lagged[.x], data[.x])
+    }
+  }, FUN.VALUE = logical(1))
+}
diff --git a/R/regression_model.R b/R/regression_model.R
index 4ad0d59..252cbf1 100644
--- a/R/regression_model.R
+++ b/R/regression_model.R
@@ -279,11 +279,11 @@ data_type <- function(data) {
       if (identical("logical", cl_d) | length(unique(data)) == 2) {
         out <- "dichotomous"
       } else {
-        if (is.ordered(data)) {
-          out <- "ordinal"
-        } else {
+        # if (is.ordered(data)) {
+        #   out <- "ordinal"
+        # } else {
           out <- "categorical"
-        }
+        # }
       }
     } else if (identical(cl_d, "character")) {
       out <- "text"
@@ -310,7 +310,16 @@ data_type <- function(data) {
 #' @examples
 #' data_types()
 data_types <- function() {
-  c("dichotomous", "ordinal", "categorical", "datatime", "continuous", "text", "empty", "monotone", "unknown")
+  list(
+    "empty" = list(descr="Variable of all NAs",classes="Any class"),
+    "monotone" = list(descr="Variable with only one unique value",classes="Any class"),
+    "dichotomous" = list(descr="Variable with only two unique values",classes="Any class"),
+    "categorical"= list(descr="Factor variable",classes="factor (ordered or unordered)"),
+    "text"= list(descr="Character variable",classes="character"),
+    "datetime"= list(descr="Variable of time, date or datetime values",classes="hms, Date, POSIXct and POSIXt"),
+    "continuous"= list(descr="Numeric variable",classes="numeric, integer or double"),
+    "unknown"= list(descr="Anything not falling within the previous",classes="Any other class")
+    )
 }
 
 
@@ -351,7 +360,7 @@ supported_functions <- function() {
     polr = list(
       descr = "Ordinal logistic regression model",
       design = "cross-sectional",
-      out.type = c("ordinal", "categorical"),
+      out.type = c("categorical"),
       fun = "MASS::polr",
       args.list = list(
         Hess = TRUE,