variable type filter

2026-06-19 04:27:30 +02:00 · 2025-04-14 10:10:33 +02:00 · 2025-04-14 10:10:33 +02:00 · 9b966e9b9c
commit 9b966e9b9c
parent 7d9e5a8f00
12 changed files with 308 additions and 757 deletions
--- a/R/app_version.R
+++ b/R/app_version.R
@ -1 +1 @@
-app_version <- function()'Version: 25.4.1.250411_1313'
+app_version <- function()'Version: 25.4.2.250414_1007'
--- a/R/data-summary.R
+++ b/R/data-summary.R
@ -156,7 +156,7 @@ overview_vars <- function(data) {

  dplyr::tibble(
    class = get_classes(data),
-    type = get_classes(data),
+    type = data_type(data),
    name = names(data),
    n_missing = unname(colSums(is.na(data))),
    p_complete = 1 - n_missing / nrow(data),
--- a/R/data_plots.R
+++ b/R/data_plots.R
@ -23,6 +23,7 @@ data_visuals_ui <- function(id, tab_title = "Plots", ...) {
          icon = bsicons::bs_icon("graph-up"),
          shiny::uiOutput(outputId = ns("primary")),
          shiny::helpText('Only non-text variables are available for plotting. Go the "Data" to reclass data to plot.'),
+          shiny::tags$br(),
          shiny::uiOutput(outputId = ns("type")),
          shiny::uiOutput(outputId = ns("secondary")),
          shiny::uiOutput(outputId = ns("tertiary")),
@ -459,7 +460,7 @@ supported_plots <- function() {
      fun = "plot_violin",
      descr = "Violin plot",
      note = "A modern alternative to the classic boxplot to visualise data distribution",
-      primary.type = c("continuous", "dichotomous", "ordinal" ,"categorical"),
+      primary.type = c("datatime","continuous", "dichotomous", "ordinal" ,"categorical"),
      secondary.type = c("dichotomous", "ordinal" ,"categorical"),
      secondary.multi = FALSE,
      secondary.extra = "none",
@ -487,8 +488,8 @@ supported_plots <- function() {
      fun = "plot_scatter",
      descr = "Scatter plot",
      note = "A classic way of showing the association between to variables",
-      primary.type = "continuous",
-      secondary.type = c("continuous", "ordinal" ,"categorical"),
+      primary.type = c("datatime","continuous"),
+      secondary.type = c("datatime","continuous", "ordinal" ,"categorical"),
      secondary.multi = FALSE,
      tertiary.type = c("dichotomous", "ordinal" ,"categorical"),
      secondary.extra = NULL
@ -497,7 +498,7 @@ supported_plots <- function() {
      fun = "plot_box",
      descr = "Box plot",
      note = "A classic way to plot data distribution by groups",
-      primary.type = c("continuous", "dichotomous", "ordinal" ,"categorical"),
+      primary.type = c("datatime","continuous", "dichotomous", "ordinal" ,"categorical"),
      secondary.type = c("dichotomous", "ordinal" ,"categorical"),
      secondary.multi = FALSE,
      tertiary.type = c("dichotomous", "ordinal" ,"categorical"),
--- a/R/helpers.R
+++ b/R/helpers.R
@ -340,7 +340,7 @@ missing_fraction <- function(data) {
 #'   sample(1:8, 20, TRUE),
 #'   sample(c(1:8, NA), 20, TRUE)
 #' ) |> data_description()
-data_description <- function(data) {
+data_description <- function(data, data_text = "Data") {
  data <- if (shiny::is.reactive(data)) data() else data

  n <- nrow(data)
@ -349,7 +349,8 @@ data_description <- function(data) {
  p_complete <- n_complete / n

  sprintf(
-    i18n("Data has %s observations and %s variables, with %s (%s%%) complete cases."),
+    i18n("%s has %s observations and %s variables, with %s (%s%%) complete cases."),
+    data_text,
    n,
    n_var,
    n_complete,
@ -357,6 +358,32 @@ data_description <- function(data) {
  )
 }

+
+#' Filter function to filter data set by variable type
+#'
+#' @param data data frame
+#' @param type vector of data types (recognised: data_types)
+#'
+#' @returns data.frame
+#' @export
+#'
+#' @examples
+#' default_parsing(mtcars) |> data_type_filter(type=c("categorical","continuous")) |> attributes()
+#' \dontrun{
+#' default_parsing(mtcars) |> data_type_filter(type=c("test","categorical","continuous"))
+#' }
+data_type_filter <- function(data,type){
+  ## Please ensure to only provide recognised data types
+  assertthat::assert_that(all(type %in% data_types()))
+
+  out <- data[data_type(data) %in% type]
+  code <- rlang::call2("data_type_filter",!!!list(type=type),.ns = "FreesearchR")
+  if (!is.null(code)){
+    attr(out, "code") <- code
+  }
+  out
+}
+
 #' Drop-in replacement for the base::sort_by with option to remove NAs
 #'
 #' @param x x
--- a/R/regression_model.R
+++ b/R/regression_model.R
@ -242,9 +242,13 @@ regression_model_uv <- function(data,

 ### HELPERS

-#' Data type assessment
+#' Data type assessment.
 #'
-#' @param data data
+#' @description
+#' These are more overall than the native typeof. This is used to assess a more
+#' meaningful "clinical" data type.
+#'
+#' @param data vector or data.frame. if data frame, each column is evaluated.
 #'
 #' @returns outcome type
 #' @export
@ -253,39 +257,60 @@ regression_model_uv <- function(data,
 #' mtcars |>
 #'   default_parsing() |>
 #'   lapply(data_type)
+#' mtcars |>
+#'   default_parsing() |>
+#'   data_type()
 #' c(1, 2) |> data_type()
 #' 1 |> data_type()
 #' c(rep(NA, 10)) |> data_type()
 #' sample(1:100, 50) |> data_type()
 #' factor(letters[1:20]) |> data_type()
+#' as.Date(1:20) |> data_type()
 data_type <- function(data) {
-  cl_d <- class(data)
-  if (all(is.na(data))) {
-    out <- "empty"
-  } else if (length(unique(data)) < 2) {
-    out <- "monotone"
-  } else if (any(c("factor", "logical") %in% cl_d) | length(unique(data)) == 2) {
-    if (identical("logical", cl_d) | length(unique(data)) == 2) {
-      out <- "dichotomous"
-    } else {
-      if (is.ordered(data)) {
-        out <- "ordinal"
-      } else {
-        out <- "categorical"
-      }
-    }
-  } else if (identical(cl_d, "character")) {
-    out <- "text"
-  } else if (!length(unique(data)) == 2) {
-    ## Previously had all thinkable classes
-    ## Now just assumes the class has not been defined above
-    ## any(c("numeric", "integer", "hms", "Date", "timediff") %in% cl_d) &
-    out <- "continuous"
+  if (is.data.frame(data)) {
+    sapply(data, data_type)
  } else {
-    out <- "unknown"
-  }
+    cl_d <- class(data)
+    if (all(is.na(data))) {
+      out <- "empty"
+    } else if (length(unique(data)) < 2) {
+      out <- "monotone"
+    } else if (any(c("factor", "logical") %in% cl_d) | length(unique(data)) == 2) {
+      if (identical("logical", cl_d) | length(unique(data)) == 2) {
+        out <- "dichotomous"
+      } else {
+        if (is.ordered(data)) {
+          out <- "ordinal"
+        } else {
+          out <- "categorical"
+        }
+      }
+    } else if (identical(cl_d, "character")) {
+      out <- "text"
+    } else if (any(c("hms", "Date", "POSIXct", "POSIXt") %in% cl_d)) {
+      out <- "datetime"
+    } else if (!length(unique(data)) == 2) {
+      ## Previously had all thinkable classes
+      ## Now just assumes the class has not been defined above
+      ## any(c("numeric", "integer", "hms", "Date", "timediff") %in% cl_d) &
+      out <- "continuous"
+    } else {
+      out <- "unknown"
+    }

-  out
+    out
+  }
+}
+
+#' Recognised data types from data_type
+#'
+#' @returns vector
+#' @export
+#'
+#' @examples
+#' data_types()
+data_types <- function() {
+  c("dichotomous", "ordinal", "categorical", "datatime", "continuous", "text", "empty", "monotone", "unknown")
 }


@ -525,17 +550,17 @@ regression_model_list <- function(data,
  parameters_code <- Filter(
    length,
    modifyList(parameters, list(
-      data=as.symbol("df"),
+      data = as.symbol("df"),
      formula.str = as.character(glue::glue(formula.str.c)),
      outcome.str = NULL
      # args.list = NULL,
-    )
-  ))
+    ))
+  )

  ## The easiest solution was to simple paste as a string
  ## The rlang::call2 or rlang::expr functions would probably work as well
  # code <- glue::glue("FreesearchR::regression_model({parameters_print}, args.list=list({list2str(args.list.c)}))", .null = "NULL")
-  code <- rlang::call2("regression_model",!!!parameters_code,.ns = "FreesearchR")
+  code <- rlang::call2("regression_model", !!!parameters_code, .ns = "FreesearchR")

  list(
    options = options,
@ -646,7 +671,6 @@ regression_model_uv_list <- function(data,

  model <- vars |>
    lapply(\(.var){
-
      parameters <-
        list(
          fun = fun.c,
@ -663,7 +687,7 @@ regression_model_uv_list <- function(data,
      ## This is the very long version
      ## Handles deeply nested glue string
      # code <- glue::glue("FreesearchR::regression_model(data=df,{list2str(modifyList(parameters,list(data=NULL,args.list=list2str(args.list.c))))})")
-      code <- rlang::call2("regression_model",!!!modifyList(parameters,list(data=as.symbol("df"),args.list=args.list.c)),.ns = "FreesearchR")
+      code <- rlang::call2("regression_model", !!!modifyList(parameters, list(data = as.symbol("df"), args.list = args.list.c)), .ns = "FreesearchR")
      REDCapCAST::set_attr(out, code, "code")
    })