feat: new cut option to simplify factors to only the top N levels

2025-12-15 17:12:09 +01:00 · 2025-10-21 21:30:07 +03:00 · 2025-10-21 21:30:07 +03:00 · 0c2b061708
commit 0c2b061708
parent a06177481b
12 changed files with 337 additions and 236 deletions
--- a/3
+++ b/3
@ -99,7 +99,8 @@ Collate:
    'correlations-module.R'
    'create-column-mod.R'
    'custom_SelectInput.R'
-    'cut-variable-dates.R'
+    'cut-variable-ext.R'
+    'cut_var.R'
    'data-summary.R'
    'data_plots.R'
    'datagrid-infos-mod.R'
--- a/1
+++ b/1
@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand

 S3method(cut_var,default)
+S3method(cut_var,factor)
 S3method(cut_var,hms)
 S3method(plot,tbl_regression)
 export(FreesearchR_palette)
--- a/R/cut-variable-dates.R
+++ b/R/cut-variable-dates.R
@ -1,188 +1,3 @@
-#' Extended cutting function with fall-back to the native base::cut
-#'
-#' @param x an object inheriting from class "hms"
-#' @param ... passed on
-#'
-#' @export
-#' @name cut_var
-cut_var <- function(x, ...) {
-  UseMethod("cut_var")
-}
-
-#' @export
-#' @name cut_var
-cut_var.default <- function(x, ...) {
-  base::cut(x, ...)
-}
-
-#' @name cut_var
-#'
-#' @return factor
-#' @export
-#'
-#' @examples
-#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(2)
-#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var("min")
-#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = "hour")
-#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = hms::as_hms(c("01:00:00", "03:01:20", "9:20:20")))
-#' d_t <- readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA))
-#' f <- d_t |> cut_var(2)
-#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA)) |> cut_var(breaks = lubridate::as_datetime(c(hms::as_hms(levels(f)), hms::as_hms(max(d_t, na.rm = TRUE) + 1))), right = FALSE)
-cut_var.hms <- function(x, breaks, ...) {
-  ## as_hms keeps returning warnings on tz(); ignored
-  suppressWarnings({
-    if (hms::is_hms(breaks)) {
-      breaks <- lubridate::as_datetime(breaks)
-    }
-    x <- lubridate::as_datetime(x)
-    out <- cut_var.POSIXt(x, breaks = breaks, ...)
-    attr(out, which = "brks") <- hms::as_hms(lubridate::as_datetime(attr(out, which = "brks")))
-    attr(out, which = "levels") <- as.character(hms::as_hms(lubridate::as_datetime(attr(out, which = "levels"))))
-  })
-  out
-}
-
-#' @name cut_var
-#' @param x an object inheriting from class "POSIXt" or "Date"
-#'
-#' @examples
-#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
-#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
-#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "month_only")
-#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks=NULL,format = "%A-%H")
-#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks=NULL,format = "%W")
-cut_var.POSIXt <- function(x, breaks, right = FALSE, include.lowest = TRUE, start.on.monday = TRUE, ...) {
-  breaks_o <- breaks
-  args <- list(...)
-  # browser()
-  if (is.numeric(breaks)) {
-    breaks <- quantile(
-      x,
-      probs = seq(0, 1, 1 / breaks),
-      right = right,
-      include.lowest = include.lowest,
-      na.rm = TRUE
-    )
-  }
-
-  if ("format" %in% names(args)){
-    assertthat::assert_that(is.character(args$format))
-    out <- forcats::as_factor(format(x,format=args$format))
-  } else if (identical(breaks, "weekday")) {
-    ## This is
-    ds <- as.Date(1:7) |>
-      (\(.x){
-        sort_by(format(.x,"%A"),as.numeric(format(.x,"%w")))
-      })()
-
-    if (start.on.monday) {
-      ds <- ds[c(7, 1:6)]
-    }
-    out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop()
-  } else if (identical(breaks, "month_only")) {
-    ## Simplest way to create a vector of all months in order
-    ## which will also follow the locale of the machine
-    ms <- paste0("1970-", 1:12, "-01") |>
-      as.Date() |>
-      months()
-
-    out <- factor(months(x), levels = ms) |> forcats::fct_drop()
-  } else {
-    ## Doesn't really work very well for breaks other than the special character cases as right border is excluded
-    out <- base::cut.POSIXt(x, breaks = breaks, right = right, ...) |> forcats::fct_drop()
-    # browser()
-  }
-  l <- levels(out)
-  if (is.numeric(breaks_o)) {
-    l <- breaks
-  } else if (is.character(breaks) && length(breaks) == 1 && !(identical(breaks, "weekday") | identical(breaks, "month_only"))) {
-    if (include.lowest) {
-      if (right) {
-        l <- c(l, min(as.character(x)))
-      } else {
-        l <- c(l, max(as.character(x)))
-      }
-    }
-  } else if (length(l) < length(breaks_o)) {
-    l <- breaks_o
-  }
-
-  attr(out, which = "brks") <- l
-  out
-}
-
-#' @name cut_var
-#' @param x an object inheriting from class "POSIXct"
-cut_var.POSIXct <- cut_var.POSIXt
-
-#' @name cut_var
-#' @param x an object inheriting from class "POSIXct"
-#'
-#' @examples
-#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
-#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
-#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(format = "%W")
-cut_var.Date <- function(x, breaks=NULL, start.on.monday = TRUE, ...) {
-  args <- list(...)
-
-  if ("format" %in% names(args)){
-    assertthat::assert_that(is.character(args$format))
-    out <- forcats::as_factor(format(x,format=args$format))
-  } else if (identical(breaks, "weekday")) {
-    ds <- as.Date(1:7) |>
-      (\(.x){
-        sort_by(format(.x,"%A"),as.numeric(format(.x,"%w")))
-      })()
-
-    if (start.on.monday) {
-      ds <- ds[c(7, 1:6)]
-    }
-    out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop()
-  } else if (identical(breaks, "month_only")) {
-    ms <- paste0("1970-", 1:12, "-01") |>
-      as.Date() |>
-      months()
-
-    out <- factor(months(x), levels = ms) |> forcats::fct_drop()
-  } else {
-    ## Doesn't really work very well for breaks other than the special character cases as right border is excluded
-    out <- base::cut.Date(x, breaks = breaks, ...) |> forcats::fct_drop()
-    # browser()
-  }
-  out
-}
-
-#' Test class
-#'
-#' @param data data
-#' @param class.vec vector of class names to test
-#'
-#' @return factor
-#' @export
-#'
-#' @examples
-#' \dontrun{
-#' vapply(REDCapCAST::redcapcast_data, \(.x){
-#'   is_any_class(.x, c("hms", "Date", "POSIXct", "POSIXt"))
-#' }, logical(1))
-#' }
-is_any_class <- function(data, class.vec) {
-  any(class(data) %in% class.vec)
-}
-
-#' Test is date/datetime/time
-#'
-#' @param data data
-#'
-#' @return factor
-#' @export
-#'
-#' @examples
-#' vapply(REDCapCAST::redcapcast_data, is_datetime, logical(1))
-is_datetime <- function(data) {
-  is_any_class(data, class.vec = c("hms", "Date", "POSIXct", "POSIXt"))
-}
-
 #' @title Module to Convert Numeric to Factor
 #'
 #' @description
@ -206,6 +21,7 @@ cut_variable_ui <- function(id) {
    shiny::fluidRow(
      column(
        width = 3,
+        # shiny::uiOutput(outputId = ns("variable"))
        shinyWidgets::virtualSelectInput(
          inputId = ns("variable"),
          label = i18n$t("Variable to cut:"),
@ -279,9 +95,20 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
        data <- data_r()
        rv$data <- data
        vars_num <- vapply(data, \(.x){
-          is.numeric(.x) || is_datetime(.x)
+          is.numeric(.x) || is_datetime(.x) || (is.factor(.x) && length(levels(.x)) > 2)
        }, logical(1))
        vars_num <- names(vars_num)[vars_num]
+
+        # shiny::renderUI(
+        # columnSelectInput(
+        #   inputId = "variable",
+        #   data = data,
+        #   label = i18n$t("Variable to cut:"),
+        #   width = "100%",
+        #   choices = vars_num,
+        #   selected = if (isTruthy(input$variable)) input$variable else vars_num[1]
+        # ))
+
        shinyWidgets::updateVirtualSelect(
          inputId = "variable",
          choices = vars_num,
@ -294,7 +121,7 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
        variable <- req(input$variable)
        req(hasName(data, variable))

-        if (is_datetime(data[[variable]])) {
+        if (is_datetime(data[[variable]]) || is.factor(data[[variable]])) {
          brks <- cut_var(data[[variable]],
            breaks = input$n_breaks
          )$brks
@ -339,7 +166,7 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
          # "quantile"
        )

-        if (any(c("hms","POSIXct") %in% class(data[[variable]]))) {
+        if (any(c("hms", "POSIXct") %in% class(data[[variable]]))) {
          choices <- c(choices, "hour")
        } else if (any(c("POSIXt", "Date") %in% class(data[[variable]]))) {
          choices <- c(
@ -353,11 +180,17 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
            "quarter",
            "year"
          )
+        } else if ("factor" %in% class(data[[variable]])) {
+          choices <- c(
+            choices,
+            "top" # ,
+            # "bottom",
+          )
        } else {
          choices <- c(
            choices,
            "fixed",
-            "quantile",
+            "quantile" # ,
            # "sd",
            # "equal",
            # "pretty",
@ -366,7 +199,7 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
            # "bclust",
            # "fisher",
            # "jenks",
-            "headtails" # ,
+            # "headtails" # ,
            # "maximum",
            # "box"
          )
@ -429,16 +262,23 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
          # cut.POSIXct <- cut.POSIXt
          f <- cut_var(data[[variable]], breaks = input$method)
          list(var = f, brks = levels(f))
+        } else if (input$method %in% c(
+          "top",
+          "bottom"
+        )) {
+          # This allows factor simplification to get the top or bottom count
+          f <- cut_var(data[[variable]], breaks = input$n_breaks)
+          list(var = f, brks = input$n_breaks, type = input$method)
        } else if (input$method %in% c("hour")) {
          # To enable datetime cutting
          # cut.POSIXct <- cut.POSIXt
          f <- cut_var(data[[variable]], breaks = "hour")
          list(var = f, brks = levels(f))
-        # } else if (input$method %in% c("week_only")) {
-        #   # As a proof of concept a single option to use "format" parameter
-        #   # https://www.stat.berkeley.edu/~s133/dates.html
-        #   f <- cut_var(data[[variable]], format = "%W")
-        #   list(var = f, brks = levels(f))
+          # } else if (input$method %in% c("week_only")) {
+          #   # As a proof of concept a single option to use "format" parameter
+          #   # https://www.stat.berkeley.edu/~s133/dates.html
+          #   f <- cut_var(data[[variable]], format = "%W")
+          #   list(var = f, brks = levels(f))
        } else {
          classInt::classIntervals(
            var = as.numeric(data[[variable]]),
@ -462,7 +302,17 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
        variable <- req(input$variable)


-        if (input$method %in% c("day", "weekday", "week", "month", "month_only", "quarter", "year", "hour")) {
+        if (input$method %in% c(
+          "day",
+          "weekday",
+          "week",
+          "month",
+          "month_only",
+          "quarter",
+          "year",
+          "hour"
+        )
+        ) {
          breaks <- input$method
        } else {
          breaks <- breaks_r()$brks
@ -475,47 +325,34 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
          right = input$right
        )

+        if ("type" %in% names(breaks_r())) {
+          parameters <- modifyList(parameters, list(type = breaks_r()$type))
+        }
+
        new_variable <- tryCatch(
          {
            rlang::exec(cut_var, !!!parameters)
          },
          error = function(err) {
-            showNotification(paste0("We encountered the following error creating your report: ", err), type = "err")
+            showNotification(paste("We encountered the following error creating the new factor:", err), type = "err")
          }
        )

-        # new_variable <- do.call(
-        #   cut,
-        #   parameters
-        # )
-
-
        data <- append_column(data, column = new_variable, name = paste0(variable, "_cut"), index = "right")

-        # setNames(paste0(variable, "_cut"))
-        #
-        #   data <- dplyr::bind_cols(data, new_variable, .name_repair = "unique_quiet")
-
-        # rv$new_var_name <- names(data)[length(data)]
-        # browser()
-
-        # browser()
        code <- rlang::call2(
          "append_column",
          !!!list(
            column = rlang::call2("cut_var",
-                                  !!!modifyList(parameters, list(x = as.symbol(paste0("data$", variable)))),
-                                  .ns = "FreesearchR"),
+              !!!modifyList(parameters, list(x = as.symbol(paste0("data$", variable)))),
+              .ns = "FreesearchR"
+            ),
            name = paste0(variable, "_cut"), index = "right"
          ),
          .ns = "FreesearchR"
        )
        attr(data, "code") <- code

-        # attr(data, "code") <- Reduce(
-        #   f = function(x, y) expr(!!x %>% !!y),
-        #   x = c(attr(data, "code"), code)
-        # )
        data
      })

@ -590,12 +427,11 @@ modal_cut_variable <- function(id,


 #' @importFrom graphics abline axis hist par plot.new plot.window
-plot_histogram <- function(data, column=NULL, bins = 30, breaks = NULL, color = "#112466") {
-  if (is.vector(data)){
+plot_histogram <- function(data, column = NULL, bins = 30, breaks = NULL, color = "#112466") {
+  if (is.vector(data)) {
    x <- data
  } else {
-  x <- data[[column]]
-
+    x <- data[[column]]
  }
  x <- as.numeric(x)
  op <- par(mar = rep(1.5, 4))
@ -610,4 +446,3 @@ plot_histogram <- function(data, column=NULL, bins = 30, breaks = NULL, color =
  abline(v = breaks, col = "#FFFFFF", lty = 1, lwd = 1.5)
  abline(v = breaks, col = "#2E2E2E", lty = 2, lwd = 1.5)
 }
-
--- a/R/cut_var.R
+++ b/R/cut_var.R
@ -0,0 +1,232 @@
+#' Extended cutting function with fall-back to the native base::cut
+#'
+#' @param x an object inheriting from class "hms"
+#' @param ... passed on
+#'
+#' @export
+#' @name cut_var
+cut_var <- function(x, ...) {
+  UseMethod("cut_var")
+}
+
+#' @export
+#' @name cut_var
+cut_var.default <- function(x, ...) {
+  base::cut(x, ...)
+}
+
+#' @name cut_var
+#'
+#' @return factor
+#' @export
+#'
+#' @examples
+#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(2)
+#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var("min")
+#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = "hour")
+#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = hms::as_hms(c("01:00:00", "03:01:20", "9:20:20")))
+#' d_t <- readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA))
+#' f <- d_t |> cut_var(2)
+#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA)) |> cut_var(breaks = lubridate::as_datetime(c(hms::as_hms(levels(f)), hms::as_hms(max(d_t, na.rm = TRUE) + 1))), right = FALSE)
+cut_var.hms <- function(x, breaks, ...) {
+  ## as_hms keeps returning warnings on tz(); ignored
+  suppressWarnings({
+    if (hms::is_hms(breaks)) {
+      breaks <- lubridate::as_datetime(breaks)
+    }
+    x <- lubridate::as_datetime(x)
+    out <- cut_var.POSIXt(x, breaks = breaks, ...)
+    attr(out, which = "brks") <- hms::as_hms(lubridate::as_datetime(attr(out, which = "brks")))
+    attr(out, which = "levels") <- as.character(hms::as_hms(lubridate::as_datetime(attr(out, which = "levels"))))
+  })
+  out
+}
+
+#' @name cut_var
+#' @param x an object inheriting from class "POSIXt" or "Date"
+#'
+#' @examples
+#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
+#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
+#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "month_only")
+#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "%A-%H")
+#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "%W")
+cut_var.POSIXt <- function(x, breaks, right = FALSE, include.lowest = TRUE, start.on.monday = TRUE, ...) {
+  breaks_o <- breaks
+  args <- list(...)
+  # browser()
+  if (is.numeric(breaks)) {
+    breaks <- quantile(
+      x,
+      probs = seq(0, 1, 1 / breaks),
+      right = right,
+      include.lowest = include.lowest,
+      na.rm = TRUE
+    )
+  }
+
+  if ("format" %in% names(args)) {
+    assertthat::assert_that(is.character(args$format))
+    out <- forcats::as_factor(format(x, format = args$format))
+  } else if (identical(breaks, "weekday")) {
+    ## This is
+    ds <- as.Date(1:7) |>
+      (\(.x){
+        sort_by(format(.x, "%A"), as.numeric(format(.x, "%w")))
+      })()
+
+    if (start.on.monday) {
+      ds <- ds[c(7, 1:6)]
+    }
+    out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop()
+  } else if (identical(breaks, "month_only")) {
+    ## Simplest way to create a vector of all months in order
+    ## which will also follow the locale of the machine
+    ms <- paste0("1970-", 1:12, "-01") |>
+      as.Date() |>
+      months()
+
+    out <- factor(months(x), levels = ms) |> forcats::fct_drop()
+  } else {
+    ## Doesn't really work very well for breaks other than the special character cases as right border is excluded
+    out <- base::cut.POSIXt(x, breaks = breaks, right = right, ...) |> forcats::fct_drop()
+    # browser()
+  }
+  l <- levels(out)
+  if (is.numeric(breaks_o)) {
+    l <- breaks
+  } else if (is.character(breaks) && length(breaks) == 1 && !(identical(breaks, "weekday") | identical(breaks, "month_only"))) {
+    if (include.lowest) {
+      if (right) {
+        l <- c(l, min(as.character(x)))
+      } else {
+        l <- c(l, max(as.character(x)))
+      }
+    }
+  } else if (length(l) < length(breaks_o)) {
+    l <- breaks_o
+  }
+
+  attr(out, which = "brks") <- l
+  out
+}
+
+#' @name cut_var
+#' @param x an object inheriting from class "POSIXct"
+cut_var.POSIXct <- cut_var.POSIXt
+
+#' @name cut_var
+#' @param x an object inheriting from class "POSIXct"
+#'
+#' @examples
+#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
+#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
+#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(format = "%W")
+cut_var.Date <- function(x, breaks = NULL, start.on.monday = TRUE, ...) {
+  args <- list(...)
+
+  if ("format" %in% names(args)) {
+    assertthat::assert_that(is.character(args$format))
+    out <- forcats::as_factor(format(x, format = args$format))
+  } else if (identical(breaks, "weekday")) {
+    ds <- as.Date(1:7) |>
+      (\(.x){
+        sort_by(format(.x, "%A"), as.numeric(format(.x, "%w")))
+      })()
+
+    if (start.on.monday) {
+      ds <- ds[c(7, 1:6)]
+    }
+    out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop()
+  } else if (identical(breaks, "month_only")) {
+    ms <- paste0("1970-", 1:12, "-01") |>
+      as.Date() |>
+      months()
+
+    out <- factor(months(x), levels = ms) |> forcats::fct_drop()
+  } else {
+    ## Doesn't really work very well for breaks other than the special character cases as right border is excluded
+    out <- base::cut.Date(x, breaks = breaks, ...) |> forcats::fct_drop()
+    # browser()
+  }
+  out
+}
+
+
+#' Simplify a factor to only the top or bottom n levels
+#'
+#' @param type
+#'
+#' @name cut_var
+#'
+#' @returns factor
+#' @export
+#'
+#' @examples
+#' mtcars$carb |>
+#'   as.factor() |>
+#'   cut_var(2) |>
+#'   table()
+cut_var.factor <- function(x, breaks = NULL, type = c("top", "bottom"), other = "Other", ...) {
+  args <- list(...)
+
+  if (is.null(breaks)) {
+    return(x)
+  }
+
+  type <- match.arg(type)
+
+  if (type == "top") {
+    lvls <- names(sort(table(x), decreasing = TRUE)[seq_len(breaks)])
+  } else if (type == "bottom") {
+    lvls <- names(sort(table(x), decreasing = FALSE)[seq_len(breaks)])
+  }
+
+  if (other %in% lvls) {
+    other <- paste(other, "_freesearchr")
+  }
+
+  ## Relabel and relevel
+  out <- forcats::fct_relabel(
+    x,
+    \(.x){
+      ifelse(.x %in% lvls, .x, other)
+    }
+  ) |>
+    forcats::fct_relevel(lvls,other)
+
+  attr(out, which = "brks") <- breaks
+  out
+}
+
+
+#' Test class
+#'
+#' @param data data
+#' @param class.vec vector of class names to test
+#'
+#' @return factor
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' vapply(REDCapCAST::redcapcast_data, \(.x){
+#'   is_any_class(.x, c("hms", "Date", "POSIXct", "POSIXt"))
+#' }, logical(1))
+#' }
+is_any_class <- function(data, class.vec) {
+  any(class(data) %in% class.vec)
+}
+
+#' Test is date/datetime/time
+#'
+#' @param data data
+#'
+#' @return factor
+#' @export
+#'
+#' @examples
+#' vapply(REDCapCAST::redcapcast_data, is_datetime, logical(1))
+is_datetime <- function(data) {
+  is_any_class(data, class.vec = c("hms", "Date", "POSIXct", "POSIXt"))
+}
--- a/R/hosted_version.R
+++ b/R/hosted_version.R
@ -1 +1 @@
-hosted_version <- function()'v25.10.3-251008'
+hosted_version <- function()'v25.10.3-251021'
--- a/R/plot_sankey.R
+++ b/R/plot_sankey.R
@ -83,7 +83,7 @@ str_remove_last <- function(data, pattern = "\n") {
 #' mtcars |>
 #'   default_parsing() |>
 #'   plot_sankey("cyl", "gear", "vs", color.group = "pri")
-plot_sankey <- function(data, pri, sec, ter = NULL, color.group = "pri", colors = NULL) {
+plot_sankey <- function(data, pri, sec, ter = NULL, color.group = "pri", colors = NULL,missing.level="Missing") {
  if (!is.null(ter)) {
    ds <- split(data, data[ter])
  } else {
@ -91,7 +91,7 @@ plot_sankey <- function(data, pri, sec, ter = NULL, color.group = "pri", colors
  }

  out <- lapply(ds, \(.ds){
-    plot_sankey_single(.ds, pri = pri, sec = sec, color.group = color.group, colors = colors)
+    plot_sankey_single(.ds, pri = pri, sec = sec, color.group = color.group, colors = colors,missing.level=missing.level)
  })

  patchwork::wrap_plots(out)
@ -120,14 +120,21 @@ plot_sankey <- function(data, pri, sec, ter = NULL, color.group = "pri", colors
 #' mtcars |>
 #'   default_parsing() |>
 #'   plot_sankey_single("cyl", "vs", color.group = "pri")
-plot_sankey_single <- function(data, pri, sec, color.group = c("pri", "sec"), colors = NULL, ...) {
+#' stRoke::trial |>
+#'   default_parsing() |>
+#'   plot_sankey_single("diabetes", "hypertension")
+plot_sankey_single <- function(data, pri, sec, color.group = c("pri", "sec"), colors = NULL,missing.level="Missing", ...) {
  color.group <- match.arg(color.group)

+  browser()
  data_orig <- data
  data[c(pri, sec)] <- data[c(pri, sec)] |>
-    dplyr::mutate(dplyr::across(dplyr::where(is.factor), forcats::fct_drop))
+    dplyr::mutate(
+      # dplyr::across(dplyr::where(is.logical), as.factor),
+      dplyr::across(dplyr::where(is.factor), forcats::fct_drop)#,
+      # dplyr::across(dplyr::where(is.factor), \(.x){forcats::fct_na_value_to_level(.x,missing.level)})
+    )

-  # browser()

  data <- data |> sankey_ready(pri = pri, sec = sec, ...)

--- a/man/cut-variable.Rd
+++ b/man/cut-variable.Rd
@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/cut-variable-dates.R
+% Please edit documentation in R/cut-variable-ext.R
 \name{cut-variable}
 \alias{cut-variable}
 \alias{cut_variable_ui}
--- a/man/cut_var.Rd
+++ b/man/cut_var.Rd
@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/cut-variable-dates.R
+% Please edit documentation in R/cut_var.R
 \name{cut_var}
 \alias{cut_var}
 \alias{cut_var.default}
@ -7,6 +7,7 @@
 \alias{cut_var.POSIXt}
 \alias{cut_var.POSIXct}
 \alias{cut_var.Date}
+\alias{cut_var.factor}
 \title{Extended cutting function with fall-back to the native base::cut}
 \usage{
 cut_var(x, ...)
@ -34,17 +35,25 @@ cut_var(x, ...)
 )

 \method{cut_var}{Date}(x, breaks = NULL, start.on.monday = TRUE, ...)
+
+\method{cut_var}{factor}(x, breaks = NULL, type = c("top", "bottom"), other = "Other", ...)
 }
 \arguments{
 \item{x}{an object inheriting from class "POSIXct"}

 \item{...}{passed on}
+
+\item{type}{}
 }
 \value{
+factor
+
 factor
 }
 \description{
 Extended cutting function with fall-back to the native base::cut
+
+Simplify a factor to only the top or bottom n levels
 }
 \examples{
 readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(2)
@ -57,9 +66,13 @@ readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA)) |> cut_
 readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
 readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
 readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "month_only")
-readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks=NULL,format = "\%A-\%H")
-readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks=NULL,format = "\%W")
+readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "\%A-\%H")
+readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "\%W")
 as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
 as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
 as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(format = "\%W")
+mtcars$carb |>
+  as.factor() |>
+  cut_var(2) |>
+  table()
 }
--- a/man/data-plots.Rd
+++ b/man/data-plots.Rd
@ -32,7 +32,15 @@ plot_ridge(data, x, y, z = NULL, ...)

 sankey_ready(data, pri, sec, numbers = "count", ...)

-plot_sankey(data, pri, sec, ter = NULL, color.group = "pri", colors = NULL)
+plot_sankey(
+  data,
+  pri,
+  sec,
+  ter = NULL,
+  color.group = "pri",
+  colors = NULL,
+  missing.level = "Missing"
+)

 plot_scatter(data, pri, sec, ter = NULL)

--- a/man/is_any_class.Rd
+++ b/man/is_any_class.Rd
@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/cut-variable-dates.R
+% Please edit documentation in R/cut_var.R
 \name{is_any_class}
 \alias{is_any_class}
 \title{Test class}
--- a/man/is_datetime.Rd
+++ b/man/is_datetime.Rd
@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/cut-variable-dates.R
+% Please edit documentation in R/cut_var.R
 \name{is_datetime}
 \alias{is_datetime}
 \title{Test is date/datetime/time}
--- a/man/plot_sankey_single.Rd
+++ b/man/plot_sankey_single.Rd
@ -10,6 +10,7 @@ plot_sankey_single(
  sec,
  color.group = c("pri", "sec"),
  colors = NULL,
+  missing.level = "Missing",
  ...
 )
 }
@ -40,4 +41,7 @@ data.frame(
 mtcars |>
  default_parsing() |>
  plot_sankey_single("cyl", "vs", color.group = "pri")
+stRoke::trial |>
+  default_parsing() |>
+  plot_sankey_single("diabetes", "hypertension")
 }