From f2c1c974e0dd85615cc3872573a1e5d44d451f1a Mon Sep 17 00:00:00 2001 From: Andreas Gammelgaard Damsbo Date: Wed, 3 Dec 2025 22:01:03 +0100 Subject: [PATCH] feat: new cut methods --- R/cut-variable-ext.R | 30 +++++++++++++++++++++++---- R/cut_var.R | 48 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 69 insertions(+), 9 deletions(-) diff --git a/R/cut-variable-ext.R b/R/cut-variable-ext.R index cb27543c..508e846c 100644 --- a/R/cut-variable-ext.R +++ b/R/cut-variable-ext.R @@ -40,7 +40,7 @@ cut_variable_ui <- function(id) { column( width = 3, shiny::conditionalPanel( - condition = "input.method != 'top' && input.method != 'bottom'", + condition = "input.method != 'top' && input.method != 'bottom' && input.method != 'words' && input.method != 'characters'", ns = ns, checkboxInput( inputId = ns("right"), @@ -94,7 +94,7 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) { data <- data_r() rv$data <- data vars_num <- vapply(data, \(.x){ - is.numeric(.x) || is_datetime(.x) || (is.factor(.x) && length(levels(.x)) > 2) + is.numeric(.x) || is_datetime(.x) || (is.factor(.x) && length(levels(.x)) > 2) || is.character(.x) }, logical(1)) vars_num <- names(vars_num)[vars_num] @@ -216,6 +216,12 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) { "top", "bottom" ) + } else if ("character" %in% class(data[[variable]])) { + choices <- c( + choices, + "characters", + "words" + ) } else { choices <- c( choices, @@ -294,7 +300,9 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) { list(var = f, brks = levels(f)) } else if (input$method %in% c( "top", - "bottom" + "bottom", + "characters", + "words" )) { # This allows factor simplification to get the top or bottom count f <- cut_var(data[[variable]], breaks = input$n_breaks) @@ -409,7 +417,6 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) { responseName = "count" ) count_data$freq <- paste(signif(count_data$count / nrow(data) * 100, 3), "%") - # browser() gridTheme <- getOption("datagrid.theme") if (length(gridTheme) < 1) { datamods:::apply_grid_theme() @@ -476,6 +483,9 @@ plot_histogram <- function(data, column = NULL, bins = 30, breaks = NULL, color } else { x <- data[[column]] } + if (is.character(x)){ + x <- REDCapCAST::as_factor(x) + } x <- as.numeric(x) op <- par(mar = rep(1.5, 4)) on.exit(par(op)) @@ -551,6 +561,18 @@ cut_methods <- function() { min = 1, max = 50 ), + "characters" = list( + descr = i18n$t("Shorten to first letters"), + breaks = i18n$t("Letters"), + min = 1, + max = 20 + ), + "words" = list( + descr = i18n$t("Shorten to first words"), + breaks = i18n$t("Words"), + min = 1, + max = 50 + ), "fixed" = list( descr = i18n$t("By specified numbers"), breaks = i18n$t("Breaks"), diff --git a/R/cut_var.R b/R/cut_var.R index d2fab621..16635694 100644 --- a/R/cut_var.R +++ b/R/cut_var.R @@ -184,21 +184,24 @@ cut_var.factor <- function(x, breaks = NULL, type = c("top", "bottom"), other = tbl <- sort(table(x), decreasing = TRUE) if (type == "top") { - if (length(levels(x)) <= breaks){ + if (length(levels(x)) <= breaks) { return(x) } lvls <- names(tbl[seq_len(breaks)]) } else if (type == "bottom") { freqs_check <- tbl / NROW(x) * 100 < breaks - if (!any(freqs_check)){ + if (!any(freqs_check)) { return(x) } lvls <- names(tbl)[!freqs_check] } - if (other %in% lvls) { - other <- paste(other, "_freesearchr") - } + # if (other %in% lvls) { + # other <- paste(other, "_freesearchr") + # } + + # Ensure unique new level name + other <- unique_names(other, lvls) ## Relabel and relevel out <- forcats::fct_relabel( @@ -214,6 +217,41 @@ cut_var.factor <- function(x, breaks = NULL, type = c("top", "bottom"), other = } +#' Subset first part of string to factor +#' +#' @name cut_var +#' +#' @returns factor +#' @export +#' +#' @examples +#' c("Sunday", "This week is short") |> cut_var(breaks = 3) +cut_var.character <- function(x, breaks = NULL, type = c("characters", "words"), ...) { + args <- list(...) + + if (is.null(breaks)) { + return(x) + } + + type <- match.arg(type) + + if (type == "characters") { + out <- substr(x, start = 1, stop = breaks) + } else if (type == "words") { + out <- strsplit(x, " ") |> + sapply(\(.x){ + if (length(.x) > breaks) { + paste(.x[seq_len(breaks)], collapse = " ") + } else { + paste(.x, collapse = " ") + } + }) + } + + attr(out, which = "brks") <- breaks + REDCapCAST::as_factor(out) +} + #' Test class #' #' @param data data