feat: new cut methods

This commit is contained in:
Andreas Gammelgaard Damsbo 2025-12-03 22:01:03 +01:00
parent cc853b2ede
commit f2c1c974e0
No known key found for this signature in database
2 changed files with 69 additions and 9 deletions

View file

@ -40,7 +40,7 @@ cut_variable_ui <- function(id) {
column(
width = 3,
shiny::conditionalPanel(
condition = "input.method != 'top' && input.method != 'bottom'",
condition = "input.method != 'top' && input.method != 'bottom' && input.method != 'words' && input.method != 'characters'",
ns = ns,
checkboxInput(
inputId = ns("right"),
@ -94,7 +94,7 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
data <- data_r()
rv$data <- data
vars_num <- vapply(data, \(.x){
is.numeric(.x) || is_datetime(.x) || (is.factor(.x) && length(levels(.x)) > 2)
is.numeric(.x) || is_datetime(.x) || (is.factor(.x) && length(levels(.x)) > 2) || is.character(.x)
}, logical(1))
vars_num <- names(vars_num)[vars_num]
@ -216,6 +216,12 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
"top",
"bottom"
)
} else if ("character" %in% class(data[[variable]])) {
choices <- c(
choices,
"characters",
"words"
)
} else {
choices <- c(
choices,
@ -294,7 +300,9 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
list(var = f, brks = levels(f))
} else if (input$method %in% c(
"top",
"bottom"
"bottom",
"characters",
"words"
)) {
# This allows factor simplification to get the top or bottom count
f <- cut_var(data[[variable]], breaks = input$n_breaks)
@ -409,7 +417,6 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
responseName = "count"
)
count_data$freq <- paste(signif(count_data$count / nrow(data) * 100, 3), "%")
# browser()
gridTheme <- getOption("datagrid.theme")
if (length(gridTheme) < 1) {
datamods:::apply_grid_theme()
@ -476,6 +483,9 @@ plot_histogram <- function(data, column = NULL, bins = 30, breaks = NULL, color
} else {
x <- data[[column]]
}
if (is.character(x)){
x <- REDCapCAST::as_factor(x)
}
x <- as.numeric(x)
op <- par(mar = rep(1.5, 4))
on.exit(par(op))
@ -551,6 +561,18 @@ cut_methods <- function() {
min = 1,
max = 50
),
"characters" = list(
descr = i18n$t("Shorten to first letters"),
breaks = i18n$t("Letters"),
min = 1,
max = 20
),
"words" = list(
descr = i18n$t("Shorten to first words"),
breaks = i18n$t("Words"),
min = 1,
max = 50
),
"fixed" = list(
descr = i18n$t("By specified numbers"),
breaks = i18n$t("Breaks"),

View file

@ -184,21 +184,24 @@ cut_var.factor <- function(x, breaks = NULL, type = c("top", "bottom"), other =
tbl <- sort(table(x), decreasing = TRUE)
if (type == "top") {
if (length(levels(x)) <= breaks){
if (length(levels(x)) <= breaks) {
return(x)
}
lvls <- names(tbl[seq_len(breaks)])
} else if (type == "bottom") {
freqs_check <- tbl / NROW(x) * 100 < breaks
if (!any(freqs_check)){
if (!any(freqs_check)) {
return(x)
}
lvls <- names(tbl)[!freqs_check]
}
if (other %in% lvls) {
other <- paste(other, "_freesearchr")
}
# if (other %in% lvls) {
# other <- paste(other, "_freesearchr")
# }
# Ensure unique new level name
other <- unique_names(other, lvls)
## Relabel and relevel
out <- forcats::fct_relabel(
@ -214,6 +217,41 @@ cut_var.factor <- function(x, breaks = NULL, type = c("top", "bottom"), other =
}
#' Subset first part of string to factor
#'
#' @name cut_var
#'
#' @returns factor
#' @export
#'
#' @examples
#' c("Sunday", "This week is short") |> cut_var(breaks = 3)
cut_var.character <- function(x, breaks = NULL, type = c("characters", "words"), ...) {
args <- list(...)
if (is.null(breaks)) {
return(x)
}
type <- match.arg(type)
if (type == "characters") {
out <- substr(x, start = 1, stop = breaks)
} else if (type == "words") {
out <- strsplit(x, " ") |>
sapply(\(.x){
if (length(.x) > breaks) {
paste(.x[seq_len(breaks)], collapse = " ")
} else {
paste(.x, collapse = " ")
}
})
}
attr(out, which = "brks") <- breaks
REDCapCAST::as_factor(out)
}
#' Test class
#'
#' @param data data