From 0c2b0617081a573e2fd67ec7bf4556c4886bac7f Mon Sep 17 00:00:00 2001 From: Andreas Gammelgaard Damsbo Date: Tue, 21 Oct 2025 21:30:07 +0300 Subject: [PATCH] feat: new cut option to simplify factors to only the top N levels --- DESCRIPTION | 3 +- NAMESPACE | 1 + ...ut-variable-dates.R => cut-variable-ext.R} | 279 ++++-------------- R/cut_var.R | 232 +++++++++++++++ R/hosted_version.R | 2 +- R/plot_sankey.R | 17 +- man/cut-variable.Rd | 2 +- man/cut_var.Rd | 19 +- man/data-plots.Rd | 10 +- man/is_any_class.Rd | 2 +- man/is_datetime.Rd | 2 +- man/plot_sankey_single.Rd | 4 + 12 files changed, 337 insertions(+), 236 deletions(-) rename R/{cut-variable-dates.R => cut-variable-ext.R} (57%) create mode 100644 R/cut_var.R diff --git a/DESCRIPTION b/DESCRIPTION index fb3d3fd6..54c35c36 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -99,7 +99,8 @@ Collate: 'correlations-module.R' 'create-column-mod.R' 'custom_SelectInput.R' - 'cut-variable-dates.R' + 'cut-variable-ext.R' + 'cut_var.R' 'data-summary.R' 'data_plots.R' 'datagrid-infos-mod.R' diff --git a/NAMESPACE b/NAMESPACE index b4592f0b..f5e24008 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand S3method(cut_var,default) +S3method(cut_var,factor) S3method(cut_var,hms) S3method(plot,tbl_regression) export(FreesearchR_palette) diff --git a/R/cut-variable-dates.R b/R/cut-variable-ext.R similarity index 57% rename from R/cut-variable-dates.R rename to R/cut-variable-ext.R index 05dcd5b9..97f0f854 100644 --- a/R/cut-variable-dates.R +++ b/R/cut-variable-ext.R @@ -1,188 +1,3 @@ -#' Extended cutting function with fall-back to the native base::cut -#' -#' @param x an object inheriting from class "hms" -#' @param ... passed on -#' -#' @export -#' @name cut_var -cut_var <- function(x, ...) { - UseMethod("cut_var") -} - -#' @export -#' @name cut_var -cut_var.default <- function(x, ...) { - base::cut(x, ...) -} - -#' @name cut_var -#' -#' @return factor -#' @export -#' -#' @examples -#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(2) -#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var("min") -#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = "hour") -#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = hms::as_hms(c("01:00:00", "03:01:20", "9:20:20"))) -#' d_t <- readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA)) -#' f <- d_t |> cut_var(2) -#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA)) |> cut_var(breaks = lubridate::as_datetime(c(hms::as_hms(levels(f)), hms::as_hms(max(d_t, na.rm = TRUE) + 1))), right = FALSE) -cut_var.hms <- function(x, breaks, ...) { - ## as_hms keeps returning warnings on tz(); ignored - suppressWarnings({ - if (hms::is_hms(breaks)) { - breaks <- lubridate::as_datetime(breaks) - } - x <- lubridate::as_datetime(x) - out <- cut_var.POSIXt(x, breaks = breaks, ...) - attr(out, which = "brks") <- hms::as_hms(lubridate::as_datetime(attr(out, which = "brks"))) - attr(out, which = "levels") <- as.character(hms::as_hms(lubridate::as_datetime(attr(out, which = "levels")))) - }) - out -} - -#' @name cut_var -#' @param x an object inheriting from class "POSIXt" or "Date" -#' -#' @examples -#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2) -#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday") -#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "month_only") -#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks=NULL,format = "%A-%H") -#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks=NULL,format = "%W") -cut_var.POSIXt <- function(x, breaks, right = FALSE, include.lowest = TRUE, start.on.monday = TRUE, ...) { - breaks_o <- breaks - args <- list(...) - # browser() - if (is.numeric(breaks)) { - breaks <- quantile( - x, - probs = seq(0, 1, 1 / breaks), - right = right, - include.lowest = include.lowest, - na.rm = TRUE - ) - } - - if ("format" %in% names(args)){ - assertthat::assert_that(is.character(args$format)) - out <- forcats::as_factor(format(x,format=args$format)) - } else if (identical(breaks, "weekday")) { - ## This is - ds <- as.Date(1:7) |> - (\(.x){ - sort_by(format(.x,"%A"),as.numeric(format(.x,"%w"))) - })() - - if (start.on.monday) { - ds <- ds[c(7, 1:6)] - } - out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop() - } else if (identical(breaks, "month_only")) { - ## Simplest way to create a vector of all months in order - ## which will also follow the locale of the machine - ms <- paste0("1970-", 1:12, "-01") |> - as.Date() |> - months() - - out <- factor(months(x), levels = ms) |> forcats::fct_drop() - } else { - ## Doesn't really work very well for breaks other than the special character cases as right border is excluded - out <- base::cut.POSIXt(x, breaks = breaks, right = right, ...) |> forcats::fct_drop() - # browser() - } - l <- levels(out) - if (is.numeric(breaks_o)) { - l <- breaks - } else if (is.character(breaks) && length(breaks) == 1 && !(identical(breaks, "weekday") | identical(breaks, "month_only"))) { - if (include.lowest) { - if (right) { - l <- c(l, min(as.character(x))) - } else { - l <- c(l, max(as.character(x))) - } - } - } else if (length(l) < length(breaks_o)) { - l <- breaks_o - } - - attr(out, which = "brks") <- l - out -} - -#' @name cut_var -#' @param x an object inheriting from class "POSIXct" -cut_var.POSIXct <- cut_var.POSIXt - -#' @name cut_var -#' @param x an object inheriting from class "POSIXct" -#' -#' @examples -#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2) -#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday") -#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(format = "%W") -cut_var.Date <- function(x, breaks=NULL, start.on.monday = TRUE, ...) { - args <- list(...) - - if ("format" %in% names(args)){ - assertthat::assert_that(is.character(args$format)) - out <- forcats::as_factor(format(x,format=args$format)) - } else if (identical(breaks, "weekday")) { - ds <- as.Date(1:7) |> - (\(.x){ - sort_by(format(.x,"%A"),as.numeric(format(.x,"%w"))) - })() - - if (start.on.monday) { - ds <- ds[c(7, 1:6)] - } - out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop() - } else if (identical(breaks, "month_only")) { - ms <- paste0("1970-", 1:12, "-01") |> - as.Date() |> - months() - - out <- factor(months(x), levels = ms) |> forcats::fct_drop() - } else { - ## Doesn't really work very well for breaks other than the special character cases as right border is excluded - out <- base::cut.Date(x, breaks = breaks, ...) |> forcats::fct_drop() - # browser() - } - out -} - -#' Test class -#' -#' @param data data -#' @param class.vec vector of class names to test -#' -#' @return factor -#' @export -#' -#' @examples -#' \dontrun{ -#' vapply(REDCapCAST::redcapcast_data, \(.x){ -#' is_any_class(.x, c("hms", "Date", "POSIXct", "POSIXt")) -#' }, logical(1)) -#' } -is_any_class <- function(data, class.vec) { - any(class(data) %in% class.vec) -} - -#' Test is date/datetime/time -#' -#' @param data data -#' -#' @return factor -#' @export -#' -#' @examples -#' vapply(REDCapCAST::redcapcast_data, is_datetime, logical(1)) -is_datetime <- function(data) { - is_any_class(data, class.vec = c("hms", "Date", "POSIXct", "POSIXt")) -} - #' @title Module to Convert Numeric to Factor #' #' @description @@ -206,6 +21,7 @@ cut_variable_ui <- function(id) { shiny::fluidRow( column( width = 3, + # shiny::uiOutput(outputId = ns("variable")) shinyWidgets::virtualSelectInput( inputId = ns("variable"), label = i18n$t("Variable to cut:"), @@ -279,9 +95,20 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) { data <- data_r() rv$data <- data vars_num <- vapply(data, \(.x){ - is.numeric(.x) || is_datetime(.x) + is.numeric(.x) || is_datetime(.x) || (is.factor(.x) && length(levels(.x)) > 2) }, logical(1)) vars_num <- names(vars_num)[vars_num] + + # shiny::renderUI( + # columnSelectInput( + # inputId = "variable", + # data = data, + # label = i18n$t("Variable to cut:"), + # width = "100%", + # choices = vars_num, + # selected = if (isTruthy(input$variable)) input$variable else vars_num[1] + # )) + shinyWidgets::updateVirtualSelect( inputId = "variable", choices = vars_num, @@ -294,7 +121,7 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) { variable <- req(input$variable) req(hasName(data, variable)) - if (is_datetime(data[[variable]])) { + if (is_datetime(data[[variable]]) || is.factor(data[[variable]])) { brks <- cut_var(data[[variable]], breaks = input$n_breaks )$brks @@ -339,7 +166,7 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) { # "quantile" ) - if (any(c("hms","POSIXct") %in% class(data[[variable]]))) { + if (any(c("hms", "POSIXct") %in% class(data[[variable]]))) { choices <- c(choices, "hour") } else if (any(c("POSIXt", "Date") %in% class(data[[variable]]))) { choices <- c( @@ -353,11 +180,17 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) { "quarter", "year" ) + } else if ("factor" %in% class(data[[variable]])) { + choices <- c( + choices, + "top" # , + # "bottom", + ) } else { choices <- c( choices, "fixed", - "quantile", + "quantile" # , # "sd", # "equal", # "pretty", @@ -366,7 +199,7 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) { # "bclust", # "fisher", # "jenks", - "headtails" # , + # "headtails" # , # "maximum", # "box" ) @@ -429,16 +262,23 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) { # cut.POSIXct <- cut.POSIXt f <- cut_var(data[[variable]], breaks = input$method) list(var = f, brks = levels(f)) + } else if (input$method %in% c( + "top", + "bottom" + )) { + # This allows factor simplification to get the top or bottom count + f <- cut_var(data[[variable]], breaks = input$n_breaks) + list(var = f, brks = input$n_breaks, type = input$method) } else if (input$method %in% c("hour")) { # To enable datetime cutting # cut.POSIXct <- cut.POSIXt f <- cut_var(data[[variable]], breaks = "hour") list(var = f, brks = levels(f)) - # } else if (input$method %in% c("week_only")) { - # # As a proof of concept a single option to use "format" parameter - # # https://www.stat.berkeley.edu/~s133/dates.html - # f <- cut_var(data[[variable]], format = "%W") - # list(var = f, brks = levels(f)) + # } else if (input$method %in% c("week_only")) { + # # As a proof of concept a single option to use "format" parameter + # # https://www.stat.berkeley.edu/~s133/dates.html + # f <- cut_var(data[[variable]], format = "%W") + # list(var = f, brks = levels(f)) } else { classInt::classIntervals( var = as.numeric(data[[variable]]), @@ -462,7 +302,17 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) { variable <- req(input$variable) - if (input$method %in% c("day", "weekday", "week", "month", "month_only", "quarter", "year", "hour")) { + if (input$method %in% c( + "day", + "weekday", + "week", + "month", + "month_only", + "quarter", + "year", + "hour" + ) + ) { breaks <- input$method } else { breaks <- breaks_r()$brks @@ -475,47 +325,34 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) { right = input$right ) + if ("type" %in% names(breaks_r())) { + parameters <- modifyList(parameters, list(type = breaks_r()$type)) + } + new_variable <- tryCatch( { rlang::exec(cut_var, !!!parameters) }, error = function(err) { - showNotification(paste0("We encountered the following error creating your report: ", err), type = "err") + showNotification(paste("We encountered the following error creating the new factor:", err), type = "err") } ) - # new_variable <- do.call( - # cut, - # parameters - # ) - - data <- append_column(data, column = new_variable, name = paste0(variable, "_cut"), index = "right") - # setNames(paste0(variable, "_cut")) - # - # data <- dplyr::bind_cols(data, new_variable, .name_repair = "unique_quiet") - - # rv$new_var_name <- names(data)[length(data)] - # browser() - - # browser() code <- rlang::call2( "append_column", !!!list( column = rlang::call2("cut_var", - !!!modifyList(parameters, list(x = as.symbol(paste0("data$", variable)))), - .ns = "FreesearchR"), + !!!modifyList(parameters, list(x = as.symbol(paste0("data$", variable)))), + .ns = "FreesearchR" + ), name = paste0(variable, "_cut"), index = "right" ), .ns = "FreesearchR" ) attr(data, "code") <- code - # attr(data, "code") <- Reduce( - # f = function(x, y) expr(!!x %>% !!y), - # x = c(attr(data, "code"), code) - # ) data }) @@ -590,12 +427,11 @@ modal_cut_variable <- function(id, #' @importFrom graphics abline axis hist par plot.new plot.window -plot_histogram <- function(data, column=NULL, bins = 30, breaks = NULL, color = "#112466") { - if (is.vector(data)){ +plot_histogram <- function(data, column = NULL, bins = 30, breaks = NULL, color = "#112466") { + if (is.vector(data)) { x <- data } else { - x <- data[[column]] - + x <- data[[column]] } x <- as.numeric(x) op <- par(mar = rep(1.5, 4)) @@ -610,4 +446,3 @@ plot_histogram <- function(data, column=NULL, bins = 30, breaks = NULL, color = abline(v = breaks, col = "#FFFFFF", lty = 1, lwd = 1.5) abline(v = breaks, col = "#2E2E2E", lty = 2, lwd = 1.5) } - diff --git a/R/cut_var.R b/R/cut_var.R new file mode 100644 index 00000000..5b822372 --- /dev/null +++ b/R/cut_var.R @@ -0,0 +1,232 @@ +#' Extended cutting function with fall-back to the native base::cut +#' +#' @param x an object inheriting from class "hms" +#' @param ... passed on +#' +#' @export +#' @name cut_var +cut_var <- function(x, ...) { + UseMethod("cut_var") +} + +#' @export +#' @name cut_var +cut_var.default <- function(x, ...) { + base::cut(x, ...) +} + +#' @name cut_var +#' +#' @return factor +#' @export +#' +#' @examples +#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(2) +#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var("min") +#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = "hour") +#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = hms::as_hms(c("01:00:00", "03:01:20", "9:20:20"))) +#' d_t <- readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA)) +#' f <- d_t |> cut_var(2) +#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA)) |> cut_var(breaks = lubridate::as_datetime(c(hms::as_hms(levels(f)), hms::as_hms(max(d_t, na.rm = TRUE) + 1))), right = FALSE) +cut_var.hms <- function(x, breaks, ...) { + ## as_hms keeps returning warnings on tz(); ignored + suppressWarnings({ + if (hms::is_hms(breaks)) { + breaks <- lubridate::as_datetime(breaks) + } + x <- lubridate::as_datetime(x) + out <- cut_var.POSIXt(x, breaks = breaks, ...) + attr(out, which = "brks") <- hms::as_hms(lubridate::as_datetime(attr(out, which = "brks"))) + attr(out, which = "levels") <- as.character(hms::as_hms(lubridate::as_datetime(attr(out, which = "levels")))) + }) + out +} + +#' @name cut_var +#' @param x an object inheriting from class "POSIXt" or "Date" +#' +#' @examples +#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2) +#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday") +#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "month_only") +#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "%A-%H") +#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "%W") +cut_var.POSIXt <- function(x, breaks, right = FALSE, include.lowest = TRUE, start.on.monday = TRUE, ...) { + breaks_o <- breaks + args <- list(...) + # browser() + if (is.numeric(breaks)) { + breaks <- quantile( + x, + probs = seq(0, 1, 1 / breaks), + right = right, + include.lowest = include.lowest, + na.rm = TRUE + ) + } + + if ("format" %in% names(args)) { + assertthat::assert_that(is.character(args$format)) + out <- forcats::as_factor(format(x, format = args$format)) + } else if (identical(breaks, "weekday")) { + ## This is + ds <- as.Date(1:7) |> + (\(.x){ + sort_by(format(.x, "%A"), as.numeric(format(.x, "%w"))) + })() + + if (start.on.monday) { + ds <- ds[c(7, 1:6)] + } + out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop() + } else if (identical(breaks, "month_only")) { + ## Simplest way to create a vector of all months in order + ## which will also follow the locale of the machine + ms <- paste0("1970-", 1:12, "-01") |> + as.Date() |> + months() + + out <- factor(months(x), levels = ms) |> forcats::fct_drop() + } else { + ## Doesn't really work very well for breaks other than the special character cases as right border is excluded + out <- base::cut.POSIXt(x, breaks = breaks, right = right, ...) |> forcats::fct_drop() + # browser() + } + l <- levels(out) + if (is.numeric(breaks_o)) { + l <- breaks + } else if (is.character(breaks) && length(breaks) == 1 && !(identical(breaks, "weekday") | identical(breaks, "month_only"))) { + if (include.lowest) { + if (right) { + l <- c(l, min(as.character(x))) + } else { + l <- c(l, max(as.character(x))) + } + } + } else if (length(l) < length(breaks_o)) { + l <- breaks_o + } + + attr(out, which = "brks") <- l + out +} + +#' @name cut_var +#' @param x an object inheriting from class "POSIXct" +cut_var.POSIXct <- cut_var.POSIXt + +#' @name cut_var +#' @param x an object inheriting from class "POSIXct" +#' +#' @examples +#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2) +#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday") +#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(format = "%W") +cut_var.Date <- function(x, breaks = NULL, start.on.monday = TRUE, ...) { + args <- list(...) + + if ("format" %in% names(args)) { + assertthat::assert_that(is.character(args$format)) + out <- forcats::as_factor(format(x, format = args$format)) + } else if (identical(breaks, "weekday")) { + ds <- as.Date(1:7) |> + (\(.x){ + sort_by(format(.x, "%A"), as.numeric(format(.x, "%w"))) + })() + + if (start.on.monday) { + ds <- ds[c(7, 1:6)] + } + out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop() + } else if (identical(breaks, "month_only")) { + ms <- paste0("1970-", 1:12, "-01") |> + as.Date() |> + months() + + out <- factor(months(x), levels = ms) |> forcats::fct_drop() + } else { + ## Doesn't really work very well for breaks other than the special character cases as right border is excluded + out <- base::cut.Date(x, breaks = breaks, ...) |> forcats::fct_drop() + # browser() + } + out +} + + +#' Simplify a factor to only the top or bottom n levels +#' +#' @param type +#' +#' @name cut_var +#' +#' @returns factor +#' @export +#' +#' @examples +#' mtcars$carb |> +#' as.factor() |> +#' cut_var(2) |> +#' table() +cut_var.factor <- function(x, breaks = NULL, type = c("top", "bottom"), other = "Other", ...) { + args <- list(...) + + if (is.null(breaks)) { + return(x) + } + + type <- match.arg(type) + + if (type == "top") { + lvls <- names(sort(table(x), decreasing = TRUE)[seq_len(breaks)]) + } else if (type == "bottom") { + lvls <- names(sort(table(x), decreasing = FALSE)[seq_len(breaks)]) + } + + if (other %in% lvls) { + other <- paste(other, "_freesearchr") + } + + ## Relabel and relevel + out <- forcats::fct_relabel( + x, + \(.x){ + ifelse(.x %in% lvls, .x, other) + } + ) |> + forcats::fct_relevel(lvls,other) + + attr(out, which = "brks") <- breaks + out +} + + +#' Test class +#' +#' @param data data +#' @param class.vec vector of class names to test +#' +#' @return factor +#' @export +#' +#' @examples +#' \dontrun{ +#' vapply(REDCapCAST::redcapcast_data, \(.x){ +#' is_any_class(.x, c("hms", "Date", "POSIXct", "POSIXt")) +#' }, logical(1)) +#' } +is_any_class <- function(data, class.vec) { + any(class(data) %in% class.vec) +} + +#' Test is date/datetime/time +#' +#' @param data data +#' +#' @return factor +#' @export +#' +#' @examples +#' vapply(REDCapCAST::redcapcast_data, is_datetime, logical(1)) +is_datetime <- function(data) { + is_any_class(data, class.vec = c("hms", "Date", "POSIXct", "POSIXt")) +} diff --git a/R/hosted_version.R b/R/hosted_version.R index 4ab7eee3..85f134c2 100644 --- a/R/hosted_version.R +++ b/R/hosted_version.R @@ -1 +1 @@ -hosted_version <- function()'v25.10.3-251008' +hosted_version <- function()'v25.10.3-251021' diff --git a/R/plot_sankey.R b/R/plot_sankey.R index 41bd7624..07b70a2b 100644 --- a/R/plot_sankey.R +++ b/R/plot_sankey.R @@ -83,7 +83,7 @@ str_remove_last <- function(data, pattern = "\n") { #' mtcars |> #' default_parsing() |> #' plot_sankey("cyl", "gear", "vs", color.group = "pri") -plot_sankey <- function(data, pri, sec, ter = NULL, color.group = "pri", colors = NULL) { +plot_sankey <- function(data, pri, sec, ter = NULL, color.group = "pri", colors = NULL,missing.level="Missing") { if (!is.null(ter)) { ds <- split(data, data[ter]) } else { @@ -91,7 +91,7 @@ plot_sankey <- function(data, pri, sec, ter = NULL, color.group = "pri", colors } out <- lapply(ds, \(.ds){ - plot_sankey_single(.ds, pri = pri, sec = sec, color.group = color.group, colors = colors) + plot_sankey_single(.ds, pri = pri, sec = sec, color.group = color.group, colors = colors,missing.level=missing.level) }) patchwork::wrap_plots(out) @@ -120,14 +120,21 @@ plot_sankey <- function(data, pri, sec, ter = NULL, color.group = "pri", colors #' mtcars |> #' default_parsing() |> #' plot_sankey_single("cyl", "vs", color.group = "pri") -plot_sankey_single <- function(data, pri, sec, color.group = c("pri", "sec"), colors = NULL, ...) { +#' stRoke::trial |> +#' default_parsing() |> +#' plot_sankey_single("diabetes", "hypertension") +plot_sankey_single <- function(data, pri, sec, color.group = c("pri", "sec"), colors = NULL,missing.level="Missing", ...) { color.group <- match.arg(color.group) + browser() data_orig <- data data[c(pri, sec)] <- data[c(pri, sec)] |> - dplyr::mutate(dplyr::across(dplyr::where(is.factor), forcats::fct_drop)) + dplyr::mutate( + # dplyr::across(dplyr::where(is.logical), as.factor), + dplyr::across(dplyr::where(is.factor), forcats::fct_drop)#, + # dplyr::across(dplyr::where(is.factor), \(.x){forcats::fct_na_value_to_level(.x,missing.level)}) + ) - # browser() data <- data |> sankey_ready(pri = pri, sec = sec, ...) diff --git a/man/cut-variable.Rd b/man/cut-variable.Rd index 3b58888c..51600674 100644 --- a/man/cut-variable.Rd +++ b/man/cut-variable.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cut-variable-dates.R +% Please edit documentation in R/cut-variable-ext.R \name{cut-variable} \alias{cut-variable} \alias{cut_variable_ui} diff --git a/man/cut_var.Rd b/man/cut_var.Rd index c3226a6a..e1d41ec5 100644 --- a/man/cut_var.Rd +++ b/man/cut_var.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cut-variable-dates.R +% Please edit documentation in R/cut_var.R \name{cut_var} \alias{cut_var} \alias{cut_var.default} @@ -7,6 +7,7 @@ \alias{cut_var.POSIXt} \alias{cut_var.POSIXct} \alias{cut_var.Date} +\alias{cut_var.factor} \title{Extended cutting function with fall-back to the native base::cut} \usage{ cut_var(x, ...) @@ -34,17 +35,25 @@ cut_var(x, ...) ) \method{cut_var}{Date}(x, breaks = NULL, start.on.monday = TRUE, ...) + +\method{cut_var}{factor}(x, breaks = NULL, type = c("top", "bottom"), other = "Other", ...) } \arguments{ \item{x}{an object inheriting from class "POSIXct"} \item{...}{passed on} + +\item{type}{} } \value{ +factor + factor } \description{ Extended cutting function with fall-back to the native base::cut + +Simplify a factor to only the top or bottom n levels } \examples{ readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(2) @@ -57,9 +66,13 @@ readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA)) |> cut_ readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2) readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday") readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "month_only") -readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks=NULL,format = "\%A-\%H") -readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks=NULL,format = "\%W") +readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "\%A-\%H") +readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "\%W") as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2) as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday") as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(format = "\%W") +mtcars$carb |> + as.factor() |> + cut_var(2) |> + table() } diff --git a/man/data-plots.Rd b/man/data-plots.Rd index cf0cfff2..c77fa109 100644 --- a/man/data-plots.Rd +++ b/man/data-plots.Rd @@ -32,7 +32,15 @@ plot_ridge(data, x, y, z = NULL, ...) sankey_ready(data, pri, sec, numbers = "count", ...) -plot_sankey(data, pri, sec, ter = NULL, color.group = "pri", colors = NULL) +plot_sankey( + data, + pri, + sec, + ter = NULL, + color.group = "pri", + colors = NULL, + missing.level = "Missing" +) plot_scatter(data, pri, sec, ter = NULL) diff --git a/man/is_any_class.Rd b/man/is_any_class.Rd index d441653e..c655246e 100644 --- a/man/is_any_class.Rd +++ b/man/is_any_class.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cut-variable-dates.R +% Please edit documentation in R/cut_var.R \name{is_any_class} \alias{is_any_class} \title{Test class} diff --git a/man/is_datetime.Rd b/man/is_datetime.Rd index 02fb9948..30354d39 100644 --- a/man/is_datetime.Rd +++ b/man/is_datetime.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cut-variable-dates.R +% Please edit documentation in R/cut_var.R \name{is_datetime} \alias{is_datetime} \title{Test is date/datetime/time} diff --git a/man/plot_sankey_single.Rd b/man/plot_sankey_single.Rd index 5ba810b8..3ff729ac 100644 --- a/man/plot_sankey_single.Rd +++ b/man/plot_sankey_single.Rd @@ -10,6 +10,7 @@ plot_sankey_single( sec, color.group = c("pri", "sec"), colors = NULL, + missing.level = "Missing", ... ) } @@ -40,4 +41,7 @@ data.frame( mtcars |> default_parsing() |> plot_sankey_single("cyl", "vs", color.group = "pri") +stRoke::trial |> + default_parsing() |> + plot_sankey_single("diabetes", "hypertension") }