feat: new cut option to simplify factors to only the top N levels

This commit is contained in:
Andreas Gammelgaard Damsbo 2025-10-21 21:30:07 +03:00
parent a06177481b
commit 0c2b061708
No known key found for this signature in database
12 changed files with 337 additions and 236 deletions

View file

@ -99,7 +99,8 @@ Collate:
'correlations-module.R'
'create-column-mod.R'
'custom_SelectInput.R'
'cut-variable-dates.R'
'cut-variable-ext.R'
'cut_var.R'
'data-summary.R'
'data_plots.R'
'datagrid-infos-mod.R'

View file

@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand
S3method(cut_var,default)
S3method(cut_var,factor)
S3method(cut_var,hms)
S3method(plot,tbl_regression)
export(FreesearchR_palette)

View file

@ -1,188 +1,3 @@
#' Extended cutting function with fall-back to the native base::cut
#'
#' @param x an object inheriting from class "hms"
#' @param ... passed on
#'
#' @export
#' @name cut_var
cut_var <- function(x, ...) {
UseMethod("cut_var")
}
#' @export
#' @name cut_var
cut_var.default <- function(x, ...) {
base::cut(x, ...)
}
#' @name cut_var
#'
#' @return factor
#' @export
#'
#' @examples
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(2)
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var("min")
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = "hour")
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = hms::as_hms(c("01:00:00", "03:01:20", "9:20:20")))
#' d_t <- readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA))
#' f <- d_t |> cut_var(2)
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA)) |> cut_var(breaks = lubridate::as_datetime(c(hms::as_hms(levels(f)), hms::as_hms(max(d_t, na.rm = TRUE) + 1))), right = FALSE)
cut_var.hms <- function(x, breaks, ...) {
## as_hms keeps returning warnings on tz(); ignored
suppressWarnings({
if (hms::is_hms(breaks)) {
breaks <- lubridate::as_datetime(breaks)
}
x <- lubridate::as_datetime(x)
out <- cut_var.POSIXt(x, breaks = breaks, ...)
attr(out, which = "brks") <- hms::as_hms(lubridate::as_datetime(attr(out, which = "brks")))
attr(out, which = "levels") <- as.character(hms::as_hms(lubridate::as_datetime(attr(out, which = "levels"))))
})
out
}
#' @name cut_var
#' @param x an object inheriting from class "POSIXt" or "Date"
#'
#' @examples
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "month_only")
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks=NULL,format = "%A-%H")
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks=NULL,format = "%W")
cut_var.POSIXt <- function(x, breaks, right = FALSE, include.lowest = TRUE, start.on.monday = TRUE, ...) {
breaks_o <- breaks
args <- list(...)
# browser()
if (is.numeric(breaks)) {
breaks <- quantile(
x,
probs = seq(0, 1, 1 / breaks),
right = right,
include.lowest = include.lowest,
na.rm = TRUE
)
}
if ("format" %in% names(args)){
assertthat::assert_that(is.character(args$format))
out <- forcats::as_factor(format(x,format=args$format))
} else if (identical(breaks, "weekday")) {
## This is
ds <- as.Date(1:7) |>
(\(.x){
sort_by(format(.x,"%A"),as.numeric(format(.x,"%w")))
})()
if (start.on.monday) {
ds <- ds[c(7, 1:6)]
}
out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop()
} else if (identical(breaks, "month_only")) {
## Simplest way to create a vector of all months in order
## which will also follow the locale of the machine
ms <- paste0("1970-", 1:12, "-01") |>
as.Date() |>
months()
out <- factor(months(x), levels = ms) |> forcats::fct_drop()
} else {
## Doesn't really work very well for breaks other than the special character cases as right border is excluded
out <- base::cut.POSIXt(x, breaks = breaks, right = right, ...) |> forcats::fct_drop()
# browser()
}
l <- levels(out)
if (is.numeric(breaks_o)) {
l <- breaks
} else if (is.character(breaks) && length(breaks) == 1 && !(identical(breaks, "weekday") | identical(breaks, "month_only"))) {
if (include.lowest) {
if (right) {
l <- c(l, min(as.character(x)))
} else {
l <- c(l, max(as.character(x)))
}
}
} else if (length(l) < length(breaks_o)) {
l <- breaks_o
}
attr(out, which = "brks") <- l
out
}
#' @name cut_var
#' @param x an object inheriting from class "POSIXct"
cut_var.POSIXct <- cut_var.POSIXt
#' @name cut_var
#' @param x an object inheriting from class "POSIXct"
#'
#' @examples
#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(format = "%W")
cut_var.Date <- function(x, breaks=NULL, start.on.monday = TRUE, ...) {
args <- list(...)
if ("format" %in% names(args)){
assertthat::assert_that(is.character(args$format))
out <- forcats::as_factor(format(x,format=args$format))
} else if (identical(breaks, "weekday")) {
ds <- as.Date(1:7) |>
(\(.x){
sort_by(format(.x,"%A"),as.numeric(format(.x,"%w")))
})()
if (start.on.monday) {
ds <- ds[c(7, 1:6)]
}
out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop()
} else if (identical(breaks, "month_only")) {
ms <- paste0("1970-", 1:12, "-01") |>
as.Date() |>
months()
out <- factor(months(x), levels = ms) |> forcats::fct_drop()
} else {
## Doesn't really work very well for breaks other than the special character cases as right border is excluded
out <- base::cut.Date(x, breaks = breaks, ...) |> forcats::fct_drop()
# browser()
}
out
}
#' Test class
#'
#' @param data data
#' @param class.vec vector of class names to test
#'
#' @return factor
#' @export
#'
#' @examples
#' \dontrun{
#' vapply(REDCapCAST::redcapcast_data, \(.x){
#' is_any_class(.x, c("hms", "Date", "POSIXct", "POSIXt"))
#' }, logical(1))
#' }
is_any_class <- function(data, class.vec) {
any(class(data) %in% class.vec)
}
#' Test is date/datetime/time
#'
#' @param data data
#'
#' @return factor
#' @export
#'
#' @examples
#' vapply(REDCapCAST::redcapcast_data, is_datetime, logical(1))
is_datetime <- function(data) {
is_any_class(data, class.vec = c("hms", "Date", "POSIXct", "POSIXt"))
}
#' @title Module to Convert Numeric to Factor
#'
#' @description
@ -206,6 +21,7 @@ cut_variable_ui <- function(id) {
shiny::fluidRow(
column(
width = 3,
# shiny::uiOutput(outputId = ns("variable"))
shinyWidgets::virtualSelectInput(
inputId = ns("variable"),
label = i18n$t("Variable to cut:"),
@ -279,9 +95,20 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
data <- data_r()
rv$data <- data
vars_num <- vapply(data, \(.x){
is.numeric(.x) || is_datetime(.x)
is.numeric(.x) || is_datetime(.x) || (is.factor(.x) && length(levels(.x)) > 2)
}, logical(1))
vars_num <- names(vars_num)[vars_num]
# shiny::renderUI(
# columnSelectInput(
# inputId = "variable",
# data = data,
# label = i18n$t("Variable to cut:"),
# width = "100%",
# choices = vars_num,
# selected = if (isTruthy(input$variable)) input$variable else vars_num[1]
# ))
shinyWidgets::updateVirtualSelect(
inputId = "variable",
choices = vars_num,
@ -294,7 +121,7 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
variable <- req(input$variable)
req(hasName(data, variable))
if (is_datetime(data[[variable]])) {
if (is_datetime(data[[variable]]) || is.factor(data[[variable]])) {
brks <- cut_var(data[[variable]],
breaks = input$n_breaks
)$brks
@ -339,7 +166,7 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
# "quantile"
)
if (any(c("hms","POSIXct") %in% class(data[[variable]]))) {
if (any(c("hms", "POSIXct") %in% class(data[[variable]]))) {
choices <- c(choices, "hour")
} else if (any(c("POSIXt", "Date") %in% class(data[[variable]]))) {
choices <- c(
@ -353,11 +180,17 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
"quarter",
"year"
)
} else if ("factor" %in% class(data[[variable]])) {
choices <- c(
choices,
"top" # ,
# "bottom",
)
} else {
choices <- c(
choices,
"fixed",
"quantile",
"quantile" # ,
# "sd",
# "equal",
# "pretty",
@ -366,7 +199,7 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
# "bclust",
# "fisher",
# "jenks",
"headtails" # ,
# "headtails" # ,
# "maximum",
# "box"
)
@ -429,16 +262,23 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
# cut.POSIXct <- cut.POSIXt
f <- cut_var(data[[variable]], breaks = input$method)
list(var = f, brks = levels(f))
} else if (input$method %in% c(
"top",
"bottom"
)) {
# This allows factor simplification to get the top or bottom count
f <- cut_var(data[[variable]], breaks = input$n_breaks)
list(var = f, brks = input$n_breaks, type = input$method)
} else if (input$method %in% c("hour")) {
# To enable datetime cutting
# cut.POSIXct <- cut.POSIXt
f <- cut_var(data[[variable]], breaks = "hour")
list(var = f, brks = levels(f))
# } else if (input$method %in% c("week_only")) {
# # As a proof of concept a single option to use "format" parameter
# # https://www.stat.berkeley.edu/~s133/dates.html
# f <- cut_var(data[[variable]], format = "%W")
# list(var = f, brks = levels(f))
# } else if (input$method %in% c("week_only")) {
# # As a proof of concept a single option to use "format" parameter
# # https://www.stat.berkeley.edu/~s133/dates.html
# f <- cut_var(data[[variable]], format = "%W")
# list(var = f, brks = levels(f))
} else {
classInt::classIntervals(
var = as.numeric(data[[variable]]),
@ -462,7 +302,17 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
variable <- req(input$variable)
if (input$method %in% c("day", "weekday", "week", "month", "month_only", "quarter", "year", "hour")) {
if (input$method %in% c(
"day",
"weekday",
"week",
"month",
"month_only",
"quarter",
"year",
"hour"
)
) {
breaks <- input$method
} else {
breaks <- breaks_r()$brks
@ -475,47 +325,34 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
right = input$right
)
if ("type" %in% names(breaks_r())) {
parameters <- modifyList(parameters, list(type = breaks_r()$type))
}
new_variable <- tryCatch(
{
rlang::exec(cut_var, !!!parameters)
},
error = function(err) {
showNotification(paste0("We encountered the following error creating your report: ", err), type = "err")
showNotification(paste("We encountered the following error creating the new factor:", err), type = "err")
}
)
# new_variable <- do.call(
# cut,
# parameters
# )
data <- append_column(data, column = new_variable, name = paste0(variable, "_cut"), index = "right")
# setNames(paste0(variable, "_cut"))
#
# data <- dplyr::bind_cols(data, new_variable, .name_repair = "unique_quiet")
# rv$new_var_name <- names(data)[length(data)]
# browser()
# browser()
code <- rlang::call2(
"append_column",
!!!list(
column = rlang::call2("cut_var",
!!!modifyList(parameters, list(x = as.symbol(paste0("data$", variable)))),
.ns = "FreesearchR"),
!!!modifyList(parameters, list(x = as.symbol(paste0("data$", variable)))),
.ns = "FreesearchR"
),
name = paste0(variable, "_cut"), index = "right"
),
.ns = "FreesearchR"
)
attr(data, "code") <- code
# attr(data, "code") <- Reduce(
# f = function(x, y) expr(!!x %>% !!y),
# x = c(attr(data, "code"), code)
# )
data
})
@ -590,12 +427,11 @@ modal_cut_variable <- function(id,
#' @importFrom graphics abline axis hist par plot.new plot.window
plot_histogram <- function(data, column=NULL, bins = 30, breaks = NULL, color = "#112466") {
if (is.vector(data)){
plot_histogram <- function(data, column = NULL, bins = 30, breaks = NULL, color = "#112466") {
if (is.vector(data)) {
x <- data
} else {
x <- data[[column]]
x <- data[[column]]
}
x <- as.numeric(x)
op <- par(mar = rep(1.5, 4))
@ -610,4 +446,3 @@ plot_histogram <- function(data, column=NULL, bins = 30, breaks = NULL, color =
abline(v = breaks, col = "#FFFFFF", lty = 1, lwd = 1.5)
abline(v = breaks, col = "#2E2E2E", lty = 2, lwd = 1.5)
}

232
R/cut_var.R Normal file
View file

@ -0,0 +1,232 @@
#' Extended cutting function with fall-back to the native base::cut
#'
#' @param x an object inheriting from class "hms"
#' @param ... passed on
#'
#' @export
#' @name cut_var
cut_var <- function(x, ...) {
UseMethod("cut_var")
}
#' @export
#' @name cut_var
cut_var.default <- function(x, ...) {
base::cut(x, ...)
}
#' @name cut_var
#'
#' @return factor
#' @export
#'
#' @examples
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(2)
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var("min")
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = "hour")
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = hms::as_hms(c("01:00:00", "03:01:20", "9:20:20")))
#' d_t <- readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA))
#' f <- d_t |> cut_var(2)
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA)) |> cut_var(breaks = lubridate::as_datetime(c(hms::as_hms(levels(f)), hms::as_hms(max(d_t, na.rm = TRUE) + 1))), right = FALSE)
cut_var.hms <- function(x, breaks, ...) {
## as_hms keeps returning warnings on tz(); ignored
suppressWarnings({
if (hms::is_hms(breaks)) {
breaks <- lubridate::as_datetime(breaks)
}
x <- lubridate::as_datetime(x)
out <- cut_var.POSIXt(x, breaks = breaks, ...)
attr(out, which = "brks") <- hms::as_hms(lubridate::as_datetime(attr(out, which = "brks")))
attr(out, which = "levels") <- as.character(hms::as_hms(lubridate::as_datetime(attr(out, which = "levels"))))
})
out
}
#' @name cut_var
#' @param x an object inheriting from class "POSIXt" or "Date"
#'
#' @examples
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "month_only")
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "%A-%H")
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "%W")
cut_var.POSIXt <- function(x, breaks, right = FALSE, include.lowest = TRUE, start.on.monday = TRUE, ...) {
breaks_o <- breaks
args <- list(...)
# browser()
if (is.numeric(breaks)) {
breaks <- quantile(
x,
probs = seq(0, 1, 1 / breaks),
right = right,
include.lowest = include.lowest,
na.rm = TRUE
)
}
if ("format" %in% names(args)) {
assertthat::assert_that(is.character(args$format))
out <- forcats::as_factor(format(x, format = args$format))
} else if (identical(breaks, "weekday")) {
## This is
ds <- as.Date(1:7) |>
(\(.x){
sort_by(format(.x, "%A"), as.numeric(format(.x, "%w")))
})()
if (start.on.monday) {
ds <- ds[c(7, 1:6)]
}
out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop()
} else if (identical(breaks, "month_only")) {
## Simplest way to create a vector of all months in order
## which will also follow the locale of the machine
ms <- paste0("1970-", 1:12, "-01") |>
as.Date() |>
months()
out <- factor(months(x), levels = ms) |> forcats::fct_drop()
} else {
## Doesn't really work very well for breaks other than the special character cases as right border is excluded
out <- base::cut.POSIXt(x, breaks = breaks, right = right, ...) |> forcats::fct_drop()
# browser()
}
l <- levels(out)
if (is.numeric(breaks_o)) {
l <- breaks
} else if (is.character(breaks) && length(breaks) == 1 && !(identical(breaks, "weekday") | identical(breaks, "month_only"))) {
if (include.lowest) {
if (right) {
l <- c(l, min(as.character(x)))
} else {
l <- c(l, max(as.character(x)))
}
}
} else if (length(l) < length(breaks_o)) {
l <- breaks_o
}
attr(out, which = "brks") <- l
out
}
#' @name cut_var
#' @param x an object inheriting from class "POSIXct"
cut_var.POSIXct <- cut_var.POSIXt
#' @name cut_var
#' @param x an object inheriting from class "POSIXct"
#'
#' @examples
#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(format = "%W")
cut_var.Date <- function(x, breaks = NULL, start.on.monday = TRUE, ...) {
args <- list(...)
if ("format" %in% names(args)) {
assertthat::assert_that(is.character(args$format))
out <- forcats::as_factor(format(x, format = args$format))
} else if (identical(breaks, "weekday")) {
ds <- as.Date(1:7) |>
(\(.x){
sort_by(format(.x, "%A"), as.numeric(format(.x, "%w")))
})()
if (start.on.monday) {
ds <- ds[c(7, 1:6)]
}
out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop()
} else if (identical(breaks, "month_only")) {
ms <- paste0("1970-", 1:12, "-01") |>
as.Date() |>
months()
out <- factor(months(x), levels = ms) |> forcats::fct_drop()
} else {
## Doesn't really work very well for breaks other than the special character cases as right border is excluded
out <- base::cut.Date(x, breaks = breaks, ...) |> forcats::fct_drop()
# browser()
}
out
}
#' Simplify a factor to only the top or bottom n levels
#'
#' @param type
#'
#' @name cut_var
#'
#' @returns factor
#' @export
#'
#' @examples
#' mtcars$carb |>
#' as.factor() |>
#' cut_var(2) |>
#' table()
cut_var.factor <- function(x, breaks = NULL, type = c("top", "bottom"), other = "Other", ...) {
args <- list(...)
if (is.null(breaks)) {
return(x)
}
type <- match.arg(type)
if (type == "top") {
lvls <- names(sort(table(x), decreasing = TRUE)[seq_len(breaks)])
} else if (type == "bottom") {
lvls <- names(sort(table(x), decreasing = FALSE)[seq_len(breaks)])
}
if (other %in% lvls) {
other <- paste(other, "_freesearchr")
}
## Relabel and relevel
out <- forcats::fct_relabel(
x,
\(.x){
ifelse(.x %in% lvls, .x, other)
}
) |>
forcats::fct_relevel(lvls,other)
attr(out, which = "brks") <- breaks
out
}
#' Test class
#'
#' @param data data
#' @param class.vec vector of class names to test
#'
#' @return factor
#' @export
#'
#' @examples
#' \dontrun{
#' vapply(REDCapCAST::redcapcast_data, \(.x){
#' is_any_class(.x, c("hms", "Date", "POSIXct", "POSIXt"))
#' }, logical(1))
#' }
is_any_class <- function(data, class.vec) {
any(class(data) %in% class.vec)
}
#' Test is date/datetime/time
#'
#' @param data data
#'
#' @return factor
#' @export
#'
#' @examples
#' vapply(REDCapCAST::redcapcast_data, is_datetime, logical(1))
is_datetime <- function(data) {
is_any_class(data, class.vec = c("hms", "Date", "POSIXct", "POSIXt"))
}

View file

@ -1 +1 @@
hosted_version <- function()'v25.10.3-251008'
hosted_version <- function()'v25.10.3-251021'

View file

@ -83,7 +83,7 @@ str_remove_last <- function(data, pattern = "\n") {
#' mtcars |>
#' default_parsing() |>
#' plot_sankey("cyl", "gear", "vs", color.group = "pri")
plot_sankey <- function(data, pri, sec, ter = NULL, color.group = "pri", colors = NULL) {
plot_sankey <- function(data, pri, sec, ter = NULL, color.group = "pri", colors = NULL,missing.level="Missing") {
if (!is.null(ter)) {
ds <- split(data, data[ter])
} else {
@ -91,7 +91,7 @@ plot_sankey <- function(data, pri, sec, ter = NULL, color.group = "pri", colors
}
out <- lapply(ds, \(.ds){
plot_sankey_single(.ds, pri = pri, sec = sec, color.group = color.group, colors = colors)
plot_sankey_single(.ds, pri = pri, sec = sec, color.group = color.group, colors = colors,missing.level=missing.level)
})
patchwork::wrap_plots(out)
@ -120,14 +120,21 @@ plot_sankey <- function(data, pri, sec, ter = NULL, color.group = "pri", colors
#' mtcars |>
#' default_parsing() |>
#' plot_sankey_single("cyl", "vs", color.group = "pri")
plot_sankey_single <- function(data, pri, sec, color.group = c("pri", "sec"), colors = NULL, ...) {
#' stRoke::trial |>
#' default_parsing() |>
#' plot_sankey_single("diabetes", "hypertension")
plot_sankey_single <- function(data, pri, sec, color.group = c("pri", "sec"), colors = NULL,missing.level="Missing", ...) {
color.group <- match.arg(color.group)
browser()
data_orig <- data
data[c(pri, sec)] <- data[c(pri, sec)] |>
dplyr::mutate(dplyr::across(dplyr::where(is.factor), forcats::fct_drop))
dplyr::mutate(
# dplyr::across(dplyr::where(is.logical), as.factor),
dplyr::across(dplyr::where(is.factor), forcats::fct_drop)#,
# dplyr::across(dplyr::where(is.factor), \(.x){forcats::fct_na_value_to_level(.x,missing.level)})
)
# browser()
data <- data |> sankey_ready(pri = pri, sec = sec, ...)

View file

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cut-variable-dates.R
% Please edit documentation in R/cut-variable-ext.R
\name{cut-variable}
\alias{cut-variable}
\alias{cut_variable_ui}

View file

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cut-variable-dates.R
% Please edit documentation in R/cut_var.R
\name{cut_var}
\alias{cut_var}
\alias{cut_var.default}
@ -7,6 +7,7 @@
\alias{cut_var.POSIXt}
\alias{cut_var.POSIXct}
\alias{cut_var.Date}
\alias{cut_var.factor}
\title{Extended cutting function with fall-back to the native base::cut}
\usage{
cut_var(x, ...)
@ -34,17 +35,25 @@ cut_var(x, ...)
)
\method{cut_var}{Date}(x, breaks = NULL, start.on.monday = TRUE, ...)
\method{cut_var}{factor}(x, breaks = NULL, type = c("top", "bottom"), other = "Other", ...)
}
\arguments{
\item{x}{an object inheriting from class "POSIXct"}
\item{...}{passed on}
\item{type}{}
}
\value{
factor
factor
}
\description{
Extended cutting function with fall-back to the native base::cut
Simplify a factor to only the top or bottom n levels
}
\examples{
readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(2)
@ -57,9 +66,13 @@ readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA)) |> cut_
readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "month_only")
readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks=NULL,format = "\%A-\%H")
readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks=NULL,format = "\%W")
readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "\%A-\%H")
readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "\%W")
as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(format = "\%W")
mtcars$carb |>
as.factor() |>
cut_var(2) |>
table()
}

View file

@ -32,7 +32,15 @@ plot_ridge(data, x, y, z = NULL, ...)
sankey_ready(data, pri, sec, numbers = "count", ...)
plot_sankey(data, pri, sec, ter = NULL, color.group = "pri", colors = NULL)
plot_sankey(
data,
pri,
sec,
ter = NULL,
color.group = "pri",
colors = NULL,
missing.level = "Missing"
)
plot_scatter(data, pri, sec, ter = NULL)

View file

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cut-variable-dates.R
% Please edit documentation in R/cut_var.R
\name{is_any_class}
\alias{is_any_class}
\title{Test class}

View file

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cut-variable-dates.R
% Please edit documentation in R/cut_var.R
\name{is_datetime}
\alias{is_datetime}
\title{Test is date/datetime/time}

View file

@ -10,6 +10,7 @@ plot_sankey_single(
sec,
color.group = c("pri", "sec"),
colors = NULL,
missing.level = "Missing",
...
)
}
@ -40,4 +41,7 @@ data.frame(
mtcars |>
default_parsing() |>
plot_sankey_single("cyl", "vs", color.group = "pri")
stRoke::trial |>
default_parsing() |>
plot_sankey_single("diabetes", "hypertension")
}