mirror of
https://github.com/agdamsbo/FreesearchR.git
synced 2025-12-15 17:12:09 +01:00
feat: new cut option to simplify factors to only the top N levels
This commit is contained in:
parent
a06177481b
commit
0c2b061708
12 changed files with 337 additions and 236 deletions
|
|
@ -99,7 +99,8 @@ Collate:
|
|||
'correlations-module.R'
|
||||
'create-column-mod.R'
|
||||
'custom_SelectInput.R'
|
||||
'cut-variable-dates.R'
|
||||
'cut-variable-ext.R'
|
||||
'cut_var.R'
|
||||
'data-summary.R'
|
||||
'data_plots.R'
|
||||
'datagrid-infos-mod.R'
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
# Generated by roxygen2: do not edit by hand
|
||||
|
||||
S3method(cut_var,default)
|
||||
S3method(cut_var,factor)
|
||||
S3method(cut_var,hms)
|
||||
S3method(plot,tbl_regression)
|
||||
export(FreesearchR_palette)
|
||||
|
|
|
|||
|
|
@ -1,188 +1,3 @@
|
|||
#' Extended cutting function with fall-back to the native base::cut
|
||||
#'
|
||||
#' @param x an object inheriting from class "hms"
|
||||
#' @param ... passed on
|
||||
#'
|
||||
#' @export
|
||||
#' @name cut_var
|
||||
cut_var <- function(x, ...) {
|
||||
UseMethod("cut_var")
|
||||
}
|
||||
|
||||
#' @export
|
||||
#' @name cut_var
|
||||
cut_var.default <- function(x, ...) {
|
||||
base::cut(x, ...)
|
||||
}
|
||||
|
||||
#' @name cut_var
|
||||
#'
|
||||
#' @return factor
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(2)
|
||||
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var("min")
|
||||
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = "hour")
|
||||
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = hms::as_hms(c("01:00:00", "03:01:20", "9:20:20")))
|
||||
#' d_t <- readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA))
|
||||
#' f <- d_t |> cut_var(2)
|
||||
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA)) |> cut_var(breaks = lubridate::as_datetime(c(hms::as_hms(levels(f)), hms::as_hms(max(d_t, na.rm = TRUE) + 1))), right = FALSE)
|
||||
cut_var.hms <- function(x, breaks, ...) {
|
||||
## as_hms keeps returning warnings on tz(); ignored
|
||||
suppressWarnings({
|
||||
if (hms::is_hms(breaks)) {
|
||||
breaks <- lubridate::as_datetime(breaks)
|
||||
}
|
||||
x <- lubridate::as_datetime(x)
|
||||
out <- cut_var.POSIXt(x, breaks = breaks, ...)
|
||||
attr(out, which = "brks") <- hms::as_hms(lubridate::as_datetime(attr(out, which = "brks")))
|
||||
attr(out, which = "levels") <- as.character(hms::as_hms(lubridate::as_datetime(attr(out, which = "levels"))))
|
||||
})
|
||||
out
|
||||
}
|
||||
|
||||
#' @name cut_var
|
||||
#' @param x an object inheriting from class "POSIXt" or "Date"
|
||||
#'
|
||||
#' @examples
|
||||
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
|
||||
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
|
||||
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "month_only")
|
||||
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks=NULL,format = "%A-%H")
|
||||
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks=NULL,format = "%W")
|
||||
cut_var.POSIXt <- function(x, breaks, right = FALSE, include.lowest = TRUE, start.on.monday = TRUE, ...) {
|
||||
breaks_o <- breaks
|
||||
args <- list(...)
|
||||
# browser()
|
||||
if (is.numeric(breaks)) {
|
||||
breaks <- quantile(
|
||||
x,
|
||||
probs = seq(0, 1, 1 / breaks),
|
||||
right = right,
|
||||
include.lowest = include.lowest,
|
||||
na.rm = TRUE
|
||||
)
|
||||
}
|
||||
|
||||
if ("format" %in% names(args)){
|
||||
assertthat::assert_that(is.character(args$format))
|
||||
out <- forcats::as_factor(format(x,format=args$format))
|
||||
} else if (identical(breaks, "weekday")) {
|
||||
## This is
|
||||
ds <- as.Date(1:7) |>
|
||||
(\(.x){
|
||||
sort_by(format(.x,"%A"),as.numeric(format(.x,"%w")))
|
||||
})()
|
||||
|
||||
if (start.on.monday) {
|
||||
ds <- ds[c(7, 1:6)]
|
||||
}
|
||||
out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop()
|
||||
} else if (identical(breaks, "month_only")) {
|
||||
## Simplest way to create a vector of all months in order
|
||||
## which will also follow the locale of the machine
|
||||
ms <- paste0("1970-", 1:12, "-01") |>
|
||||
as.Date() |>
|
||||
months()
|
||||
|
||||
out <- factor(months(x), levels = ms) |> forcats::fct_drop()
|
||||
} else {
|
||||
## Doesn't really work very well for breaks other than the special character cases as right border is excluded
|
||||
out <- base::cut.POSIXt(x, breaks = breaks, right = right, ...) |> forcats::fct_drop()
|
||||
# browser()
|
||||
}
|
||||
l <- levels(out)
|
||||
if (is.numeric(breaks_o)) {
|
||||
l <- breaks
|
||||
} else if (is.character(breaks) && length(breaks) == 1 && !(identical(breaks, "weekday") | identical(breaks, "month_only"))) {
|
||||
if (include.lowest) {
|
||||
if (right) {
|
||||
l <- c(l, min(as.character(x)))
|
||||
} else {
|
||||
l <- c(l, max(as.character(x)))
|
||||
}
|
||||
}
|
||||
} else if (length(l) < length(breaks_o)) {
|
||||
l <- breaks_o
|
||||
}
|
||||
|
||||
attr(out, which = "brks") <- l
|
||||
out
|
||||
}
|
||||
|
||||
#' @name cut_var
|
||||
#' @param x an object inheriting from class "POSIXct"
|
||||
cut_var.POSIXct <- cut_var.POSIXt
|
||||
|
||||
#' @name cut_var
|
||||
#' @param x an object inheriting from class "POSIXct"
|
||||
#'
|
||||
#' @examples
|
||||
#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
|
||||
#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
|
||||
#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(format = "%W")
|
||||
cut_var.Date <- function(x, breaks=NULL, start.on.monday = TRUE, ...) {
|
||||
args <- list(...)
|
||||
|
||||
if ("format" %in% names(args)){
|
||||
assertthat::assert_that(is.character(args$format))
|
||||
out <- forcats::as_factor(format(x,format=args$format))
|
||||
} else if (identical(breaks, "weekday")) {
|
||||
ds <- as.Date(1:7) |>
|
||||
(\(.x){
|
||||
sort_by(format(.x,"%A"),as.numeric(format(.x,"%w")))
|
||||
})()
|
||||
|
||||
if (start.on.monday) {
|
||||
ds <- ds[c(7, 1:6)]
|
||||
}
|
||||
out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop()
|
||||
} else if (identical(breaks, "month_only")) {
|
||||
ms <- paste0("1970-", 1:12, "-01") |>
|
||||
as.Date() |>
|
||||
months()
|
||||
|
||||
out <- factor(months(x), levels = ms) |> forcats::fct_drop()
|
||||
} else {
|
||||
## Doesn't really work very well for breaks other than the special character cases as right border is excluded
|
||||
out <- base::cut.Date(x, breaks = breaks, ...) |> forcats::fct_drop()
|
||||
# browser()
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
#' Test class
|
||||
#'
|
||||
#' @param data data
|
||||
#' @param class.vec vector of class names to test
|
||||
#'
|
||||
#' @return factor
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#' vapply(REDCapCAST::redcapcast_data, \(.x){
|
||||
#' is_any_class(.x, c("hms", "Date", "POSIXct", "POSIXt"))
|
||||
#' }, logical(1))
|
||||
#' }
|
||||
is_any_class <- function(data, class.vec) {
|
||||
any(class(data) %in% class.vec)
|
||||
}
|
||||
|
||||
#' Test is date/datetime/time
|
||||
#'
|
||||
#' @param data data
|
||||
#'
|
||||
#' @return factor
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' vapply(REDCapCAST::redcapcast_data, is_datetime, logical(1))
|
||||
is_datetime <- function(data) {
|
||||
is_any_class(data, class.vec = c("hms", "Date", "POSIXct", "POSIXt"))
|
||||
}
|
||||
|
||||
#' @title Module to Convert Numeric to Factor
|
||||
#'
|
||||
#' @description
|
||||
|
|
@ -206,6 +21,7 @@ cut_variable_ui <- function(id) {
|
|||
shiny::fluidRow(
|
||||
column(
|
||||
width = 3,
|
||||
# shiny::uiOutput(outputId = ns("variable"))
|
||||
shinyWidgets::virtualSelectInput(
|
||||
inputId = ns("variable"),
|
||||
label = i18n$t("Variable to cut:"),
|
||||
|
|
@ -279,9 +95,20 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
|
|||
data <- data_r()
|
||||
rv$data <- data
|
||||
vars_num <- vapply(data, \(.x){
|
||||
is.numeric(.x) || is_datetime(.x)
|
||||
is.numeric(.x) || is_datetime(.x) || (is.factor(.x) && length(levels(.x)) > 2)
|
||||
}, logical(1))
|
||||
vars_num <- names(vars_num)[vars_num]
|
||||
|
||||
# shiny::renderUI(
|
||||
# columnSelectInput(
|
||||
# inputId = "variable",
|
||||
# data = data,
|
||||
# label = i18n$t("Variable to cut:"),
|
||||
# width = "100%",
|
||||
# choices = vars_num,
|
||||
# selected = if (isTruthy(input$variable)) input$variable else vars_num[1]
|
||||
# ))
|
||||
|
||||
shinyWidgets::updateVirtualSelect(
|
||||
inputId = "variable",
|
||||
choices = vars_num,
|
||||
|
|
@ -294,7 +121,7 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
|
|||
variable <- req(input$variable)
|
||||
req(hasName(data, variable))
|
||||
|
||||
if (is_datetime(data[[variable]])) {
|
||||
if (is_datetime(data[[variable]]) || is.factor(data[[variable]])) {
|
||||
brks <- cut_var(data[[variable]],
|
||||
breaks = input$n_breaks
|
||||
)$brks
|
||||
|
|
@ -339,7 +166,7 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
|
|||
# "quantile"
|
||||
)
|
||||
|
||||
if (any(c("hms","POSIXct") %in% class(data[[variable]]))) {
|
||||
if (any(c("hms", "POSIXct") %in% class(data[[variable]]))) {
|
||||
choices <- c(choices, "hour")
|
||||
} else if (any(c("POSIXt", "Date") %in% class(data[[variable]]))) {
|
||||
choices <- c(
|
||||
|
|
@ -353,11 +180,17 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
|
|||
"quarter",
|
||||
"year"
|
||||
)
|
||||
} else if ("factor" %in% class(data[[variable]])) {
|
||||
choices <- c(
|
||||
choices,
|
||||
"top" # ,
|
||||
# "bottom",
|
||||
)
|
||||
} else {
|
||||
choices <- c(
|
||||
choices,
|
||||
"fixed",
|
||||
"quantile",
|
||||
"quantile" # ,
|
||||
# "sd",
|
||||
# "equal",
|
||||
# "pretty",
|
||||
|
|
@ -366,7 +199,7 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
|
|||
# "bclust",
|
||||
# "fisher",
|
||||
# "jenks",
|
||||
"headtails" # ,
|
||||
# "headtails" # ,
|
||||
# "maximum",
|
||||
# "box"
|
||||
)
|
||||
|
|
@ -429,16 +262,23 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
|
|||
# cut.POSIXct <- cut.POSIXt
|
||||
f <- cut_var(data[[variable]], breaks = input$method)
|
||||
list(var = f, brks = levels(f))
|
||||
} else if (input$method %in% c(
|
||||
"top",
|
||||
"bottom"
|
||||
)) {
|
||||
# This allows factor simplification to get the top or bottom count
|
||||
f <- cut_var(data[[variable]], breaks = input$n_breaks)
|
||||
list(var = f, brks = input$n_breaks, type = input$method)
|
||||
} else if (input$method %in% c("hour")) {
|
||||
# To enable datetime cutting
|
||||
# cut.POSIXct <- cut.POSIXt
|
||||
f <- cut_var(data[[variable]], breaks = "hour")
|
||||
list(var = f, brks = levels(f))
|
||||
# } else if (input$method %in% c("week_only")) {
|
||||
# # As a proof of concept a single option to use "format" parameter
|
||||
# # https://www.stat.berkeley.edu/~s133/dates.html
|
||||
# f <- cut_var(data[[variable]], format = "%W")
|
||||
# list(var = f, brks = levels(f))
|
||||
# } else if (input$method %in% c("week_only")) {
|
||||
# # As a proof of concept a single option to use "format" parameter
|
||||
# # https://www.stat.berkeley.edu/~s133/dates.html
|
||||
# f <- cut_var(data[[variable]], format = "%W")
|
||||
# list(var = f, brks = levels(f))
|
||||
} else {
|
||||
classInt::classIntervals(
|
||||
var = as.numeric(data[[variable]]),
|
||||
|
|
@ -462,7 +302,17 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
|
|||
variable <- req(input$variable)
|
||||
|
||||
|
||||
if (input$method %in% c("day", "weekday", "week", "month", "month_only", "quarter", "year", "hour")) {
|
||||
if (input$method %in% c(
|
||||
"day",
|
||||
"weekday",
|
||||
"week",
|
||||
"month",
|
||||
"month_only",
|
||||
"quarter",
|
||||
"year",
|
||||
"hour"
|
||||
)
|
||||
) {
|
||||
breaks <- input$method
|
||||
} else {
|
||||
breaks <- breaks_r()$brks
|
||||
|
|
@ -475,47 +325,34 @@ cut_variable_server <- function(id, data_r = reactive(NULL)) {
|
|||
right = input$right
|
||||
)
|
||||
|
||||
if ("type" %in% names(breaks_r())) {
|
||||
parameters <- modifyList(parameters, list(type = breaks_r()$type))
|
||||
}
|
||||
|
||||
new_variable <- tryCatch(
|
||||
{
|
||||
rlang::exec(cut_var, !!!parameters)
|
||||
},
|
||||
error = function(err) {
|
||||
showNotification(paste0("We encountered the following error creating your report: ", err), type = "err")
|
||||
showNotification(paste("We encountered the following error creating the new factor:", err), type = "err")
|
||||
}
|
||||
)
|
||||
|
||||
# new_variable <- do.call(
|
||||
# cut,
|
||||
# parameters
|
||||
# )
|
||||
|
||||
|
||||
data <- append_column(data, column = new_variable, name = paste0(variable, "_cut"), index = "right")
|
||||
|
||||
# setNames(paste0(variable, "_cut"))
|
||||
#
|
||||
# data <- dplyr::bind_cols(data, new_variable, .name_repair = "unique_quiet")
|
||||
|
||||
# rv$new_var_name <- names(data)[length(data)]
|
||||
# browser()
|
||||
|
||||
# browser()
|
||||
code <- rlang::call2(
|
||||
"append_column",
|
||||
!!!list(
|
||||
column = rlang::call2("cut_var",
|
||||
!!!modifyList(parameters, list(x = as.symbol(paste0("data$", variable)))),
|
||||
.ns = "FreesearchR"),
|
||||
!!!modifyList(parameters, list(x = as.symbol(paste0("data$", variable)))),
|
||||
.ns = "FreesearchR"
|
||||
),
|
||||
name = paste0(variable, "_cut"), index = "right"
|
||||
),
|
||||
.ns = "FreesearchR"
|
||||
)
|
||||
attr(data, "code") <- code
|
||||
|
||||
# attr(data, "code") <- Reduce(
|
||||
# f = function(x, y) expr(!!x %>% !!y),
|
||||
# x = c(attr(data, "code"), code)
|
||||
# )
|
||||
data
|
||||
})
|
||||
|
||||
|
|
@ -590,12 +427,11 @@ modal_cut_variable <- function(id,
|
|||
|
||||
|
||||
#' @importFrom graphics abline axis hist par plot.new plot.window
|
||||
plot_histogram <- function(data, column=NULL, bins = 30, breaks = NULL, color = "#112466") {
|
||||
if (is.vector(data)){
|
||||
plot_histogram <- function(data, column = NULL, bins = 30, breaks = NULL, color = "#112466") {
|
||||
if (is.vector(data)) {
|
||||
x <- data
|
||||
} else {
|
||||
x <- data[[column]]
|
||||
|
||||
x <- data[[column]]
|
||||
}
|
||||
x <- as.numeric(x)
|
||||
op <- par(mar = rep(1.5, 4))
|
||||
|
|
@ -610,4 +446,3 @@ plot_histogram <- function(data, column=NULL, bins = 30, breaks = NULL, color =
|
|||
abline(v = breaks, col = "#FFFFFF", lty = 1, lwd = 1.5)
|
||||
abline(v = breaks, col = "#2E2E2E", lty = 2, lwd = 1.5)
|
||||
}
|
||||
|
||||
232
R/cut_var.R
Normal file
232
R/cut_var.R
Normal file
|
|
@ -0,0 +1,232 @@
|
|||
#' Extended cutting function with fall-back to the native base::cut
|
||||
#'
|
||||
#' @param x an object inheriting from class "hms"
|
||||
#' @param ... passed on
|
||||
#'
|
||||
#' @export
|
||||
#' @name cut_var
|
||||
cut_var <- function(x, ...) {
|
||||
UseMethod("cut_var")
|
||||
}
|
||||
|
||||
#' @export
|
||||
#' @name cut_var
|
||||
cut_var.default <- function(x, ...) {
|
||||
base::cut(x, ...)
|
||||
}
|
||||
|
||||
#' @name cut_var
|
||||
#'
|
||||
#' @return factor
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(2)
|
||||
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var("min")
|
||||
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = "hour")
|
||||
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = hms::as_hms(c("01:00:00", "03:01:20", "9:20:20")))
|
||||
#' d_t <- readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA))
|
||||
#' f <- d_t |> cut_var(2)
|
||||
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA)) |> cut_var(breaks = lubridate::as_datetime(c(hms::as_hms(levels(f)), hms::as_hms(max(d_t, na.rm = TRUE) + 1))), right = FALSE)
|
||||
cut_var.hms <- function(x, breaks, ...) {
|
||||
## as_hms keeps returning warnings on tz(); ignored
|
||||
suppressWarnings({
|
||||
if (hms::is_hms(breaks)) {
|
||||
breaks <- lubridate::as_datetime(breaks)
|
||||
}
|
||||
x <- lubridate::as_datetime(x)
|
||||
out <- cut_var.POSIXt(x, breaks = breaks, ...)
|
||||
attr(out, which = "brks") <- hms::as_hms(lubridate::as_datetime(attr(out, which = "brks")))
|
||||
attr(out, which = "levels") <- as.character(hms::as_hms(lubridate::as_datetime(attr(out, which = "levels"))))
|
||||
})
|
||||
out
|
||||
}
|
||||
|
||||
#' @name cut_var
|
||||
#' @param x an object inheriting from class "POSIXt" or "Date"
|
||||
#'
|
||||
#' @examples
|
||||
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
|
||||
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
|
||||
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "month_only")
|
||||
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "%A-%H")
|
||||
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "%W")
|
||||
cut_var.POSIXt <- function(x, breaks, right = FALSE, include.lowest = TRUE, start.on.monday = TRUE, ...) {
|
||||
breaks_o <- breaks
|
||||
args <- list(...)
|
||||
# browser()
|
||||
if (is.numeric(breaks)) {
|
||||
breaks <- quantile(
|
||||
x,
|
||||
probs = seq(0, 1, 1 / breaks),
|
||||
right = right,
|
||||
include.lowest = include.lowest,
|
||||
na.rm = TRUE
|
||||
)
|
||||
}
|
||||
|
||||
if ("format" %in% names(args)) {
|
||||
assertthat::assert_that(is.character(args$format))
|
||||
out <- forcats::as_factor(format(x, format = args$format))
|
||||
} else if (identical(breaks, "weekday")) {
|
||||
## This is
|
||||
ds <- as.Date(1:7) |>
|
||||
(\(.x){
|
||||
sort_by(format(.x, "%A"), as.numeric(format(.x, "%w")))
|
||||
})()
|
||||
|
||||
if (start.on.monday) {
|
||||
ds <- ds[c(7, 1:6)]
|
||||
}
|
||||
out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop()
|
||||
} else if (identical(breaks, "month_only")) {
|
||||
## Simplest way to create a vector of all months in order
|
||||
## which will also follow the locale of the machine
|
||||
ms <- paste0("1970-", 1:12, "-01") |>
|
||||
as.Date() |>
|
||||
months()
|
||||
|
||||
out <- factor(months(x), levels = ms) |> forcats::fct_drop()
|
||||
} else {
|
||||
## Doesn't really work very well for breaks other than the special character cases as right border is excluded
|
||||
out <- base::cut.POSIXt(x, breaks = breaks, right = right, ...) |> forcats::fct_drop()
|
||||
# browser()
|
||||
}
|
||||
l <- levels(out)
|
||||
if (is.numeric(breaks_o)) {
|
||||
l <- breaks
|
||||
} else if (is.character(breaks) && length(breaks) == 1 && !(identical(breaks, "weekday") | identical(breaks, "month_only"))) {
|
||||
if (include.lowest) {
|
||||
if (right) {
|
||||
l <- c(l, min(as.character(x)))
|
||||
} else {
|
||||
l <- c(l, max(as.character(x)))
|
||||
}
|
||||
}
|
||||
} else if (length(l) < length(breaks_o)) {
|
||||
l <- breaks_o
|
||||
}
|
||||
|
||||
attr(out, which = "brks") <- l
|
||||
out
|
||||
}
|
||||
|
||||
#' @name cut_var
|
||||
#' @param x an object inheriting from class "POSIXct"
|
||||
cut_var.POSIXct <- cut_var.POSIXt
|
||||
|
||||
#' @name cut_var
|
||||
#' @param x an object inheriting from class "POSIXct"
|
||||
#'
|
||||
#' @examples
|
||||
#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
|
||||
#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
|
||||
#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(format = "%W")
|
||||
cut_var.Date <- function(x, breaks = NULL, start.on.monday = TRUE, ...) {
|
||||
args <- list(...)
|
||||
|
||||
if ("format" %in% names(args)) {
|
||||
assertthat::assert_that(is.character(args$format))
|
||||
out <- forcats::as_factor(format(x, format = args$format))
|
||||
} else if (identical(breaks, "weekday")) {
|
||||
ds <- as.Date(1:7) |>
|
||||
(\(.x){
|
||||
sort_by(format(.x, "%A"), as.numeric(format(.x, "%w")))
|
||||
})()
|
||||
|
||||
if (start.on.monday) {
|
||||
ds <- ds[c(7, 1:6)]
|
||||
}
|
||||
out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop()
|
||||
} else if (identical(breaks, "month_only")) {
|
||||
ms <- paste0("1970-", 1:12, "-01") |>
|
||||
as.Date() |>
|
||||
months()
|
||||
|
||||
out <- factor(months(x), levels = ms) |> forcats::fct_drop()
|
||||
} else {
|
||||
## Doesn't really work very well for breaks other than the special character cases as right border is excluded
|
||||
out <- base::cut.Date(x, breaks = breaks, ...) |> forcats::fct_drop()
|
||||
# browser()
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
|
||||
#' Simplify a factor to only the top or bottom n levels
|
||||
#'
|
||||
#' @param type
|
||||
#'
|
||||
#' @name cut_var
|
||||
#'
|
||||
#' @returns factor
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' mtcars$carb |>
|
||||
#' as.factor() |>
|
||||
#' cut_var(2) |>
|
||||
#' table()
|
||||
cut_var.factor <- function(x, breaks = NULL, type = c("top", "bottom"), other = "Other", ...) {
|
||||
args <- list(...)
|
||||
|
||||
if (is.null(breaks)) {
|
||||
return(x)
|
||||
}
|
||||
|
||||
type <- match.arg(type)
|
||||
|
||||
if (type == "top") {
|
||||
lvls <- names(sort(table(x), decreasing = TRUE)[seq_len(breaks)])
|
||||
} else if (type == "bottom") {
|
||||
lvls <- names(sort(table(x), decreasing = FALSE)[seq_len(breaks)])
|
||||
}
|
||||
|
||||
if (other %in% lvls) {
|
||||
other <- paste(other, "_freesearchr")
|
||||
}
|
||||
|
||||
## Relabel and relevel
|
||||
out <- forcats::fct_relabel(
|
||||
x,
|
||||
\(.x){
|
||||
ifelse(.x %in% lvls, .x, other)
|
||||
}
|
||||
) |>
|
||||
forcats::fct_relevel(lvls,other)
|
||||
|
||||
attr(out, which = "brks") <- breaks
|
||||
out
|
||||
}
|
||||
|
||||
|
||||
#' Test class
|
||||
#'
|
||||
#' @param data data
|
||||
#' @param class.vec vector of class names to test
|
||||
#'
|
||||
#' @return factor
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#' vapply(REDCapCAST::redcapcast_data, \(.x){
|
||||
#' is_any_class(.x, c("hms", "Date", "POSIXct", "POSIXt"))
|
||||
#' }, logical(1))
|
||||
#' }
|
||||
is_any_class <- function(data, class.vec) {
|
||||
any(class(data) %in% class.vec)
|
||||
}
|
||||
|
||||
#' Test is date/datetime/time
|
||||
#'
|
||||
#' @param data data
|
||||
#'
|
||||
#' @return factor
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' vapply(REDCapCAST::redcapcast_data, is_datetime, logical(1))
|
||||
is_datetime <- function(data) {
|
||||
is_any_class(data, class.vec = c("hms", "Date", "POSIXct", "POSIXt"))
|
||||
}
|
||||
|
|
@ -1 +1 @@
|
|||
hosted_version <- function()'v25.10.3-251008'
|
||||
hosted_version <- function()'v25.10.3-251021'
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ str_remove_last <- function(data, pattern = "\n") {
|
|||
#' mtcars |>
|
||||
#' default_parsing() |>
|
||||
#' plot_sankey("cyl", "gear", "vs", color.group = "pri")
|
||||
plot_sankey <- function(data, pri, sec, ter = NULL, color.group = "pri", colors = NULL) {
|
||||
plot_sankey <- function(data, pri, sec, ter = NULL, color.group = "pri", colors = NULL,missing.level="Missing") {
|
||||
if (!is.null(ter)) {
|
||||
ds <- split(data, data[ter])
|
||||
} else {
|
||||
|
|
@ -91,7 +91,7 @@ plot_sankey <- function(data, pri, sec, ter = NULL, color.group = "pri", colors
|
|||
}
|
||||
|
||||
out <- lapply(ds, \(.ds){
|
||||
plot_sankey_single(.ds, pri = pri, sec = sec, color.group = color.group, colors = colors)
|
||||
plot_sankey_single(.ds, pri = pri, sec = sec, color.group = color.group, colors = colors,missing.level=missing.level)
|
||||
})
|
||||
|
||||
patchwork::wrap_plots(out)
|
||||
|
|
@ -120,14 +120,21 @@ plot_sankey <- function(data, pri, sec, ter = NULL, color.group = "pri", colors
|
|||
#' mtcars |>
|
||||
#' default_parsing() |>
|
||||
#' plot_sankey_single("cyl", "vs", color.group = "pri")
|
||||
plot_sankey_single <- function(data, pri, sec, color.group = c("pri", "sec"), colors = NULL, ...) {
|
||||
#' stRoke::trial |>
|
||||
#' default_parsing() |>
|
||||
#' plot_sankey_single("diabetes", "hypertension")
|
||||
plot_sankey_single <- function(data, pri, sec, color.group = c("pri", "sec"), colors = NULL,missing.level="Missing", ...) {
|
||||
color.group <- match.arg(color.group)
|
||||
|
||||
browser()
|
||||
data_orig <- data
|
||||
data[c(pri, sec)] <- data[c(pri, sec)] |>
|
||||
dplyr::mutate(dplyr::across(dplyr::where(is.factor), forcats::fct_drop))
|
||||
dplyr::mutate(
|
||||
# dplyr::across(dplyr::where(is.logical), as.factor),
|
||||
dplyr::across(dplyr::where(is.factor), forcats::fct_drop)#,
|
||||
# dplyr::across(dplyr::where(is.factor), \(.x){forcats::fct_na_value_to_level(.x,missing.level)})
|
||||
)
|
||||
|
||||
# browser()
|
||||
|
||||
data <- data |> sankey_ready(pri = pri, sec = sec, ...)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/cut-variable-dates.R
|
||||
% Please edit documentation in R/cut-variable-ext.R
|
||||
\name{cut-variable}
|
||||
\alias{cut-variable}
|
||||
\alias{cut_variable_ui}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/cut-variable-dates.R
|
||||
% Please edit documentation in R/cut_var.R
|
||||
\name{cut_var}
|
||||
\alias{cut_var}
|
||||
\alias{cut_var.default}
|
||||
|
|
@ -7,6 +7,7 @@
|
|||
\alias{cut_var.POSIXt}
|
||||
\alias{cut_var.POSIXct}
|
||||
\alias{cut_var.Date}
|
||||
\alias{cut_var.factor}
|
||||
\title{Extended cutting function with fall-back to the native base::cut}
|
||||
\usage{
|
||||
cut_var(x, ...)
|
||||
|
|
@ -34,17 +35,25 @@ cut_var(x, ...)
|
|||
)
|
||||
|
||||
\method{cut_var}{Date}(x, breaks = NULL, start.on.monday = TRUE, ...)
|
||||
|
||||
\method{cut_var}{factor}(x, breaks = NULL, type = c("top", "bottom"), other = "Other", ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{an object inheriting from class "POSIXct"}
|
||||
|
||||
\item{...}{passed on}
|
||||
|
||||
\item{type}{}
|
||||
}
|
||||
\value{
|
||||
factor
|
||||
|
||||
factor
|
||||
}
|
||||
\description{
|
||||
Extended cutting function with fall-back to the native base::cut
|
||||
|
||||
Simplify a factor to only the top or bottom n levels
|
||||
}
|
||||
\examples{
|
||||
readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(2)
|
||||
|
|
@ -57,9 +66,13 @@ readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA)) |> cut_
|
|||
readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
|
||||
readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
|
||||
readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "month_only")
|
||||
readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks=NULL,format = "\%A-\%H")
|
||||
readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks=NULL,format = "\%W")
|
||||
readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "\%A-\%H")
|
||||
readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "\%W")
|
||||
as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
|
||||
as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
|
||||
as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(format = "\%W")
|
||||
mtcars$carb |>
|
||||
as.factor() |>
|
||||
cut_var(2) |>
|
||||
table()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,7 +32,15 @@ plot_ridge(data, x, y, z = NULL, ...)
|
|||
|
||||
sankey_ready(data, pri, sec, numbers = "count", ...)
|
||||
|
||||
plot_sankey(data, pri, sec, ter = NULL, color.group = "pri", colors = NULL)
|
||||
plot_sankey(
|
||||
data,
|
||||
pri,
|
||||
sec,
|
||||
ter = NULL,
|
||||
color.group = "pri",
|
||||
colors = NULL,
|
||||
missing.level = "Missing"
|
||||
)
|
||||
|
||||
plot_scatter(data, pri, sec, ter = NULL)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/cut-variable-dates.R
|
||||
% Please edit documentation in R/cut_var.R
|
||||
\name{is_any_class}
|
||||
\alias{is_any_class}
|
||||
\title{Test class}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/cut-variable-dates.R
|
||||
% Please edit documentation in R/cut_var.R
|
||||
\name{is_datetime}
|
||||
\alias{is_datetime}
|
||||
\title{Test is date/datetime/time}
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ plot_sankey_single(
|
|||
sec,
|
||||
color.group = c("pri", "sec"),
|
||||
colors = NULL,
|
||||
missing.level = "Missing",
|
||||
...
|
||||
)
|
||||
}
|
||||
|
|
@ -40,4 +41,7 @@ data.frame(
|
|||
mtcars |>
|
||||
default_parsing() |>
|
||||
plot_sankey_single("cyl", "vs", color.group = "pri")
|
||||
stRoke::trial |>
|
||||
default_parsing() |>
|
||||
plot_sankey_single("diabetes", "hypertension")
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue