mirror of
https://github.com/agdamsbo/FreesearchR.git
synced 2025-12-16 09:32:10 +01:00
233 lines
7.6 KiB
R
233 lines
7.6 KiB
R
|
|
#' Extended cutting function with fall-back to the native base::cut
|
||
|
|
#'
|
||
|
|
#' @param x an object inheriting from class "hms"
|
||
|
|
#' @param ... passed on
|
||
|
|
#'
|
||
|
|
#' @export
|
||
|
|
#' @name cut_var
|
||
|
|
cut_var <- function(x, ...) {
|
||
|
|
UseMethod("cut_var")
|
||
|
|
}
|
||
|
|
|
||
|
|
#' @export
|
||
|
|
#' @name cut_var
|
||
|
|
cut_var.default <- function(x, ...) {
|
||
|
|
base::cut(x, ...)
|
||
|
|
}
|
||
|
|
|
||
|
|
#' @name cut_var
|
||
|
|
#'
|
||
|
|
#' @return factor
|
||
|
|
#' @export
|
||
|
|
#'
|
||
|
|
#' @examples
|
||
|
|
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(2)
|
||
|
|
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var("min")
|
||
|
|
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = "hour")
|
||
|
|
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "08:20:20", "21:20:20", "03:02:20")) |> cut_var(breaks = hms::as_hms(c("01:00:00", "03:01:20", "9:20:20")))
|
||
|
|
#' d_t <- readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA))
|
||
|
|
#' f <- d_t |> cut_var(2)
|
||
|
|
#' readr::parse_time(c("01:00:20", "03:00:20", "01:20:20", "03:02:20", NA)) |> cut_var(breaks = lubridate::as_datetime(c(hms::as_hms(levels(f)), hms::as_hms(max(d_t, na.rm = TRUE) + 1))), right = FALSE)
|
||
|
|
cut_var.hms <- function(x, breaks, ...) {
|
||
|
|
## as_hms keeps returning warnings on tz(); ignored
|
||
|
|
suppressWarnings({
|
||
|
|
if (hms::is_hms(breaks)) {
|
||
|
|
breaks <- lubridate::as_datetime(breaks)
|
||
|
|
}
|
||
|
|
x <- lubridate::as_datetime(x)
|
||
|
|
out <- cut_var.POSIXt(x, breaks = breaks, ...)
|
||
|
|
attr(out, which = "brks") <- hms::as_hms(lubridate::as_datetime(attr(out, which = "brks")))
|
||
|
|
attr(out, which = "levels") <- as.character(hms::as_hms(lubridate::as_datetime(attr(out, which = "levels"))))
|
||
|
|
})
|
||
|
|
out
|
||
|
|
}
|
||
|
|
|
||
|
|
#' @name cut_var
|
||
|
|
#' @param x an object inheriting from class "POSIXt" or "Date"
|
||
|
|
#'
|
||
|
|
#' @examples
|
||
|
|
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
|
||
|
|
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
|
||
|
|
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "month_only")
|
||
|
|
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "%A-%H")
|
||
|
|
#' readr::parse_datetime(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = NULL, format = "%W")
|
||
|
|
cut_var.POSIXt <- function(x, breaks, right = FALSE, include.lowest = TRUE, start.on.monday = TRUE, ...) {
|
||
|
|
breaks_o <- breaks
|
||
|
|
args <- list(...)
|
||
|
|
# browser()
|
||
|
|
if (is.numeric(breaks)) {
|
||
|
|
breaks <- quantile(
|
||
|
|
x,
|
||
|
|
probs = seq(0, 1, 1 / breaks),
|
||
|
|
right = right,
|
||
|
|
include.lowest = include.lowest,
|
||
|
|
na.rm = TRUE
|
||
|
|
)
|
||
|
|
}
|
||
|
|
|
||
|
|
if ("format" %in% names(args)) {
|
||
|
|
assertthat::assert_that(is.character(args$format))
|
||
|
|
out <- forcats::as_factor(format(x, format = args$format))
|
||
|
|
} else if (identical(breaks, "weekday")) {
|
||
|
|
## This is
|
||
|
|
ds <- as.Date(1:7) |>
|
||
|
|
(\(.x){
|
||
|
|
sort_by(format(.x, "%A"), as.numeric(format(.x, "%w")))
|
||
|
|
})()
|
||
|
|
|
||
|
|
if (start.on.monday) {
|
||
|
|
ds <- ds[c(7, 1:6)]
|
||
|
|
}
|
||
|
|
out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop()
|
||
|
|
} else if (identical(breaks, "month_only")) {
|
||
|
|
## Simplest way to create a vector of all months in order
|
||
|
|
## which will also follow the locale of the machine
|
||
|
|
ms <- paste0("1970-", 1:12, "-01") |>
|
||
|
|
as.Date() |>
|
||
|
|
months()
|
||
|
|
|
||
|
|
out <- factor(months(x), levels = ms) |> forcats::fct_drop()
|
||
|
|
} else {
|
||
|
|
## Doesn't really work very well for breaks other than the special character cases as right border is excluded
|
||
|
|
out <- base::cut.POSIXt(x, breaks = breaks, right = right, ...) |> forcats::fct_drop()
|
||
|
|
# browser()
|
||
|
|
}
|
||
|
|
l <- levels(out)
|
||
|
|
if (is.numeric(breaks_o)) {
|
||
|
|
l <- breaks
|
||
|
|
} else if (is.character(breaks) && length(breaks) == 1 && !(identical(breaks, "weekday") | identical(breaks, "month_only"))) {
|
||
|
|
if (include.lowest) {
|
||
|
|
if (right) {
|
||
|
|
l <- c(l, min(as.character(x)))
|
||
|
|
} else {
|
||
|
|
l <- c(l, max(as.character(x)))
|
||
|
|
}
|
||
|
|
}
|
||
|
|
} else if (length(l) < length(breaks_o)) {
|
||
|
|
l <- breaks_o
|
||
|
|
}
|
||
|
|
|
||
|
|
attr(out, which = "brks") <- l
|
||
|
|
out
|
||
|
|
}
|
||
|
|
|
||
|
|
#' @name cut_var
|
||
|
|
#' @param x an object inheriting from class "POSIXct"
|
||
|
|
cut_var.POSIXct <- cut_var.POSIXt
|
||
|
|
|
||
|
|
#' @name cut_var
|
||
|
|
#' @param x an object inheriting from class "POSIXct"
|
||
|
|
#'
|
||
|
|
#' @examples
|
||
|
|
#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(2)
|
||
|
|
#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(breaks = "weekday")
|
||
|
|
#' as.Date(c("1992-02-01 01:00:20", "1992-02-06 03:00:20", "1992-05-01 01:20:20", "1992-09-01 08:20:20", "1999-02-01 21:20:20", "1992-12-01 03:02:20")) |> cut_var(format = "%W")
|
||
|
|
cut_var.Date <- function(x, breaks = NULL, start.on.monday = TRUE, ...) {
|
||
|
|
args <- list(...)
|
||
|
|
|
||
|
|
if ("format" %in% names(args)) {
|
||
|
|
assertthat::assert_that(is.character(args$format))
|
||
|
|
out <- forcats::as_factor(format(x, format = args$format))
|
||
|
|
} else if (identical(breaks, "weekday")) {
|
||
|
|
ds <- as.Date(1:7) |>
|
||
|
|
(\(.x){
|
||
|
|
sort_by(format(.x, "%A"), as.numeric(format(.x, "%w")))
|
||
|
|
})()
|
||
|
|
|
||
|
|
if (start.on.monday) {
|
||
|
|
ds <- ds[c(7, 1:6)]
|
||
|
|
}
|
||
|
|
out <- factor(weekdays(x), levels = ds) |> forcats::fct_drop()
|
||
|
|
} else if (identical(breaks, "month_only")) {
|
||
|
|
ms <- paste0("1970-", 1:12, "-01") |>
|
||
|
|
as.Date() |>
|
||
|
|
months()
|
||
|
|
|
||
|
|
out <- factor(months(x), levels = ms) |> forcats::fct_drop()
|
||
|
|
} else {
|
||
|
|
## Doesn't really work very well for breaks other than the special character cases as right border is excluded
|
||
|
|
out <- base::cut.Date(x, breaks = breaks, ...) |> forcats::fct_drop()
|
||
|
|
# browser()
|
||
|
|
}
|
||
|
|
out
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
#' Simplify a factor to only the top or bottom n levels
|
||
|
|
#'
|
||
|
|
#' @param type
|
||
|
|
#'
|
||
|
|
#' @name cut_var
|
||
|
|
#'
|
||
|
|
#' @returns factor
|
||
|
|
#' @export
|
||
|
|
#'
|
||
|
|
#' @examples
|
||
|
|
#' mtcars$carb |>
|
||
|
|
#' as.factor() |>
|
||
|
|
#' cut_var(2) |>
|
||
|
|
#' table()
|
||
|
|
cut_var.factor <- function(x, breaks = NULL, type = c("top", "bottom"), other = "Other", ...) {
|
||
|
|
args <- list(...)
|
||
|
|
|
||
|
|
if (is.null(breaks)) {
|
||
|
|
return(x)
|
||
|
|
}
|
||
|
|
|
||
|
|
type <- match.arg(type)
|
||
|
|
|
||
|
|
if (type == "top") {
|
||
|
|
lvls <- names(sort(table(x), decreasing = TRUE)[seq_len(breaks)])
|
||
|
|
} else if (type == "bottom") {
|
||
|
|
lvls <- names(sort(table(x), decreasing = FALSE)[seq_len(breaks)])
|
||
|
|
}
|
||
|
|
|
||
|
|
if (other %in% lvls) {
|
||
|
|
other <- paste(other, "_freesearchr")
|
||
|
|
}
|
||
|
|
|
||
|
|
## Relabel and relevel
|
||
|
|
out <- forcats::fct_relabel(
|
||
|
|
x,
|
||
|
|
\(.x){
|
||
|
|
ifelse(.x %in% lvls, .x, other)
|
||
|
|
}
|
||
|
|
) |>
|
||
|
|
forcats::fct_relevel(lvls,other)
|
||
|
|
|
||
|
|
attr(out, which = "brks") <- breaks
|
||
|
|
out
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
#' Test class
|
||
|
|
#'
|
||
|
|
#' @param data data
|
||
|
|
#' @param class.vec vector of class names to test
|
||
|
|
#'
|
||
|
|
#' @return factor
|
||
|
|
#' @export
|
||
|
|
#'
|
||
|
|
#' @examples
|
||
|
|
#' \dontrun{
|
||
|
|
#' vapply(REDCapCAST::redcapcast_data, \(.x){
|
||
|
|
#' is_any_class(.x, c("hms", "Date", "POSIXct", "POSIXt"))
|
||
|
|
#' }, logical(1))
|
||
|
|
#' }
|
||
|
|
is_any_class <- function(data, class.vec) {
|
||
|
|
any(class(data) %in% class.vec)
|
||
|
|
}
|
||
|
|
|
||
|
|
#' Test is date/datetime/time
|
||
|
|
#'
|
||
|
|
#' @param data data
|
||
|
|
#'
|
||
|
|
#' @return factor
|
||
|
|
#' @export
|
||
|
|
#'
|
||
|
|
#' @examples
|
||
|
|
#' vapply(REDCapCAST::redcapcast_data, is_datetime, logical(1))
|
||
|
|
is_datetime <- function(data) {
|
||
|
|
is_any_class(data, class.vec = c("hms", "Date", "POSIXct", "POSIXt"))
|
||
|
|
}
|