mirror of
https://github.com/agdamsbo/FreesearchR.git
synced 2025-09-12 09:59:39 +02:00
revised data types
This commit is contained in:
parent
07e94f4401
commit
aaceb55fe8
5 changed files with 84 additions and 38 deletions
|
@ -1 +1 @@
|
|||
app_version <- function()'Version: 25.4.3.250415_1627'
|
||||
app_version <- function()'Version: 25.4.3.250422'
|
||||
|
|
|
@ -155,8 +155,8 @@ overview_vars <- function(data) {
|
|||
data <- as.data.frame(data)
|
||||
|
||||
dplyr::tibble(
|
||||
icon = data_type(data),
|
||||
type = icon,
|
||||
icon = get_classes(data),
|
||||
class = icon,
|
||||
name = names(data),
|
||||
n_missing = unname(colSums(is.na(data))),
|
||||
p_complete = 1 - n_missing / nrow(data),
|
||||
|
@ -189,6 +189,7 @@ create_overview_datagrid <- function(data,...) {
|
|||
std_names <- c(
|
||||
"Name" = "name",
|
||||
"Icon" = "icon",
|
||||
"Class" = "class",
|
||||
"Type" = "type",
|
||||
"Missings" = "n_missing",
|
||||
"Complete" = "p_complete",
|
||||
|
@ -235,7 +236,7 @@ create_overview_datagrid <- function(data,...) {
|
|||
grid <- add_class_icon(
|
||||
grid = grid,
|
||||
column = "icon",
|
||||
fun = type_icons
|
||||
fun = class_icons
|
||||
)
|
||||
|
||||
grid <- toastui::grid_format(
|
||||
|
|
|
@ -408,7 +408,7 @@ all_but <- function(data, ...) {
|
|||
#'
|
||||
#' @examples
|
||||
#' default_parsing(mtcars) |> subset_types("ordinal")
|
||||
#' default_parsing(mtcars) |> subset_types(c("dichotomous", "ordinal", "categorical"))
|
||||
#' default_parsing(mtcars) |> subset_types(c("dichotomous", "categorical"))
|
||||
#' #' default_parsing(mtcars) |> subset_types("factor",class)
|
||||
subset_types <- function(data, types, type.fun = data_type) {
|
||||
data[sapply(data, type.fun) %in% types]
|
||||
|
@ -443,58 +443,58 @@ supported_plots <- function() {
|
|||
fun = "plot_hbars",
|
||||
descr = "Stacked horizontal bars",
|
||||
note = "A classical way of visualising the distribution of an ordinal scale like the modified Ranking Scale and known as Grotta bars",
|
||||
primary.type = c("dichotomous", "ordinal", "categorical"),
|
||||
secondary.type = c("dichotomous", "ordinal", "categorical"),
|
||||
primary.type = c("dichotomous", "categorical"),
|
||||
secondary.type = c("dichotomous", "categorical"),
|
||||
secondary.multi = FALSE,
|
||||
tertiary.type = c("dichotomous", "ordinal", "categorical"),
|
||||
tertiary.type = c("dichotomous", "categorical"),
|
||||
secondary.extra = "none"
|
||||
),
|
||||
plot_violin = list(
|
||||
fun = "plot_violin",
|
||||
descr = "Violin plot",
|
||||
note = "A modern alternative to the classic boxplot to visualise data distribution",
|
||||
primary.type = c("datatime", "continuous", "dichotomous", "ordinal", "categorical"),
|
||||
secondary.type = c("dichotomous", "ordinal", "categorical"),
|
||||
primary.type = c("datatime", "continuous", "dichotomous", "categorical"),
|
||||
secondary.type = c("dichotomous", "categorical"),
|
||||
secondary.multi = FALSE,
|
||||
secondary.extra = "none",
|
||||
tertiary.type = c("dichotomous", "ordinal", "categorical")
|
||||
tertiary.type = c("dichotomous", "categorical")
|
||||
),
|
||||
# plot_ridge = list(
|
||||
# descr = "Ridge plot",
|
||||
# note = "An alternative option to visualise data distribution",
|
||||
# primary.type = "continuous",
|
||||
# secondary.type = c("dichotomous", "ordinal" ,"categorical"),
|
||||
# tertiary.type = c("dichotomous", "ordinal" ,"categorical"),
|
||||
# secondary.type = c("dichotomous" ,"categorical"),
|
||||
# tertiary.type = c("dichotomous" ,"categorical"),
|
||||
# secondary.extra = NULL
|
||||
# ),
|
||||
plot_sankey = list(
|
||||
fun = "plot_sankey",
|
||||
descr = "Sankey plot",
|
||||
note = "A way of visualising change between groups",
|
||||
primary.type = c("dichotomous", "ordinal", "categorical"),
|
||||
secondary.type = c("dichotomous", "ordinal", "categorical"),
|
||||
primary.type = c("dichotomous", "categorical"),
|
||||
secondary.type = c("dichotomous", "categorical"),
|
||||
secondary.multi = FALSE,
|
||||
secondary.extra = NULL,
|
||||
tertiary.type = c("dichotomous", "ordinal", "categorical")
|
||||
tertiary.type = c("dichotomous", "categorical")
|
||||
),
|
||||
plot_scatter = list(
|
||||
fun = "plot_scatter",
|
||||
descr = "Scatter plot",
|
||||
note = "A classic way of showing the association between to variables",
|
||||
primary.type = c("datatime", "continuous"),
|
||||
secondary.type = c("datatime", "continuous", "ordinal", "categorical"),
|
||||
secondary.type = c("datatime", "continuous", "categorical"),
|
||||
secondary.multi = FALSE,
|
||||
tertiary.type = c("dichotomous", "ordinal", "categorical"),
|
||||
tertiary.type = c("dichotomous", "categorical"),
|
||||
secondary.extra = NULL
|
||||
),
|
||||
plot_box = list(
|
||||
fun = "plot_box",
|
||||
descr = "Box plot",
|
||||
note = "A classic way to plot data distribution by groups",
|
||||
primary.type = c("datatime", "continuous", "dichotomous", "ordinal", "categorical"),
|
||||
secondary.type = c("dichotomous", "ordinal", "categorical"),
|
||||
primary.type = c("datatime", "continuous", "dichotomous", "categorical"),
|
||||
secondary.type = c("dichotomous", "categorical"),
|
||||
secondary.multi = FALSE,
|
||||
tertiary.type = c("dichotomous", "ordinal", "categorical"),
|
||||
tertiary.type = c("dichotomous", "categorical"),
|
||||
secondary.extra = "none"
|
||||
),
|
||||
plot_euler = list(
|
||||
|
@ -505,7 +505,7 @@ supported_plots <- function() {
|
|||
secondary.type = "dichotomous",
|
||||
secondary.multi = TRUE,
|
||||
secondary.max = 4,
|
||||
tertiary.type = c("dichotomous", "ordinal", "categorical"),
|
||||
tertiary.type = c("dichotomous", "categorical"),
|
||||
secondary.extra = NULL
|
||||
)
|
||||
)
|
||||
|
|
44
R/helpers.R
44
R/helpers.R
|
@ -258,16 +258,22 @@ default_parsing <- function(data) {
|
|||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' ds <- mtcars |> lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |> dplyr::bind_cols()
|
||||
#' ds <- mtcars |>
|
||||
#' lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |>
|
||||
#' dplyr::bind_cols()
|
||||
#' ds |>
|
||||
#' remove_empty_attr() |>
|
||||
#' str()
|
||||
#' mtcars |> lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |> remove_empty_attr() |>
|
||||
#' mtcars |>
|
||||
#' lapply(\(.x) REDCapCAST::set_attr(.x, label = NA, attr = "label")) |>
|
||||
#' remove_empty_attr() |>
|
||||
#' str()
|
||||
#'
|
||||
remove_empty_attr <- function(data) {
|
||||
if (is.data.frame(data)) {
|
||||
data |> lapply(remove_empty_attr) |> dplyr::bind_cols()
|
||||
data |>
|
||||
lapply(remove_empty_attr) |>
|
||||
dplyr::bind_cols()
|
||||
} else if (is.list(data)) {
|
||||
data |> lapply(remove_empty_attr)
|
||||
} else {
|
||||
|
@ -387,7 +393,7 @@ data_description <- function(data, data_text = "Data") {
|
|||
#' }
|
||||
data_type_filter <- function(data, type) {
|
||||
## Please ensure to only provide recognised data types
|
||||
assertthat::assert_that(all(type %in% data_types()))
|
||||
assertthat::assert_that(all(type %in% names(data_types())))
|
||||
|
||||
if (!is.null(type)) {
|
||||
out <- data[data_type(data) %in% type]
|
||||
|
@ -616,3 +622,33 @@ append_column <- function(data, column, name, index = "right") {
|
|||
) |>
|
||||
dplyr::bind_cols()
|
||||
}
|
||||
|
||||
|
||||
|
||||
#' Test if element is identical to the previous
|
||||
#'
|
||||
#' @param data data. vector, data.frame or list
|
||||
#' @param no.name logical to remove names attribute before testing
|
||||
#'
|
||||
#' @returns logical vector
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' c(1, 1, 2, 3, 3, 2, 4, 4) |> is_identical_to_previous()
|
||||
#' mtcars[c(1, 1, 2, 3, 3, 2, 4, 4)] |> is_identical_to_previous()
|
||||
#' list(1, 1, list(2), "A", "a", "a") |> is_identical_to_previous()
|
||||
is_identical_to_previous <- function(data, no.name = TRUE) {
|
||||
if (is.data.frame(data)) {
|
||||
lagged <- data.frame(FALSE, data[seq_len(length(data) - 1)])
|
||||
} else {
|
||||
lagged <- c(FALSE, data[seq_len(length(data) - 1)])
|
||||
}
|
||||
|
||||
vapply(seq_len(length(data)), \(.x){
|
||||
if (isTRUE(no.name)) {
|
||||
identical(unname(lagged[.x]), unname(data[.x]))
|
||||
} else {
|
||||
identical(lagged[.x], data[.x])
|
||||
}
|
||||
}, FUN.VALUE = logical(1))
|
||||
}
|
||||
|
|
|
@ -279,11 +279,11 @@ data_type <- function(data) {
|
|||
if (identical("logical", cl_d) | length(unique(data)) == 2) {
|
||||
out <- "dichotomous"
|
||||
} else {
|
||||
if (is.ordered(data)) {
|
||||
out <- "ordinal"
|
||||
} else {
|
||||
# if (is.ordered(data)) {
|
||||
# out <- "ordinal"
|
||||
# } else {
|
||||
out <- "categorical"
|
||||
}
|
||||
# }
|
||||
}
|
||||
} else if (identical(cl_d, "character")) {
|
||||
out <- "text"
|
||||
|
@ -310,7 +310,16 @@ data_type <- function(data) {
|
|||
#' @examples
|
||||
#' data_types()
|
||||
data_types <- function() {
|
||||
c("dichotomous", "ordinal", "categorical", "datatime", "continuous", "text", "empty", "monotone", "unknown")
|
||||
list(
|
||||
"empty" = list(descr="Variable of all NAs",classes="Any class"),
|
||||
"monotone" = list(descr="Variable with only one unique value",classes="Any class"),
|
||||
"dichotomous" = list(descr="Variable with only two unique values",classes="Any class"),
|
||||
"categorical"= list(descr="Factor variable",classes="factor (ordered or unordered)"),
|
||||
"text"= list(descr="Character variable",classes="character"),
|
||||
"datetime"= list(descr="Variable of time, date or datetime values",classes="hms, Date, POSIXct and POSIXt"),
|
||||
"continuous"= list(descr="Numeric variable",classes="numeric, integer or double"),
|
||||
"unknown"= list(descr="Anything not falling within the previous",classes="Any other class")
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
|
@ -351,7 +360,7 @@ supported_functions <- function() {
|
|||
polr = list(
|
||||
descr = "Ordinal logistic regression model",
|
||||
design = "cross-sectional",
|
||||
out.type = c("ordinal", "categorical"),
|
||||
out.type = c("categorical"),
|
||||
fun = "MASS::polr",
|
||||
args.list = list(
|
||||
Hess = TRUE,
|
||||
|
|
Loading…
Add table
Reference in a new issue