mirror of
https://github.com/agdamsbo/FreesearchR.git
synced 2026-06-19 04:27:30 +02:00
fix: implement "with_labels" and added tests
This commit is contained in:
parent
342579c36f
commit
f928aee110
2 changed files with 150 additions and 13 deletions
29
R/helpers.R
29
R/helpers.R
|
|
@ -219,20 +219,23 @@ file_export <- function(data,
|
|||
#' head(5) |>
|
||||
#' str()
|
||||
default_parsing <- function(data) {
|
||||
name_labels <- lapply(data, \(.x) REDCapCAST::get_attr(.x, attr = "label"))
|
||||
# name_labels <- lapply(data, \(.x) REDCapCAST::get_attr(.x, attr = "label"))
|
||||
# browser()
|
||||
out <- data |>
|
||||
setNames(make.names(names(data), unique = TRUE)) |>
|
||||
## Temporary step to avoid nested list and crashing
|
||||
remove_nested_list() |>
|
||||
REDCapCAST::parse_data() |>
|
||||
REDCapCAST::as_factor() |>
|
||||
REDCapCAST::numchar2fct(numeric.threshold = 8,
|
||||
character.throshold = 10) |>
|
||||
REDCapCAST::as_logical() |>
|
||||
REDCapCAST::fct_drop()
|
||||
|
||||
set_column_label(out, setNames(name_labels, names(out)), overwrite = FALSE)
|
||||
with_labels(data,{
|
||||
data |>
|
||||
setNames(make.names(names(data), unique = TRUE)) |>
|
||||
## Temporary step to avoid nested list and crashing
|
||||
remove_nested_list() |>
|
||||
REDCapCAST::parse_data() |>
|
||||
REDCapCAST::as_factor() |>
|
||||
REDCapCAST::numchar2fct(numeric.threshold = 8,
|
||||
character.throshold = 10) |>
|
||||
REDCapCAST::as_logical() |>
|
||||
REDCapCAST::fct_drop()
|
||||
})
|
||||
# out <-
|
||||
#
|
||||
# set_column_label(out, setNames(name_labels, names(out)), overwrite = FALSE)
|
||||
|
||||
# purrr::map2(
|
||||
# out,
|
||||
|
|
|
|||
134
tests/testthat/test-default-parsing.R
Normal file
134
tests/testthat/test-default-parsing.R
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
test_that("default_parsing returns a data.frame", {
|
||||
result <- default_parsing(mtcars)
|
||||
expect_true(is.data.frame(result))
|
||||
})
|
||||
|
||||
test_that("default_parsing preserves row count", {
|
||||
result <- default_parsing(mtcars)
|
||||
expect_equal(nrow(result), nrow(mtcars))
|
||||
})
|
||||
|
||||
test_that("default_parsing preserves column count", {
|
||||
result <- default_parsing(mtcars)
|
||||
expect_equal(ncol(result), ncol(mtcars))
|
||||
})
|
||||
|
||||
test_that("default_parsing produces valid column names (make.names compatible)", {
|
||||
# Create data with problematic column names
|
||||
bad_names_df <- data.frame(
|
||||
`1bad` = 1:5,
|
||||
`has space` = letters[1:5],
|
||||
`good_name` = TRUE,
|
||||
check.names = FALSE
|
||||
)
|
||||
result <- default_parsing(bad_names_df)
|
||||
expect_true(all(make.names(names(result)) == names(result)))
|
||||
})
|
||||
|
||||
test_that("default_parsing handles duplicate column names", {
|
||||
dup_df <- data.frame(a = 1:5, b = 6:10)
|
||||
names(dup_df) <- c("x", "x")
|
||||
result <- default_parsing(dup_df)
|
||||
expect_equal(length(names(result)), 2)
|
||||
expect_true(all(!duplicated(names(result))))
|
||||
})
|
||||
|
||||
test_that("default_parsing converts low-cardinality numeric columns to factor", {
|
||||
# A numeric column with <= 8 unique values should become a factor
|
||||
df <- data.frame(
|
||||
group = c(1, 2, 3, 1, 2, 3, 1, 2), # 3 unique → factor
|
||||
value = rnorm(8) # 8 unique → stays numeric
|
||||
)
|
||||
result <- default_parsing(df)
|
||||
expect_true(is.factor(result$group))
|
||||
})
|
||||
|
||||
test_that("default_parsing converts low-cardinality character columns to factor", {
|
||||
# A character column with <= 10 unique values should become a factor
|
||||
df <- data.frame(
|
||||
category = rep(c("a", "b", "c"), 4), # 3 unique → factor
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
result <- default_parsing(df)
|
||||
expect_true(is.factor(result$category))
|
||||
})
|
||||
|
||||
test_that("default_parsing drops unused factor levels", {
|
||||
df <- data.frame(
|
||||
x = factor(c("a", "b", "a"), levels = c("a", "b", "c")) # "c" unused
|
||||
)
|
||||
result <- default_parsing(df)
|
||||
expect_false("c" %in% levels(result$x))
|
||||
})
|
||||
|
||||
test_that("default_parsing converts logical-like columns to logical", {
|
||||
df <- data.frame(
|
||||
flag = c(0L, 1L, 0L, 1L, 0L),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
result <- default_parsing(df)
|
||||
# as_logical should have converted 0/1 integer to logical
|
||||
expect_true(is.logical(result$flag))
|
||||
})
|
||||
|
||||
test_that("default_parsing preserves column labels when present", {
|
||||
df <- data.frame(a = 1:3, b = c("x", "y", "z"), stringsAsFactors = FALSE)
|
||||
attr(df$a, "label") <- "Column A Label"
|
||||
attr(df$b, "label") <- "Column B Label"
|
||||
|
||||
result <- default_parsing(df)
|
||||
|
||||
expect_equal(attr(result$a, "label"), "Column A Label")
|
||||
expect_equal(attr(result$b, "label"), "Column B Label")
|
||||
})
|
||||
|
||||
test_that("default_parsing handles columns with no label attribute", {
|
||||
df <- data.frame(a = 1:3, b = c("x", "y", "z"), stringsAsFactors = FALSE)
|
||||
result <- default_parsing(df)
|
||||
# Should not error; label attrs simply absent or NULL
|
||||
expect_null(attr(result$a, "label"))
|
||||
})
|
||||
|
||||
test_that("default_parsing handles a single-column data.frame", {
|
||||
df <- data.frame(x = 1:10)
|
||||
result <- default_parsing(df)
|
||||
expect_equal(ncol(result), 1)
|
||||
expect_equal(nrow(result), 10)
|
||||
})
|
||||
|
||||
test_that("default_parsing handles an empty data.frame gracefully", {
|
||||
df <- data.frame(a = integer(0), b = character(0), stringsAsFactors = FALSE)
|
||||
result <- default_parsing(df)
|
||||
expect_equal(nrow(result), 0)
|
||||
})
|
||||
|
||||
test_that("default_parsing handles all-NA columns without error", {
|
||||
df <- data.frame(a = NA_real_, b = NA_character_, stringsAsFactors = FALSE)
|
||||
expect_no_error(default_parsing(df))
|
||||
})
|
||||
|
||||
test_that("default_parsing removes nested list columns", {
|
||||
df <- data.frame(id = 1:3)
|
||||
df$nested <- list(list(1, 2), list(3), list(4, 5)) # nested list column
|
||||
# Should not crash; nested list column is removed by remove_nested_list()
|
||||
expect_no_error(default_parsing(df))
|
||||
})
|
||||
|
||||
test_that("default_parsing works with dplyr::starwars-like tibble", {
|
||||
skip_if_not_installed("dplyr")
|
||||
sw <- head(dplyr::starwars, 10)
|
||||
result <- default_parsing(sw)
|
||||
expect_true(is.data.frame(result))
|
||||
expect_equal(nrow(result), 10)
|
||||
})
|
||||
|
||||
test_that("default_parsing high-cardinality character column stays character or factor", {
|
||||
# > 10 unique values → should NOT be coerced to factor by numchar2fct
|
||||
df <- data.frame(
|
||||
id = paste0("id_", 1:20),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
result <- default_parsing(df)
|
||||
# high cardinality: remains character (not factor)
|
||||
expect_false(is.factor(result$id))
|
||||
})
|
||||
Loading…
Add table
Add a link
Reference in a new issue