all data parsing and formatting has been seperated in individual functions

This commit is contained in:
Andreas Gammelgaard Damsbo 2024-11-18 14:40:32 +01:00
commit ea08a2066f
No known key found for this signature in database
7 changed files with 362 additions and 85 deletions

View file

@ -15,10 +15,7 @@ ds2dd_detailed(
field.label = NULL,
field.label.attr = "label",
field.validation = NULL,
metadata = names(REDCapCAST::redcapcast_meta),
validate.time = FALSE,
time.var.sel.pos = "[Tt]i[d(me)]",
time.var.sel.neg = "[Dd]at[eo]"
metadata = names(REDCapCAST::redcapcast_meta)
)
}
\arguments{
@ -58,14 +55,6 @@ file with `haven::read_dta()`).}
\item{metadata}{redcap metadata headings. Default is
REDCapCAST:::metadata_names.}
\item{validate.time}{Flag to validate guessed time columns}
\item{time.var.sel.pos}{Positive selection regex string passed to
`gues_time_only_filter()` as sel.pos.}
\item{time.var.sel.neg}{Negative selection regex string passed to
`gues_time_only_filter()` as sel.neg.}
}
\value{
list of length 2
@ -84,7 +73,6 @@ Ensure, that the data set is formatted with as much information as possible.
}
\examples{
data <- REDCapCAST::redcapcast_data
data |> ds2dd_detailed(validate.time = TRUE)
data |> ds2dd_detailed()
iris |> ds2dd_detailed(add.auto.id = TRUE)
iris |>

33
man/guess_time_only.Rd Normal file
View file

@ -0,0 +1,33 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ds2dd_detailed.R
\name{guess_time_only}
\alias{guess_time_only}
\title{Guess time variables based on naming pattern}
\usage{
guess_time_only(
data,
validate.time = FALSE,
time.var.sel.pos = "[Tt]i[d(me)]",
time.var.sel.neg = "[Dd]at[eo]"
)
}
\arguments{
\item{data}{data.frame or tibble}
\item{validate.time}{Flag to validate guessed time columns}
\item{time.var.sel.pos}{Positive selection regex string passed to
`gues_time_only_filter()` as sel.pos.}
\item{time.var.sel.neg}{Negative selection regex string passed to
`gues_time_only_filter()` as sel.neg.}
}
\value{
data.frame or tibble
}
\description{
This is for repairing data with time variables with appended "1970-01-01"
}
\examples{
redcapcast_data |> guess_time_only(validate.time = TRUE)
}

24
man/haven_all_levels.Rd Normal file
View file

@ -0,0 +1,24 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ds2dd_detailed.R
\name{haven_all_levels}
\alias{haven_all_levels}
\title{Finish incomplete haven attributes substituting missings with values}
\usage{
haven_all_levels(data)
}
\arguments{
\item{data}{haven labelled variable}
}
\value{
named vector
}
\description{
Finish incomplete haven attributes substituting missings with values
}
\examples{
ds <- structure(c(1, 2, 3, 2, 10, 9),
labels = c(Unknown = 9, Refused = 10),
class = "haven_labelled"
)
ds |> haven_all_levels()
}

29
man/numchar2fct.Rd Normal file
View file

@ -0,0 +1,29 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ds2dd_detailed.R
\name{numchar2fct}
\alias{numchar2fct}
\title{Applying var2fct across data set}
\usage{
numchar2fct(data, numeric.threshold = 6, character.throshold = 6)
}
\arguments{
\item{data}{dataset. data.frame or tibble}
\item{numeric.threshold}{threshold for var2fct for numeric columns. Default
is 6.}
\item{character.throshold}{threshold for var2fct for character columns.
Default is 6.}
}
\value{
data.frame or tibble
}
\description{
Individual thresholds for character and numeric columns
}
\examples{
mtcars |> str()
mtcars |>
numchar2fct(numeric.threshold = 6) |>
str()
}

39
man/parse_data.Rd Normal file
View file

@ -0,0 +1,39 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ds2dd_detailed.R
\name{parse_data}
\alias{parse_data}
\title{Helper to auto-parse un-formatted data with haven and readr}
\usage{
parse_data(
data,
guess_type = TRUE,
col_types = NULL,
locale = readr::default_locale(),
ignore.vars = "cpr",
...
)
}
\arguments{
\item{data}{data.frame or tibble}
\item{guess_type}{logical to guess type with readr}
\item{col_types}{specify col_types using readr semantics. Ignored if guess_type is TRUE}
\item{locale}{option to specify locale. Defaults to readr::default_locale().}
\item{ignore.vars}{specify column names of columns to ignore when parsing}
\item{...}{ignored}
}
\value{
data.frame or tibble
}
\description{
Helper to auto-parse un-formatted data with haven and readr
}
\examples{
mtcars |>
parse_data() |>
str()
}

29
man/var2fct.Rd Normal file
View file

@ -0,0 +1,29 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ds2dd_detailed.R
\name{var2fct}
\alias{var2fct}
\title{Convert vector to factor based on threshold of number of unique levels}
\usage{
var2fct(data, unique.n)
}
\arguments{
\item{data}{vector or data.frame column}
\item{unique.n}{threshold to convert class to factor}
}
\value{
vector
}
\description{
This is a wrapper of forcats::as_factor, which sorts numeric vectors before
factoring, but levels character vectors in order of appearance.
}
\examples{
sample(seq_len(4), 20, TRUE) |>
var2fct(6) |>
summary()
sample(letters, 20) |>
var2fct(6) |>
summary()
sample(letters[1:4], 20, TRUE) |> var2fct(6)
}