mirror of
https://github.com/agdamsbo/FreesearchR.git
synced 2026-06-19 12:37:30 +02:00
This commit is contained in:
parent
6c44be558d
commit
912fff7474
32 changed files with 2340 additions and 273 deletions
153
R/wide2long.R
Normal file
153
R/wide2long.R
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
#' Alternative pivoting method for easily pivoting based on name pattern
|
||||
#'
|
||||
#' @description
|
||||
#' This function requires and assumes a systematic naming of variables.
|
||||
#' For now only supports one level pivoting. Adding more levels would require
|
||||
#' an added "ignore" string pattern or similarly. Example 2.
|
||||
#'
|
||||
#'
|
||||
#' @param data data
|
||||
#' @param pattern pattern(s) to match. Character vector of length 1 or more.
|
||||
#' @param type type of match. can be one of "prefix","infix" or "suffix".
|
||||
#' @param id.col ID column. Will fill ID for all. Column name or numeric index.
|
||||
#' Default is "1", first column.
|
||||
#' @param instance.name
|
||||
#'
|
||||
#' @returns data.frame
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' data.frame(
|
||||
#' 1:20, sample(70:80, 20, TRUE),
|
||||
#' sample(70:100, 20, TRUE),
|
||||
#' sample(70:100, 20, TRUE),
|
||||
#' sample(170:200, 20, TRUE)
|
||||
#' ) |>
|
||||
#' setNames(c("id", "age", "weight_0", "weight_1", "height_1")) |>
|
||||
#' wide2long(pattern = c("_0", "_1"), type = "suffix")
|
||||
#' data.frame(
|
||||
#' 1:20, sample(70:80, 20, TRUE),
|
||||
#' sample(70:100, 20, TRUE),
|
||||
#' sample(70:100, 20, TRUE),
|
||||
#' sample(170:200, 20, TRUE)
|
||||
#' ) |>
|
||||
#' setNames(c("id", "age", "weight_0", "weight_a_1", "height_b_1")) |>
|
||||
#' wide2long(pattern = c("_0", "_1"), type = "suffix")
|
||||
#' # Optional filling of missing values by last observation carried forward
|
||||
#' # Needed for mmrm analyses
|
||||
#' long_missings |>
|
||||
#' # Fills record ID assuming none are missing
|
||||
#' tidyr::fill(record_id) |>
|
||||
#' # Grouping by ID for the last step
|
||||
#' dplyr::group_by(record_id) |>
|
||||
#' # Filling missing data by ID
|
||||
#' tidyr::fill(names(long_missings)[!names(long_missings) %in% new_names]) |>
|
||||
#' # Remove grouping
|
||||
#' dplyr::ungroup()
|
||||
wide2long <- function(
|
||||
data,
|
||||
pattern,
|
||||
type = c("prefix", "infix", "suffix"),
|
||||
id.col = 1,
|
||||
instance.name = "instance") {
|
||||
type <- match.arg(type)
|
||||
|
||||
## Give the unique suffix names to use for identifying repeated measures
|
||||
# suffixes <- c("_0", "_1")
|
||||
|
||||
## If no ID column is present, one is added
|
||||
if (id.col == "none" | is.null(id.col)) {
|
||||
data <- stats::setNames(
|
||||
data.frame(seq_len(nrow(data)), data),
|
||||
make.names(c("id", names(data)), unique = TRUE)
|
||||
)
|
||||
id.col <- 1
|
||||
}
|
||||
# browser()
|
||||
## Relevant columns are determined based on suffixes
|
||||
cols <- names(data)[grepl_fix(names(data), pattern = pattern, type = type)]
|
||||
|
||||
## New colnames are created by removing suffixes
|
||||
new_names <- unique(gsub(paste(pattern, collapse = "|"), "", cols))
|
||||
|
||||
out <- split(data, seq_len(nrow(data))) |> # Splits dataset by row
|
||||
# Starts data modifications for each subject
|
||||
lapply(\(.x){
|
||||
## Pivots data with repeated measures as determined by the defined suffixes
|
||||
long_ls <- split.default(
|
||||
# Subset only repeated data
|
||||
.x[cols],
|
||||
# ... and split by meassure
|
||||
gsub(paste(new_names, collapse = "|"), "", cols)
|
||||
) |>
|
||||
# Sort data by order of given suffixes to ensure chronology
|
||||
sort_by(pattern) |>
|
||||
# New colnames are applied
|
||||
lapply(\(.y){
|
||||
setNames(
|
||||
.y,
|
||||
gsub(paste(pattern, collapse = "|"), "", names(.y))
|
||||
)
|
||||
})
|
||||
|
||||
# Subsets non-pivotted data (this is assumed to belong to same )
|
||||
single <- .x[-match(cols, names(.x))]
|
||||
|
||||
# Extends with empty rows to get same dimensions as long data
|
||||
single[(nrow(single) + 1):length(long_ls), ] <- NA
|
||||
|
||||
# Fills ID col
|
||||
single[id.col] <- single[1, id.col]
|
||||
|
||||
# Everything is merged together
|
||||
merged <- dplyr::bind_cols(
|
||||
single,
|
||||
# Instance names are defined as suffixes without leading non-characters
|
||||
REDCapCAST::as_factor(data.frame(gsub(
|
||||
"^[^[:alnum:]]+", "",
|
||||
names(long_ls)
|
||||
))),
|
||||
dplyr::bind_rows(long_ls)
|
||||
)
|
||||
|
||||
# Ensure unique new names based on supplied
|
||||
colnames(merged) <- make.names(
|
||||
c(
|
||||
names(single),
|
||||
instance.name,
|
||||
names(merged)[(NCOL(single) + 2):NCOL(merged)]
|
||||
),
|
||||
unique = TRUE
|
||||
)
|
||||
|
||||
merged
|
||||
}) |> dplyr::bind_rows()
|
||||
|
||||
rownames(out) <- NULL
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
|
||||
#' Matches pattern to vector based on match type
|
||||
#'
|
||||
#' @param data vector
|
||||
#' @param pattern pattern(s) to match. Character vector of length 1 or more.
|
||||
#' @param type type of match. can be one of "prefix","infix" or "suffix".
|
||||
#'
|
||||
#' @returns logical vector
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' c("id", "age", "weight_0", "weight_1") |> grepl_fix(pattern = c("_0", "_1"), type = "suffix")
|
||||
grepl_fix <- function(data, pattern, type = c("prefix", "infix", "suffix")) {
|
||||
type <- match.arg(type)
|
||||
|
||||
if (type == "prefix") {
|
||||
grepl(paste0("^(", paste(pattern, collapse = "|"), ")*"), data)
|
||||
} else if (type == "suffix") {
|
||||
grepl(paste0("*(", paste(pattern, collapse = "|"), ")$"), data)
|
||||
} else if (type == "infix") {
|
||||
grepl(paste0("*(", paste(pattern, collapse = "|"), ")*"), data)
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue