mirror of
https://github.com/agdamsbo/FreesearchR.git
synced 2025-09-12 18:09:39 +02:00
154 lines
4.8 KiB
R
154 lines
4.8 KiB
R
|
#' Alternative pivoting method for easily pivoting based on name pattern
|
||
|
#'
|
||
|
#' @description
|
||
|
#' This function requires and assumes a systematic naming of variables.
|
||
|
#' For now only supports one level pivoting. Adding more levels would require
|
||
|
#' an added "ignore" string pattern or similarly. Example 2.
|
||
|
#'
|
||
|
#'
|
||
|
#' @param data data
|
||
|
#' @param pattern pattern(s) to match. Character vector of length 1 or more.
|
||
|
#' @param type type of match. can be one of "prefix","infix" or "suffix".
|
||
|
#' @param id.col ID column. Will fill ID for all. Column name or numeric index.
|
||
|
#' Default is "1", first column.
|
||
|
#' @param instance.name
|
||
|
#'
|
||
|
#' @returns data.frame
|
||
|
#' @export
|
||
|
#'
|
||
|
#' @examples
|
||
|
#' data.frame(
|
||
|
#' 1:20, sample(70:80, 20, TRUE),
|
||
|
#' sample(70:100, 20, TRUE),
|
||
|
#' sample(70:100, 20, TRUE),
|
||
|
#' sample(170:200, 20, TRUE)
|
||
|
#' ) |>
|
||
|
#' setNames(c("id", "age", "weight_0", "weight_1", "height_1")) |>
|
||
|
#' wide2long(pattern = c("_0", "_1"), type = "suffix")
|
||
|
#' data.frame(
|
||
|
#' 1:20, sample(70:80, 20, TRUE),
|
||
|
#' sample(70:100, 20, TRUE),
|
||
|
#' sample(70:100, 20, TRUE),
|
||
|
#' sample(170:200, 20, TRUE)
|
||
|
#' ) |>
|
||
|
#' setNames(c("id", "age", "weight_0", "weight_a_1", "height_b_1")) |>
|
||
|
#' wide2long(pattern = c("_0", "_1"), type = "suffix")
|
||
|
#' # Optional filling of missing values by last observation carried forward
|
||
|
#' # Needed for mmrm analyses
|
||
|
#' long_missings |>
|
||
|
#' # Fills record ID assuming none are missing
|
||
|
#' tidyr::fill(record_id) |>
|
||
|
#' # Grouping by ID for the last step
|
||
|
#' dplyr::group_by(record_id) |>
|
||
|
#' # Filling missing data by ID
|
||
|
#' tidyr::fill(names(long_missings)[!names(long_missings) %in% new_names]) |>
|
||
|
#' # Remove grouping
|
||
|
#' dplyr::ungroup()
|
||
|
wide2long <- function(
|
||
|
data,
|
||
|
pattern,
|
||
|
type = c("prefix", "infix", "suffix"),
|
||
|
id.col = 1,
|
||
|
instance.name = "instance") {
|
||
|
type <- match.arg(type)
|
||
|
|
||
|
## Give the unique suffix names to use for identifying repeated measures
|
||
|
# suffixes <- c("_0", "_1")
|
||
|
|
||
|
## If no ID column is present, one is added
|
||
|
if (id.col == "none" | is.null(id.col)) {
|
||
|
data <- stats::setNames(
|
||
|
data.frame(seq_len(nrow(data)), data),
|
||
|
make.names(c("id", names(data)), unique = TRUE)
|
||
|
)
|
||
|
id.col <- 1
|
||
|
}
|
||
|
# browser()
|
||
|
## Relevant columns are determined based on suffixes
|
||
|
cols <- names(data)[grepl_fix(names(data), pattern = pattern, type = type)]
|
||
|
|
||
|
## New colnames are created by removing suffixes
|
||
|
new_names <- unique(gsub(paste(pattern, collapse = "|"), "", cols))
|
||
|
|
||
|
out <- split(data, seq_len(nrow(data))) |> # Splits dataset by row
|
||
|
# Starts data modifications for each subject
|
||
|
lapply(\(.x){
|
||
|
## Pivots data with repeated measures as determined by the defined suffixes
|
||
|
long_ls <- split.default(
|
||
|
# Subset only repeated data
|
||
|
.x[cols],
|
||
|
# ... and split by meassure
|
||
|
gsub(paste(new_names, collapse = "|"), "", cols)
|
||
|
) |>
|
||
|
# Sort data by order of given suffixes to ensure chronology
|
||
|
sort_by(pattern) |>
|
||
|
# New colnames are applied
|
||
|
lapply(\(.y){
|
||
|
setNames(
|
||
|
.y,
|
||
|
gsub(paste(pattern, collapse = "|"), "", names(.y))
|
||
|
)
|
||
|
})
|
||
|
|
||
|
# Subsets non-pivotted data (this is assumed to belong to same )
|
||
|
single <- .x[-match(cols, names(.x))]
|
||
|
|
||
|
# Extends with empty rows to get same dimensions as long data
|
||
|
single[(nrow(single) + 1):length(long_ls), ] <- NA
|
||
|
|
||
|
# Fills ID col
|
||
|
single[id.col] <- single[1, id.col]
|
||
|
|
||
|
# Everything is merged together
|
||
|
merged <- dplyr::bind_cols(
|
||
|
single,
|
||
|
# Instance names are defined as suffixes without leading non-characters
|
||
|
REDCapCAST::as_factor(data.frame(gsub(
|
||
|
"^[^[:alnum:]]+", "",
|
||
|
names(long_ls)
|
||
|
))),
|
||
|
dplyr::bind_rows(long_ls)
|
||
|
)
|
||
|
|
||
|
# Ensure unique new names based on supplied
|
||
|
colnames(merged) <- make.names(
|
||
|
c(
|
||
|
names(single),
|
||
|
instance.name,
|
||
|
names(merged)[(NCOL(single) + 2):NCOL(merged)]
|
||
|
),
|
||
|
unique = TRUE
|
||
|
)
|
||
|
|
||
|
merged
|
||
|
}) |> dplyr::bind_rows()
|
||
|
|
||
|
rownames(out) <- NULL
|
||
|
|
||
|
out
|
||
|
}
|
||
|
|
||
|
|
||
|
#' Matches pattern to vector based on match type
|
||
|
#'
|
||
|
#' @param data vector
|
||
|
#' @param pattern pattern(s) to match. Character vector of length 1 or more.
|
||
|
#' @param type type of match. can be one of "prefix","infix" or "suffix".
|
||
|
#'
|
||
|
#' @returns logical vector
|
||
|
#' @export
|
||
|
#'
|
||
|
#' @examples
|
||
|
#' c("id", "age", "weight_0", "weight_1") |> grepl_fix(pattern = c("_0", "_1"), type = "suffix")
|
||
|
grepl_fix <- function(data, pattern, type = c("prefix", "infix", "suffix")) {
|
||
|
type <- match.arg(type)
|
||
|
|
||
|
if (type == "prefix") {
|
||
|
grepl(paste0("^(", paste(pattern, collapse = "|"), ")*"), data)
|
||
|
} else if (type == "suffix") {
|
||
|
grepl(paste0("*(", paste(pattern, collapse = "|"), ")$"), data)
|
||
|
} else if (type == "infix") {
|
||
|
grepl(paste0("*(", paste(pattern, collapse = "|"), ")*"), data)
|
||
|
}
|
||
|
}
|