mirror of
https://github.com/agdamsbo/REDCapCAST.git
synced 2026-06-21 14:09:08 +02:00
trying to overcome error messagein github actions by moving R package to root.
This commit is contained in:
parent
58cabc7153
commit
582a481942
33 changed files with 1 additions and 1276 deletions
204
R/REDCap_split.r
Normal file
204
R/REDCap_split.r
Normal file
|
|
@ -0,0 +1,204 @@
|
|||
#' Split REDCap repeating instruments table into multiple tables
|
||||
#'
|
||||
#' This will take output from a REDCap export and split it into a base table
|
||||
#' and child tables for each repeating instrument. Metadata
|
||||
#' is used to determine which fields should be included in each resultant table.
|
||||
#'
|
||||
#' @param records Exported project records. May be a \code{data.frame},
|
||||
#' \code{response}, or \code{character} vector containing JSON from an API
|
||||
#' call.
|
||||
#' @param metadata Project metadata (the data dictionary). May be a
|
||||
#' \code{data.frame}, \code{response}, or \code{character} vector containing
|
||||
#' JSON from an API call.
|
||||
#' @param primary_table_name Name given to the list element for the primary
|
||||
#' output table (as described in \emph{README.md}). Ignored if
|
||||
#' \code{forms = 'all'}.
|
||||
#' @param forms Indicate whether to create separate tables for repeating
|
||||
#' instruments only or for all forms.
|
||||
#' @author Paul W. Egeler, M.S., GStat
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#' # Using an API call -------------------------------------------------------
|
||||
#'
|
||||
#' library(RCurl)
|
||||
#'
|
||||
#' # Get the records
|
||||
#' records <- postForm(
|
||||
#' uri = api_url, # Supply your site-specific URI
|
||||
#' token = api_token, # Supply your own API token
|
||||
#' content = 'record',
|
||||
#' format = 'json',
|
||||
#' returnFormat = 'json'
|
||||
#' )
|
||||
#'
|
||||
#' # Get the metadata
|
||||
#' metadata <- postForm(
|
||||
#' uri = api_url, # Supply your site-specific URI
|
||||
#' token = api_token, # Supply your own API token
|
||||
#' content = 'metadata',
|
||||
#' format = 'json'
|
||||
#' )
|
||||
#'
|
||||
#' # Convert exported JSON strings into a list of data.frames
|
||||
#' REDCapRITS::REDCap_split(records, metadata)
|
||||
#'
|
||||
#' # Using a raw data export -------------------------------------------------
|
||||
#'
|
||||
#' # Get the records
|
||||
#' records <- read.csv("/path/to/data/ExampleProject_DATA_2018-06-03_1700.csv")
|
||||
#'
|
||||
#' # Get the metadata
|
||||
#' metadata <- read.csv("/path/to/data/ExampleProject_DataDictionary_2018-06-03.csv")
|
||||
#'
|
||||
#' # Split the tables
|
||||
#' REDCapRITS::REDCap_split(records, metadata)
|
||||
#'
|
||||
#' # In conjunction with the R export script ---------------------------------
|
||||
#'
|
||||
#' # You must set the working directory first since the REDCap data export script
|
||||
#' # contains relative file references.
|
||||
#' setwd("/path/to/data/")
|
||||
#'
|
||||
#' # Run the data export script supplied by REDCap.
|
||||
#' # This will create a data.frame of your records called 'data'
|
||||
#' source("ExampleProject_R_2018-06-03_1700.r")
|
||||
#'
|
||||
#' # Get the metadata
|
||||
#' metadata <- read.csv("ExampleProject_DataDictionary_2018-06-03.csv")
|
||||
#'
|
||||
#' # Split the tables
|
||||
#' REDCapRITS::REDCap_split(data, metadata)
|
||||
#' }
|
||||
#' @return A list of \code{"data.frame"}s. The number of tables will differ
|
||||
#' depending on the \code{forms} option selected.
|
||||
#' \itemize{
|
||||
#' \item \code{'repeating'}: one base table and one or more
|
||||
#' tables for each repeating instrument.
|
||||
#' \item \code{'all'}: a data.frame for each instrument, regardless of
|
||||
#' whether it is a repeating instrument or not.
|
||||
#' }
|
||||
#' @include process_user_input.r utils.r
|
||||
#' @export
|
||||
REDCap_split <- function(records,
|
||||
metadata,
|
||||
primary_table_name = "",
|
||||
forms = c("repeating", "all")
|
||||
) {
|
||||
|
||||
# Process user input
|
||||
records <- process_user_input(records)
|
||||
metadata <- as.data.frame(process_user_input(metadata)) # See issue #12
|
||||
|
||||
# Get the variable names in the dataset
|
||||
vars_in_data <- names(records)
|
||||
|
||||
# Match arg for forms
|
||||
forms <- match.arg(forms)
|
||||
|
||||
# Check to see if there were any repeating instruments
|
||||
if (forms == "repeating" && !"redcap_repeat_instrument" %in% vars_in_data) {
|
||||
stop("There are no repeating instruments in this dataset.")
|
||||
}
|
||||
|
||||
# Remove NAs from `redcap_repeat_instrument` (see issue #12)
|
||||
if(any(is.na(records$redcap_repeat_instrument))) {
|
||||
records$redcap_repeat_instrument <- ifelse(
|
||||
is.na(records$redcap_repeat_instrument),
|
||||
"",
|
||||
as.character(records$redcap_repeat_instrument)
|
||||
)
|
||||
}
|
||||
|
||||
# Standardize variable names for metadata
|
||||
names(metadata) <- metadata_names
|
||||
|
||||
# Make sure that no metadata columns are factors
|
||||
metadata <- rapply(metadata, as.character, classes = "factor", how = "replace")
|
||||
|
||||
# Find the fields and associated form
|
||||
fields <- match_fields_to_form(metadata, vars_in_data)
|
||||
|
||||
# Variables to be present in each output table
|
||||
universal_fields <- c(
|
||||
vars_in_data[1],
|
||||
grep(
|
||||
"^redcap_(?!(repeat)).*",
|
||||
vars_in_data,
|
||||
value = TRUE,
|
||||
perl = TRUE
|
||||
)
|
||||
)
|
||||
|
||||
if ("redcap_repeat_instrument" %in% vars_in_data) {
|
||||
# Variables to be at the beginning of each repeating instrument
|
||||
repeat_instrument_fields <- grep(
|
||||
"^redcap_repeat.*",
|
||||
vars_in_data,
|
||||
value = TRUE
|
||||
)
|
||||
|
||||
# Identify the subtables in the data
|
||||
subtables <- unique(records$redcap_repeat_instrument)
|
||||
subtables <- subtables[subtables != ""]
|
||||
|
||||
# Split the table based on instrument
|
||||
out <- split.data.frame(records, records$redcap_repeat_instrument)
|
||||
primary_table_index <- which(names(out) == "")
|
||||
|
||||
if (forms == "repeating" && primary_table_name %in% subtables) {
|
||||
warning("The label given to the primary table is already used by a repeating instrument. The primary table label will be left blank.")
|
||||
primary_table_name <- ""
|
||||
} else if (primary_table_name > "") {
|
||||
names(out)[[primary_table_index]] <- primary_table_name
|
||||
}
|
||||
|
||||
# Delete the variables that are not relevant
|
||||
for (i in names(out)) {
|
||||
|
||||
if (i == primary_table_name) {
|
||||
|
||||
out_fields <- which(
|
||||
vars_in_data %in% c(
|
||||
universal_fields,
|
||||
fields[!fields[,2] %in% subtables, 1]
|
||||
)
|
||||
)
|
||||
out[[primary_table_index]] <- out[[primary_table_index]][out_fields]
|
||||
|
||||
} else {
|
||||
|
||||
out_fields <- which(
|
||||
vars_in_data %in% c(
|
||||
universal_fields,
|
||||
repeat_instrument_fields,
|
||||
fields[fields[,2] == i, 1]
|
||||
)
|
||||
)
|
||||
out[[i]] <- out[[i]][out_fields]
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (forms == "all") {
|
||||
|
||||
out <- c(
|
||||
split_non_repeating_forms(
|
||||
out[[primary_table_index]],
|
||||
universal_fields,
|
||||
fields[!fields[,2] %in% subtables,]
|
||||
),
|
||||
out[-primary_table_index]
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
out <- split_non_repeating_forms(records, universal_fields, fields)
|
||||
|
||||
}
|
||||
|
||||
out
|
||||
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue