major overhaul with new functions. docs are lacking

2026-06-19 05:07:30 +02:00 · 2024-05-02 13:31:21 +02:00 · 2024-05-02 13:31:21 +02:00 · 04f5bec85c
commit 04f5bec85c
parent 1fd3911974
28 changed files with 874 additions and 81 deletions
--- a/R/doc2dd.R
+++ b/R/doc2dd.R
@ -0,0 +1,290 @@
+utils::globalVariables(c("calculations", "choices"))
+#' Doc table to data dictionary - EARLY, DOCS MISSING
+#'
+#' @description
+#' Works well with `project.aid::docx2list()`.
+#' Allows defining a database in a text document (see provided template) for
+#' an easier to use data base creation. This approach allows easier
+#' collaboration when defining the database. The generic case is a data frame
+#' with variable names as values in a column. This is a format like the REDCap
+#' data dictionary, but gives a few options for formatting.
+#'
+#' @param data tibble or data.frame with all variable names in one column
+#' @param instrument.name character vector length one. Instrument name.
+#' @param col.variables variable names column (default = 1), allows dplyr
+#' subsetting
+#' @param list.datetime.format formatting for date/time detection.
+#' See `case_match_regex_list()`
+#' @param col.description descriptions column, allows dplyr
+#' subsetting. If empty, variable names will be used.
+#' @param col.condition conditions for branching column, allows dplyr
+#' subsetting. See `char2cond()`.
+#' @param col.subheader sub-header column, allows dplyr subsetting.
+#' See `format_subheader()`.
+#' @param subheader.tag formatting tag. Default is "h2"
+#' @param condition.minor.sep condition split minor. See `char2cond()`.
+#' Default is ",".
+#' @param condition.major.sep condition split major. See `char2cond()`.
+#' Default is ";".
+#' @param col.calculation calculations column. Has to be written exact.
+#' Character vector.
+#' @param col.choices choices column. See `char2choice()`.
+#' @param choices.char.sep choices split. See `char2choice()`. Default is "/".
+#' @param missing.default value for missing fields. Default is NA.
+#'
+#' @return tibble or data.frame (same as data)
+#' @export
+#'
+#' @examples
+#' # data <- dd_inst
+#' # data |> doc2dd(instrument.name = "evt",
+#' # col.description = 3,
+#' # col.condition = 4,
+#' # col.subheader = 2,
+#' # col.calculation = 5,
+#' # col.choices = 6)
+doc2dd <- function(data,
+                   instrument.name,
+                   col.variables = 1,
+                   list.datetime.format = list(
+                     date_dmy = "_dat[eo]$",
+                     time_hh_mm_ss = "_ti[md]e?$"
+                   ),
+                   col.description = NULL,
+                   col.condition = NULL,
+                   col.subheader = NULL,
+                   subheader.tag = "h2",
+                   condition.minor.sep = ",",
+                   condition.major.sep = ";",
+                   col.calculation = NULL,
+                   col.choices = NULL,
+                   choices.char.sep = "/",
+                   missing.default = NA) {
+  data <- data |>
+    dplyr::mutate(dplyr::across(dplyr::everything(), ~ dplyr::na_if(.x, c(""))))
+
+
+  ## Defining the field name
+  out <- data |>
+    dplyr::mutate(
+      field_name = dplyr::pick(col.variables) |> unlist()
+    )
+
+  ## Defining the field label. Field name is used if no label is provided.
+  if (is_missing(col.description)) {
+    out <- out |>
+      dplyr::mutate(
+        field_label = field_name
+      )
+  } else {
+    out <- out |>
+      dplyr::mutate(
+        field_label = dplyr::pick(col.description) |> unlist()
+      )
+  }
+
+  ## Defining the sub-header
+  if (!is_missing(col.subheader)) {
+    out <- out |>
+      dplyr::mutate(
+        section_header = dplyr::pick(col.subheader) |>
+          unlist() |>
+          format_subheader(tag = subheader.tag)
+      )
+  }
+
+  ## Defining the choices
+  if (is_missing(col.choices)) {
+    out <- out |>
+      dplyr::mutate(
+        choices = missing.default
+      )
+  } else {
+    out <- out |>
+      dplyr::mutate(
+        choices = dplyr::pick(col.choices) |>
+          unlist() |>
+          char2choice(char.split = choices.char.sep)
+      )
+  }
+
+  ## Defining the calculations
+  if (is_missing(col.calculation)) {
+    out <- out |>
+      dplyr::mutate(
+        calculations = missing.default
+      )
+  } else {
+    out <- out |>
+      dplyr::mutate(
+        calculations = dplyr::pick(col.calculation) |>
+          unlist() |>
+          tolower() |>
+          (\(.x) gsub("’", "'", .x))()
+      )
+  }
+
+  ## Merging choices and calculations, defining field type and setting form name
+  out <- out |>
+    dplyr::mutate(
+      select_choices_or_calculations = dplyr::coalesce(calculations, choices),
+      field_type = dplyr::case_when(!is.na(choices) ~ "radio",
+        !is.na(calculations) ~ "calc",
+        .default = "text"
+      ),
+      form_name = instrument.name
+    )
+
+  ## Defining branching logic from conditions
+  if (is_missing(col.condition)) {
+    out <- out |>
+      dplyr::mutate(
+        branching_logic = missing.default
+      )
+  } else {
+    out <- out |>
+      dplyr::mutate(
+        branching_logic = dplyr::pick(col.condition) |>
+          unlist() |>
+          char2cond(minor.split = condition.minor.sep,
+                    major.split = condition.major.sep)
+      )
+  }
+
+  ## Detecting data/time formatting from systematic field names
+  if (is.null(list.datetime.format)) {
+    out <- out |>
+      dplyr::mutate(
+        text_validation_type_or_show_slider_number = missing.default
+      )
+  } else {
+    out <- out |>
+      dplyr::mutate(
+        text_validation_type_or_show_slider_number = case_match_regex_list(
+          field_name,
+          list.datetime.format
+        )
+      )
+  }
+
+  ## Selecting relevant columns
+  out <- out |>
+    dplyr::select(dplyr::any_of(names(REDCapCAST::redcapcast_meta)))
+
+  ## Merging and ordering columns for upload
+  out |>
+    list(REDCapCAST::redcapcast_meta |> dplyr::slice(0)) |>
+    dplyr::bind_rows() |>
+    dplyr::select(names(REDCapCAST::redcapcast_meta))
+}
+
+
+
+
+#' Simple function to generate REDCap choices from character vector
+#'
+#' @param data vector
+#' @param char.split splitting character(s)
+#' @param raw specific values. Can be used for options of same length.
+#' @param .default default value for missing. Default is NA.
+#'
+#' @return vector
+#' @export
+#'
+#' @examples
+#' char2choice(c("yes/no","  yep. / nope  ","",NA,"what"),.default=NA)
+char2choice <- function(data, char.split = "/", raw = NULL,.default=NA) {
+  ls <- strsplit(x = data, split = char.split)
+
+  ls |>
+    purrr::map(function(.x) {
+      if (is.null(raw)) {
+        raw <- seq_len(length(.x))
+      }
+      if (length(.x) == 0 | all(is.na(.x))) {
+        .default
+      } else {
+        paste(paste0(raw, ", ",trimws(.x)), collapse = " | ")
+      }
+    }) |>
+    purrr::list_c()
+}
+
+#' Simple function to generate REDCap branching logic from character vector
+#'
+#' @param data vector
+#' @param .default default value for missing. Default is NA.
+#' @param minor.split minor split
+#' @param major.split major split
+#' @param major.sep argument separation. Default is " or ".
+#'
+#' @return vector
+#' @export
+#'
+#' @examples
+#' #data <- dd_inst$betingelse
+#' #c("Extubation_novent, 2; Pacu_delay, 1") |> char2cond()
+char2cond <- function(data, minor.split = ",", major.split = ";", major.sep = " or ", .default = NA) {
+  strsplit(x = data, split = major.split) |>
+    purrr::map(function(.y) {
+      strsplit(x = .y, split = minor.split) |>
+        purrr::map(function(.x) {
+          if (length(.x) == 0 | all(is.na(.x))) {
+            .default
+          } else {
+            glue::glue("[{trimws(tolower(.x[1]))}]='{trimws(.x[2])}'")
+          }
+        }) |>
+        purrr::list_c() |>
+        glue::glue_collapse(sep = major.sep)
+    }) |>
+    purrr::list_c()
+}
+
+#' List-base regex case_when
+#'
+#' @description
+#' Mimics case_when for list of regex patterns and values. Used for date/time
+#' validation generation from name vector. Like case_when, the matches are in
+#' order of priority.
+#' Primarily used in REDCapCAST to do data type coding from systematic variable
+#' naming.
+#'
+#' @param data vector
+#' @param match.list list of case matches
+#' @param .default Default value for non-matches. Default is NA.
+#'
+#' @return vector
+#' @export
+#'
+#' @examples
+#' case_match_regex_list(
+#'   c("test_date", "test_time", "test_tida", "test_tid"),
+#'   list(date_dmy = "_dat[eo]$", time_hh_mm_ss = "_ti[md]e?$")
+#' )
+case_match_regex_list <- function(data, match.list, .default = NA) {
+  match.list |>
+    purrr::imap(function(.z, .i) {
+      dplyr::if_else(grepl(.z, data), .i, NA)
+    }) |>
+    (\(.x){
+      dplyr::coalesce(!!!.x)
+    })() |>
+    (\(.x){
+      dplyr::if_else(is.na(.x), .default, .x)
+    })()
+}
+
+#' Multi missing check
+#'
+#' @param data character vector
+#' @param nas character vector of strings considered as NA
+#'
+#' @return logical vector
+is_missing <- function(data,nas=c("", "NA")) {
+  if (is.null(data)) {
+    TRUE
+  } else {
+    is.na(data) | data %in% nas
+  }
+}
--- a/R/html_styling.R
+++ b/R/html_styling.R
@ -0,0 +1,68 @@
+#' Create two-column HTML table for data piping in REDCap instruments
+#'
+#' @param text descriptive text
+#' @param variable variable to pipe
+#'
+#' @return character vector
+#' @export
+#'
+#' @examples
+#' create_html_table(text = "Patient ID", variable = c("[cpr]"))
+#' create_html_table(text = paste("assessor", 1:2, sep = "_"), variable = c("[cpr]"))
+#' # create_html_table(text = c("CPR nummer","Word"), variable = c("[cpr][1]", "[cpr][2]", "[test]"))
+create_html_table <- function(text, variable) {
+  assertthat::assert_that(length(text)>1 & length(variable)==1 |
+                            length(text)==1 & length(variable)>1 |
+                            length(text)==length(variable),
+                          msg = "text and variable has to have same length, or one has to have length 1")
+
+  start <- '<table style="border-collapse: collapse; width: 100%;" border="0"> <tbody>'
+  end <- "</tbody> </table>"
+
+  # Extension would allow defining number of columns and specify styling
+  items <- purrr::map2(text, variable, function(.x, .y) {
+    glue::glue('<tr> <td style="width: 58%;"> <h5><span style="font-weight: normal;">{.x}<br /></span></h5> </td> <td style="width: 42%; text-align: left;"> <h5><span style="font-weight: bold;">{.y}</span></h5> </td> </tr>')
+  })
+
+  glue::glue(start, glue::glue_collapse(purrr::list_c(items)), end)
+}
+
+#' Simple html tag wrapping for REDCap text formatting
+#'
+#' @param data character vector
+#' @param tag character vector length 1
+#' @param extra character vector
+#'
+#' @return character vector
+#' @export
+#'
+#' @examples
+#' html_tag_wrap("Titel", tag = "div", extra = 'class="rich-text-field-label"')
+#' html_tag_wrap("Titel", tag = "h2")
+html_tag_wrap <- function(data, tag = "h2", extra = NULL) {
+  et <- ifelse(is.null(extra), "", paste0(" ", extra))
+  glue::glue("<{tag}{et}>{data}</{tag}>")
+}
+
+
+#' Sub-header formatting wrapper
+#'
+#' @param data character vector
+#' @param tag character vector length 1
+#'
+#' @return character vector
+#' @export
+#'
+#' @examples
+#' "Instrument header" |> format_subheader()
+format_subheader <- function(data, tag = "h2") {
+  dplyr::if_else(is.na(data) | data == "",
+                 NA,
+                 data |>
+                   html_tag_wrap(tag = tag) |>
+                   html_tag_wrap(
+                     tag = "div",
+                     extra = 'class="rich-text-field-label"'
+                   )
+  )
+}
--- a/R/process_user_input.r
+++ b/R/process_user_input.r
@ -1,7 +1,20 @@
+#' User input processing
+#'
+#' @param x input
+#'
+#' @return processed input
+#' @export
 process_user_input <- function(x) {
  UseMethod("process_user_input", x)
 }

+#' User input processing default
+#'
+#' @param x input
+#' @param ... ignored
+#'
+#' @return processed input
+#' @export
 process_user_input.default <- function(x, ...) {
  stop(
    deparse(substitute(x)),
@ -12,10 +25,25 @@ process_user_input.default <- function(x, ...) {
  )
 }

+
+#' User input processing data.frame
+#'
+#' @param x input
+#' @param ... ignored
+#'
+#' @return processed input
+#' @export
 process_user_input.data.frame <- function(x, ...) {
  x
 }

+#' User input processing character
+#'
+#' @param x input
+#' @param ... ignored
+#'
+#' @return processed input
+#' @export
 process_user_input.character <- function(x, ...) {
  if (!requireNamespace("jsonlite", quietly = TRUE)) {
    stop(
@ -32,6 +60,14 @@ process_user_input.character <- function(x, ...) {
  jsonlite::fromJSON(x)
 }

+
+#' User input processing response
+#'
+#' @param x input
+#' @param ... ignored
+#'
+#' @return processed input
+#' @export
 process_user_input.response <- function(x, ...) {
  process_user_input(rawToChar(x$content))
 }
--- a/R/shiny_cast.R
+++ b/R/shiny_cast.R
@ -34,3 +34,57 @@ shiny_cast <- function() {
  )
 }

+
+#' Helper to import files correctly
+#'
+#' @param filenames file names
+#'
+#' @return character vector
+#' @export
+#'
+#' @examples
+#' file_extension(list.files(here::here(""))[[2]])[[1]]
+#' file_extension(c("file.cd..ks","file"))
+file_extension <- function(filenames) {
+  sub(pattern = "^(.*\\.|[^.]+)(?=[^.]*)", replacement = "",
+      filenames,
+      perl = TRUE)
+}
+
+#' Flexible file import based on extension
+#'
+#' @param file file name
+#' @param consider.na character vector of strings to consider as NAs
+#'
+#' @return tibble
+#' @export
+#'
+#' @examples
+#' read_input("https://raw.githubusercontent.com/agdamsbo/cognitive.index.lookup/main/data/sample.csv")
+read_input <- function(file, consider.na = c("NA", '""', "")) {
+  ext <- file_extension(file)
+
+  tryCatch(
+    {
+      if (ext == "csv") {
+        df <- readr::read_csv(file = file, na = consider.na)
+      } else if (ext %in% c("xls", "xlsx")) {
+        df <- openxlsx2::read_xlsx(file = file, na.strings = consider.na)
+      } else if (ext == "dta") {
+        df <- haven::read_dta(file = file)
+      } else if (ext == "ods") {
+        df <- readODS::read_ods(file = file)
+      } else {
+        stop("Input file format has to be on of:
+             '.csv', '.xls', '.xlsx', '.dta' or '.ods'")
+      }
+    },
+    error = function(e) {
+      # return a safeError if a parsing error occurs
+      stop(shiny::safeError(e))
+    }
+  )
+
+  df
+}
+
--- a/R/utils.r
+++ b/R/utils.r
@ -497,55 +497,4 @@ is_repeated_longitudinal <- function(data, generics = c(



-#' Helper to import files correctly
-#'
-#' @param filenames file names
-#'
-#' @return character vector
-#' @export
-#'
-#' @examples
-#' file_extension(list.files(here::here(""))[[2]])[[1]]
-#' file_extension(c("file.cd..ks","file"))
-file_extension <- function(filenames) {
-  sub(pattern = "^(.*\\.|[^.]+)(?=[^.]*)", replacement = "",
-      filenames,
-      perl = TRUE)
-}

-#' Flexible file import based on extension
-#'
-#' @param file file name
-#' @param consider.na character vector of strings to consider as NAs
-#'
-#' @return tibble
-#' @export
-#'
-#' @examples
-#' read_input("https://raw.githubusercontent.com/agdamsbo/cognitive.index.lookup/main/data/sample.csv")
-read_input <- function(file, consider.na = c("NA", '""', "")) {
-  ext <- file_extension(file)
-
-  tryCatch(
-    {
-      if (ext == "csv") {
-        df <- readr::read_csv(file = file, na = consider.na)
-      } else if (ext %in% c("xls", "xlsx")) {
-        df <- openxlsx2::read_xlsx(file = file, na.strings = consider.na)
-      } else if (ext == "dta") {
-        df <- haven::read_dta(file = file)
-      } else if (ext == "ods") {
-        df <- readODS::read_ods(file = file)
-      } else {
-        stop("Input file format has to be on of:
-             '.csv', '.xls', '.xlsx', '.dta' or '.ods'")
-      }
-    },
-    error = function(e) {
-      # return a safeError if a parsing error occurs
-      stop(shiny::safeError(e))
-    }
-  )
-
-  df
-}