parsing of different form formats, move to bslib with new sidebar

2026-06-19 05:47:30 +02:00 · 2024-11-14 14:29:46 +01:00 · 2024-11-14 14:29:46 +01:00 · a576a580db
commit a576a580db
parent f26cf1c916
12 changed files with 664 additions and 122 deletions
--- a/R/parse_formats.R
+++ b/R/parse_formats.R
@ -0,0 +1,101 @@
+#' Parse input data with columns of priorities to columns of groups
+#'
+#' @description
+#' This handles transforming data from a typical Google form to the format
+#' compatible with `prioritized_grouping()`.
+#'
+#' @param data data.frame or tibble
+#' @param id id column. Numeric index or column name. Default is 1.
+#' @param prio.cols priority columns. Numeric indices or column names.
+#' @param sort.cols flag to sort priority columns names/indices. Default=FALSE
+#'
+#' @return data.frame
+#' @export
+#'
+parse_prio_form <- function(data, id = 1, prio.cols,sort.cols=FALSE) {
+  if (is.character(prio.cols)) {
+    grp.index <- match(prio.cols, names(data))
+  } else {
+    grp.index <- prio.cols
+  }
+
+  if (sort.cols){
+    prio.cols <- sort(prio.cols)
+  }
+
+  new.names <- names(data)
+  new.names[grp.index] <- seq_along(grp.index)
+
+  data <- setNames(data, new.names)
+
+  out <- split(data, seq_len(nrow(data))) |>
+    lapply(\(.x){
+      # browser()
+
+      out <- as.data.frame(matrix(c(as.character(.x[[id]]), colnames(.x)[grp.index]), nrow = 1))
+      setNames(out, c(
+        "id",
+        # names(.x[id]),
+        unname(unlist(.x[grp.index]))
+      ))
+    }) |>
+    dplyr::bind_rows() |>
+    dplyr::mutate(dplyr::across(-1, as.integer))
+
+  # Sorting is not really needed, but a nice touch
+  out[c(names(out)[1], sort(names(out)[-1]))]
+}
+
+
+#' Parse input data from column of strings with prioritised group names
+#'
+#' @description
+#' This handles transforming data from a typical Microsoft form to the format
+#' compatible with `prioritized_grouping()`.
+#'
+#' @param data data.frame or tibble
+#' @param id id column. Numeric index of column name. Default is 1.
+#' @param string.col string column. Numeric index or column name.
+#' @param pattern regex pattern to use for splitting priorities string with
+#' `strsplit()`.
+#' Default is ";".
+#'
+#' @return data.frame
+#' @export
+#'
+parse_string_form <- function(data, id = 1, string.col,pattern=NULL) {
+  if (is.null(pattern)){
+    pattern <- ";"
+  }
+
+  if (length(string.col) != 1) {
+    stop("string.col is required, and has to have length 1")
+  }
+  if (is.character(string.col)) {
+    string.index <- match(string.col, names(data))
+  } else {
+    string.index <- string.col
+  }
+
+  # Cells with NAs are excluded.
+  # NAs happen if the priorities are not edited upon form submission, but a
+  # default order can not be guessed reliably if group naming is not ordered
+  # (like group N, group N+1...)
+  out <- data.frame(data[[id]], data[[string.index]]) |>
+    na.omit() |>
+    (\(.d){
+      split(.d, seq_len(nrow(.d)))
+    })() |>
+    lapply(\(.x){
+      grps <- unlist(strsplit(x=.x[[2]],split=pattern))
+      out <- as.data.frame(matrix(c(.x[[1]], seq_along(grps)), nrow = 1))
+      setNames(
+        out,
+        c("id", grps)
+      )
+    }) |>
+    dplyr::bind_rows()
+
+  # Sorting is not really needed, but a nice touch
+  out[c(names(out)[1], sort(names(out)[-1]))]
+}
--- a/R/prioritized_grouping.R
+++ b/R/prioritized_grouping.R
@ -2,7 +2,7 @@ utils::globalVariables(c("group", "grp", "i", "j", "value"))

 #' Solve grouping based on priorities or costs.
 #'
-#' @param data data set in wide format. First column should bi ID, then one column
+#' @param data data set in wide format. First column should be ID, then one column
 #' for each group containing cost/priorities.
 #' @param cap_classes class capacity. Numeric vector length 1 or length=number
 #' of groups. If NULL equal group sizes are calculated. Default is NULL.
@ -39,6 +39,11 @@ prioritized_grouping <-
           subject identifiers")
    }

+    # Converts tibble to data.frame
+    if ("tbl_df" %in% class(data)){
+      data <- as.data.frame(data)
+    }
+
    ## This program very much trust the user to supply correctly formatted data
    cost <- t(data[, -1]) # Transpose converts to matrix
    colnames(cost) <- data[, 1]
@ -360,3 +365,4 @@ read_input <- function(file, consider.na = c("NA", '""', "")) {

  df
 }
+