major update and first official launch. CRAN is waiting.

2026-06-19 13:57:29 +02:00 · 2024-10-10 12:14:41 +02:00 · 2024-10-10 12:14:41 +02:00 · 3b035ab06f
commit 3b035ab06f
parent 464b842629
218 changed files with 1758 additions and 410523 deletions
--- a/app/rsconnect/shinyapps.io/agdamsbo/prioritized-grouping.dcf
+++ b/app/rsconnect/shinyapps.io/agdamsbo/prioritized-grouping.dcf
@ -0,0 +1,10 @@
+name: prioritized-grouping
+title:
+username: agdamsbo
+account: agdamsbo
+server: shinyapps.io
+hostUrl: https://api.shinyapps.io/v1
+appId: 12977223
+bundleId: 9205937
+url: https://agdamsbo.shinyapps.io/prioritized-grouping/
+version: 1
--- a/app/server.R
+++ b/app/server.R
@ -0,0 +1,436 @@
+
+
+########
+#### Current file: R//prioritized_grouping.R 
+########
+
+utils::globalVariables(c("group", "grp", "i", "j", "value"))
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+prioritized_grouping <-
+  function(data,
+           cap_classes = NULL,
+           excess_space = 20,
+           pre_grouped = NULL,
+           seed = 6293812) {
+    set.seed(seed = seed)
+    # browser()
+    requireNamespace("ROI")
+    requireNamespace("ROI.plugin.symphony")
+
+    if (!is.data.frame(data)) {
+      stop("Supplied data has to be a data frame, with each row
+           are subjects and columns are groups, with the first column being
+           subject identifiers")
+    }
+
+    ## This program very much trust the user to supply correctly formatted data
+    cost <- t(data[, -1]) # Transpose converts to matrix
+    colnames(cost) <- data[, 1]
+
+    nms_groups <- rownames(cost)
+    num_groups <- dim(cost)[1]
+    num_sub <- dim(cost)[2]
+
+    ## Adding the option to introduce a bit of head room to the classes by
+    ## the groups to a little bigger than the smallest possible
+    ## Default is to allow for an extra 20 % fill
+    excess <- 1 + (excess_space / 100)
+
+    # generous round up of capacities
+    if (is.null(cap_classes)) {
+      capacity <- rep(ceiling(excess * num_sub / num_groups), num_groups)
+      # } else if (!is.numeric(cap_classes)) {
+      #   stop("cap_classes has to be numeric")
+    } else if (length(cap_classes) == 1) {
+      capacity <- ceiling(rep(cap_classes, num_groups) * excess)
+    } else if (length(cap_classes) == num_groups) {
+      capacity <- ceiling(cap_classes * excess)
+    } else {
+      stop("cap_classes has to be either length 1 or same as number of groups")
+    }
+
+    ## This test should be a little more elegant
+    ## pre_grouped should be a data.frame or matrix with an ID and group column
+    with_pre_grouped <- FALSE
+    if (!is.null(pre_grouped)) {
+      # Setting flag for later and export list
+      with_pre_grouped <- TRUE
+
+      # Simple translation to allow pre_grouped to denote indices
+      if (is.numeric(pre_grouped[, 2])){
+        pre_grouped$pre.groups <- nms_groups[pre_grouped[, 2]]
+      } else {
+        pre_grouped$pre.groups <- as.character(pre_grouped[, 2])
+      }
+
+      # Splitting to list for later merging
+      pre <- split(
+        pre_grouped[, 1],
+        factor(pre_grouped[, 3], levels = nms_groups)
+      )
+      # Subtracting capacity numbers, to reflect already filled spots
+      capacity <- capacity - lengths(pre)
+      # Making sure pre_grouped are removed from main data set
+      data <- data[!data[[1]] %in% pre_grouped[[1]], ]
+
+      cost <- t(data[, -1])
+      colnames(cost) <- data[, 1]
+
+      num_groups <- dim(cost)[1]
+      num_sub <- dim(cost)[2]
+    }
+
+    ## Simple NA handling. Better to handle NAs yourself!
+    cost[is.na(cost)] <- num_groups
+
+    i_m <- seq_len(num_groups)
+    j_m <- seq_len(num_sub)
+
+    m <- ompr::MIPModel() |>
+      ompr::add_variable(grp[i, j],
+        i = i_m,
+        j = j_m,
+        type = "binary"
+      ) |>
+      ## The first constraint says that group size should not exceed capacity
+      ompr::add_constraint(ompr::sum_expr(grp[i, j], j = j_m) <= capacity[i],
+        i = i_m
+      ) |>
+      ## The second constraint says each subject can only be in one group
+      ompr::add_constraint(ompr::sum_expr(grp[i, j], i = i_m) == 1, j = j_m) |>
+      ## The objective is set to minimize the cost of the grouping
+      ## Giving subjects the group with the highest possible ranking
+      ompr::set_objective(
+        ompr::sum_expr(
+          cost[i, j] * grp[i, j],
+          i = i_m,
+          j = j_m
+        ),
+        "min"
+      ) |>
+      ompr::solve_model(ompr.roi::with_ROI(solver = "symphony", verbosity = 1))
+
+    if (m$status == "error") {
+      stop("The algorithm is not able to solve the problem. Please adjust the
+           constraints by increasing group capacities and/or excess fill")
+    }
+
+    ## Getting groups
+    solution <- ompr::get_solution(m, grp[i, j]) |> dplyr::filter(value > 0)
+
+    grouped <- solution |> dplyr::select(i, j)
+
+    if (!is.null(rownames(cost))) {
+      grouped$i <- rownames(cost)[grouped$i]
+    }
+
+    if (!is.null(colnames(cost))) {
+      grouped$j <- colnames(cost)[grouped$j]
+    }
+
+    ## Splitting into groups based on groups
+    grouped_ls <- split(grouped$j, grouped$i)
+
+
+    ## Extracting subject cost for the final groups for evaluation
+    if (is.null(rownames(cost))) {
+      rownames(cost) <- seq_len(nrow(cost))
+    }
+
+    if (is.null(colnames(cost))) {
+      colnames(cost) <- seq_len(ncol(cost))
+    }
+
+    evaluated <- lapply(seq_len(length(grouped_ls)), function(i) {
+      ndx <- match(names(grouped_ls)[i], rownames(cost))
+      cost[ndx, grouped_ls[[i]]]
+    })
+    names(evaluated) <- names(grouped_ls)
+
+    if (with_pre_grouped) {
+      names(pre) <- names(grouped_ls)
+      grouped_all <- mapply(c, grouped_ls, pre, SIMPLIFY = FALSE)
+
+      out <- list(all_grouped = grouped_all)
+    } else {
+      out <- list(all_grouped = grouped_ls)
+    }
+
+    export <- do.call(rbind, lapply(seq_along(out[[1]]), function(i) {
+      cbind("ID" = out[[1]][[i]], "Group" = names(out[[1]])[i])
+    }))
+
+    out <- c(
+      out,
+      list(
+        evaluation = evaluated,
+        groupings = grouped_ls,
+        solution = solution,
+        capacity = capacity,
+        excess = excess,
+        pre_grouped = with_pre_grouped,
+        cost_scale = levels(factor(cost)),
+        input = data,
+        export = export
+      )
+    )
+    # exists("excess")
+
+    class(out) <- c("prioritized_groups_list", class(out))
+
+    return(out)
+  }
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+grouping_plot <- function(data,
+                          columns = NULL,
+                          overall = FALSE,
+                          viridis.option="D",
+                          viridis.direction=-1) {
+  assertthat::assert_that("prioritized_groups_list" %in% class(data))
+
+  dl <- data[[2]]
+  cost_scale <- unique(data[[8]])
+  cap <- data[[5]]
+  cnts_ls <- lapply(dl, function(i) {
+    factor(i, levels = cost_scale)
+  })
+
+  y_max <- max(lengths(dl))
+
+  if (overall) {
+    ds <- tibble::tibble(
+      group = seq_along(dl),
+      mean = round(Reduce(c, lapply(dl, mean)), 1)
+    )
+    out <- ds |>
+      ggplot2::ggplot(ggplot2::aes(x = group, y = mean, fill = mean)) +
+      ggplot2::geom_bar(stat = "identity") +
+      ggplot2::geom_hline(yintercept = 1) +
+      ggplot2::scale_fill_viridis_c(option=viridis.option,
+                                    direction = viridis.direction) +
+      ggplot2::guides(fill = "none") +
+      ggplot2::scale_x_continuous(name = "Groups", breaks = ds$group) +
+      ggplot2::ylab("Mean priority/cost") +
+      ggplot2::labs(
+        title = "Overall group-wise mean priority/cost of groupings",
+        subtitle = "Horizontal line marking the perfect mean=1 for reference"
+      )
+  } else {
+    out <- lapply(seq_along(dl), function(i) {
+      ttl <- names(dl)[i]
+      ns <- length(dl[[i]])
+      cnts <- cnts_ls[[i]]
+      ggplot2::ggplot() +
+        ggplot2::geom_bar(ggplot2::aes(cnts, fill = cnts)) +
+        ggplot2::scale_x_discrete(
+          name = NULL,
+          breaks = cost_scale,
+          drop = FALSE
+        ) +
+        ggplot2::scale_y_continuous(name = NULL, limits = c(0, y_max)) +
+        ggplot2::scale_fill_manual(
+          values = viridisLite::viridis(length(cost_scale),
+                                        direction = viridis.direction,
+                                        option = viridis.option)
+        ) +
+        ggplot2::guides(fill = "none") +
+        ggplot2::labs(
+          title =
+            paste0(
+              ttl, " (fill=", round(ns / cap[[i]], 1), ";n=", ns, ";mean=",
+              round(mean(dl[[i]]), 1), ")"
+            )
+        )
+    }) |>
+      patchwork::wrap_plots(ncol = columns)
+  }
+
+  return(out)
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+plot.prioritized_groups_list <- function(data, ...) {
+  grouping_plot(data, ...)
+}
+
+## Helper function for Shiny
+
+
+
+
+
+
+
+
+
+file_extension <- function(filenames) {
+  sub(
+    pattern = "^(.*\\.|[^.]+)(?=[^.]*)",
+    replacement = "",
+    filenames, perl = TRUE
+  )
+}
+
+
+########
+#### Current file: app/server_raw.R 
+########
+
+library(shiny)
+server <- function(input, output, session) {
+  # source("https://git.nikohuru.dk/au-phd/PhysicalActivityandStrokeOutcome/raw/branch/main/side%20projects/assignment.R")
+  # source(here::here("R/group_assign.R"))
+
+  dat <- reactive({
+    # input$file1 will be NULL initially. After the user selects
+    # and uploads a file, head of that data file by default,
+    # or all rows if selected, will be shown.
+
+    req(input$file1)
+    # Make laoding dependent of file name extension (file_ext())
+    ext <- file_extension(input$file1$datapath)
+
+    tryCatch(
+      {
+        if (ext == "csv") {
+          df <- utils::read.csv(input$file1$datapath,na.strings = c("NA", '""',""))
+        } else if (ext %in% c("xls", "xlsx")) {
+          df <- openxlsx::read.xlsx(input$file1$datapath,na.strings = c("NA", '""',""))
+        } else if (ext == "ods") {
+          df <- readODS::read_ods(file = file)
+        } else {
+          stop("Input file format has to be on of:
+             '.csv', '.xls', '.xlsx' or '.ods'")
+        }
+      },
+      error = function(e) {
+        # return a safeError if a parsing error occurs
+        stop(safeError(e))
+      }
+    )
+
+    return(df)
+  })
+
+  dat_pre <- reactive({
+
+    # req(input$file2)
+    # Make laoding dependent of file name extension (file_ext())
+    if (!is.null(input$file2$datapath)){
+      ext <- file_extension(input$file2$datapath)
+
+      if (ext == "csv") {
+        df <- utils::read.csv(input$file2$datapath,na.strings = c("NA", '""',""))
+      } else if (ext %in% c("xls", "xlsx")) {
+        df <- openxlsx::read.xlsx(input$file2$datapath,na.strings = c("NA", '""',""))
+      } else if (ext == "ods") {
+        df <- readODS::read_ods(file = file)
+      } else {
+        stop("Input file format has to be on of:
+             '.csv', '.xls', '.xlsx' or '.ods'")
+      }
+
+      return(df)
+    } else {
+      return(NULL)
+    }
+
+  })
+
+  groups <-
+    reactive({
+      grouped <- prioritized_grouping(
+        data = dat(),
+        excess_space = input$excess,
+        pre_grouped = dat_pre()
+      )
+      return(grouped)
+    })
+
+
+  plot.overall <- reactive({
+    dplyr::case_match(input$overall.plot,
+                      "yes"~TRUE,
+                      "no"~FALSE,
+                      .default=NULL)
+  })
+
+  output$raw.data.tbl <- renderTable({
+    groups()$export
+  })
+
+  output$pre.groups <- renderTable({
+    dat_pre()
+  })
+
+  output$input <- renderTable({
+    dat()
+  })
+
+  output$groups.plt <- renderPlot({
+    grouping_plot(groups(),overall = plot.overall())
+  })
+
+  # Downloadable csv of selected dataset ----
+  output$downloadData <- downloadHandler(
+    filename = "prioritized_grouping.csv",
+
+    content = function(file) {
+      write.csv(groups()$export, file, row.names = FALSE)
+    }
+  )
+
+}
--- a/app/server_raw.R
+++ b/app/server_raw.R
@ -0,0 +1,105 @@
+library(shiny)
+server <- function(input, output, session) {
+  # source("https://git.nikohuru.dk/au-phd/PhysicalActivityandStrokeOutcome/raw/branch/main/side%20projects/assignment.R")
+  # source(here::here("R/group_assign.R"))
+
+  dat <- reactive({
+    # input$file1 will be NULL initially. After the user selects
+    # and uploads a file, head of that data file by default,
+    # or all rows if selected, will be shown.
+
+    req(input$file1)
+    # Make laoding dependent of file name extension (file_ext())
+    ext <- file_extension(input$file1$datapath)
+
+    tryCatch(
+      {
+        if (ext == "csv") {
+          df <- utils::read.csv(input$file1$datapath,na.strings = c("NA", '""',""))
+        } else if (ext %in% c("xls", "xlsx")) {
+          df <- openxlsx::read.xlsx(input$file1$datapath,na.strings = c("NA", '""',""))
+        } else if (ext == "ods") {
+          df <- readODS::read_ods(file = file)
+        } else {
+          stop("Input file format has to be on of:
+             '.csv', '.xls', '.xlsx' or '.ods'")
+        }
+      },
+      error = function(e) {
+        # return a safeError if a parsing error occurs
+        stop(safeError(e))
+      }
+    )
+
+    return(df)
+  })
+
+  dat_pre <- reactive({
+
+    # req(input$file2)
+    # Make laoding dependent of file name extension (file_ext())
+    if (!is.null(input$file2$datapath)){
+      ext <- file_extension(input$file2$datapath)
+
+      if (ext == "csv") {
+        df <- utils::read.csv(input$file2$datapath,na.strings = c("NA", '""',""))
+      } else if (ext %in% c("xls", "xlsx")) {
+        df <- openxlsx::read.xlsx(input$file2$datapath,na.strings = c("NA", '""',""))
+      } else if (ext == "ods") {
+        df <- readODS::read_ods(file = file)
+      } else {
+        stop("Input file format has to be on of:
+             '.csv', '.xls', '.xlsx' or '.ods'")
+      }
+
+      return(df)
+    } else {
+      return(NULL)
+    }
+
+  })
+
+  groups <-
+    reactive({
+      grouped <- prioritized_grouping(
+        data = dat(),
+        excess_space = input$excess,
+        pre_grouped = dat_pre()
+      )
+      return(grouped)
+    })
+
+
+  plot.overall <- reactive({
+    dplyr::case_match(input$overall.plot,
+                      "yes"~TRUE,
+                      "no"~FALSE,
+                      .default=NULL)
+  })
+
+  output$raw.data.tbl <- renderTable({
+    groups()$export
+  })
+
+  output$pre.groups <- renderTable({
+    dat_pre()
+  })
+
+  output$input <- renderTable({
+    dat()
+  })
+
+  output$groups.plt <- renderPlot({
+    grouping_plot(groups(),overall = plot.overall())
+  })
+
+  # Downloadable csv of selected dataset ----
+  output$downloadData <- downloadHandler(
+    filename = "prioritized_grouping.csv",
+
+    content = function(file) {
+      write.csv(groups()$export, file, row.names = FALSE)
+    }
+  )
+
+}
--- a/app/ui.R
+++ b/app/ui.R
@ -0,0 +1,131 @@
+library(shiny)
+ui <- fluidPage(
+  ## -----------------------------------------------------------------------------
+  ## Application title
+  ## -----------------------------------------------------------------------------
+
+  titlePanel("Group allocation based on individual subject prioritization.",
+             windowTitle = "Prioritized grouping calculator"),
+  h5(
+    "Please note this calculator is only meant as a proof of concept for educational purposes,
+     and the author will take no responsibility for the results of the calculator.
+     Uploaded data is not kept, but please, do not upload any sensitive data."
+  ),
+
+  ## -----------------------------------------------------------------------------
+  ## Side panel
+  ## -----------------------------------------------------------------------------
+
+
+  ## -----------------------------------------------------------------------------
+  ## Single entry
+  ## -----------------------------------------------------------------------------
+  sidebarLayout(
+    sidebarPanel(
+      numericInput(
+        inputId = "excess",
+        label = "Excess space (%)",
+        value = 20,
+        step = 5
+      ),
+      p("As default, the program will try to evenly distribute subjects in groups.
+        This factor will add more capacity to each group, for an overall lesser cost,
+        but more uneven group numbers. More adjustments can be performed with the source script."),
+      a(href='https://git.nikohuru.dk/au-phd/PhysicalActivityandStrokeOutcome/src/branch/main/apps/Assignment', "Source", target="_blank"),
+      ## -----------------------------------------------------------------------------
+      ## File upload
+      ## -----------------------------------------------------------------------------
+
+      # Input: Select a file ----
+
+      fileInput(
+        inputId = "file1",
+        label = "Choose main data file",
+        multiple = FALSE,
+        accept = c(
+          ".csv",".xls",".xlsx", ".ods"
+        )
+      ),
+      strong("Columns: ID, group1, group2, ... groupN."),
+      strong("NOTE: 0s will be interpreted as lowest score."),
+      p("Cells should contain cost/priorities.
+         Lowest score, for highest priority.
+        Non-ranked should contain a number (eg lowest score+1).
+         Will handle missings but try to avoid."),
+      shiny::radioButtons(
+        inputId = "overall.plot",
+        label = "Print overall mean grouping priorities/costs only?",
+        selected = "no",
+        choices = list(
+          "Yes" = "yes",
+          "No" = "no"
+        )
+      ),
+
+
+      fileInput(
+        inputId = "file2",
+        label = "Choose data file for pre-assigned subjects",
+        multiple = FALSE,
+        accept = c(
+          ".csv",".xls",".xlsx"
+        )
+      ),
+      h6("Columns: ID, group"),
+
+
+
+      ## -----------------------------------------------------------------------------
+      ## Download output
+      ## -----------------------------------------------------------------------------
+
+      # Horizontal line ----
+      tags$hr(),
+
+      h4("Download results"),
+
+      # Button
+      downloadButton("downloadData", "Download")
+    ),
+
+    mainPanel(tabsetPanel(
+      ## -----------------------------------------------------------------------------
+      ## Plot tab
+      ## -----------------------------------------------------------------------------
+
+      tabPanel(
+        "Summary",
+        h3("Grouping plot"),
+        p("These plots are to summarise simple performance meassures for the assignment.
+          'f' is group fill fraction and 'm' is mean cost in group."),
+
+        plotOutput("groups.plt")
+
+      ),
+
+      tabPanel(
+        "Results",
+        h3("Raw Results"),
+        p("This is identical to the downloaded file (see panel on left)"),
+
+        htmlOutput("raw.data.tbl", container = span)
+
+      ),
+
+      tabPanel(
+        "Input data Results",
+        h3("Costs/prioritis overview"),
+
+
+        htmlOutput("input", container = span),
+
+        h3("Pre-assigned groups"),
+        p("Appears empty if none is uploaded."),
+
+        htmlOutput("pre.groups", container = span)
+
+      )
+
+    ))
+  )
+)