first commit

2026-06-19 13:57:29 +02:00 · 2024-01-11 09:43:23 +01:00 · 2024-01-11 09:43:23 +01:00 · 6333bcee61
commit 6333bcee61
208 changed files with 413695 additions and 0 deletions
--- a/R/group_assign.R
+++ b/R/group_assign.R
@ -0,0 +1,185 @@
+group_assignment <-
+  function(ds,
+           cap_classes = NULL,
+           excess_space = NULL,
+           pre_assign = NULL) {
+    require(ROI)
+    require(ROI.plugin.symphony)
+    
+    if (!is.data.frame(ds)){
+      stop("Supplied data has to be a data frame, with each row
+           are subjects and columns are groups, with the first column being
+           subject identifiers")}
+    
+    ## This program very much trust the user to supply correctly formatted data
+    cost <- t(ds[,-1]) #Transpose converts to matrix
+    colnames(cost) <- ds[,1]
+    
+    num_groups <- dim(cost)[1]
+    num_sub <- dim(cost)[2]
+    
+    ## Adding the option to introduce a bit of head room to the classes by
+    ## the groups to a little bigger than the smallest possible
+    ## Default is to allow for an extra 20 % fill
+    if (is.null(excess_space)) {
+      excess <- 1.2
+    } else {
+      excess <- excess_space
+    }
+    
+    # generous round up of capacities
+    if (is.null(cap_classes)) {
+      capacity <- rep(ceiling(excess*num_sub/num_groups), num_groups)
+    # } else if (!is.numeric(cap_classes)) {
+    #   stop("cap_classes has to be numeric")
+    } else if (length(cap_classes)==1){
+     capacity <- ceiling(rep(cap_classes,num_groups)*excess)
+    } else if (length(cap_classes)==num_groups){
+      capacity <- ceiling(cap_classes*excess)
+    } else {
+      stop("cap_classes has to be either length 1 or same as number of groups")
+    }
+    
+    ## This test should be a little more elegant
+    ## pre_assign should be a data.frame or matrix with an ID and assignment column
+    with_pre_assign <- FALSE
+    if (!is.null(pre_assign)){
+      # Setting flag for later and export list
+      with_pre_assign <- TRUE
+      # Splitting to list for later merging
+      pre <- split(pre_assign[,1],factor(pre_assign[,2],levels = seq_len(num_groups)))
+      # Subtracting capacity numbers, to reflect already filled spots
+      capacity <- capacity-lengths(pre)
+      # Making sure pre_assigned are removed from main data set
+      ds <- ds[!ds[[1]] %in% pre_assign[[1]],]
+      
+      cost <- t(ds[,-1])
+      colnames(cost) <- ds[,1]
+      
+      num_groups <- dim(cost)[1]
+      num_sub <- dim(cost)[2]
+    }
+    
+    ## Simple NA handling. Better to handle NAs yourself!
+    cost[is.na(cost)] <- num_groups
+    
+    i_m <- seq_len(num_groups)
+    j_m <- seq_len(num_sub)
+    
+    m <- ompr::MIPModel() %>%
+      ompr::add_variable(grp[i, j],
+                   i = i_m,
+                   j = j_m,
+                   type = "binary") %>%
+      ## The first constraint says that group size should not exceed capacity
+      ompr::add_constraint(ompr::sum_expr(grp[i, j], j = j_m) <= capacity[i],
+                     i = i_m) %>%
+      ## The second constraint says each subject can only be in one group
+      ompr::add_constraint(ompr::sum_expr(grp[i, j], i = i_m) == 1, j = j_m) %>%
+      ## The objective is set to minimize the cost of the assignments
+      ## Giving subjects the group with the highest possible ranking
+      ompr::set_objective(ompr::sum_expr(
+        cost[i, j] * grp[i, j],
+        i = i_m,
+        j = j_m
+      ),
+      "min") %>%
+      ompr::solve_model(ompr.roi::with_ROI(solver = "symphony", verbosity = 1))
+    
+    ## Getting assignments
+    solution <- ompr::get_solution(m, grp[i, j]) %>% filter(value > 0)
+    
+    assign <- solution |> select(i,j)
+    
+    if (!is.null(rownames(cost))){
+      assign$i <- rownames(cost)[assign$i]
+    }
+
+    if (!is.null(colnames(cost))){
+      assign$j <- colnames(cost)[assign$j]
+    }
+    
+    ## Splitting into groups based on assignment
+    assign_ls <- split(assign$j,assign$i)
+    
+    
+    ## Extracting subject cost for the final assignment for evaluation
+    if (is.null(rownames(cost))){
+      rownames(cost) <- seq_len(nrow(cost))
+    }
+    
+    if (is.null(colnames(cost))){
+      colnames(cost) <- seq_len(ncol(cost))
+    }
+    
+    eval <- lapply(seq_len(length(assign_ls)),function(i){
+      ndx <- match(names(assign_ls)[i],rownames(cost))
+      cost[ndx,assign_ls[[i]]]
+    })
+    names(eval) <- names(assign_ls)
+    
+    if (with_pre_assign){
+      names(pre) <- names(assign_ls)
+      assign_all <- mapply(c, assign_ls, pre, SIMPLIFY=FALSE)
+      
+      out <- list(all_assigned=assign_all)
+    } else {
+      out <- list(all_assigned=assign_ls)
+    }
+    
+    export <- do.call(rbind,lapply(seq_along(out[[1]]),function(i){
+      cbind("ID"=out[[1]][[i]],"Group"=names(out[[1]])[i])
+    }))
+    
+    out <- append(out,
+                  list(evaluation=eval,
+                       assigned=assign_ls,
+                       solution = solution,
+                       capacity = capacity,
+                       excess = excess,
+                       pre_assign = with_pre_assign,
+                       cost_scale = levels(factor(cost)),
+                       input=ds,
+                       export=export))
+    # exists("excess")
+    return(out)
+  }
+
+
+## Assessment performance overview
+## The function plots costs of assignment for each subject in every group
+assignment_plot <- function(lst){
+  
+  dl <- lst[[2]]
+  cost_scale <- unique(lst[[8]])
+  cap <- lst[[5]]
+  cnts_ls <- lapply(dl,function(i){
+    factor(i,levels=cost_scale)
+  })
+  require(ggplot2)
+  require(patchwork)
+  require(viridisLite)
+  
+  y_max <- max(lengths(dl))
+  
+  wrap_plots(lapply(seq_along(dl),function(i){
+    ttl <- names(dl)[i]
+    ns <- length(dl[[i]])
+    cnts <- cnts_ls[[i]]
+    ggplot2::ggplot() + ggplot2::geom_bar(ggplot2::aes(cnts,fill=cnts)) +
+      ggplot2::scale_x_discrete(name = NULL, breaks=cost_scale, drop=FALSE) +
+      ggplot2::scale_y_continuous(name = NULL, limits = c(0,y_max)) + 
+      ggplot2::scale_fill_manual(values = viridisLite::viridis(length(cost_scale), direction = -1)) +
+      ggplot2::guides(fill=FALSE) + 
+      ggplot2::labs(title=paste0(ttl," (fill=",round(ns/cap[[i]],1),";m=",round(mean(dl[[i]]),1),";n=",ns ,")"))
+  })) 
+}
+
+
+## Helper function for Shiny
+file_extension <- function(filenames) {
+  sub(pattern = "^(.*\\.|[^.]+)(?=[^.]*)", replacement = "", filenames, perl = TRUE)
+}
+
+
+
--- a/R/server.R
+++ b/R/server.R
@ -0,0 +1,96 @@
+server <- function(input, output, session) {
+  library(dplyr)
+  library(tidyr)
+  library(ROI)
+  library(ROI.plugin.symphony)
+  library(ompr)
+  library(ompr.roi)
+  library(magrittr)
+  library(ggplot2)
+  library(viridisLite)
+  library(patchwork)
+  library(openxlsx)
+  # source("https://git.nikohuru.dk/au-phd/PhysicalActivityandStrokeOutcome/raw/branch/main/side%20projects/assignment.R")
+  source(here::here("R/group_assign.R"))
+  
+  dat <- shiny::reactive({
+    # input$file1 will be NULL initially. After the user selects
+    # and uploads a file, head of that data file by default,
+    # or all rows if selected, will be shown.
+    
+    req(input$file1)
+    # Make laoding dependent of file name extension (file_ext())
+    ext <- file_extension(input$file1$datapath)
+    
+    if (ext == "csv") {
+      df <- read.csv(input$file1$datapath,na.strings = c("NA", '""',""))
+    } else if (ext %in% c("xls", "xlsx")) {
+      df <- openxlsx::read.xlsx(input$file1$datapath,na.strings = c("NA", '""',""))
+      
+    } else {
+      stop("Input file format has to be either '.csv', '.xls' or '.xlsx'")
+    }
+    
+    return(df)
+  })
+  
+  dat_pre <- shiny::reactive({
+    
+    # req(input$file2)
+    # Make laoding dependent of file name extension (file_ext())
+    if (!is.null(input$file2$datapath)){
+      ext <- file_extension(input$file2$datapath)
+      
+      if (ext == "csv") {
+        df <- read.csv(input$file2$datapath,na.strings = c("NA", '""',""))
+      } else if (ext %in% c("xls", "xlsx")) {
+        df <- openxlsx::read.xlsx(input$file2$datapath,na.strings = c("NA", '""',""))
+        
+      } else {
+        stop("Input file format has to be either '.csv', '.xls' or '.xlsx'")
+      }
+      
+      return(df)
+    } else {
+      return(NULL)
+    }
+
+  })
+  
+  assign <-
+    shiny::reactive({
+      assigned <- group_assignment(
+        ds = dat(),
+        excess_space = input$ecxess,
+        pre_assign = dat_pre()
+      )
+      return(assigned)
+    })
+  
+  
+  output$raw.data.tbl <- shiny::renderTable({
+    assign()$export
+  })
+  
+  output$pre.assign <- shiny::renderTable({
+    dat_pre()
+  })
+  
+  output$input <- shiny::renderTable({
+    dat()
+  })
+  
+  output$assign.plt <- shiny::renderPlot({
+    assignment_plot(assign())
+  })
+  
+  # Downloadable csv of selected dataset ----
+  output$downloadData <- shiny::downloadHandler(
+    filename = "group_assignment.csv",
+
+    content = function(file) {
+      write.csv(assign()$export, file, row.names = FALSE)
+    }
+  )
+  
+}
--- a/R/ui.R
+++ b/R/ui.R
@ -0,0 +1,120 @@
+ui <- shiny::fluidPage(
+  ## -----------------------------------------------------------------------------
+  ## Application title
+  ## -----------------------------------------------------------------------------
+  
+  shiny::titlePanel("Assign groups based on costs/priorities.",
+             windowTitle = "Group assignment calculator"),
+  shiny::h5(
+    "Please note this calculator is only meant as a proof of concept for educational purposes,
+     and the author will take no responsibility for the results of the calculator.
+     Uploaded data is not kept, but please, do not upload any sensitive data."
+  ),
+  
+  ## -----------------------------------------------------------------------------
+  ## Side panel
+  ## -----------------------------------------------------------------------------
+  
+  
+  ## -----------------------------------------------------------------------------
+  ## Single entry
+  ## -----------------------------------------------------------------------------
+  shiny::sidebarLayout(
+    shiny::sidebarPanel(
+      shiny::numericInput(
+        inputId = "ecxess",
+        label = "Excess space",
+        value = 1,
+        step = .05
+      ),
+      shiny::p("As default, the program will try to evenly distribute subjects in groups. 
+        This factor will add more capacity to each group, for an overall lesser cost, 
+        but more uneven group numbers. More adjustments can be performed with the source script."),
+      shiny::a(href='https://git.nikohuru.dk/au-phd/PhysicalActivityandStrokeOutcome/src/branch/main/apps/Assignment', "Source", target="_blank"),
+      ## -----------------------------------------------------------------------------
+      ## File upload
+      ## -----------------------------------------------------------------------------
+      
+      # Input: Select a file ----
+      
+      shiny::fileInput(
+        inputId = "file1",
+        label = "Choose main data file",
+        multiple = FALSE,
+        accept = c(
+          ".csv",".xls",".xlsx"
+        )
+      ),
+      shiny::strong("Columns: ID, group1, group2, ... groupN."),
+      shiny::strong("NOTE: 0s will be interpreted as lowest score."),
+      shiny::p("Cells should contain cost/priorities.
+         Lowest score, for highest priority.
+        Non-ranked should contain a number (eg lowest score+1).
+         Will handle missings but try to avoid."),
+      
+      shiny::fileInput(
+        inputId = "file2",
+        label = "Choose data file for pre-assigned subjects",
+        multiple = FALSE,
+        accept = c(
+          ".csv",".xls",".xlsx"
+        )
+      ),
+      shiny::h6("Columns: ID, group"),
+      
+      
+      
+      ## -----------------------------------------------------------------------------
+      ## Download output
+      ## -----------------------------------------------------------------------------
+      
+      # Horizontal line ----
+      tags$hr(),
+      
+      shiny::h4("Download results"),
+      
+      # Button
+      shiny::downloadButton("downloadData", "Download")
+    ),
+    
+    shiny::mainPanel(shiny::tabsetPanel(
+      ## -----------------------------------------------------------------------------
+      ## Plot tab
+      ## -----------------------------------------------------------------------------
+      
+      shiny::tabPanel(
+        "Summary",
+        shiny::h3("Assignment plot"),
+        shiny::p("These plots are to summarise simple performance meassures for the assignment. 
+          'f' is group fill fraction and 'm' is mean cost in group."),
+        
+        shiny::plotOutput("assign.plt")
+        
+      ),
+      
+      shiny::tabPanel(
+        "Results",
+        shiny::h3("Raw Results"),
+        shiny::p("This is identical to the downloaded file (see panel on left)"),
+        
+        shiny::htmlOutput("raw.data.tbl", container = span)
+        
+      ),
+      
+      shiny::tabPanel(
+        "Input data Results",
+        shiny::h3("Costs/prioritis overview"),
+        
+        
+        shiny::htmlOutput("input", container = span),
+        
+        shiny::h3("Pre-assigned groups"),
+        shiny::p("Appears empty if none is uploaded."),
+
+        shiny::htmlOutput("pre.assign", container = span)
+        
+      )
+      
+    ))
+  )
+)