first commit

This commit is contained in:
Andreas Gammelgaard Damsbo 2024-01-11 09:43:23 +01:00
commit 6333bcee61
208 changed files with 413695 additions and 0 deletions

185
R/group_assign.R Normal file
View file

@ -0,0 +1,185 @@
group_assignment <-
function(ds,
cap_classes = NULL,
excess_space = NULL,
pre_assign = NULL) {
require(ROI)
require(ROI.plugin.symphony)
if (!is.data.frame(ds)){
stop("Supplied data has to be a data frame, with each row
are subjects and columns are groups, with the first column being
subject identifiers")}
## This program very much trust the user to supply correctly formatted data
cost <- t(ds[,-1]) #Transpose converts to matrix
colnames(cost) <- ds[,1]
num_groups <- dim(cost)[1]
num_sub <- dim(cost)[2]
## Adding the option to introduce a bit of head room to the classes by
## the groups to a little bigger than the smallest possible
## Default is to allow for an extra 20 % fill
if (is.null(excess_space)) {
excess <- 1.2
} else {
excess <- excess_space
}
# generous round up of capacities
if (is.null(cap_classes)) {
capacity <- rep(ceiling(excess*num_sub/num_groups), num_groups)
# } else if (!is.numeric(cap_classes)) {
# stop("cap_classes has to be numeric")
} else if (length(cap_classes)==1){
capacity <- ceiling(rep(cap_classes,num_groups)*excess)
} else if (length(cap_classes)==num_groups){
capacity <- ceiling(cap_classes*excess)
} else {
stop("cap_classes has to be either length 1 or same as number of groups")
}
## This test should be a little more elegant
## pre_assign should be a data.frame or matrix with an ID and assignment column
with_pre_assign <- FALSE
if (!is.null(pre_assign)){
# Setting flag for later and export list
with_pre_assign <- TRUE
# Splitting to list for later merging
pre <- split(pre_assign[,1],factor(pre_assign[,2],levels = seq_len(num_groups)))
# Subtracting capacity numbers, to reflect already filled spots
capacity <- capacity-lengths(pre)
# Making sure pre_assigned are removed from main data set
ds <- ds[!ds[[1]] %in% pre_assign[[1]],]
cost <- t(ds[,-1])
colnames(cost) <- ds[,1]
num_groups <- dim(cost)[1]
num_sub <- dim(cost)[2]
}
## Simple NA handling. Better to handle NAs yourself!
cost[is.na(cost)] <- num_groups
i_m <- seq_len(num_groups)
j_m <- seq_len(num_sub)
m <- ompr::MIPModel() %>%
ompr::add_variable(grp[i, j],
i = i_m,
j = j_m,
type = "binary") %>%
## The first constraint says that group size should not exceed capacity
ompr::add_constraint(ompr::sum_expr(grp[i, j], j = j_m) <= capacity[i],
i = i_m) %>%
## The second constraint says each subject can only be in one group
ompr::add_constraint(ompr::sum_expr(grp[i, j], i = i_m) == 1, j = j_m) %>%
## The objective is set to minimize the cost of the assignments
## Giving subjects the group with the highest possible ranking
ompr::set_objective(ompr::sum_expr(
cost[i, j] * grp[i, j],
i = i_m,
j = j_m
),
"min") %>%
ompr::solve_model(ompr.roi::with_ROI(solver = "symphony", verbosity = 1))
## Getting assignments
solution <- ompr::get_solution(m, grp[i, j]) %>% filter(value > 0)
assign <- solution |> select(i,j)
if (!is.null(rownames(cost))){
assign$i <- rownames(cost)[assign$i]
}
if (!is.null(colnames(cost))){
assign$j <- colnames(cost)[assign$j]
}
## Splitting into groups based on assignment
assign_ls <- split(assign$j,assign$i)
## Extracting subject cost for the final assignment for evaluation
if (is.null(rownames(cost))){
rownames(cost) <- seq_len(nrow(cost))
}
if (is.null(colnames(cost))){
colnames(cost) <- seq_len(ncol(cost))
}
eval <- lapply(seq_len(length(assign_ls)),function(i){
ndx <- match(names(assign_ls)[i],rownames(cost))
cost[ndx,assign_ls[[i]]]
})
names(eval) <- names(assign_ls)
if (with_pre_assign){
names(pre) <- names(assign_ls)
assign_all <- mapply(c, assign_ls, pre, SIMPLIFY=FALSE)
out <- list(all_assigned=assign_all)
} else {
out <- list(all_assigned=assign_ls)
}
export <- do.call(rbind,lapply(seq_along(out[[1]]),function(i){
cbind("ID"=out[[1]][[i]],"Group"=names(out[[1]])[i])
}))
out <- append(out,
list(evaluation=eval,
assigned=assign_ls,
solution = solution,
capacity = capacity,
excess = excess,
pre_assign = with_pre_assign,
cost_scale = levels(factor(cost)),
input=ds,
export=export))
# exists("excess")
return(out)
}
## Assessment performance overview
## The function plots costs of assignment for each subject in every group
assignment_plot <- function(lst){
dl <- lst[[2]]
cost_scale <- unique(lst[[8]])
cap <- lst[[5]]
cnts_ls <- lapply(dl,function(i){
factor(i,levels=cost_scale)
})
require(ggplot2)
require(patchwork)
require(viridisLite)
y_max <- max(lengths(dl))
wrap_plots(lapply(seq_along(dl),function(i){
ttl <- names(dl)[i]
ns <- length(dl[[i]])
cnts <- cnts_ls[[i]]
ggplot2::ggplot() + ggplot2::geom_bar(ggplot2::aes(cnts,fill=cnts)) +
ggplot2::scale_x_discrete(name = NULL, breaks=cost_scale, drop=FALSE) +
ggplot2::scale_y_continuous(name = NULL, limits = c(0,y_max)) +
ggplot2::scale_fill_manual(values = viridisLite::viridis(length(cost_scale), direction = -1)) +
ggplot2::guides(fill=FALSE) +
ggplot2::labs(title=paste0(ttl," (fill=",round(ns/cap[[i]],1),";m=",round(mean(dl[[i]]),1),";n=",ns ,")"))
}))
}
## Helper function for Shiny
file_extension <- function(filenames) {
sub(pattern = "^(.*\\.|[^.]+)(?=[^.]*)", replacement = "", filenames, perl = TRUE)
}

96
R/server.R Normal file
View file

@ -0,0 +1,96 @@
server <- function(input, output, session) {
library(dplyr)
library(tidyr)
library(ROI)
library(ROI.plugin.symphony)
library(ompr)
library(ompr.roi)
library(magrittr)
library(ggplot2)
library(viridisLite)
library(patchwork)
library(openxlsx)
# source("https://git.nikohuru.dk/au-phd/PhysicalActivityandStrokeOutcome/raw/branch/main/side%20projects/assignment.R")
source(here::here("R/group_assign.R"))
dat <- shiny::reactive({
# input$file1 will be NULL initially. After the user selects
# and uploads a file, head of that data file by default,
# or all rows if selected, will be shown.
req(input$file1)
# Make laoding dependent of file name extension (file_ext())
ext <- file_extension(input$file1$datapath)
if (ext == "csv") {
df <- read.csv(input$file1$datapath,na.strings = c("NA", '""',""))
} else if (ext %in% c("xls", "xlsx")) {
df <- openxlsx::read.xlsx(input$file1$datapath,na.strings = c("NA", '""',""))
} else {
stop("Input file format has to be either '.csv', '.xls' or '.xlsx'")
}
return(df)
})
dat_pre <- shiny::reactive({
# req(input$file2)
# Make laoding dependent of file name extension (file_ext())
if (!is.null(input$file2$datapath)){
ext <- file_extension(input$file2$datapath)
if (ext == "csv") {
df <- read.csv(input$file2$datapath,na.strings = c("NA", '""',""))
} else if (ext %in% c("xls", "xlsx")) {
df <- openxlsx::read.xlsx(input$file2$datapath,na.strings = c("NA", '""',""))
} else {
stop("Input file format has to be either '.csv', '.xls' or '.xlsx'")
}
return(df)
} else {
return(NULL)
}
})
assign <-
shiny::reactive({
assigned <- group_assignment(
ds = dat(),
excess_space = input$ecxess,
pre_assign = dat_pre()
)
return(assigned)
})
output$raw.data.tbl <- shiny::renderTable({
assign()$export
})
output$pre.assign <- shiny::renderTable({
dat_pre()
})
output$input <- shiny::renderTable({
dat()
})
output$assign.plt <- shiny::renderPlot({
assignment_plot(assign())
})
# Downloadable csv of selected dataset ----
output$downloadData <- shiny::downloadHandler(
filename = "group_assignment.csv",
content = function(file) {
write.csv(assign()$export, file, row.names = FALSE)
}
)
}

120
R/ui.R Normal file
View file

@ -0,0 +1,120 @@
ui <- shiny::fluidPage(
## -----------------------------------------------------------------------------
## Application title
## -----------------------------------------------------------------------------
shiny::titlePanel("Assign groups based on costs/priorities.",
windowTitle = "Group assignment calculator"),
shiny::h5(
"Please note this calculator is only meant as a proof of concept for educational purposes,
and the author will take no responsibility for the results of the calculator.
Uploaded data is not kept, but please, do not upload any sensitive data."
),
## -----------------------------------------------------------------------------
## Side panel
## -----------------------------------------------------------------------------
## -----------------------------------------------------------------------------
## Single entry
## -----------------------------------------------------------------------------
shiny::sidebarLayout(
shiny::sidebarPanel(
shiny::numericInput(
inputId = "ecxess",
label = "Excess space",
value = 1,
step = .05
),
shiny::p("As default, the program will try to evenly distribute subjects in groups.
This factor will add more capacity to each group, for an overall lesser cost,
but more uneven group numbers. More adjustments can be performed with the source script."),
shiny::a(href='https://git.nikohuru.dk/au-phd/PhysicalActivityandStrokeOutcome/src/branch/main/apps/Assignment', "Source", target="_blank"),
## -----------------------------------------------------------------------------
## File upload
## -----------------------------------------------------------------------------
# Input: Select a file ----
shiny::fileInput(
inputId = "file1",
label = "Choose main data file",
multiple = FALSE,
accept = c(
".csv",".xls",".xlsx"
)
),
shiny::strong("Columns: ID, group1, group2, ... groupN."),
shiny::strong("NOTE: 0s will be interpreted as lowest score."),
shiny::p("Cells should contain cost/priorities.
Lowest score, for highest priority.
Non-ranked should contain a number (eg lowest score+1).
Will handle missings but try to avoid."),
shiny::fileInput(
inputId = "file2",
label = "Choose data file for pre-assigned subjects",
multiple = FALSE,
accept = c(
".csv",".xls",".xlsx"
)
),
shiny::h6("Columns: ID, group"),
## -----------------------------------------------------------------------------
## Download output
## -----------------------------------------------------------------------------
# Horizontal line ----
tags$hr(),
shiny::h4("Download results"),
# Button
shiny::downloadButton("downloadData", "Download")
),
shiny::mainPanel(shiny::tabsetPanel(
## -----------------------------------------------------------------------------
## Plot tab
## -----------------------------------------------------------------------------
shiny::tabPanel(
"Summary",
shiny::h3("Assignment plot"),
shiny::p("These plots are to summarise simple performance meassures for the assignment.
'f' is group fill fraction and 'm' is mean cost in group."),
shiny::plotOutput("assign.plt")
),
shiny::tabPanel(
"Results",
shiny::h3("Raw Results"),
shiny::p("This is identical to the downloaded file (see panel on left)"),
shiny::htmlOutput("raw.data.tbl", container = span)
),
shiny::tabPanel(
"Input data Results",
shiny::h3("Costs/prioritis overview"),
shiny::htmlOutput("input", container = span),
shiny::h3("Pre-assigned groups"),
shiny::p("Appears empty if none is uploaded."),
shiny::htmlOutput("pre.assign", container = span)
)
))
)
)