mirror of
https://github.com/agdamsbo/prioritized.grouping.git
synced 2026-06-19 13:57:29 +02:00
first commit
This commit is contained in:
commit
6333bcee61
208 changed files with 413695 additions and 0 deletions
185
R/group_assign.R
Normal file
185
R/group_assign.R
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
group_assignment <-
|
||||
function(ds,
|
||||
cap_classes = NULL,
|
||||
excess_space = NULL,
|
||||
pre_assign = NULL) {
|
||||
require(ROI)
|
||||
require(ROI.plugin.symphony)
|
||||
|
||||
if (!is.data.frame(ds)){
|
||||
stop("Supplied data has to be a data frame, with each row
|
||||
are subjects and columns are groups, with the first column being
|
||||
subject identifiers")}
|
||||
|
||||
## This program very much trust the user to supply correctly formatted data
|
||||
cost <- t(ds[,-1]) #Transpose converts to matrix
|
||||
colnames(cost) <- ds[,1]
|
||||
|
||||
num_groups <- dim(cost)[1]
|
||||
num_sub <- dim(cost)[2]
|
||||
|
||||
## Adding the option to introduce a bit of head room to the classes by
|
||||
## the groups to a little bigger than the smallest possible
|
||||
## Default is to allow for an extra 20 % fill
|
||||
if (is.null(excess_space)) {
|
||||
excess <- 1.2
|
||||
} else {
|
||||
excess <- excess_space
|
||||
}
|
||||
|
||||
# generous round up of capacities
|
||||
if (is.null(cap_classes)) {
|
||||
capacity <- rep(ceiling(excess*num_sub/num_groups), num_groups)
|
||||
# } else if (!is.numeric(cap_classes)) {
|
||||
# stop("cap_classes has to be numeric")
|
||||
} else if (length(cap_classes)==1){
|
||||
capacity <- ceiling(rep(cap_classes,num_groups)*excess)
|
||||
} else if (length(cap_classes)==num_groups){
|
||||
capacity <- ceiling(cap_classes*excess)
|
||||
} else {
|
||||
stop("cap_classes has to be either length 1 or same as number of groups")
|
||||
}
|
||||
|
||||
## This test should be a little more elegant
|
||||
## pre_assign should be a data.frame or matrix with an ID and assignment column
|
||||
with_pre_assign <- FALSE
|
||||
if (!is.null(pre_assign)){
|
||||
# Setting flag for later and export list
|
||||
with_pre_assign <- TRUE
|
||||
# Splitting to list for later merging
|
||||
pre <- split(pre_assign[,1],factor(pre_assign[,2],levels = seq_len(num_groups)))
|
||||
# Subtracting capacity numbers, to reflect already filled spots
|
||||
capacity <- capacity-lengths(pre)
|
||||
# Making sure pre_assigned are removed from main data set
|
||||
ds <- ds[!ds[[1]] %in% pre_assign[[1]],]
|
||||
|
||||
cost <- t(ds[,-1])
|
||||
colnames(cost) <- ds[,1]
|
||||
|
||||
num_groups <- dim(cost)[1]
|
||||
num_sub <- dim(cost)[2]
|
||||
}
|
||||
|
||||
## Simple NA handling. Better to handle NAs yourself!
|
||||
cost[is.na(cost)] <- num_groups
|
||||
|
||||
i_m <- seq_len(num_groups)
|
||||
j_m <- seq_len(num_sub)
|
||||
|
||||
m <- ompr::MIPModel() %>%
|
||||
ompr::add_variable(grp[i, j],
|
||||
i = i_m,
|
||||
j = j_m,
|
||||
type = "binary") %>%
|
||||
## The first constraint says that group size should not exceed capacity
|
||||
ompr::add_constraint(ompr::sum_expr(grp[i, j], j = j_m) <= capacity[i],
|
||||
i = i_m) %>%
|
||||
## The second constraint says each subject can only be in one group
|
||||
ompr::add_constraint(ompr::sum_expr(grp[i, j], i = i_m) == 1, j = j_m) %>%
|
||||
## The objective is set to minimize the cost of the assignments
|
||||
## Giving subjects the group with the highest possible ranking
|
||||
ompr::set_objective(ompr::sum_expr(
|
||||
cost[i, j] * grp[i, j],
|
||||
i = i_m,
|
||||
j = j_m
|
||||
),
|
||||
"min") %>%
|
||||
ompr::solve_model(ompr.roi::with_ROI(solver = "symphony", verbosity = 1))
|
||||
|
||||
## Getting assignments
|
||||
solution <- ompr::get_solution(m, grp[i, j]) %>% filter(value > 0)
|
||||
|
||||
assign <- solution |> select(i,j)
|
||||
|
||||
if (!is.null(rownames(cost))){
|
||||
assign$i <- rownames(cost)[assign$i]
|
||||
}
|
||||
|
||||
if (!is.null(colnames(cost))){
|
||||
assign$j <- colnames(cost)[assign$j]
|
||||
}
|
||||
|
||||
## Splitting into groups based on assignment
|
||||
assign_ls <- split(assign$j,assign$i)
|
||||
|
||||
|
||||
## Extracting subject cost for the final assignment for evaluation
|
||||
if (is.null(rownames(cost))){
|
||||
rownames(cost) <- seq_len(nrow(cost))
|
||||
}
|
||||
|
||||
if (is.null(colnames(cost))){
|
||||
colnames(cost) <- seq_len(ncol(cost))
|
||||
}
|
||||
|
||||
eval <- lapply(seq_len(length(assign_ls)),function(i){
|
||||
ndx <- match(names(assign_ls)[i],rownames(cost))
|
||||
cost[ndx,assign_ls[[i]]]
|
||||
})
|
||||
names(eval) <- names(assign_ls)
|
||||
|
||||
if (with_pre_assign){
|
||||
names(pre) <- names(assign_ls)
|
||||
assign_all <- mapply(c, assign_ls, pre, SIMPLIFY=FALSE)
|
||||
|
||||
out <- list(all_assigned=assign_all)
|
||||
} else {
|
||||
out <- list(all_assigned=assign_ls)
|
||||
}
|
||||
|
||||
export <- do.call(rbind,lapply(seq_along(out[[1]]),function(i){
|
||||
cbind("ID"=out[[1]][[i]],"Group"=names(out[[1]])[i])
|
||||
}))
|
||||
|
||||
out <- append(out,
|
||||
list(evaluation=eval,
|
||||
assigned=assign_ls,
|
||||
solution = solution,
|
||||
capacity = capacity,
|
||||
excess = excess,
|
||||
pre_assign = with_pre_assign,
|
||||
cost_scale = levels(factor(cost)),
|
||||
input=ds,
|
||||
export=export))
|
||||
# exists("excess")
|
||||
return(out)
|
||||
}
|
||||
|
||||
|
||||
## Assessment performance overview
|
||||
## The function plots costs of assignment for each subject in every group
|
||||
assignment_plot <- function(lst){
|
||||
|
||||
dl <- lst[[2]]
|
||||
cost_scale <- unique(lst[[8]])
|
||||
cap <- lst[[5]]
|
||||
cnts_ls <- lapply(dl,function(i){
|
||||
factor(i,levels=cost_scale)
|
||||
})
|
||||
require(ggplot2)
|
||||
require(patchwork)
|
||||
require(viridisLite)
|
||||
|
||||
y_max <- max(lengths(dl))
|
||||
|
||||
wrap_plots(lapply(seq_along(dl),function(i){
|
||||
ttl <- names(dl)[i]
|
||||
ns <- length(dl[[i]])
|
||||
cnts <- cnts_ls[[i]]
|
||||
ggplot2::ggplot() + ggplot2::geom_bar(ggplot2::aes(cnts,fill=cnts)) +
|
||||
ggplot2::scale_x_discrete(name = NULL, breaks=cost_scale, drop=FALSE) +
|
||||
ggplot2::scale_y_continuous(name = NULL, limits = c(0,y_max)) +
|
||||
ggplot2::scale_fill_manual(values = viridisLite::viridis(length(cost_scale), direction = -1)) +
|
||||
ggplot2::guides(fill=FALSE) +
|
||||
ggplot2::labs(title=paste0(ttl," (fill=",round(ns/cap[[i]],1),";m=",round(mean(dl[[i]]),1),";n=",ns ,")"))
|
||||
}))
|
||||
}
|
||||
|
||||
|
||||
## Helper function for Shiny
|
||||
file_extension <- function(filenames) {
|
||||
sub(pattern = "^(.*\\.|[^.]+)(?=[^.]*)", replacement = "", filenames, perl = TRUE)
|
||||
}
|
||||
|
||||
|
||||
|
||||
96
R/server.R
Normal file
96
R/server.R
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
server <- function(input, output, session) {
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
library(ROI)
|
||||
library(ROI.plugin.symphony)
|
||||
library(ompr)
|
||||
library(ompr.roi)
|
||||
library(magrittr)
|
||||
library(ggplot2)
|
||||
library(viridisLite)
|
||||
library(patchwork)
|
||||
library(openxlsx)
|
||||
# source("https://git.nikohuru.dk/au-phd/PhysicalActivityandStrokeOutcome/raw/branch/main/side%20projects/assignment.R")
|
||||
source(here::here("R/group_assign.R"))
|
||||
|
||||
dat <- shiny::reactive({
|
||||
# input$file1 will be NULL initially. After the user selects
|
||||
# and uploads a file, head of that data file by default,
|
||||
# or all rows if selected, will be shown.
|
||||
|
||||
req(input$file1)
|
||||
# Make laoding dependent of file name extension (file_ext())
|
||||
ext <- file_extension(input$file1$datapath)
|
||||
|
||||
if (ext == "csv") {
|
||||
df <- read.csv(input$file1$datapath,na.strings = c("NA", '""',""))
|
||||
} else if (ext %in% c("xls", "xlsx")) {
|
||||
df <- openxlsx::read.xlsx(input$file1$datapath,na.strings = c("NA", '""',""))
|
||||
|
||||
} else {
|
||||
stop("Input file format has to be either '.csv', '.xls' or '.xlsx'")
|
||||
}
|
||||
|
||||
return(df)
|
||||
})
|
||||
|
||||
dat_pre <- shiny::reactive({
|
||||
|
||||
# req(input$file2)
|
||||
# Make laoding dependent of file name extension (file_ext())
|
||||
if (!is.null(input$file2$datapath)){
|
||||
ext <- file_extension(input$file2$datapath)
|
||||
|
||||
if (ext == "csv") {
|
||||
df <- read.csv(input$file2$datapath,na.strings = c("NA", '""',""))
|
||||
} else if (ext %in% c("xls", "xlsx")) {
|
||||
df <- openxlsx::read.xlsx(input$file2$datapath,na.strings = c("NA", '""',""))
|
||||
|
||||
} else {
|
||||
stop("Input file format has to be either '.csv', '.xls' or '.xlsx'")
|
||||
}
|
||||
|
||||
return(df)
|
||||
} else {
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
})
|
||||
|
||||
assign <-
|
||||
shiny::reactive({
|
||||
assigned <- group_assignment(
|
||||
ds = dat(),
|
||||
excess_space = input$ecxess,
|
||||
pre_assign = dat_pre()
|
||||
)
|
||||
return(assigned)
|
||||
})
|
||||
|
||||
|
||||
output$raw.data.tbl <- shiny::renderTable({
|
||||
assign()$export
|
||||
})
|
||||
|
||||
output$pre.assign <- shiny::renderTable({
|
||||
dat_pre()
|
||||
})
|
||||
|
||||
output$input <- shiny::renderTable({
|
||||
dat()
|
||||
})
|
||||
|
||||
output$assign.plt <- shiny::renderPlot({
|
||||
assignment_plot(assign())
|
||||
})
|
||||
|
||||
# Downloadable csv of selected dataset ----
|
||||
output$downloadData <- shiny::downloadHandler(
|
||||
filename = "group_assignment.csv",
|
||||
|
||||
content = function(file) {
|
||||
write.csv(assign()$export, file, row.names = FALSE)
|
||||
}
|
||||
)
|
||||
|
||||
}
|
||||
120
R/ui.R
Normal file
120
R/ui.R
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
ui <- shiny::fluidPage(
|
||||
## -----------------------------------------------------------------------------
|
||||
## Application title
|
||||
## -----------------------------------------------------------------------------
|
||||
|
||||
shiny::titlePanel("Assign groups based on costs/priorities.",
|
||||
windowTitle = "Group assignment calculator"),
|
||||
shiny::h5(
|
||||
"Please note this calculator is only meant as a proof of concept for educational purposes,
|
||||
and the author will take no responsibility for the results of the calculator.
|
||||
Uploaded data is not kept, but please, do not upload any sensitive data."
|
||||
),
|
||||
|
||||
## -----------------------------------------------------------------------------
|
||||
## Side panel
|
||||
## -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
## -----------------------------------------------------------------------------
|
||||
## Single entry
|
||||
## -----------------------------------------------------------------------------
|
||||
shiny::sidebarLayout(
|
||||
shiny::sidebarPanel(
|
||||
shiny::numericInput(
|
||||
inputId = "ecxess",
|
||||
label = "Excess space",
|
||||
value = 1,
|
||||
step = .05
|
||||
),
|
||||
shiny::p("As default, the program will try to evenly distribute subjects in groups.
|
||||
This factor will add more capacity to each group, for an overall lesser cost,
|
||||
but more uneven group numbers. More adjustments can be performed with the source script."),
|
||||
shiny::a(href='https://git.nikohuru.dk/au-phd/PhysicalActivityandStrokeOutcome/src/branch/main/apps/Assignment', "Source", target="_blank"),
|
||||
## -----------------------------------------------------------------------------
|
||||
## File upload
|
||||
## -----------------------------------------------------------------------------
|
||||
|
||||
# Input: Select a file ----
|
||||
|
||||
shiny::fileInput(
|
||||
inputId = "file1",
|
||||
label = "Choose main data file",
|
||||
multiple = FALSE,
|
||||
accept = c(
|
||||
".csv",".xls",".xlsx"
|
||||
)
|
||||
),
|
||||
shiny::strong("Columns: ID, group1, group2, ... groupN."),
|
||||
shiny::strong("NOTE: 0s will be interpreted as lowest score."),
|
||||
shiny::p("Cells should contain cost/priorities.
|
||||
Lowest score, for highest priority.
|
||||
Non-ranked should contain a number (eg lowest score+1).
|
||||
Will handle missings but try to avoid."),
|
||||
|
||||
shiny::fileInput(
|
||||
inputId = "file2",
|
||||
label = "Choose data file for pre-assigned subjects",
|
||||
multiple = FALSE,
|
||||
accept = c(
|
||||
".csv",".xls",".xlsx"
|
||||
)
|
||||
),
|
||||
shiny::h6("Columns: ID, group"),
|
||||
|
||||
|
||||
|
||||
## -----------------------------------------------------------------------------
|
||||
## Download output
|
||||
## -----------------------------------------------------------------------------
|
||||
|
||||
# Horizontal line ----
|
||||
tags$hr(),
|
||||
|
||||
shiny::h4("Download results"),
|
||||
|
||||
# Button
|
||||
shiny::downloadButton("downloadData", "Download")
|
||||
),
|
||||
|
||||
shiny::mainPanel(shiny::tabsetPanel(
|
||||
## -----------------------------------------------------------------------------
|
||||
## Plot tab
|
||||
## -----------------------------------------------------------------------------
|
||||
|
||||
shiny::tabPanel(
|
||||
"Summary",
|
||||
shiny::h3("Assignment plot"),
|
||||
shiny::p("These plots are to summarise simple performance meassures for the assignment.
|
||||
'f' is group fill fraction and 'm' is mean cost in group."),
|
||||
|
||||
shiny::plotOutput("assign.plt")
|
||||
|
||||
),
|
||||
|
||||
shiny::tabPanel(
|
||||
"Results",
|
||||
shiny::h3("Raw Results"),
|
||||
shiny::p("This is identical to the downloaded file (see panel on left)"),
|
||||
|
||||
shiny::htmlOutput("raw.data.tbl", container = span)
|
||||
|
||||
),
|
||||
|
||||
shiny::tabPanel(
|
||||
"Input data Results",
|
||||
shiny::h3("Costs/prioritis overview"),
|
||||
|
||||
|
||||
shiny::htmlOutput("input", container = span),
|
||||
|
||||
shiny::h3("Pre-assigned groups"),
|
||||
shiny::p("Appears empty if none is uploaded."),
|
||||
|
||||
shiny::htmlOutput("pre.assign", container = span)
|
||||
|
||||
)
|
||||
|
||||
))
|
||||
)
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue