major update and first official launch. CRAN is waiting.
Some checks are pending
pkgdown.yaml / pkgdown (push) Waiting to run

This commit is contained in:
Andreas Gammelgaard Damsbo 2024-10-10 12:14:41 +02:00
commit 3b035ab06f
218 changed files with 1758 additions and 410523 deletions

View file

@ -1,197 +0,0 @@
#' Solve group assignment based on
#'
#' @param ds data set
#' @param cap_classes class capacity. Numeric vector length 1 or length=number
#' of groups. If NULL equal group sizes are calculated. Default is NULL.
#' @param excess_space allowed excess group fill. Default is 1.2.
#' @param pre_assign Pre assigned data set. Optional.
#' @param set_solver choose the desired solver (originally "symphony").
#'
#' @return
#' @export
#'
#' @examples
#' read.csv(here::here("data/assign_sample.csv")) |> group_assignment()
group_assignment <-
function(ds,
cap_classes = NULL,
excess_space = 1.2,
pre_assign = NULL,
set_solver="symphony") {
require(ROI)
require(ROI.plugin.symphony)
if (!is.data.frame(ds)){
stop("Supplied data has to be a data frame, with each row
are subjects and columns are groups, with the first column being
subject identifiers")}
## This program very much trust the user to supply correctly formatted data
cost <- t(ds[,-1]) #Transpose converts to matrix
colnames(cost) <- ds[,1]
num_groups <- dim(cost)[1]
num_sub <- dim(cost)[2]
## Adding the option to introduce a bit of head room to the classes by
## the groups to a little bigger than the smallest possible
## Default is to allow for an extra 20 % fill
excess <- excess_space
# generous round up of capacities
if (is.null(cap_classes)) {
capacity <- rep(ceiling(excess*num_sub/num_groups), num_groups)
# } else if (!is.numeric(cap_classes)) {
# stop("cap_classes has to be numeric")
} else if (length(cap_classes)==1){
capacity <- ceiling(rep(cap_classes,num_groups)*excess)
} else if (length(cap_classes)==num_groups){
capacity <- ceiling(cap_classes*excess)
} else {
stop("cap_classes has to be either length 1 or same as number of groups")
}
## This test should be a little more elegant
## pre_assign should be a data.frame or matrix with an ID and assignment column
with_pre_assign <- FALSE
if (!is.null(pre_assign)){
# Setting flag for later and export list
with_pre_assign <- TRUE
# Splitting to list for later merging
pre <- split(pre_assign[,1],factor(pre_assign[,2],levels = seq_len(num_groups)))
# Subtracting capacity numbers, to reflect already filled spots
capacity <- capacity-lengths(pre)
# Making sure pre_assigned are removed from main data set
ds <- ds[!ds[[1]] %in% pre_assign[[1]],]
cost <- t(ds[,-1])
colnames(cost) <- ds[,1]
num_groups <- dim(cost)[1]
num_sub <- dim(cost)[2]
}
## Simple NA handling. Better to handle NAs yourself!
cost[is.na(cost)] <- num_groups
i_m <- seq_len(num_groups)
j_m <- seq_len(num_sub)
m <- ompr::MIPModel() |>
ompr::add_variable(grp[i, j],
i = i_m,
j = j_m,
type = "binary") |>
## The first constraint says that group size should not exceed capacity
ompr::add_constraint(ompr::sum_expr(grp[i, j], j = j_m) <= capacity[i],
i = i_m) |>
## The second constraint says each subject can only be in one group
ompr::add_constraint(ompr::sum_expr(grp[i, j], i = i_m) == 1, j = j_m) |>
## The objective is set to minimize the cost of the assignments
## Giving subjects the group with the highest possible ranking
ompr::set_objective(ompr::sum_expr(
cost[i, j] * grp[i, j],
i = i_m,
j = j_m
),
"min") |>
# ompr::solve_model(ompr.roi::with_ROI(solver = "symphony", verbosity = 1))
ompr::solve_model(ompr.roi::with_ROI(solver = set_solver, verbosity = 1))
## Getting assignments
solution <- ompr::get_solution(m, grp[i, j]) |> dplyr::filter(value > 0)
assign <- solution |> dplyr::select(i,j)
if (!is.null(rownames(cost))){
assign$i <- rownames(cost)[assign$i]
}
if (!is.null(colnames(cost))){
assign$j <- colnames(cost)[assign$j]
}
## Splitting into groups based on assignment
assign_ls <- split(assign$j,assign$i)
## Extracting subject cost for the final assignment for evaluation
if (is.null(rownames(cost))){
rownames(cost) <- seq_len(nrow(cost))
}
if (is.null(colnames(cost))){
colnames(cost) <- seq_len(ncol(cost))
}
eval <- lapply(seq_len(length(assign_ls)),function(i){
ndx <- match(names(assign_ls)[i],rownames(cost))
cost[ndx,assign_ls[[i]]]
})
names(eval) <- names(assign_ls)
if (with_pre_assign){
names(pre) <- names(assign_ls)
assign_all <- mapply(c, assign_ls, pre, SIMPLIFY=FALSE)
out <- list(all_assigned=assign_all)
} else {
out <- list(all_assigned=assign_ls)
}
export <- do.call(rbind,lapply(seq_along(out[[1]]),function(i){
cbind("ID"=out[[1]][[i]],"Group"=names(out[[1]])[i])
}))
out <- append(out,
list(evaluation=eval,
assigned=assign_ls,
solution = solution,
capacity = capacity,
excess = excess,
pre_assign = with_pre_assign,
cost_scale = levels(factor(cost)),
input=ds,
export=export))
# exists("excess")
return(out)
}
## Assessment performance overview
## The function plots costs of assignment for each subject in every group
assignment_plot <- function(lst){
dl <- lst[[2]]
cost_scale <- unique(lst[[8]])
cap <- lst[[5]]
cnts_ls <- lapply(dl,function(i){
factor(i,levels=cost_scale)
})
require(ggplot2)
require(patchwork)
require(viridisLite)
y_max <- max(lengths(dl))
wrap_plots(lapply(seq_along(dl),function(i){
ttl <- names(dl)[i]
ns <- length(dl[[i]])
cnts <- cnts_ls[[i]]
ggplot2::ggplot() + ggplot2::geom_bar(ggplot2::aes(cnts,fill=cnts)) +
ggplot2::scale_x_discrete(name = NULL, breaks=cost_scale, drop=FALSE) +
ggplot2::scale_y_continuous(name = NULL, limits = c(0,y_max)) +
ggplot2::scale_fill_manual(values = viridisLite::viridis(length(cost_scale), direction = -1)) +
ggplot2::guides(fill=FALSE) +
ggplot2::labs(title=paste0(ttl," (fill=",round(ns/cap[[i]],1),";m=",round(mean(dl[[i]]),1),";n=",ns ,")"))
}))
}
## Helper function for Shiny
file_extension <- function(filenames) {
sub(pattern = "^(.*\\.|[^.]+)(?=[^.]*)", replacement = "", filenames, perl = TRUE)
}

319
R/prioritized_grouping.R Normal file
View file

@ -0,0 +1,319 @@
utils::globalVariables(c("group", "grp", "i", "j", "value"))
#' Solve grouping based on priorities or costs.
#'
#' @param data data set in wide format. First column should bi ID, then one column
#' for each group containing cost/priorities.
#' @param cap_classes class capacity. Numeric vector length 1 or length=number
#' of groups. If NULL equal group sizes are calculated. Default is NULL.
#' @param excess_space allowed excess group fill in percentage. Default is 20.
#' Supplied group capacities will be enlarged by this factors and rounded up.
#' @param pre_grouped Pre grouped data set. Optional. Should contain two
#' columns, 'id' and 'group', with 'group' containing the given group index.
#' @param seed specify a seed value. For complex problems.
#'
#' @return list of custom class 'prioritized_groups_list'
#' @export
#'
#' @examples
#' prioritized_grouping(
#' data=read.csv(here::here("data/prioritized_sample.csv")),
#' pre_grouped=read.csv(here::here("data/pre_grouped.csv"))) |> plot()
prioritized_grouping <-
function(data,
cap_classes = NULL,
excess_space = 20,
pre_grouped = NULL,
seed = 6293812) {
set.seed(seed = seed)
# browser()
requireNamespace("ROI")
requireNamespace("ROI.plugin.symphony")
if (!is.data.frame(data)) {
stop("Supplied data has to be a data frame, with each row
are subjects and columns are groups, with the first column being
subject identifiers")
}
## This program very much trust the user to supply correctly formatted data
cost <- t(data[, -1]) # Transpose converts to matrix
colnames(cost) <- data[, 1]
nms_groups <- rownames(cost)
num_groups <- dim(cost)[1]
num_sub <- dim(cost)[2]
## Adding the option to introduce a bit of head room to the classes by
## the groups to a little bigger than the smallest possible
## Default is to allow for an extra 20 % fill
excess <- 1 + (excess_space / 100)
# generous round up of capacities
if (is.null(cap_classes)) {
capacity <- rep(ceiling(excess * num_sub / num_groups), num_groups)
# } else if (!is.numeric(cap_classes)) {
# stop("cap_classes has to be numeric")
} else if (length(cap_classes) == 1) {
capacity <- ceiling(rep(cap_classes, num_groups) * excess)
} else if (length(cap_classes) == num_groups) {
capacity <- ceiling(cap_classes * excess)
} else {
stop("cap_classes has to be either length 1 or same as number of groups")
}
## This test should be a little more elegant
## pre_grouped should be a data.frame or matrix with an ID and group column
with_pre_grouped <- FALSE
if (!is.null(pre_grouped)) {
# Setting flag for later and export list
with_pre_grouped <- TRUE
# Simple translation to allow pre_grouped to denote indices
if (is.numeric(pre_grouped[, 2])){
pre_grouped$pre.groups <- nms_groups[pre_grouped[, 2]]
} else {
pre_grouped$pre.groups <- as.character(pre_grouped[, 2])
}
# Splitting to list for later merging
pre <- split(
pre_grouped[, 1],
factor(pre_grouped[, 3], levels = nms_groups)
)
# Subtracting capacity numbers, to reflect already filled spots
capacity <- capacity - lengths(pre)
# Making sure pre_grouped are removed from main data set
data <- data[!data[[1]] %in% pre_grouped[[1]], ]
cost <- t(data[, -1])
colnames(cost) <- data[, 1]
num_groups <- dim(cost)[1]
num_sub <- dim(cost)[2]
}
## Simple NA handling. Better to handle NAs yourself!
cost[is.na(cost)] <- num_groups
i_m <- seq_len(num_groups)
j_m <- seq_len(num_sub)
m <- ompr::MIPModel() |>
ompr::add_variable(grp[i, j],
i = i_m,
j = j_m,
type = "binary"
) |>
## The first constraint says that group size should not exceed capacity
ompr::add_constraint(ompr::sum_expr(grp[i, j], j = j_m) <= capacity[i],
i = i_m
) |>
## The second constraint says each subject can only be in one group
ompr::add_constraint(ompr::sum_expr(grp[i, j], i = i_m) == 1, j = j_m) |>
## The objective is set to minimize the cost of the grouping
## Giving subjects the group with the highest possible ranking
ompr::set_objective(
ompr::sum_expr(
cost[i, j] * grp[i, j],
i = i_m,
j = j_m
),
"min"
) |>
ompr::solve_model(ompr.roi::with_ROI(solver = "symphony", verbosity = 1))
if (m$status == "error") {
stop("The algorithm is not able to solve the problem. Please adjust the
constraints by increasing group capacities and/or excess fill")
}
## Getting groups
solution <- ompr::get_solution(m, grp[i, j]) |> dplyr::filter(value > 0)
grouped <- solution |> dplyr::select(i, j)
if (!is.null(rownames(cost))) {
grouped$i <- rownames(cost)[grouped$i]
}
if (!is.null(colnames(cost))) {
grouped$j <- colnames(cost)[grouped$j]
}
## Splitting into groups based on groups
grouped_ls <- split(grouped$j, grouped$i)
## Extracting subject cost for the final groups for evaluation
if (is.null(rownames(cost))) {
rownames(cost) <- seq_len(nrow(cost))
}
if (is.null(colnames(cost))) {
colnames(cost) <- seq_len(ncol(cost))
}
evaluated <- lapply(seq_len(length(grouped_ls)), function(i) {
ndx <- match(names(grouped_ls)[i], rownames(cost))
cost[ndx, grouped_ls[[i]]]
})
names(evaluated) <- names(grouped_ls)
if (with_pre_grouped) {
names(pre) <- names(grouped_ls)
grouped_all <- mapply(c, grouped_ls, pre, SIMPLIFY = FALSE)
out <- list(all_grouped = grouped_all)
} else {
out <- list(all_grouped = grouped_ls)
}
export <- do.call(rbind, lapply(seq_along(out[[1]]), function(i) {
cbind("ID" = out[[1]][[i]], "Group" = names(out[[1]])[i])
}))
out <- c(
out,
list(
evaluation = evaluated,
groupings = grouped_ls,
solution = solution,
capacity = capacity,
excess = excess,
pre_grouped = with_pre_grouped,
cost_scale = levels(factor(cost)),
input = data,
export = export
)
)
# exists("excess")
class(out) <- c("prioritized_groups_list", class(out))
return(out)
}
#' Assessment performance overview
#' @description
#' The function plots costs of grouping for each subject in every group.
#' Performance measures printed are fill: fraction of filling relative to the
#' capacity specified; mean: mean priority/cost in group; n: number of subjects
#' in the group.
#'
#' @param data A "prioritized_groups_list" class list from
#' 'prioritized_grouping()'
#' @param columns number of columns in plot
#' @param overall logical to only print overall groups mean priority/cost
#' @param viridis.option option value passed on to 'viridisLite::viridis'.
#' Default="D".
#' @param viridis.direction direction value passed on to 'viridisLite::viridis'.
#' Default=-1.
#'
#' @return ggplot2 list object
#' @export
#'
#' @examples
#' #read.csv(here::here("data/prioritized_sample.csv")) |>
#' # prioritized_grouping(cap_classes = sample(4:12, 17, TRUE)) |>
#' # grouping_plot()
grouping_plot <- function(data,
columns = NULL,
overall = FALSE,
viridis.option="D",
viridis.direction=-1) {
assertthat::assert_that("prioritized_groups_list" %in% class(data))
dl <- data[[2]]
cost_scale <- unique(data[[8]])
cap <- data[[5]]
cnts_ls <- lapply(dl, function(i) {
factor(i, levels = cost_scale)
})
y_max <- max(lengths(dl))
if (overall) {
ds <- tibble::tibble(
group = seq_along(dl),
mean = round(Reduce(c, lapply(dl, mean)), 1)
)
out <- ds |>
ggplot2::ggplot(ggplot2::aes(x = group, y = mean, fill = mean)) +
ggplot2::geom_bar(stat = "identity") +
ggplot2::geom_hline(yintercept = 1) +
ggplot2::scale_fill_viridis_c(option=viridis.option,
direction = viridis.direction) +
ggplot2::guides(fill = "none") +
ggplot2::scale_x_continuous(name = "Groups", breaks = ds$group) +
ggplot2::ylab("Mean priority/cost") +
ggplot2::labs(
title = "Overall group-wise mean priority/cost of groupings",
subtitle = "Horizontal line marking the perfect mean=1 for reference"
)
} else {
out <- lapply(seq_along(dl), function(i) {
ttl <- names(dl)[i]
ns <- length(dl[[i]])
cnts <- cnts_ls[[i]]
ggplot2::ggplot() +
ggplot2::geom_bar(ggplot2::aes(cnts, fill = cnts)) +
ggplot2::scale_x_discrete(
name = NULL,
breaks = cost_scale,
drop = FALSE
) +
ggplot2::scale_y_continuous(name = NULL, limits = c(0, y_max)) +
ggplot2::scale_fill_manual(
values = viridisLite::viridis(length(cost_scale),
direction = viridis.direction,
option = viridis.option)
) +
ggplot2::guides(fill = "none") +
ggplot2::labs(
title =
paste0(
ttl, " (fill=", round(ns / cap[[i]], 1), ";n=", ns, ";mean=",
round(mean(dl[[i]]), 1), ")"
)
)
}) |>
patchwork::wrap_plots(ncol = columns)
}
return(out)
}
#' Plot extension for easy groupings plot
#'
#' @param data data of class 'prioritized_groups_list'
#' @param ... passed on to 'grouping_plot()'
#'
#' @return ggplot2 list object
#' @export
#'
#' @examples
#' read.csv(here::here("data/prioritized_sample.csv")) |>
#' prioritized_grouping() |>
#' plot(overall = TRUE, viridis.option="D",viridis.direction=-1)
plot.prioritized_groups_list <- function(data, ...) {
grouping_plot(data, ...)
}
## Helper function for Shiny
#' Title
#'
#' @param filenames character vector of file name. Splits at the last '.'.
#'
#' @return character vector of extension
#' @export
#'
#' @examples
#' file_extension("data/prioritized_sample.csv")
file_extension <- function(filenames) {
sub(
pattern = "^(.*\\.|[^.]+)(?=[^.]*)",
replacement = "",
filenames, perl = TRUE
)
}

View file

@ -1,94 +0,0 @@
library(shiny)
server <- function(input, output, session) {
# source("https://git.nikohuru.dk/au-phd/PhysicalActivityandStrokeOutcome/raw/branch/main/side%20projects/assignment.R")
source(here::here("R/group_assign.R"))
dat <- reactive({
# input$file1 will be NULL initially. After the user selects
# and uploads a file, head of that data file by default,
# or all rows if selected, will be shown.
req(input$file1)
# Make laoding dependent of file name extension (file_ext())
ext <- file_extension(input$file1$datapath)
tryCatch(
{
if (ext == "csv") {
df <- read.csv(input$file1$datapath,na.strings = c("NA", '""',""))
} else if (ext %in% c("xls", "xlsx")) {
df <- openxlsx::read.xlsx(input$file1$datapath,na.strings = c("NA", '""',""))
} else {
stop("Input file format has to be either '.csv', '.xls' or '.xlsx'")
}
},
error = function(e) {
# return a safeError if a parsing error occurs
stop(safeError(e))
}
)
return(df)
})
dat_pre <- reactive({
# req(input$file2)
# Make laoding dependent of file name extension (file_ext())
if (!is.null(input$file2$datapath)){
ext <- file_extension(input$file2$datapath)
if (ext == "csv") {
df <- read.csv(input$file2$datapath,na.strings = c("NA", '""',""))
} else if (ext %in% c("xls", "xlsx")) {
df <- openxlsx::read.xlsx(input$file2$datapath,na.strings = c("NA", '""',""))
} else {
stop("Input file format has to be either '.csv', '.xls' or '.xlsx'")
}
return(df)
} else {
return(NULL)
}
})
assign <-
reactive({
assigned <- group_assignment(
ds = dat(),
excess_space = input$ecxess,
pre_assign = dat_pre()
)
return(assigned)
})
output$raw.data.tbl <- renderTable({
assign()$export
})
output$pre.assign <- renderTable({
dat_pre()
})
output$input <- renderTable({
dat()
})
output$assign.plt <- renderPlot({
assignment_plot(assign())
})
# Downloadable csv of selected dataset ----
output$downloadData <- downloadHandler(
filename = "group_assignment.csv",
content = function(file) {
write.csv(assign()$export, file, row.names = FALSE)
}
)
}

121
R/ui.R
View file

@ -1,121 +0,0 @@
library(shiny)
ui <- fluidPage(
## -----------------------------------------------------------------------------
## Application title
## -----------------------------------------------------------------------------
titlePanel("Assign groups based on costs/priorities.",
windowTitle = "Group assignment calculator"),
h5(
"Please note this calculator is only meant as a proof of concept for educational purposes,
and the author will take no responsibility for the results of the calculator.
Uploaded data is not kept, but please, do not upload any sensitive data."
),
## -----------------------------------------------------------------------------
## Side panel
## -----------------------------------------------------------------------------
## -----------------------------------------------------------------------------
## Single entry
## -----------------------------------------------------------------------------
sidebarLayout(
sidebarPanel(
numericInput(
inputId = "ecxess",
label = "Excess space",
value = 1,
step = .05
),
p("As default, the program will try to evenly distribute subjects in groups.
This factor will add more capacity to each group, for an overall lesser cost,
but more uneven group numbers. More adjustments can be performed with the source script."),
a(href='https://git.nikohuru.dk/au-phd/PhysicalActivityandStrokeOutcome/src/branch/main/apps/Assignment', "Source", target="_blank"),
## -----------------------------------------------------------------------------
## File upload
## -----------------------------------------------------------------------------
# Input: Select a file ----
fileInput(
inputId = "file1",
label = "Choose main data file",
multiple = FALSE,
accept = c(
".csv",".xls",".xlsx"
)
),
strong("Columns: ID, group1, group2, ... groupN."),
strong("NOTE: 0s will be interpreted as lowest score."),
p("Cells should contain cost/priorities.
Lowest score, for highest priority.
Non-ranked should contain a number (eg lowest score+1).
Will handle missings but try to avoid."),
fileInput(
inputId = "file2",
label = "Choose data file for pre-assigned subjects",
multiple = FALSE,
accept = c(
".csv",".xls",".xlsx"
)
),
h6("Columns: ID, group"),
## -----------------------------------------------------------------------------
## Download output
## -----------------------------------------------------------------------------
# Horizontal line ----
tags$hr(),
h4("Download results"),
# Button
downloadButton("downloadData", "Download")
),
mainPanel(tabsetPanel(
## -----------------------------------------------------------------------------
## Plot tab
## -----------------------------------------------------------------------------
tabPanel(
"Summary",
h3("Assignment plot"),
p("These plots are to summarise simple performance meassures for the assignment.
'f' is group fill fraction and 'm' is mean cost in group."),
plotOutput("assign.plt")
),
tabPanel(
"Results",
h3("Raw Results"),
p("This is identical to the downloaded file (see panel on left)"),
htmlOutput("raw.data.tbl", container = span)
),
tabPanel(
"Input data Results",
h3("Costs/prioritis overview"),
htmlOutput("input", container = span),
h3("Pre-assigned groups"),
p("Appears empty if none is uploaded."),
htmlOutput("pre.assign", container = span)
)
))
)
)