major update and first official launch. CRAN is waiting.
Some checks are pending
pkgdown.yaml / pkgdown (push) Waiting to run

This commit is contained in:
Andreas Gammelgaard Damsbo 2024-10-10 12:14:41 +02:00
commit 3b035ab06f
218 changed files with 1758 additions and 410523 deletions

View file

@ -0,0 +1,10 @@
name: prioritized-grouping
title:
username: agdamsbo
account: agdamsbo
server: shinyapps.io
hostUrl: https://api.shinyapps.io/v1
appId: 12977223
bundleId: 9205937
url: https://agdamsbo.shinyapps.io/prioritized-grouping/
version: 1

436
app/server.R Normal file
View file

@ -0,0 +1,436 @@
########
#### Current file: R//prioritized_grouping.R
########
utils::globalVariables(c("group", "grp", "i", "j", "value"))
prioritized_grouping <-
function(data,
cap_classes = NULL,
excess_space = 20,
pre_grouped = NULL,
seed = 6293812) {
set.seed(seed = seed)
# browser()
requireNamespace("ROI")
requireNamespace("ROI.plugin.symphony")
if (!is.data.frame(data)) {
stop("Supplied data has to be a data frame, with each row
are subjects and columns are groups, with the first column being
subject identifiers")
}
## This program very much trust the user to supply correctly formatted data
cost <- t(data[, -1]) # Transpose converts to matrix
colnames(cost) <- data[, 1]
nms_groups <- rownames(cost)
num_groups <- dim(cost)[1]
num_sub <- dim(cost)[2]
## Adding the option to introduce a bit of head room to the classes by
## the groups to a little bigger than the smallest possible
## Default is to allow for an extra 20 % fill
excess <- 1 + (excess_space / 100)
# generous round up of capacities
if (is.null(cap_classes)) {
capacity <- rep(ceiling(excess * num_sub / num_groups), num_groups)
# } else if (!is.numeric(cap_classes)) {
# stop("cap_classes has to be numeric")
} else if (length(cap_classes) == 1) {
capacity <- ceiling(rep(cap_classes, num_groups) * excess)
} else if (length(cap_classes) == num_groups) {
capacity <- ceiling(cap_classes * excess)
} else {
stop("cap_classes has to be either length 1 or same as number of groups")
}
## This test should be a little more elegant
## pre_grouped should be a data.frame or matrix with an ID and group column
with_pre_grouped <- FALSE
if (!is.null(pre_grouped)) {
# Setting flag for later and export list
with_pre_grouped <- TRUE
# Simple translation to allow pre_grouped to denote indices
if (is.numeric(pre_grouped[, 2])){
pre_grouped$pre.groups <- nms_groups[pre_grouped[, 2]]
} else {
pre_grouped$pre.groups <- as.character(pre_grouped[, 2])
}
# Splitting to list for later merging
pre <- split(
pre_grouped[, 1],
factor(pre_grouped[, 3], levels = nms_groups)
)
# Subtracting capacity numbers, to reflect already filled spots
capacity <- capacity - lengths(pre)
# Making sure pre_grouped are removed from main data set
data <- data[!data[[1]] %in% pre_grouped[[1]], ]
cost <- t(data[, -1])
colnames(cost) <- data[, 1]
num_groups <- dim(cost)[1]
num_sub <- dim(cost)[2]
}
## Simple NA handling. Better to handle NAs yourself!
cost[is.na(cost)] <- num_groups
i_m <- seq_len(num_groups)
j_m <- seq_len(num_sub)
m <- ompr::MIPModel() |>
ompr::add_variable(grp[i, j],
i = i_m,
j = j_m,
type = "binary"
) |>
## The first constraint says that group size should not exceed capacity
ompr::add_constraint(ompr::sum_expr(grp[i, j], j = j_m) <= capacity[i],
i = i_m
) |>
## The second constraint says each subject can only be in one group
ompr::add_constraint(ompr::sum_expr(grp[i, j], i = i_m) == 1, j = j_m) |>
## The objective is set to minimize the cost of the grouping
## Giving subjects the group with the highest possible ranking
ompr::set_objective(
ompr::sum_expr(
cost[i, j] * grp[i, j],
i = i_m,
j = j_m
),
"min"
) |>
ompr::solve_model(ompr.roi::with_ROI(solver = "symphony", verbosity = 1))
if (m$status == "error") {
stop("The algorithm is not able to solve the problem. Please adjust the
constraints by increasing group capacities and/or excess fill")
}
## Getting groups
solution <- ompr::get_solution(m, grp[i, j]) |> dplyr::filter(value > 0)
grouped <- solution |> dplyr::select(i, j)
if (!is.null(rownames(cost))) {
grouped$i <- rownames(cost)[grouped$i]
}
if (!is.null(colnames(cost))) {
grouped$j <- colnames(cost)[grouped$j]
}
## Splitting into groups based on groups
grouped_ls <- split(grouped$j, grouped$i)
## Extracting subject cost for the final groups for evaluation
if (is.null(rownames(cost))) {
rownames(cost) <- seq_len(nrow(cost))
}
if (is.null(colnames(cost))) {
colnames(cost) <- seq_len(ncol(cost))
}
evaluated <- lapply(seq_len(length(grouped_ls)), function(i) {
ndx <- match(names(grouped_ls)[i], rownames(cost))
cost[ndx, grouped_ls[[i]]]
})
names(evaluated) <- names(grouped_ls)
if (with_pre_grouped) {
names(pre) <- names(grouped_ls)
grouped_all <- mapply(c, grouped_ls, pre, SIMPLIFY = FALSE)
out <- list(all_grouped = grouped_all)
} else {
out <- list(all_grouped = grouped_ls)
}
export <- do.call(rbind, lapply(seq_along(out[[1]]), function(i) {
cbind("ID" = out[[1]][[i]], "Group" = names(out[[1]])[i])
}))
out <- c(
out,
list(
evaluation = evaluated,
groupings = grouped_ls,
solution = solution,
capacity = capacity,
excess = excess,
pre_grouped = with_pre_grouped,
cost_scale = levels(factor(cost)),
input = data,
export = export
)
)
# exists("excess")
class(out) <- c("prioritized_groups_list", class(out))
return(out)
}
grouping_plot <- function(data,
columns = NULL,
overall = FALSE,
viridis.option="D",
viridis.direction=-1) {
assertthat::assert_that("prioritized_groups_list" %in% class(data))
dl <- data[[2]]
cost_scale <- unique(data[[8]])
cap <- data[[5]]
cnts_ls <- lapply(dl, function(i) {
factor(i, levels = cost_scale)
})
y_max <- max(lengths(dl))
if (overall) {
ds <- tibble::tibble(
group = seq_along(dl),
mean = round(Reduce(c, lapply(dl, mean)), 1)
)
out <- ds |>
ggplot2::ggplot(ggplot2::aes(x = group, y = mean, fill = mean)) +
ggplot2::geom_bar(stat = "identity") +
ggplot2::geom_hline(yintercept = 1) +
ggplot2::scale_fill_viridis_c(option=viridis.option,
direction = viridis.direction) +
ggplot2::guides(fill = "none") +
ggplot2::scale_x_continuous(name = "Groups", breaks = ds$group) +
ggplot2::ylab("Mean priority/cost") +
ggplot2::labs(
title = "Overall group-wise mean priority/cost of groupings",
subtitle = "Horizontal line marking the perfect mean=1 for reference"
)
} else {
out <- lapply(seq_along(dl), function(i) {
ttl <- names(dl)[i]
ns <- length(dl[[i]])
cnts <- cnts_ls[[i]]
ggplot2::ggplot() +
ggplot2::geom_bar(ggplot2::aes(cnts, fill = cnts)) +
ggplot2::scale_x_discrete(
name = NULL,
breaks = cost_scale,
drop = FALSE
) +
ggplot2::scale_y_continuous(name = NULL, limits = c(0, y_max)) +
ggplot2::scale_fill_manual(
values = viridisLite::viridis(length(cost_scale),
direction = viridis.direction,
option = viridis.option)
) +
ggplot2::guides(fill = "none") +
ggplot2::labs(
title =
paste0(
ttl, " (fill=", round(ns / cap[[i]], 1), ";n=", ns, ";mean=",
round(mean(dl[[i]]), 1), ")"
)
)
}) |>
patchwork::wrap_plots(ncol = columns)
}
return(out)
}
plot.prioritized_groups_list <- function(data, ...) {
grouping_plot(data, ...)
}
## Helper function for Shiny
file_extension <- function(filenames) {
sub(
pattern = "^(.*\\.|[^.]+)(?=[^.]*)",
replacement = "",
filenames, perl = TRUE
)
}
########
#### Current file: app/server_raw.R
########
library(shiny)
server <- function(input, output, session) {
# source("https://git.nikohuru.dk/au-phd/PhysicalActivityandStrokeOutcome/raw/branch/main/side%20projects/assignment.R")
# source(here::here("R/group_assign.R"))
dat <- reactive({
# input$file1 will be NULL initially. After the user selects
# and uploads a file, head of that data file by default,
# or all rows if selected, will be shown.
req(input$file1)
# Make laoding dependent of file name extension (file_ext())
ext <- file_extension(input$file1$datapath)
tryCatch(
{
if (ext == "csv") {
df <- utils::read.csv(input$file1$datapath,na.strings = c("NA", '""',""))
} else if (ext %in% c("xls", "xlsx")) {
df <- openxlsx::read.xlsx(input$file1$datapath,na.strings = c("NA", '""',""))
} else if (ext == "ods") {
df <- readODS::read_ods(file = file)
} else {
stop("Input file format has to be on of:
'.csv', '.xls', '.xlsx' or '.ods'")
}
},
error = function(e) {
# return a safeError if a parsing error occurs
stop(safeError(e))
}
)
return(df)
})
dat_pre <- reactive({
# req(input$file2)
# Make laoding dependent of file name extension (file_ext())
if (!is.null(input$file2$datapath)){
ext <- file_extension(input$file2$datapath)
if (ext == "csv") {
df <- utils::read.csv(input$file2$datapath,na.strings = c("NA", '""',""))
} else if (ext %in% c("xls", "xlsx")) {
df <- openxlsx::read.xlsx(input$file2$datapath,na.strings = c("NA", '""',""))
} else if (ext == "ods") {
df <- readODS::read_ods(file = file)
} else {
stop("Input file format has to be on of:
'.csv', '.xls', '.xlsx' or '.ods'")
}
return(df)
} else {
return(NULL)
}
})
groups <-
reactive({
grouped <- prioritized_grouping(
data = dat(),
excess_space = input$excess,
pre_grouped = dat_pre()
)
return(grouped)
})
plot.overall <- reactive({
dplyr::case_match(input$overall.plot,
"yes"~TRUE,
"no"~FALSE,
.default=NULL)
})
output$raw.data.tbl <- renderTable({
groups()$export
})
output$pre.groups <- renderTable({
dat_pre()
})
output$input <- renderTable({
dat()
})
output$groups.plt <- renderPlot({
grouping_plot(groups(),overall = plot.overall())
})
# Downloadable csv of selected dataset ----
output$downloadData <- downloadHandler(
filename = "prioritized_grouping.csv",
content = function(file) {
write.csv(groups()$export, file, row.names = FALSE)
}
)
}

105
app/server_raw.R Normal file
View file

@ -0,0 +1,105 @@
library(shiny)
server <- function(input, output, session) {
# source("https://git.nikohuru.dk/au-phd/PhysicalActivityandStrokeOutcome/raw/branch/main/side%20projects/assignment.R")
# source(here::here("R/group_assign.R"))
dat <- reactive({
# input$file1 will be NULL initially. After the user selects
# and uploads a file, head of that data file by default,
# or all rows if selected, will be shown.
req(input$file1)
# Make laoding dependent of file name extension (file_ext())
ext <- file_extension(input$file1$datapath)
tryCatch(
{
if (ext == "csv") {
df <- utils::read.csv(input$file1$datapath,na.strings = c("NA", '""',""))
} else if (ext %in% c("xls", "xlsx")) {
df <- openxlsx::read.xlsx(input$file1$datapath,na.strings = c("NA", '""',""))
} else if (ext == "ods") {
df <- readODS::read_ods(file = file)
} else {
stop("Input file format has to be on of:
'.csv', '.xls', '.xlsx' or '.ods'")
}
},
error = function(e) {
# return a safeError if a parsing error occurs
stop(safeError(e))
}
)
return(df)
})
dat_pre <- reactive({
# req(input$file2)
# Make laoding dependent of file name extension (file_ext())
if (!is.null(input$file2$datapath)){
ext <- file_extension(input$file2$datapath)
if (ext == "csv") {
df <- utils::read.csv(input$file2$datapath,na.strings = c("NA", '""',""))
} else if (ext %in% c("xls", "xlsx")) {
df <- openxlsx::read.xlsx(input$file2$datapath,na.strings = c("NA", '""',""))
} else if (ext == "ods") {
df <- readODS::read_ods(file = file)
} else {
stop("Input file format has to be on of:
'.csv', '.xls', '.xlsx' or '.ods'")
}
return(df)
} else {
return(NULL)
}
})
groups <-
reactive({
grouped <- prioritized_grouping(
data = dat(),
excess_space = input$excess,
pre_grouped = dat_pre()
)
return(grouped)
})
plot.overall <- reactive({
dplyr::case_match(input$overall.plot,
"yes"~TRUE,
"no"~FALSE,
.default=NULL)
})
output$raw.data.tbl <- renderTable({
groups()$export
})
output$pre.groups <- renderTable({
dat_pre()
})
output$input <- renderTable({
dat()
})
output$groups.plt <- renderPlot({
grouping_plot(groups(),overall = plot.overall())
})
# Downloadable csv of selected dataset ----
output$downloadData <- downloadHandler(
filename = "prioritized_grouping.csv",
content = function(file) {
write.csv(groups()$export, file, row.names = FALSE)
}
)
}

131
app/ui.R Normal file
View file

@ -0,0 +1,131 @@
library(shiny)
ui <- fluidPage(
## -----------------------------------------------------------------------------
## Application title
## -----------------------------------------------------------------------------
titlePanel("Group allocation based on individual subject prioritization.",
windowTitle = "Prioritized grouping calculator"),
h5(
"Please note this calculator is only meant as a proof of concept for educational purposes,
and the author will take no responsibility for the results of the calculator.
Uploaded data is not kept, but please, do not upload any sensitive data."
),
## -----------------------------------------------------------------------------
## Side panel
## -----------------------------------------------------------------------------
## -----------------------------------------------------------------------------
## Single entry
## -----------------------------------------------------------------------------
sidebarLayout(
sidebarPanel(
numericInput(
inputId = "excess",
label = "Excess space (%)",
value = 20,
step = 5
),
p("As default, the program will try to evenly distribute subjects in groups.
This factor will add more capacity to each group, for an overall lesser cost,
but more uneven group numbers. More adjustments can be performed with the source script."),
a(href='https://git.nikohuru.dk/au-phd/PhysicalActivityandStrokeOutcome/src/branch/main/apps/Assignment', "Source", target="_blank"),
## -----------------------------------------------------------------------------
## File upload
## -----------------------------------------------------------------------------
# Input: Select a file ----
fileInput(
inputId = "file1",
label = "Choose main data file",
multiple = FALSE,
accept = c(
".csv",".xls",".xlsx", ".ods"
)
),
strong("Columns: ID, group1, group2, ... groupN."),
strong("NOTE: 0s will be interpreted as lowest score."),
p("Cells should contain cost/priorities.
Lowest score, for highest priority.
Non-ranked should contain a number (eg lowest score+1).
Will handle missings but try to avoid."),
shiny::radioButtons(
inputId = "overall.plot",
label = "Print overall mean grouping priorities/costs only?",
selected = "no",
choices = list(
"Yes" = "yes",
"No" = "no"
)
),
fileInput(
inputId = "file2",
label = "Choose data file for pre-assigned subjects",
multiple = FALSE,
accept = c(
".csv",".xls",".xlsx"
)
),
h6("Columns: ID, group"),
## -----------------------------------------------------------------------------
## Download output
## -----------------------------------------------------------------------------
# Horizontal line ----
tags$hr(),
h4("Download results"),
# Button
downloadButton("downloadData", "Download")
),
mainPanel(tabsetPanel(
## -----------------------------------------------------------------------------
## Plot tab
## -----------------------------------------------------------------------------
tabPanel(
"Summary",
h3("Grouping plot"),
p("These plots are to summarise simple performance meassures for the assignment.
'f' is group fill fraction and 'm' is mean cost in group."),
plotOutput("groups.plt")
),
tabPanel(
"Results",
h3("Raw Results"),
p("This is identical to the downloaded file (see panel on left)"),
htmlOutput("raw.data.tbl", container = span)
),
tabPanel(
"Input data Results",
h3("Costs/prioritis overview"),
htmlOutput("input", container = span),
h3("Pre-assigned groups"),
p("Appears empty if none is uploaded."),
htmlOutput("pre.groups", container = span)
)
))
)
)