prioritized.grouping/app/server.R

657 lines
14 KiB
R
Raw Normal View History

########
#### Current file: R//parse_formats.R
########
parse_prio_form <- function(data, id = 1, prio.cols,sort.cols=FALSE) {
if (is.character(prio.cols)) {
grp.index <- match(prio.cols, names(data))
} else {
grp.index <- prio.cols
}
if (sort.cols){
prio.cols <- sort(prio.cols)
}
new.names <- names(data)
new.names[grp.index] <- seq_along(grp.index)
data <- setNames(data, new.names)
out <- split(data, seq_len(nrow(data))) |>
lapply(\(.x){
# browser()
out <- as.data.frame(matrix(c(as.character(.x[[id]]), colnames(.x)[grp.index]), nrow = 1))
setNames(out, c(
"id",
# names(.x[id]),
unname(unlist(.x[grp.index]))
))
}) |>
dplyr::bind_rows() |>
dplyr::mutate(dplyr::across(-1, as.integer))
# Sorting is not really needed, but a nice touch
out[c(names(out)[1], sort(names(out)[-1]))]
}
parse_string_form <- function(data, id = 1, string.col,pattern=NULL) {
if (is.null(pattern)){
pattern <- ";"
}
if (length(string.col) != 1) {
stop("string.col is required, and has to have length 1")
}
if (is.character(string.col)) {
string.index <- match(string.col, names(data))
} else {
string.index <- string.col
}
# Cells with NAs are excluded.
# NAs happen if the priorities are not edited upon form submission, but a
# default order can not be guessed reliably if group naming is not ordered
# (like group N, group N+1...)
out <- data.frame(data[[id]], data[[string.index]]) |>
na.omit() |>
(\(.d){
split(.d, seq_len(nrow(.d)))
})() |>
lapply(\(.x){
grps <- unlist(strsplit(x=.x[[2]],split=pattern))
out <- as.data.frame(matrix(c(.x[[1]], seq_along(grps)), nrow = 1))
setNames(
out,
c("id", grps)
)
}) |>
dplyr::bind_rows()
# Sorting is not really needed, but a nice touch
out[c(names(out)[1], sort(names(out)[-1]))]
}
########
#### Current file: R//prioritized_grouping.R
########
utils::globalVariables(c("group", "grp", "i", "j", "value"))
2024-10-10 13:25:09 +02:00
prioritized_grouping <-
function(data,
cap_classes = NULL,
excess_space = 20,
pre_grouped = NULL,
seed = 6293812) {
set.seed(seed = seed)
# browser()
requireNamespace("ROI")
requireNamespace("ROI.plugin.symphony")
if (!is.data.frame(data)) {
stop("Supplied data has to be a data frame, with each row
are subjects and columns are groups, with the first column being
subject identifiers")
}
# Converts tibble to data.frame
if ("tbl_df" %in% class(data)){
data <- as.data.frame(data)
}
## This program very much trust the user to supply correctly formatted data
cost <- t(data[, -1]) # Transpose converts to matrix
colnames(cost) <- data[, 1]
nms_groups <- rownames(cost)
num_groups <- dim(cost)[1]
num_sub <- dim(cost)[2]
## Adding the option to introduce a bit of head room to the classes by
## the groups to a little bigger than the smallest possible
## Default is to allow for an extra 20 % fill
excess <- 1 + (excess_space / 100)
# generous round up of capacities
if (is.null(cap_classes)) {
capacity <- rep(ceiling(excess * num_sub / num_groups), num_groups)
# } else if (!is.numeric(cap_classes)) {
# stop("cap_classes has to be numeric")
} else if (length(cap_classes) == 1) {
capacity <- ceiling(rep(cap_classes, num_groups) * excess)
} else if (length(cap_classes) == num_groups) {
capacity <- ceiling(cap_classes * excess)
} else {
stop("cap_classes has to be either length 1 or same as number of groups")
}
## This test should be a little more elegant
## pre_grouped should be a data.frame or matrix with an ID and group column
with_pre_grouped <- FALSE
if (!is.null(pre_grouped)) {
# Setting flag for later and export list
with_pre_grouped <- TRUE
# Simple translation to allow pre_grouped to denote indices
if (is.numeric(pre_grouped[, 2])){
pre_grouped$pre.groups <- nms_groups[pre_grouped[, 2]]
} else {
pre_grouped$pre.groups <- as.character(pre_grouped[, 2])
}
# Splitting to list for later merging
pre <- split(
pre_grouped[, 1],
factor(pre_grouped[, 3], levels = nms_groups)
)
# Subtracting capacity numbers, to reflect already filled spots
capacity <- capacity - lengths(pre)
# Making sure pre_grouped are removed from main data set
data <- data[!data[[1]] %in% pre_grouped[[1]], ]
cost <- t(data[, -1])
colnames(cost) <- data[, 1]
num_groups <- dim(cost)[1]
num_sub <- dim(cost)[2]
}
## Simple NA handling. Better to handle NAs yourself!
cost[is.na(cost)] <- num_groups
i_m <- seq_len(num_groups)
j_m <- seq_len(num_sub)
m <- ompr::MIPModel() |>
ompr::add_variable(grp[i, j],
i = i_m,
j = j_m,
type = "binary"
) |>
## The first constraint says that group size should not exceed capacity
ompr::add_constraint(ompr::sum_expr(grp[i, j], j = j_m) <= capacity[i],
i = i_m
) |>
## The second constraint says each subject can only be in one group
ompr::add_constraint(ompr::sum_expr(grp[i, j], i = i_m) == 1, j = j_m) |>
## The objective is set to minimize the cost of the grouping
## Giving subjects the group with the highest possible ranking
ompr::set_objective(
ompr::sum_expr(
cost[i, j] * grp[i, j],
i = i_m,
j = j_m
),
"min"
) |>
ompr::solve_model(ompr.roi::with_ROI(solver = "symphony", verbosity = 1))
if (m$status == "error") {
stop("The algorithm is not able to solve the problem. Please adjust the
constraints by increasing group capacities and/or excess fill")
}
## Getting groups
solution <- ompr::get_solution(m, grp[i, j]) |> dplyr::filter(value > 0)
grouped <- solution |> dplyr::select(i, j)
if (!is.null(rownames(cost))) {
grouped$i <- rownames(cost)[grouped$i]
}
if (!is.null(colnames(cost))) {
grouped$j <- colnames(cost)[grouped$j]
}
## Splitting into groups based on groups
grouped_ls <- split(grouped$j, grouped$i)
## Extracting subject cost for the final groups for evaluation
if (is.null(rownames(cost))) {
rownames(cost) <- seq_len(nrow(cost))
}
if (is.null(colnames(cost))) {
colnames(cost) <- seq_len(ncol(cost))
}
evaluated <- lapply(seq_len(length(grouped_ls)), function(i) {
ndx <- match(names(grouped_ls)[i], rownames(cost))
cost[ndx, grouped_ls[[i]]]
})
names(evaluated) <- names(grouped_ls)
if (with_pre_grouped) {
names(pre) <- names(grouped_ls)
grouped_all <- mapply(c, grouped_ls, pre, SIMPLIFY = FALSE)
out <- list(all_grouped = grouped_all)
} else {
out <- list(all_grouped = grouped_ls)
}
export <- do.call(rbind, lapply(seq_along(out[[1]]), function(i) {
cbind("ID" = out[[1]][[i]], "Group" = names(out[[1]])[i])
}))
out <- c(
out,
list(
evaluation = evaluated,
groupings = grouped_ls,
solution = solution,
capacity = capacity,
excess = excess,
pre_grouped = with_pre_grouped,
cost_scale = levels(factor(cost)),
input = data,
export = export
)
)
# exists("excess")
class(out) <- c("prioritized_groups_list", class(out))
2024-10-10 13:25:09 +02:00
invisible(out)
}
2024-10-10 13:25:09 +02:00
grouping_plot <- function(data,
2024-10-10 13:25:09 +02:00
columns = 4,
overall = FALSE,
viridis.option="D",
viridis.direction=-1) {
assertthat::assert_that("prioritized_groups_list" %in% class(data))
dl <- data[[2]]
cost_scale <- unique(data[[8]])
cap <- data[[5]]
cnts_ls <- lapply(dl, function(i) {
factor(i, levels = cost_scale)
})
y_max <- max(lengths(dl))
if (overall) {
ds <- tibble::tibble(
group = seq_along(dl),
mean = round(Reduce(c, lapply(dl, mean)), 1)
)
out <- ds |>
ggplot2::ggplot(ggplot2::aes(x = group, y = mean, fill = mean)) +
ggplot2::geom_bar(stat = "identity") +
ggplot2::geom_hline(yintercept = 1) +
ggplot2::scale_fill_viridis_c(option=viridis.option,
direction = viridis.direction) +
ggplot2::guides(fill = "none") +
ggplot2::scale_x_continuous(name = "Groups", breaks = ds$group) +
ggplot2::ylab("Mean priority/cost") +
ggplot2::labs(
title = "Overall group-wise mean priority/cost of groupings",
subtitle = "Horizontal line marking the perfect mean=1 for reference"
)
} else {
out <- lapply(seq_along(dl), function(i) {
ttl <- names(dl)[i]
ns <- length(dl[[i]])
cnts <- cnts_ls[[i]]
ggplot2::ggplot() +
ggplot2::geom_bar(ggplot2::aes(cnts, fill = cnts)) +
ggplot2::scale_x_discrete(
name = NULL,
breaks = cost_scale,
drop = FALSE
) +
ggplot2::scale_y_continuous(name = NULL, limits = c(0, y_max)) +
ggplot2::scale_fill_manual(
values = viridisLite::viridis(length(cost_scale),
direction = viridis.direction,
option = viridis.option)
) +
ggplot2::guides(fill = "none") +
ggplot2::labs(
title =
paste0(
ttl, " (fill=", round(ns / cap[[i]], 1), ";n=", ns, ";mean=",
round(mean(dl[[i]]), 1), ")"
)
)
}) |>
patchwork::wrap_plots(ncol = columns)
}
return(out)
}
2024-10-10 13:25:09 +02:00
plot.prioritized_groups_list <- function(x, ...) {
grouping_plot(x, ...)
}
## Helper function for Shiny
file_extension <- function(filenames) {
sub(
pattern = "^(.*\\.|[^.]+)(?=[^.]*)",
replacement = "",
filenames, perl = TRUE
)
}
2024-10-10 13:22:22 +02:00
read_input <- function(file, consider.na = c("NA", '""', "")) {
ext <- file_extension(file)
tryCatch(
{
if (ext == "csv") {
df <- utils::read.csv(file = file, na = consider.na)
} else if (ext %in% c("xls", "xlsx")) {
df <- openxlsx2::read_xlsx(file = file, na.strings = consider.na)
} else if (ext == "ods") {
df <- readODS::read_ods(file = file)
} else {
stop("Input file format has to be on of:
'.csv', '.xls', '.xlsx', '.dta' or '.ods'")
}
},
error = function(e) {
# return a safeError if a parsing error occurs
stop(shiny::safeError(e))
}
)
df
}
########
#### Current file: app/server_raw.R
########
library(shiny)
server <- function(input, output, session) {
# source("https://git.nikohuru.dk/au-phd/PhysicalActivityandStrokeOutcome/raw/branch/main/side%20projects/assignment.R")
# source(here::here("R/group_assign.R"))
v <- shiny::reactiveValues(
ds = NULL,
pre = NULL
)
dat <- reactive({
# input$file1 will be NULL initially. After the user selects
# and uploads a file, head of that data file by default,
# or all rows if selected, will be shown.
req(input$file1)
# Make laoding dependent of file name extension (file_ext())
out <- read_input(input$file1$datapath)
v$ds <- "loaded"
return(out)
})
dat_parsed <- reactive({
req(input$file1)
if (input$input_type == "default") {
out <- dat()
} else if (input$input_type == "prio") {
req(input$id_var_prio)
req(input$prio_vars)
out <- parse_prio_form(
data = dat(),
id = input$id_var_prio,
prio.cols = input$prio_vars
)
} else if (input$input_type == "string") {
req(input$id_var_string)
req(input$string_var)
out <- parse_string_form(
data = dat(),
id = input$id_var_string,
string.col = input$string_var,
pattern = input$string_split
)
}
return(out)
})
output$id_var_prio <- shiny::renderUI({
selectInput(
inputId = "id_var_prio",
selected = 1,
label = "ID column",
choices = colnames(dat()),
multiple = FALSE
)
})
output$id_var_string <- shiny::renderUI({
selectInput(
inputId = "id_var_string",
selected = 1,
label = "ID column",
choices = colnames(dat()),
multiple = FALSE
)
})
output$prio_vars <- shiny::renderUI({
selectizeInput(
inputId = "prio_vars",
selected = NULL,
label = "Priority columns (select from first to lowest)",
choices = colnames(dat())[-match(input$id_var_prio, colnames(dat()))],
multiple = TRUE
)
})
output$string_var <- shiny::renderUI({
selectizeInput(
inputId = "string_var",
selected = NULL,
label = "Column of strings",
choices = colnames(dat())[-match(input$id_var_string, colnames(dat()))],
multiple = FALSE
)
})
dat_pre <- reactive({
# req(input$file2)
# Make laoding dependent of file name extension (file_ext())
if (!is.null(input$file2$datapath)) {
out <- read_input(input$file2$datapath)
} else {
out <- NULL
}
v$pre <- "loaded"
return(out)
})
groups <-
reactive({
grouped <- prioritized_grouping(
data = dat_parsed(),
excess_space = input$excess,
pre_grouped = dat_pre()
)
return(grouped)
})
plot.overall <- reactive({
dplyr::case_match(input$overall.plot,
"yes" ~ TRUE,
"no" ~ FALSE,
.default = NULL
)
})
output$raw.data.tbl <- renderTable({
groups()$export
})
output$pre.groups <- renderTable({
dat_pre()
})
output$input <- renderTable({
dat()
})
output$input_parsed <- renderTable({
dat_parsed()
})
output$groups.plt <- renderPlot({
grouping_plot(groups(), overall = plot.overall())
})
output$uploaded <- shiny::reactive({
if (is.null(v$ds)) {
"no"
} else {
"yes"
}
})
output$pre_assigned <- shiny::reactive({
if (is.null(v$pre)) {
"no"
} else {
"yes"
}
})
shiny::outputOptions(output, "uploaded", suspendWhenHidden = FALSE)
shiny::outputOptions(output, "pre_assigned", suspendWhenHidden = FALSE)
# Downloadable csv of selected dataset ----
output$downloadData <- downloadHandler(
filename = "prioritized_grouping.ods",
content = function(file) {
readODS::write_ods(as.data.frame(groups()$export), file)
}
)
}