feat: new distribution plotting for categorical (incl dichotomous) variables

This commit is contained in:
Andreas Gammelgaard Damsbo 2025-10-31 11:39:12 +01:00
parent ebc8c65628
commit ae9aa2e6f5
No known key found for this signature in database
4 changed files with 174 additions and 6 deletions

View file

@ -343,6 +343,12 @@ data_visuals_server <- function(id,
ter = input$tertiary
)
## If the dictionary holds additional arguments to pass to the
## plotting function, these are included
if (!is.null(rv$plot.params()[["fun.args"]])){
parameters <- modifyList(parameters,rv$plot.params()[["fun.args"]])
}
shiny::withProgress(message = i18n$t("Drawing the plot. Hold tight for a moment.."), {
rv$plot <- rlang::exec(
create_plot,
@ -506,7 +512,7 @@ supported_plots <- function() {
fun = "plot_violin",
descr = i18n$t("Violin plot"),
note = i18n$t("A modern alternative to the classic boxplot to visualise data distribution"),
primary.type = c("datatime", "continuous", "dichotomous", "categorical"),
primary.type = c("datatime", "continuous"),
secondary.type = c("dichotomous", "categorical"),
secondary.multi = FALSE,
secondary.extra = "none",
@ -544,7 +550,7 @@ supported_plots <- function() {
fun = "plot_box",
descr = i18n$t("Box plot"),
note = i18n$t("A classic way to plot data distribution by groups"),
primary.type = c("datatime", "continuous", "dichotomous", "categorical"),
primary.type = c("datatime", "continuous"),
secondary.type = c("dichotomous", "categorical"),
secondary.multi = FALSE,
tertiary.type = c("dichotomous", "categorical"),
@ -560,6 +566,28 @@ supported_plots <- function() {
secondary.max = 4,
tertiary.type = c("dichotomous"),
secondary.extra = NULL
),
plot_bar_rel = list(
fun = "plot_bar",
fun.args =list(style="fill"),
descr = i18n$t("Stacked relative barplot"),
note = i18n$t("Create relative stacked barplots to show the distribution of categorical levels"),
primary.type = c("dichotomous", "categorical"),
secondary.type = c("dichotomous", "categorical"),
secondary.multi = FALSE,
tertiary.type = c("dichotomous", "categorical"),
secondary.extra = NULL
),
plot_bar_abs = list(
fun = "plot_bar",
fun.args =list(style="dodge"),
descr = i18n$t("Side-by-side barplot"),
note = i18n$t("Create side-by-side barplot to show the distribution of categorical levels"),
primary.type = c("dichotomous", "categorical"),
secondary.type = c("dichotomous", "categorical"),
secondary.multi = FALSE,
tertiary.type = c("dichotomous", "categorical"),
secondary.extra = "none"
)
)
}

118
R/plot_bar.R Normal file
View file

@ -0,0 +1,118 @@
plot_bar <- function(data, pri, sec, ter = NULL, style = c("stack", "dodge", "fill"), max_level = 30, ...) {
style <- match.arg(style)
if (!is.null(ter)) {
ds <- split(data, data[ter])
} else {
ds <- list(data)
}
out <- lapply(ds, \(.ds){
plot_bar_single(
data = .ds,
pri = pri,
sec = sec,
style = style,
max_level = max_level
)
})
wrap_plot_list(out, title = glue::glue(i18n$t("Grouped by {get_label(data,ter)}")), ...)
}
#' Single vertical barplot
#'
#' @param style barplot style passed to geom_bar position argument.
#' One of c("stack", "dodge", "fill")
#'
#' @name data-plots
#'
#' @returns ggplot object
#' @export
#'
#' @examples
#' mtcars |>
#' dplyr::mutate(cyl = factor(cyl), am = factor(am)) |>
#' plot_bar_single(pri = "cyl", sec = "am", style = "fill")
#'
#' mtcars |>
#' dplyr::mutate(cyl = factor(cyl), am = factor(am)) |>
#' plot_bar_single(pri = "cyl", style = "stack")
plot_bar_single <- function(data, pri, sec = NULL, style = c("stack", "dodge", "fill"), max_level = 30) {
style <- match.arg(style)
if (identical(sec, "none")) {
sec <- NULL
}
p_data <- as.data.frame(table(data[c(pri, sec)])) |>
dplyr::mutate(dplyr::across(tidyselect::any_of(c(pri, sec)), forcats::as_factor),
p = Freq / NROW(data)
)
if (nrow(p_data) > max_level) {
# browser()
p_data <- sort_by(
p_data,
p_data[["Freq"]],
decreasing = TRUE
) |>
head(max_level)
# if (is.null(sec)){
# p_data <- sort_by(
# p_data,
# p_data[["Freq"]],
# decreasing=TRUE) |>
# head(max_level)
# } else {
# split(p_data,p_data[[sec]]) |>
# lapply(\(.x){
# # browser()
# sort_by(
# .x,
# .x[["Freq"]],
# decreasing=TRUE) |>
# head(max_level)
# }) |> dplyr::bind_rows()
# }
}
## Shortens long level names
p_data[[pri]] <- forcats::as_factor(unique_short(as.character(p_data[[pri]]), max = 20))
if (!is.null(sec)) {
fill <- sec
} else {
fill <- pri
}
p <- ggplot2::ggplot(
p_data,
ggplot2::aes(
x = .data[[pri]],
y = p,
fill = .data[[fill]]
)
) +
ggplot2::geom_bar(position = style, stat = "identity") +
ggplot2::scale_y_continuous(labels = scales::percent) +
ggplot2::ylab("Percentage") +
ggplot2::xlab(get_label(data,pri))
## To handle large number of levels and long level names
if (nrow(p_data) > 10 | any(nchar(as.character(p_data[[pri]])) > 6)) {
p <- p +
ggplot2::guides(fill = "none") +
ggplot2::theme(
axis.text.x = ggplot2::element_text(
angle = 90,
vjust = 1, hjust = 1
))+
ggplot2::theme(
axis.text.x = ggplot2::element_text(vjust = 0.5)
)
}
p
}

View file

@ -39,8 +39,6 @@ plot_box <- function(data, pri, sec, ter = NULL,...) {
}
#' Create nice box-plots
#'
#' @name data-plots

View file

@ -1,11 +1,13 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data_plots.R, R/plot_box.R, R/plot_hbar.R,
% R/plot_ridge.R, R/plot_sankey.R, R/plot_scatter.R, R/plot_violin.R
% Please edit documentation in R/data_plots.R, R/plot_bar.R, R/plot_box.R,
% R/plot_hbar.R, R/plot_ridge.R, R/plot_sankey.R, R/plot_scatter.R,
% R/plot_violin.R
\name{data-plots}
\alias{data-plots}
\alias{data_visuals_ui}
\alias{data_visuals_server}
\alias{create_plot}
\alias{plot_bar_single}
\alias{plot_box}
\alias{plot_box_single}
\alias{plot_hbars}
@ -22,6 +24,14 @@ data_visuals_server(id, data, ...)
create_plot(data, type, pri, sec, ter = NULL, ...)
plot_bar_single(
data,
pri,
sec = NULL,
style = c("stack", "dodge", "fill"),
max_level = 30
)
plot_box(data, pri, sec, ter = NULL, ...)
plot_box_single(data, pri, sec = NULL, seed = 2103)
@ -60,6 +70,9 @@ plot_violin(data, pri, sec, ter = NULL)
\item{sec}{secondary variable}
\item{ter}{tertiary variable}
\item{style}{barplot style passed to geom_bar position argument.
One of c("stack", "dodge", "fill")}
}
\value{
Shiny ui module
@ -68,6 +81,8 @@ shiny server module
ggplot2 object
ggplot object
ggplot2 object
ggplot object
@ -89,6 +104,8 @@ Data correlations evaluation module
Wrapper to create plot based on provided type
Single vertical barplot
Beautiful box plot(s)
Create nice box-plots
@ -107,6 +124,13 @@ Beatiful violin plot
}
\examples{
create_plot(mtcars, "plot_violin", "mpg", "cyl") |> attributes()
mtcars |>
dplyr::mutate(cyl = factor(cyl), am = factor(am)) |>
plot_bar_single(pri = "cyl", sec = "am", style = "fill")
mtcars |>
dplyr::mutate(cyl = factor(cyl), am = factor(am)) |>
plot_bar_single(pri = "cyl", style = "stack")
mtcars |> plot_box(pri = "mpg", sec = "gear")
mtcars |> plot_box(pri = "mpg", sec="cyl")
mtcars |>