feat: new distribution plotting for categorical (incl dichotomous) variables

This commit is contained in:
Andreas Gammelgaard Damsbo 2025-10-31 11:39:12 +01:00
parent ebc8c65628
commit ae9aa2e6f5
No known key found for this signature in database
4 changed files with 174 additions and 6 deletions

View file

@ -343,6 +343,12 @@ data_visuals_server <- function(id,
ter = input$tertiary ter = input$tertiary
) )
## If the dictionary holds additional arguments to pass to the
## plotting function, these are included
if (!is.null(rv$plot.params()[["fun.args"]])){
parameters <- modifyList(parameters,rv$plot.params()[["fun.args"]])
}
shiny::withProgress(message = i18n$t("Drawing the plot. Hold tight for a moment.."), { shiny::withProgress(message = i18n$t("Drawing the plot. Hold tight for a moment.."), {
rv$plot <- rlang::exec( rv$plot <- rlang::exec(
create_plot, create_plot,
@ -506,7 +512,7 @@ supported_plots <- function() {
fun = "plot_violin", fun = "plot_violin",
descr = i18n$t("Violin plot"), descr = i18n$t("Violin plot"),
note = i18n$t("A modern alternative to the classic boxplot to visualise data distribution"), note = i18n$t("A modern alternative to the classic boxplot to visualise data distribution"),
primary.type = c("datatime", "continuous", "dichotomous", "categorical"), primary.type = c("datatime", "continuous"),
secondary.type = c("dichotomous", "categorical"), secondary.type = c("dichotomous", "categorical"),
secondary.multi = FALSE, secondary.multi = FALSE,
secondary.extra = "none", secondary.extra = "none",
@ -544,7 +550,7 @@ supported_plots <- function() {
fun = "plot_box", fun = "plot_box",
descr = i18n$t("Box plot"), descr = i18n$t("Box plot"),
note = i18n$t("A classic way to plot data distribution by groups"), note = i18n$t("A classic way to plot data distribution by groups"),
primary.type = c("datatime", "continuous", "dichotomous", "categorical"), primary.type = c("datatime", "continuous"),
secondary.type = c("dichotomous", "categorical"), secondary.type = c("dichotomous", "categorical"),
secondary.multi = FALSE, secondary.multi = FALSE,
tertiary.type = c("dichotomous", "categorical"), tertiary.type = c("dichotomous", "categorical"),
@ -560,6 +566,28 @@ supported_plots <- function() {
secondary.max = 4, secondary.max = 4,
tertiary.type = c("dichotomous"), tertiary.type = c("dichotomous"),
secondary.extra = NULL secondary.extra = NULL
),
plot_bar_rel = list(
fun = "plot_bar",
fun.args =list(style="fill"),
descr = i18n$t("Stacked relative barplot"),
note = i18n$t("Create relative stacked barplots to show the distribution of categorical levels"),
primary.type = c("dichotomous", "categorical"),
secondary.type = c("dichotomous", "categorical"),
secondary.multi = FALSE,
tertiary.type = c("dichotomous", "categorical"),
secondary.extra = NULL
),
plot_bar_abs = list(
fun = "plot_bar",
fun.args =list(style="dodge"),
descr = i18n$t("Side-by-side barplot"),
note = i18n$t("Create side-by-side barplot to show the distribution of categorical levels"),
primary.type = c("dichotomous", "categorical"),
secondary.type = c("dichotomous", "categorical"),
secondary.multi = FALSE,
tertiary.type = c("dichotomous", "categorical"),
secondary.extra = "none"
) )
) )
} }

118
R/plot_bar.R Normal file
View file

@ -0,0 +1,118 @@
plot_bar <- function(data, pri, sec, ter = NULL, style = c("stack", "dodge", "fill"), max_level = 30, ...) {
style <- match.arg(style)
if (!is.null(ter)) {
ds <- split(data, data[ter])
} else {
ds <- list(data)
}
out <- lapply(ds, \(.ds){
plot_bar_single(
data = .ds,
pri = pri,
sec = sec,
style = style,
max_level = max_level
)
})
wrap_plot_list(out, title = glue::glue(i18n$t("Grouped by {get_label(data,ter)}")), ...)
}
#' Single vertical barplot
#'
#' @param style barplot style passed to geom_bar position argument.
#' One of c("stack", "dodge", "fill")
#'
#' @name data-plots
#'
#' @returns ggplot object
#' @export
#'
#' @examples
#' mtcars |>
#' dplyr::mutate(cyl = factor(cyl), am = factor(am)) |>
#' plot_bar_single(pri = "cyl", sec = "am", style = "fill")
#'
#' mtcars |>
#' dplyr::mutate(cyl = factor(cyl), am = factor(am)) |>
#' plot_bar_single(pri = "cyl", style = "stack")
plot_bar_single <- function(data, pri, sec = NULL, style = c("stack", "dodge", "fill"), max_level = 30) {
style <- match.arg(style)
if (identical(sec, "none")) {
sec <- NULL
}
p_data <- as.data.frame(table(data[c(pri, sec)])) |>
dplyr::mutate(dplyr::across(tidyselect::any_of(c(pri, sec)), forcats::as_factor),
p = Freq / NROW(data)
)
if (nrow(p_data) > max_level) {
# browser()
p_data <- sort_by(
p_data,
p_data[["Freq"]],
decreasing = TRUE
) |>
head(max_level)
# if (is.null(sec)){
# p_data <- sort_by(
# p_data,
# p_data[["Freq"]],
# decreasing=TRUE) |>
# head(max_level)
# } else {
# split(p_data,p_data[[sec]]) |>
# lapply(\(.x){
# # browser()
# sort_by(
# .x,
# .x[["Freq"]],
# decreasing=TRUE) |>
# head(max_level)
# }) |> dplyr::bind_rows()
# }
}
## Shortens long level names
p_data[[pri]] <- forcats::as_factor(unique_short(as.character(p_data[[pri]]), max = 20))
if (!is.null(sec)) {
fill <- sec
} else {
fill <- pri
}
p <- ggplot2::ggplot(
p_data,
ggplot2::aes(
x = .data[[pri]],
y = p,
fill = .data[[fill]]
)
) +
ggplot2::geom_bar(position = style, stat = "identity") +
ggplot2::scale_y_continuous(labels = scales::percent) +
ggplot2::ylab("Percentage") +
ggplot2::xlab(get_label(data,pri))
## To handle large number of levels and long level names
if (nrow(p_data) > 10 | any(nchar(as.character(p_data[[pri]])) > 6)) {
p <- p +
ggplot2::guides(fill = "none") +
ggplot2::theme(
axis.text.x = ggplot2::element_text(
angle = 90,
vjust = 1, hjust = 1
))+
ggplot2::theme(
axis.text.x = ggplot2::element_text(vjust = 0.5)
)
}
p
}

View file

@ -39,8 +39,6 @@ plot_box <- function(data, pri, sec, ter = NULL,...) {
} }
#' Create nice box-plots #' Create nice box-plots
#' #'
#' @name data-plots #' @name data-plots

View file

@ -1,11 +1,13 @@
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data_plots.R, R/plot_box.R, R/plot_hbar.R, % Please edit documentation in R/data_plots.R, R/plot_bar.R, R/plot_box.R,
% R/plot_ridge.R, R/plot_sankey.R, R/plot_scatter.R, R/plot_violin.R % R/plot_hbar.R, R/plot_ridge.R, R/plot_sankey.R, R/plot_scatter.R,
% R/plot_violin.R
\name{data-plots} \name{data-plots}
\alias{data-plots} \alias{data-plots}
\alias{data_visuals_ui} \alias{data_visuals_ui}
\alias{data_visuals_server} \alias{data_visuals_server}
\alias{create_plot} \alias{create_plot}
\alias{plot_bar_single}
\alias{plot_box} \alias{plot_box}
\alias{plot_box_single} \alias{plot_box_single}
\alias{plot_hbars} \alias{plot_hbars}
@ -22,6 +24,14 @@ data_visuals_server(id, data, ...)
create_plot(data, type, pri, sec, ter = NULL, ...) create_plot(data, type, pri, sec, ter = NULL, ...)
plot_bar_single(
data,
pri,
sec = NULL,
style = c("stack", "dodge", "fill"),
max_level = 30
)
plot_box(data, pri, sec, ter = NULL, ...) plot_box(data, pri, sec, ter = NULL, ...)
plot_box_single(data, pri, sec = NULL, seed = 2103) plot_box_single(data, pri, sec = NULL, seed = 2103)
@ -60,6 +70,9 @@ plot_violin(data, pri, sec, ter = NULL)
\item{sec}{secondary variable} \item{sec}{secondary variable}
\item{ter}{tertiary variable} \item{ter}{tertiary variable}
\item{style}{barplot style passed to geom_bar position argument.
One of c("stack", "dodge", "fill")}
} }
\value{ \value{
Shiny ui module Shiny ui module
@ -68,6 +81,8 @@ shiny server module
ggplot2 object ggplot2 object
ggplot object
ggplot2 object ggplot2 object
ggplot object ggplot object
@ -89,6 +104,8 @@ Data correlations evaluation module
Wrapper to create plot based on provided type Wrapper to create plot based on provided type
Single vertical barplot
Beautiful box plot(s) Beautiful box plot(s)
Create nice box-plots Create nice box-plots
@ -107,6 +124,13 @@ Beatiful violin plot
} }
\examples{ \examples{
create_plot(mtcars, "plot_violin", "mpg", "cyl") |> attributes() create_plot(mtcars, "plot_violin", "mpg", "cyl") |> attributes()
mtcars |>
dplyr::mutate(cyl = factor(cyl), am = factor(am)) |>
plot_bar_single(pri = "cyl", sec = "am", style = "fill")
mtcars |>
dplyr::mutate(cyl = factor(cyl), am = factor(am)) |>
plot_bar_single(pri = "cyl", style = "stack")
mtcars |> plot_box(pri = "mpg", sec = "gear") mtcars |> plot_box(pri = "mpg", sec = "gear")
mtcars |> plot_box(pri = "mpg", sec="cyl") mtcars |> plot_box(pri = "mpg", sec="cyl")
mtcars |> mtcars |>