From 48f4d99429206c1d8f0bc43590d14df90b001c3c Mon Sep 17 00:00:00 2001 From: Andreas Gammelgaard Damsbo Date: Thu, 30 Oct 2025 14:49:38 +0100 Subject: [PATCH] feat: new string split function to allow splitting string into rows or columns. select from allowed delimiters. --- R/separate_string.R | 289 +++++++++++++++++++++++++++++++++++ R/ui_elements.R | 20 ++- examples/string_split_demo.R | 54 +++++++ man/detect_delimiter.Rd | 22 +++ man/separate_string.Rd | 23 +++ 5 files changed, 404 insertions(+), 4 deletions(-) create mode 100644 R/separate_string.R create mode 100644 examples/string_split_demo.R create mode 100644 man/detect_delimiter.Rd create mode 100644 man/separate_string.Rd diff --git a/R/separate_string.R b/R/separate_string.R new file mode 100644 index 00000000..d319272d --- /dev/null +++ b/R/separate_string.R @@ -0,0 +1,289 @@ +string_split_ui <- function(id) { + ns <- NS(id) + tagList( + shiny::fluidRow( + column( + width = 4, + shiny::uiOutput(outputId = ns("variable")) + ), + column( + width = 4, + shiny::uiOutput(outputId = ns("delim")) + ), + column( + width = 4, + shiny::uiOutput(outputId = ns("direction")) + )#, + # column( + # width = 3, + # actionButton( + # inputId = ns("split"), + # label = tagList(phosphoricons::ph("scissors"), i18n$t("Split the variable")), + # class = "btn-outline-primary float-end" + # ) + # ) + ), + shiny::fluidRow( + column( + width = 3, + shiny::h4(i18n$t("Original data")), + shiny::tableOutput(outputId = ns("orig_data")) + ), + column(width=1), + column( + width = 8, + shiny::h4(i18n$t("Preview of result")), + shiny::tableOutput(outputId = ns("new_data")) + ) + ), + actionButton( + inputId = ns("create"), + label = tagList(phosphoricons::ph("pencil"), i18n$t("Save new data")), + class = "btn-outline-primary float-end" + ), + tags$div(class = "clearfix") + ) +} + + + +string_split_server <- function(id, data_r = reactive(NULL)) { + moduleServer( + id, + function(input, output, session) { + rv <- reactiveValues(data = NULL, temp = NULL, out = NULL) + + ns <- session$ns + + bindEvent(observe({ + data <- data_r() + rv$data <- data + # browser() + vars_num <- vapply(data, \(.x){ + is.character(.x) + }, logical(1)) + vars_num <- names(vars_num)[vars_num] + + output$variable <- shiny::renderUI( + columnSelectInput( + inputId = ns("variable"), + data = data, + label = i18n$t("Variable to split:"), + width = "100%", + col_subset = vars_num, + selected = if (isTruthy(input$variable)) input$variable else vars_num[1] + ) + ) + + # shinyWidgets::updateVirtualSelect( + # inputId = "variable", + # choices = vars_num, + # selected = if (isTruthy(input$variable)) input$variable else vars_num[1] + # ) + }), data_r(), input$hidden) + + output$delim <- shiny::renderUI({ + req(rv$data) + req(input$variable) + # browser() + + data <- rv$data |> + dplyr::select(tidyselect::all_of(input$variable)) + + delimiters <- Reduce(c, unique(sapply(data[[1]], detect_delimiter))) + + # shiny::textInput(inputId = ns("delim"), label = i18n$t("Text or character to split string by")) + shiny::selectInput( + inputId = ns("delim"), label = i18n$t("Select delimiter"), + choices = setNames( + delimiters, + glue::glue("'{delimiters}'") + ),selected = 1 + ) + }) + + + output$direction <- shiny::renderUI({ + vectorSelectInput( + inputId = ns("direction"), + label = i18n$t("Direction:"), + choices = setNames( + c( + "wide", + "long" + ), + c( + i18n$t("Split string to multiple columns. Keep number of rows."), + i18n$t("Split string to multiple observations (rows) in the same column. Also ads id and instance columns") + ) + ), + selected = "wide", + width = "100%" + ) + }) + + observeEvent( + list( + input$variable, + input$delim, + input$direction + ), + { + req(rv$data) + req(input$variable) + req(input$delim) + req(input$direction) + + data <- rv$data |> + dplyr::select(tidyselect::all_of(input$variable)) + # browser() + rv$temp <- separate_string( + data = data, + col = input$variable, + delim = input$delim, + direction = input$direction + ) + } + ) + + shiny::observeEvent(input$split, { + show_data(rv$temp, title = i18n$t("Browse data preview"), type = "modal") + }) + + ## Toastui would not render the original data, so the solution was to go + ## with native table rendering, which works, but doesn't please the eye + + output$orig_data <- shiny::renderTable({ + req(data_r()) + req(input$variable) + data <- data_r()|> + dplyr::select(tidyselect::all_of(input$variable)) + # browser() + head(data , 10) + }) + + output$new_data <- shiny::renderTable({ + shiny::req(rv$temp) + data <- rv$temp + head(data, 10) + # toastui::datagrid( + # data = head(data, 100), + # colwidths = "guess", + # theme = "default", + # bodyHeight = "auto", pagination = 10 + # ) + }) + + data_split_r <- reactive({ + req(rv$temp) + + data <- rv$data + + separate_string( + data = data, + col = input$variable, + delim = input$delim, + direction = input$direction + ) + }) + + data_returned_r <- observeEvent(input$create, { + rv$data <- data_split_r() + }) + + return(reactive(rv$data)) + } + ) +} + + +modal_string_split <- function(id, + title = i18n$t("Split character string"), + easyClose = TRUE, + size = "xl", + footer = NULL) { + ns <- NS(id) + showModal(modalDialog( + title = tagList(title, datamods:::button_close_modal()), + string_split_ui(id), + tags$div( + style = "display: none;", + textInput(inputId = ns("hidden"), label = NULL, value = datamods:::genId()) + ), + easyClose = easyClose, + size = size, + footer = footer + )) +} + + +### Helpers + +#' Separate string wide or long +#' +#' @param data data +#' @param col column +#' @param delim delimiter +#' @param direction target direction +#' +#' @returns data.frame +#' @export +#' +separate_string <- function(data, col, delim, direction = c("wide", "long")) { + direction <- match.arg(direction) + + if (direction == "long") { + out <- data |> + dplyr::mutate(id_str_split = dplyr::row_number()) |> + dplyr::group_by_at("id_str_split") |> + tidyr::separate_longer_delim(cols = tidyselect::all_of(col), delim = delim) |> + dplyr::mutate(instance_str_split = dplyr::row_number()) |> + # add_instance(by="id") + dplyr::ungroup() |> + dplyr::mutate(dplyr::across(tidyselect::matches(col), trimws)) + } else if (direction == "wide") { + ## Experiment of wide to long + + out <- data |> + tidyr::separate_wider_delim( + cols = tidyselect::all_of(col), + delim = delim, + names_sep = "_", + too_few = "align_start" + ) |> + dplyr::mutate(dplyr::across(tidyselect::starts_with(col), trimws)) + } + + out +} + + + +#' Detect delimiters in string based on allowed delimiters +#' +#' @description +#' Accepts any repeat of delimiters and includes surrounding whitespace +#' +#' +#' @param text character vector +#' @param delimiters allowed delimiters +#' +#' @returns character vector +#' @export +#' +#' @examples +#' sapply(c("Walk - run", "Sel__Re", "what;now"), detect_delimiter) +detect_delimiter <- function(data, delimiters = c("_", "-", ";", "\n",",")) { + # Create patterns for each delimiter with potential surrounding whitespace + patterns <- paste0("\\s*\\", delimiters, "+\\s*") + + # Check each pattern + out <- sapply(patterns, \(.x){ + if (grepl(.x, data)) { + # Extract the actual matched delimiter with whitespace + regmatches(data, regexpr(.x, data)) + } + }) + + Reduce(c, out) +} diff --git a/R/ui_elements.R b/R/ui_elements.R index f216beee..3d23d3a2 100644 --- a/R/ui_elements.R +++ b/R/ui_elements.R @@ -99,7 +99,7 @@ ui_elements <- function(selection) { import_globalenv_ui( id = "env", title = NULL, - packages = c("NHANES", "stRoke", "datasets") + packages = c("NHANES", "stRoke", "datasets", "MASS") ) ), # shiny::conditionalPanel( @@ -268,7 +268,7 @@ ui_elements <- function(selection) { shiny::tags$br(), shiny::fluidRow( shiny::column( - width = 4, + width = 3, shiny::actionButton( inputId = "modal_update", label = i18n$t("Reorder factor levels"), @@ -280,7 +280,7 @@ ui_elements <- function(selection) { shiny::tags$br() ), shiny::column( - width = 4, + width = 3, shiny::actionButton( inputId = "modal_cut", label = i18n$t("New factor"), @@ -292,7 +292,19 @@ ui_elements <- function(selection) { shiny::tags$br() ), shiny::column( - width = 4, + width = 3, + shiny::actionButton( + inputId = "modal_string", + label = i18n$t("Split text"), + width = "100%" + ), + shiny::tags$br(), + shiny::helpText(i18n$t("Split a text column by a custom delimiter.")), + shiny::tags$br(), + shiny::tags$br() + ), + shiny::column( + width = 3, shiny::actionButton( inputId = "modal_column", label = i18n$t("New variable"), diff --git a/examples/string_split_demo.R b/examples/string_split_demo.R new file mode 100644 index 00000000..dd70f5a1 --- /dev/null +++ b/examples/string_split_demo.R @@ -0,0 +1,54 @@ +library(shiny) +library(reactable) + +ui <- fluidPage( + theme = bslib::bs_theme(version = 5L, preset = "bootstrap"), + shinyWidgets::html_dependency_winbox(), + tags$h2("Split string"), + fluidRow( + column( + width = 4, + actionButton("modal", "Or click here to open a modal to create a column") + ), + column( + width = 8, + reactableOutput(outputId = "table"), + verbatimTextOutput("code") + ) + ) +) + +server <- function(input, output, session) { + rv <- reactiveValues( + data = MASS::Cars93[, c(1, 3, 4, 5, 6, 10)], + out = NULL + ) + + # modal window mode + observeEvent(input$modal, modal_string_split("modal")) + + rv$out <- create_column_server( + id = "modal", + data_r = reactive(rv$data) + ) + + + # Show result + output$table <- renderReactable({ + data <- req(rv$data) + reactable( + data = data, + bordered = TRUE, + compact = TRUE, + striped = TRUE + ) + }) + + output$code <- renderPrint({ + attr(rv$data, "code") + }) +} + +if (interactive()) { + shinyApp(ui, server) +} diff --git a/man/detect_delimiter.Rd b/man/detect_delimiter.Rd new file mode 100644 index 00000000..2c5f69b7 --- /dev/null +++ b/man/detect_delimiter.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/separate_string.R +\name{detect_delimiter} +\alias{detect_delimiter} +\title{Detect delimiters in string based on allowed delimiters} +\usage{ +detect_delimiter(data, delimiters = c("_", "-", ";", "\\n", ",")) +} +\arguments{ +\item{delimiters}{allowed delimiters} + +\item{text}{character vector} +} +\value{ +character vector +} +\description{ +Accepts any repeat of delimiters and includes surrounding whitespace +} +\examples{ +sapply(c("Walk - run", "Sel__Re", "what;now"), detect_delimiter) +} diff --git a/man/separate_string.Rd b/man/separate_string.Rd new file mode 100644 index 00000000..9c9b6f24 --- /dev/null +++ b/man/separate_string.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/separate_string.R +\name{separate_string} +\alias{separate_string} +\title{Separate string wide or long} +\usage{ +separate_string(data, col, delim, direction = c("wide", "long")) +} +\arguments{ +\item{data}{data} + +\item{col}{column} + +\item{delim}{delimiter} + +\item{direction}{target direction} +} +\value{ +data.frame +} +\description{ +Separate string wide or long +}