# Extract data from stata file for data dictionary Extract data from stata file for data dictionary ## Usage ``` r ds2dd_detailed( data, add.auto.id = FALSE, date.format = "dmy", form.name = NULL, form.sep = NULL, form.prefix = TRUE, field.type = NULL, field.label = NULL, field.label.attr = "label", field.validation = NULL, metadata = names(REDCapCAST::redcapcast_meta), convert.logicals = FALSE ) ``` ## Arguments - data: data frame - add.auto.id: flag to add id column - date.format: date format, character string. ymd/dmy/mdy. dafault is dmy. - form.name: manually specify form name(s). Vector of length 1 or ncol(data). Default is NULL and "data" is used. - form.sep: If supplied dataset has form names as suffix or prefix to the column/variable names, the seperator can be specified. If supplied, the form.name is ignored. Default is NULL. - form.prefix: Flag to set if form is prefix (TRUE) or suffix (FALSE) to the column names. Assumes all columns have pre- or suffix if specified. - field.type: manually specify field type(s). Vector of length 1 or ncol(data). Default is NULL and "text" is used for everything but factors, which wil get "radio". - field.label: manually specify field label(s). Vector of length 1 or ncol(data). Default is NULL and colnames(data) is used or attribute \`field.label.attr\` for haven_labelled data set (imported .dta file with \`haven::read_dta()\`). - field.label.attr: attribute name for named labels for haven_labelled data set (imported .dta file with \`haven::read_dta()\`. Default is "label" - field.validation: manually specify field validation(s). Vector of length 1 or ncol(data). Default is NULL and \`levels()\` are used for factors or attribute \`factor.labels.attr\` for haven_labelled data set (imported .dta file with \`haven::read_dta()\`). - metadata: redcap metadata headings. Default is names(REDCapCAST::redcapcast_meta). - convert.logicals: convert logicals to factor. Default is TRUE. ## Value list of length 2 ## Details This function is a natural development of the ds2dd() function. It assumes that the first column is the ID-column. No checks. Please, do always inspect the data dictionary before upload. Ensure, that the data set is formatted with as much information as possible. \`field.type\` can be supplied ## Examples ``` r ## Basic parsing with default options requireNamespace("REDCapCAST") redcapcast_data |> dplyr::select(-dplyr::starts_with("redcap_")) |> ds2dd_detailed() #> $data #> # A tibble: 25 × 24 #> record_id cpr inclusion inclusion_time dob age age_integer sex #> #> 1 1 12034… 2023-03-13 12:38:49 1940-03-12 83.0 83 fema… #> 2 2 01023… 2023-03-01 10:38:57 1934-02-01 89.1 89 male #> 3 2 NA NA NA NA NA NA NA #> 4 2 NA NA NA NA NA NA NA #> 5 3 23015… 2022-03-08 12:01:07 1956-01-23 66.1 66 male #> 6 3 NA NA NA NA NA NA NA #> 7 3 NA NA NA NA NA NA NA #> 8 3 NA NA NA NA NA NA NA #> 9 3 NA NA NA NA NA NA NA #> 10 3 NA NA NA NA NA NA NA #> # ℹ 15 more rows #> # ℹ 16 more variables: cohabitation , hypertension , diabetes , #> # region , baseline_data_start_complete , mrs_assessed , #> # mrs_date , mrs_score , mrs_complete , con_mrs , #> # con_calc , consensus_complete , event_datetime , #> # event_age , event_type , new_event_complete #> #> $meta #> # A tibble: 24 × 18 #> field_name form_name section_header field_type field_label #> #> 1 record_id data NA text record_id #> 2 cpr data NA text cpr #> 3 inclusion data NA text inclusion #> 4 inclusion_time data NA text inclusion_time #> 5 dob data NA text dob #> 6 age data NA text age #> 7 age_integer data NA text age_integer #> 8 sex data NA text sex #> 9 cohabitation data NA text cohabitation #> 10 hypertension data NA text hypertension #> # ℹ 14 more rows #> # ℹ 13 more variables: select_choices_or_calculations , field_note , #> # text_validation_type_or_show_slider_number , #> # text_validation_min , text_validation_max , identifier , #> # branching_logic , required_field , custom_alignment , #> # question_number , matrix_group_name , matrix_ranking , #> # field_annotation #> #> attr(,"class") #> [1] "REDCapCAST" "list" ## Adding a record_id field iris |> ds2dd_detailed(add.auto.id = TRUE) #> $data #> # A tibble: 150 × 6 #> record_id sepallength sepalwidth petallength petalwidth species #> #> 1 1 5.1 3.5 1.4 0.2 1 #> 2 2 4.9 3 1.4 0.2 1 #> 3 3 4.7 3.2 1.3 0.2 1 #> 4 4 4.6 3.1 1.5 0.2 1 #> 5 5 5 3.6 1.4 0.2 1 #> 6 6 5.4 3.9 1.7 0.4 1 #> 7 7 4.6 3.4 1.4 0.3 1 #> 8 8 5 3.4 1.5 0.2 1 #> 9 9 4.4 2.9 1.4 0.2 1 #> 10 10 4.9 3.1 1.5 0.1 1 #> # ℹ 140 more rows #> #> $meta #> # A tibble: 6 × 18 #> field_name form_name section_header field_type field_label #> #> 1 record_id data NA text record_id #> 2 sepallength data NA text Sepal.Length #> 3 sepalwidth data NA text Sepal.Width #> 4 petallength data NA text Petal.Length #> 5 petalwidth data NA text Petal.Width #> 6 species data NA radio Species #> # ℹ 13 more variables: select_choices_or_calculations , field_note , #> # text_validation_type_or_show_slider_number , #> # text_validation_min , text_validation_max , identifier , #> # branching_logic , required_field , custom_alignment , #> # question_number , matrix_group_name , matrix_ranking , #> # field_annotation #> #> attr(,"class") #> [1] "REDCapCAST" "list" ## Passing form name information to function iris |> ds2dd_detailed( add.auto.id = TRUE, form.name = sample(c("b", "c"), size = 6, replace = TRUE, prob = rep(.5, 2)) ) |> purrr::pluck("meta") #> # A tibble: 6 × 18 #> field_name form_name section_header field_type field_label #> #> 1 record_id c NA text record_id #> 2 sepallength b NA text Sepal.Length #> 3 sepalwidth c NA text Sepal.Width #> 4 petallength b NA text Petal.Length #> 5 petalwidth c NA text Petal.Width #> 6 species b NA radio Species #> # ℹ 13 more variables: select_choices_or_calculations , field_note , #> # text_validation_type_or_show_slider_number , #> # text_validation_min , text_validation_max , identifier , #> # branching_logic , required_field , custom_alignment , #> # question_number , matrix_group_name , matrix_ranking , #> # field_annotation mtcars |> dplyr::mutate(unknown = NA) |> numchar2fct() |> ds2dd_detailed(add.auto.id = TRUE) #> $data #> # A tibble: 32 × 13 #> record_id mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 1 21 2 160 110 3.9 2.62 16.5 1 2 2 4 #> 2 2 21 2 160 110 3.9 2.88 17.0 1 2 2 4 #> 3 3 22.8 1 108 93 3.85 2.32 18.6 2 2 2 1 #> 4 4 21.4 2 258 110 3.08 3.22 19.4 2 1 1 1 #> 5 5 18.7 3 360 175 3.15 3.44 17.0 1 1 1 2 #> 6 6 18.1 2 225 105 2.76 3.46 20.2 2 1 1 1 #> 7 7 14.3 3 360 245 3.21 3.57 15.8 1 1 1 4 #> 8 8 24.4 1 147. 62 3.69 3.19 20 2 1 2 2 #> 9 9 22.8 1 141. 95 3.92 3.15 22.9 2 1 2 2 #> 10 10 19.2 2 168. 123 3.92 3.44 18.3 2 1 2 4 #> # ℹ 22 more rows #> # ℹ 1 more variable: unknown #> #> $meta #> # A tibble: 13 × 18 #> field_name form_name section_header field_type field_label #> #> 1 record_id data NA text record_id #> 2 mpg data NA text mpg #> 3 cyl data NA radio cyl #> 4 disp data NA text disp #> 5 hp data NA text hp #> 6 drat data NA text drat #> 7 wt data NA text wt #> 8 qsec data NA text qsec #> 9 vs data NA radio vs #> 10 am data NA radio am #> 11 gear data NA radio gear #> 12 carb data NA radio carb #> 13 unknown data NA truefalse unknown #> # ℹ 13 more variables: select_choices_or_calculations , field_note , #> # text_validation_type_or_show_slider_number , #> # text_validation_min , text_validation_max , identifier , #> # branching_logic , required_field , custom_alignment , #> # question_number , matrix_group_name , matrix_ranking , #> # field_annotation #> #> attr(,"class") #> [1] "REDCapCAST" "list" ## Using column name suffix to carry form name data <- iris |> ds2dd_detailed(add.auto.id = TRUE) |> purrr::pluck("data") names(data) <- glue::glue("{sample(x = c('a','b'),size = length(names(data)), replace=TRUE,prob = rep(x=.5,2))}__{names(data)}") data |> ds2dd_detailed(form.sep = "__") #> $data #> # A tibble: 150 × 6 #> record_id sepallength sepalwidth petallength petalwidth species #> #> 1 1 5.1 3.5 1.4 0.2 1 #> 2 2 4.9 3 1.4 0.2 1 #> 3 3 4.7 3.2 1.3 0.2 1 #> 4 4 4.6 3.1 1.5 0.2 1 #> 5 5 5 3.6 1.4 0.2 1 #> 6 6 5.4 3.9 1.7 0.4 1 #> 7 7 4.6 3.4 1.4 0.3 1 #> 8 8 5 3.4 1.5 0.2 1 #> 9 9 4.4 2.9 1.4 0.2 1 #> 10 10 4.9 3.1 1.5 0.1 1 #> # ℹ 140 more rows #> #> $meta #> # A tibble: 6 × 18 #> field_name form_name section_header field_type field_label #> #> 1 record_id b NA text record_id #> 2 sepallength a NA text sepallength #> 3 sepalwidth a NA text sepalwidth #> 4 petallength a NA text petallength #> 5 petalwidth a NA text petalwidth #> 6 species a NA text species #> # ℹ 13 more variables: select_choices_or_calculations , field_note , #> # text_validation_type_or_show_slider_number , #> # text_validation_min , text_validation_max , identifier , #> # branching_logic , required_field , custom_alignment , #> # question_number , matrix_group_name , matrix_ranking , #> # field_annotation #> #> attr(,"class") #> [1] "REDCapCAST" "list" ```