Skip to content

Commit

Permalink
added step to reclassify first_learned data as self-taught
Browse files Browse the repository at this point in the history
  • Loading branch information
CHCRowley committed Jan 2, 2024
1 parent 711e839 commit 45fb6c1
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 5 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ export(apply_skip_logic)
export(break_q_names)
export(calculate_freqs)
export(check_skip_logic)
export(clean_data)
export(clean_departments)
export(clean_first_learned)
export(clean_workplace)
export(compare_models)
export(create_filtered_pages)
Expand Down
46 changes: 46 additions & 0 deletions R/data_cleaning.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,25 @@ rename_cols <- function(data) {
return(data)
}

#' @title Clean data
#'
#' @description Recategorise department, workplace and first_learned data
#'
#' @param data cleaned CARS dataset
#'
#' @return CARS dataset
#' @export

clean_data <- function(data){

data %>%
clean_departments() %>%
clean_workplace() %>%
clean_first_learned()

}


#' @title Clean department data
#'
#' @description add NHS to department list and merge departments where needed.
Expand Down Expand Up @@ -207,3 +226,30 @@ clean_workplace <- function(data) {
return(data)

}

#' @title Clean first learned data
#'
#' @description reclassify 'other' free text responses into self-taught based on common terms used
#'
#' @param data cleaned CARS dataset
#'
#' @return CARS dataset
#' @export

clean_first_learned <- function(data) {

matches <- c("self",
"hobby",
"personal",
"independ",
"home",
"for fun",
"free time",
"spare time",
"childhood")

data$first_learned[stringr::str_detect(tolower(data$first_learned), stringr::str_c(matches, collapse = "|"))] <- "Self-taught"

return(data)

}
1 change: 1 addition & 0 deletions R/frequency-tables.R
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ summarise_where_learned_code <- function(data){
"Education",
"Previous private sector employment",
"Previous public sector employment",
"Self-taught",
"Other")

data <- data %>%
Expand Down
3 changes: 1 addition & 2 deletions main.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ library(magrittr)
data <- CARS::get_tidy_data_file("2023_data.csv") %>%
CARS::rename_cols() %>%
CARS::apply_skip_logic() %>%
CARS::clean_workplace() %>%
CARS::clean_departments() %>%
CARS::clean_data() %>%
CARS::derive_vars()

CARS::create_filtered_pages(data, type = "departments")
Expand Down
17 changes: 17 additions & 0 deletions man/clean_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/clean_first_learned.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 6 additions & 3 deletions tests/testthat/test-summarise_where_learned_code.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,22 @@ dummy_data <- data.frame(
"Sometimes",
"Regularly",
"All the time"),
each=18),
each = 21),

other_coding_experience = rep(c(
NA,
"Yes",
"No"),
times = 6,
each = 6),
each = 7),

first_learned = rep(c(
NA,
"Current employment",
"Education",
"Previous private sector employment",
"Previous public sector employment",
"Self-taught",
"Other"),
times = 18)

Expand All @@ -46,15 +47,17 @@ test_that("summarise_where_learned_code output is as expected", {
"Education",
"Previous private sector employment",
"Previous public sector employment",
"Self-taught",
"Other"),
levels = c(
"Current employment",
"Education",
"Previous private sector employment",
"Previous public sector employment",
"Self-taught",
"Other")),

n = c(19/47, rep(7/47, times=4))
n = c(24/64, rep(8/64, times=5))

)

Expand Down

0 comments on commit 45fb6c1

Please sign in to comment.