Skip to content

Commit

Permalink
Merge branch 'main' into pub
Browse files Browse the repository at this point in the history
  • Loading branch information
ldavies99 committed Feb 5, 2024
2 parents 33563eb + 8262305 commit 16166ad
Show file tree
Hide file tree
Showing 86 changed files with 3,468 additions and 1,059 deletions.
3 changes: 2 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@
^quarto/main$
^R/test\.R$
^main\.R$
^.*.quarto
^.*.quarto
^data-raw$
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ temp/
*.csv
*.rda
exploratory_scripts/
docs/
quarto/qa/summary_qa.html
quarto/summary_qa_files/
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,5 @@ LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
Config/testthat/edition: 3
Depends:
R (>= 2.10)
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@ export(apply_skip_logic)
export(break_q_names)
export(calculate_freqs)
export(check_skip_logic)
export(clean_data)
export(clean_departments)
export(clean_first_learned)
export(clean_workplace)
export(compare_models)
export(create_filtered_pages)
export(create_y_lab)
Expand Down Expand Up @@ -43,6 +46,11 @@ export(w2_enforce_streaming)
export(w2_rename_cols)
export(w3_enforce_streaming)
export(w3_rename_cols)
export(w4_check_skip_logic)
export(w4_clean_departments)
export(w4_enforce_skip_logic)
export(w4_enforce_streaming)
export(w4_rename_cols)
export(wrap_outputs)
importFrom(dplyr,across)
importFrom(dplyr,all_of)
Expand Down
156 changes: 124 additions & 32 deletions R/data_cleaning.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

rename_cols <- function(data) {
if (ncol(data) != 112) {
stop("Unexpected input: incorrect number of columns. Please use the 2022 CARS dataset.")
stop("Unexpected input: incorrect number of columns. Please use the 2023 CARS dataset.")
}

colnames(data)[c(1, 7:ncol(data))] <- c(
Expand All @@ -23,6 +23,7 @@ rename_cols <- function(data) {
"CS_grade",
"department",
"other_department_name",
"prof_DE",
"prof_DS",
"prof_DDAT",
"prof_GAD",
Expand All @@ -34,6 +35,14 @@ rename_cols <- function(data) {
"prof_CS_none",
"prof_CS_other",
"ONS_directorate",
"pay_band",
"NHS_band",
"NJC_grade",
"primary_work_country",
"England_NHS_organisation",
"Scotland_NHS_organisation",
"Wales_NHS_organisation",
"Northern_Ireland_NHS_organisation",
"highest_qualification",
"qual_1_subject",
"qual_1_level",
Expand All @@ -46,46 +55,35 @@ rename_cols <- function(data) {
"qual_3_learn_code",
"code_freq",
"management",
"ops_analysis",
"ops_cleaning",
"ops_linking",
"ops_transfer_migration",
"ops_vis",
"ops_machine_learning",
"ops_modelling",
"ops_QA",
"ops_other",
"ops_other_name",
"knowledge_R",
"access_matlab",
"access_python",
"access_R",
"knowledge_SQL",
"access_SQL",
"knowledge_SAS",
"access_SAS",
"knowledge_VBA",
"access_SPSS",
"access_SQL",
"access_stata",
"access_VBA",
"access_open_source_other",
"access_licensed_other",
"access_other_specified",
"knowledge_matlab",
"knowledge_python",
"access_python",
"knowledge_R",
"knowledge_SAS",
"knowledge_SPSS",
"access_SPSS",
"knowledge_SQL",
"knowledge_stata",
"access_stata",
"knowledge_JS",
"access_JS",
"knowledge_java",
"access_java",
"knowledge_C",
"access_C",
"knowledge_matlab",
"access_matlab",
"knowledge_access_other",
"knowledge_VBA",
"knowledge_licensed_other",
"knowledge_open_source_other",
"knowledge_other_specified",
"knowledge_git",
"access_git",
"other_coding_experience",
"coding_ability_change",
"prev_coding_experience",
"first_learned",
"coding_ability_change",
"heard_of_RAP",
"have_RAP_champ",
"know_RAP_champ",
"strategy_knowledge",
"RAP_confident",
Expand All @@ -102,11 +100,13 @@ rename_cols <- function(data) {
"prac_review",
"prac_functions",
"prac_unit_test",
"prac_other_automated",
"prac_package",
"prac_dir_structure",
"prac_style",
"prac_automated_QA",
"prac_AQUA_book",
"prac_development_QA",
"prac_proportionate_QA",
"doc_comments",
"doc_functions",
"doc_readme",
Expand All @@ -129,6 +129,25 @@ rename_cols <- function(data) {
return(data)
}

#' @title Clean data
#'
#' @description Recategorise department, workplace and first_learned data
#'
#' @param data cleaned CARS dataset
#'
#' @return CARS dataset
#' @export

clean_data <- function(data){

data %>%
clean_departments() %>%
clean_workplace() %>%
clean_first_learned()

}


#' @title Clean department data
#'
#' @description add NHS to department list and merge departments where needed.
Expand All @@ -140,13 +159,29 @@ rename_cols <- function(data) {

clean_departments <- function(data) {

data$department[grepl("forest research", tolower(data$other_department_name))] <- "Forestry Commission"
data$department[data$department == "Foreign, Commonwealth & Development Office (excl. agencies)"] <- "Foreign, Commonwealth and Development Office (excl. agencies)"

data$department[data$workplace == "NHS"] <- "NHS"

data$department[data$other_department_name == "Office for National Statistics"] <- "Office for National Statistics"

data$department[data$other_department_name == "Data Science Campus"] <- "Office for National Statistics"

data$department[data$other_department_name == "Welsh Revenue Authority"] <- "Welsh Government"

data$department[data$other_department_name == "Equality Hub, Cabinet Office"] <- "Cabinet Office (excl. agencies)"

data$department[data$other_department_name == "Natural England"] <- "Natural England"

data$department[data$other_department_name == "Department for Communities"] <- "Northern Ireland Executive"

data$department[data$other_department_name == "Department of Education Northern Ireland"] <- "Northern Ireland Executive"

defra_orgs <- c(
"Department for Environment, Food and Rural Affairs (excl. agencies)",
"Forestry Commission",
"Forest Research",
"Forestry England",
"Animal and Plant Health Agency",
"Centre for Environment, Fisheries and Aquaculture Science",
"Rural Payments Agency",
Expand All @@ -161,3 +196,60 @@ clean_departments <- function(data) {

}

#' @title Clean workplace data
#'
#' @description reclassify 'other' text responses into CS/NHS
#'
#' @param data cleaned CARS dataset
#'
#' @return CARS dataset
#' @export

clean_workplace <- function(data) {

data$workplace[data$workplace == "MOD"] <- "Civil service, including devolved administrations"

data$workplace[data$workplace == "HMRC"] <- "Civil service, including devolved administrations"

data$workplace[data$workplace == "The Pensions Regulator"] <- "Civil service, including devolved administrations"

data$workplace[data$workplace == "Scottish Funding Council"] <- "Civil service, including devolved administrations"

data$workplace[data$workplace == "Office for Students"] <- "Civil service, including devolved administrations"

data$workplace[data$workplace == "Office for students"] <- "Civil service, including devolved administrations"

data$workplace[data$workplace == "OfS"] <- "Civil service, including devolved administrations"

data$workplace[data$workplace == "Dstl"] <- "Civil service, including devolved administrations"

return(data)

}

#' @title Clean first learned data
#'
#' @description reclassify 'other' free text responses into self-taught based on common terms used
#'
#' @param data cleaned CARS dataset
#'
#' @return CARS dataset
#' @export

clean_first_learned <- function(data) {

matches <- c("self",
"hobby",
"personal",
"independ",
"home",
"for fun",
"free time",
"spare time",
"childhood")

data$first_learned[stringr::str_detect(tolower(data$first_learned), stringr::str_c(matches, collapse = "|"))] <- "Self-taught"

return(data)

}
36 changes: 30 additions & 6 deletions R/derive_vars.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
derive_vars <- function(data) {
data <- data %>%
derive_language_status() %>%
derive_rap_score()
derive_rap_score() %>%
derive_rap_champ_status()

return(data)
}
Expand All @@ -20,7 +21,7 @@ derive_vars <- function(data) {

#' @title Derive language status
#'
#' @description Derve the status of each programmming language as "access" (access only), "knowledge" (knowledge only), "both" or "neither".
#' @description Derive the status of each programmming language as "access" (access only), "knowledge" (knowledge only), "both" or "neither".
#'
#' @param data tidied CARS wave 3 data (data.frame).
#'
Expand Down Expand Up @@ -85,7 +86,7 @@ derive_basic_rap_scores <- function(data) {
"prac_open_source_own",
"prac_version_control",
"prac_review",
"prac_AQUA_book",
"prac_proportionate_QA",
"doc_comments",
"doc_readme")

Expand All @@ -102,7 +103,7 @@ derive_basic_rap_scores <- function(data) {
"open_code_score",
"version_control_score",
"peer_review_score",
"AQUA_book_score",
"proportionate_QA_score",
"doc_score")

high_vals <- c("Regularly", "All the time")
Expand All @@ -115,8 +116,8 @@ derive_basic_rap_scores <- function(data) {
.x %in% high_vals ~ 1,
TRUE ~ 0),
.names = "{.col}_score")) %>%
mutate(doc_score = as.integer(.data$doc_comments_score & .data$doc_readme_score)) %>%
select(-c(.data$doc_comments_score, .data$doc_readme_score)) %>%
mutate(doc_score = as.integer(doc_comments_score & doc_readme_score)) %>%
select(-c(doc_comments_score, doc_readme_score)) %>%
rename_with(~ score_col_names[which(paste0(prac_cols, "_score") == .x)],
.cols = paste0(prac_cols,
"_score")) %>%
Expand Down Expand Up @@ -181,3 +182,26 @@ derive_advanced_rap_scores <- function(data) {
return(data)

}


#' @title Derive RAP Champion status
#'
#' @description Derive RAP Champion status column from existing variables and add to the dataframe.
#'
#' @param data a date frame containing cleaned CARS wave 5 data
#'
#' @return dataframe containing the additional RAP Champion status columns
#'
#' @importFrom dplyr mutate case_when
derive_rap_champ_status <- function(data){

data <- data %>%
mutate(RAP_champ_status = case_when(have_RAP_champ == "Yes" & know_RAP_champ == "Yes, and I am a RAP Champion" ~ "Yes, and I am a RAP Champion",
have_RAP_champ == "Yes" & know_RAP_champ == "Yes" ~ "Yes, and I know who the RAP Champion is",
have_RAP_champ == "Yes" & know_RAP_champ == "No" ~ "Yes, but I don't know who the RAP Champion is",
have_RAP_champ == "No" ~ "No",
have_RAP_champ == "Don't know" ~ "I don't know"))

}


Loading

0 comments on commit 16166ad

Please sign in to comment.