Merge branch 'main' into pub

best-practice-and-impact · Feb 5, 2024 · 16166ad · 16166ad
2 parents 33563eb + 8262305
commit 16166ad
Show file tree

Hide file tree

Showing 86 changed files with 3,468 additions and 1,059 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -7,4 +7,5 @@
 ^quarto/main$
 ^R/test\.R$
 ^main\.R$
-^.*.quarto
+^.*.quarto
+^data-raw$
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,6 @@ temp/
 *.csv
 *.rda
 exploratory_scripts/
+docs/
+quarto/qa/summary_qa.html
+quarto/summary_qa_files/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -33,3 +33,5 @@ LazyData: true
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
 Config/testthat/edition: 3
+Depends: 
+    R (>= 2.10)
diff --git a/NAMESPACE b/NAMESPACE
@@ -5,7 +5,10 @@ export(apply_skip_logic)
 export(break_q_names)
 export(calculate_freqs)
 export(check_skip_logic)
+export(clean_data)
 export(clean_departments)
+export(clean_first_learned)
+export(clean_workplace)
 export(compare_models)
 export(create_filtered_pages)
 export(create_y_lab)
@@ -43,6 +46,11 @@ export(w2_enforce_streaming)
 export(w2_rename_cols)
 export(w3_enforce_streaming)
 export(w3_rename_cols)
+export(w4_check_skip_logic)
+export(w4_clean_departments)
+export(w4_enforce_skip_logic)
+export(w4_enforce_streaming)
+export(w4_rename_cols)
 export(wrap_outputs)
 importFrom(dplyr,across)
 importFrom(dplyr,all_of)

diff --git a/R/data_cleaning.R b/R/data_cleaning.R
@@ -11,7 +11,7 @@
 
 rename_cols <- function(data) {
   if (ncol(data) != 112) {
-    stop("Unexpected input: incorrect number of columns. Please use the 2022 CARS dataset.")
+    stop("Unexpected input: incorrect number of columns. Please use the 2023 CARS dataset.")
   }
 
   colnames(data)[c(1, 7:ncol(data))] <- c(
@@ -23,6 +23,7 @@ rename_cols <- function(data) {
     "CS_grade",
     "department",
     "other_department_name",
+    "prof_DE",
     "prof_DS",
     "prof_DDAT",
     "prof_GAD",
@@ -34,6 +35,14 @@ rename_cols <- function(data) {
     "prof_CS_none",
     "prof_CS_other",
     "ONS_directorate",
+    "pay_band",
+    "NHS_band",
+    "NJC_grade",
+    "primary_work_country",
+    "England_NHS_organisation",
+    "Scotland_NHS_organisation",
+    "Wales_NHS_organisation",
+    "Northern_Ireland_NHS_organisation",
     "highest_qualification",
     "qual_1_subject",
     "qual_1_level",
@@ -46,46 +55,35 @@ rename_cols <- function(data) {
     "qual_3_learn_code",
     "code_freq",
     "management",
-    "ops_analysis",
-    "ops_cleaning",
-    "ops_linking",
-    "ops_transfer_migration",
-    "ops_vis",
-    "ops_machine_learning",
-    "ops_modelling",
-    "ops_QA",
-    "ops_other",
-    "ops_other_name",
-    "knowledge_R",
+    "access_matlab",
+    "access_python",
     "access_R",
-    "knowledge_SQL",
-    "access_SQL",
-    "knowledge_SAS",
     "access_SAS",
-    "knowledge_VBA",
+    "access_SPSS",
+    "access_SQL",
+    "access_stata",
     "access_VBA",
+    "access_open_source_other",
+    "access_licensed_other",
+    "access_other_specified",
+    "knowledge_matlab",
     "knowledge_python",
-    "access_python",
+    "knowledge_R",
+    "knowledge_SAS",
     "knowledge_SPSS",
-    "access_SPSS",
+    "knowledge_SQL",
     "knowledge_stata",
-    "access_stata",
-    "knowledge_JS",
-    "access_JS",
-    "knowledge_java",
-    "access_java",
-    "knowledge_C",
-    "access_C",
-    "knowledge_matlab",
-    "access_matlab",
-    "knowledge_access_other",
+    "knowledge_VBA",
+    "knowledge_licensed_other",
+    "knowledge_open_source_other",
+    "knowledge_other_specified",
     "knowledge_git",
     "access_git",
     "other_coding_experience",
-    "coding_ability_change",
-    "prev_coding_experience",
     "first_learned",
+    "coding_ability_change",
     "heard_of_RAP",
+    "have_RAP_champ",
     "know_RAP_champ",
     "strategy_knowledge",
     "RAP_confident",
@@ -102,11 +100,13 @@ rename_cols <- function(data) {
     "prac_review",
     "prac_functions",
     "prac_unit_test",
+    "prac_other_automated",
     "prac_package",
     "prac_dir_structure",
     "prac_style",
     "prac_automated_QA",
-    "prac_AQUA_book",
+    "prac_development_QA",
+    "prac_proportionate_QA",
     "doc_comments",
     "doc_functions",
     "doc_readme",
@@ -129,6 +129,25 @@ rename_cols <- function(data) {
   return(data)
 }
 
+#' @title Clean data
+#'
+#' @description Recategorise department, workplace and first_learned data
+#'
+#' @param data cleaned CARS dataset
+#'
+#' @return CARS dataset
+#' @export
+
+clean_data <- function(data){
+
+ data %>%
+   clean_departments() %>%
+   clean_workplace() %>%
+   clean_first_learned()
+
+}
+
+
 #' @title Clean department data
 #'
 #' @description add NHS to department list and merge departments where needed.
@@ -140,13 +159,29 @@ rename_cols <- function(data) {
 
 clean_departments <- function(data) {
 
-  data$department[grepl("forest research", tolower(data$other_department_name))] <- "Forestry Commission"
+  data$department[data$department == "Foreign, Commonwealth & Development Office (excl. agencies)"] <- "Foreign, Commonwealth and Development Office (excl. agencies)"
 
   data$department[data$workplace == "NHS"] <- "NHS"
 
+  data$department[data$other_department_name == "Office for National Statistics"] <- "Office for National Statistics"
+
+  data$department[data$other_department_name == "Data Science Campus"] <- "Office for National Statistics"
+
+  data$department[data$other_department_name == "Welsh Revenue Authority"] <- "Welsh Government"
+
+  data$department[data$other_department_name == "Equality Hub, Cabinet Office"] <- "Cabinet Office (excl. agencies)"
+
+  data$department[data$other_department_name == "Natural England"] <- "Natural England"
+
+  data$department[data$other_department_name == "Department for Communities"] <- "Northern Ireland Executive"
+
+  data$department[data$other_department_name == "Department of Education Northern Ireland"] <- "Northern Ireland Executive"
+
   defra_orgs <- c(
     "Department for Environment, Food and Rural Affairs (excl. agencies)",
     "Forestry Commission",
+    "Forest Research",
+    "Forestry England",
     "Animal and Plant Health Agency",
     "Centre for Environment, Fisheries and Aquaculture Science",
     "Rural Payments Agency",
@@ -161,3 +196,60 @@ clean_departments <- function(data) {
 
 }
 
+#' @title Clean workplace data
+#'
+#' @description reclassify 'other' text responses into CS/NHS
+#'
+#' @param data cleaned CARS dataset
+#'
+#' @return CARS dataset
+#' @export
+
+clean_workplace <- function(data) {
+
+  data$workplace[data$workplace == "MOD"] <- "Civil service, including devolved administrations"
+
+  data$workplace[data$workplace == "HMRC"] <- "Civil service, including devolved administrations"
+
+  data$workplace[data$workplace == "The Pensions Regulator"] <- "Civil service, including devolved administrations"
+
+  data$workplace[data$workplace == "Scottish Funding Council"] <- "Civil service, including devolved administrations"
+
+  data$workplace[data$workplace == "Office for Students"] <- "Civil service, including devolved administrations"
+
+  data$workplace[data$workplace == "Office for students"] <- "Civil service, including devolved administrations"
+
+  data$workplace[data$workplace == "OfS"] <- "Civil service, including devolved administrations"
+
+  data$workplace[data$workplace == "Dstl"] <- "Civil service, including devolved administrations"
+
+  return(data)
+
+}
+
+#' @title Clean first learned data
+#'
+#' @description reclassify 'other' free text responses into self-taught based on common terms used
+#'
+#' @param data cleaned CARS dataset
+#'
+#' @return CARS dataset
+#' @export
+
+clean_first_learned <- function(data) {
+
+  matches <- c("self",
+               "hobby",
+               "personal",
+               "independ",
+               "home",
+               "for fun",
+               "free time",
+               "spare time",
+               "childhood")
+
+  data$first_learned[stringr::str_detect(tolower(data$first_learned), stringr::str_c(matches, collapse = "|"))] <- "Self-taught"
+
+  return(data)
+
+}
diff --git a/R/derive_vars.R b/R/derive_vars.R
@@ -11,7 +11,8 @@
 derive_vars <- function(data) {
   data <- data %>%
     derive_language_status() %>%
-    derive_rap_score()
+    derive_rap_score() %>%
+    derive_rap_champ_status()
 
   return(data)
 }
@@ -20,7 +21,7 @@ derive_vars <- function(data) {
 
 #' @title Derive language status
 #'
-#' @description Derve the status of each programmming language as "access" (access only), "knowledge" (knowledge only), "both" or "neither".
+#' @description Derive the status of each programmming language as "access" (access only), "knowledge" (knowledge only), "both" or "neither".
 #'
 #' @param data tidied CARS wave 3 data (data.frame).
 #'
@@ -85,7 +86,7 @@ derive_basic_rap_scores <- function(data) {
                         "prac_open_source_own",
                         "prac_version_control",
                         "prac_review",
-                        "prac_AQUA_book",
+                        "prac_proportionate_QA",
                         "doc_comments",
                         "doc_readme")
 
@@ -102,7 +103,7 @@ derive_basic_rap_scores <- function(data) {
                        "open_code_score",
                        "version_control_score",
                        "peer_review_score",
-                       "AQUA_book_score",
+                       "proportionate_QA_score",
                        "doc_score")
 
   high_vals <- c("Regularly", "All the time")
@@ -115,8 +116,8 @@ derive_basic_rap_scores <- function(data) {
                               .x %in% high_vals ~ 1,
                               TRUE ~ 0),
                   .names = "{.col}_score")) %>%
-    mutate(doc_score = as.integer(.data$doc_comments_score & .data$doc_readme_score)) %>%
-    select(-c(.data$doc_comments_score, .data$doc_readme_score)) %>%
+    mutate(doc_score = as.integer(doc_comments_score & doc_readme_score)) %>%
+    select(-c(doc_comments_score, doc_readme_score)) %>%
     rename_with(~ score_col_names[which(paste0(prac_cols, "_score") == .x)],
                 .cols = paste0(prac_cols,
                                "_score")) %>%
@@ -181,3 +182,26 @@ derive_advanced_rap_scores <- function(data) {
   return(data)
 
 }
+
+
+#' @title Derive RAP Champion status
+#'
+#' @description Derive RAP Champion status column from existing variables and add to the dataframe.
+#'
+#' @param data a date frame containing cleaned CARS wave 5 data
+#'
+#' @return dataframe containing the additional RAP Champion status columns
+#'
+#' @importFrom dplyr mutate case_when
+derive_rap_champ_status <- function(data){
+
+  data <- data %>%
+          mutate(RAP_champ_status = case_when(have_RAP_champ == "Yes" & know_RAP_champ == "Yes, and I am a RAP Champion" ~ "Yes, and I am a RAP Champion",
+                                              have_RAP_champ == "Yes" & know_RAP_champ == "Yes" ~ "Yes, and I know who the RAP Champion is",
+                                              have_RAP_champ == "Yes" & know_RAP_champ == "No" ~ "Yes, but I don't know who the RAP Champion is",
+                                              have_RAP_champ == "No" ~ "No",
+                                              have_RAP_champ == "Don't know" ~ "I don't know"))
+
+}
+
+