Department names (#68)

* Updated department name cleaning for wave5 * Added data engineers to prof list * Added docs/ to gitignore * Update clean_departments test --------- Co-authored-by: ldavies99 <[email protected]>
best-practice-and-impact · Dec 1, 2023 · d672e73 · d672e73
1 parent 27f4950
commit d672e73
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 8 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,4 @@ temp/
 *.csv
 *.rda
 exploratory_scripts/
+docs/
diff --git a/R/data_cleaning.R b/R/data_cleaning.R
@@ -140,13 +140,15 @@ rename_cols <- function(data) {
 
 clean_departments <- function(data) {
 
-  data$department[grepl("forest research", tolower(data$other_department_name))] <- "Forestry Commission"
+  data$department[data$department == "Foreign, Commonwealth & Development Office (excl. agencies)"] <- "Foreign, Commonwealth and Development Office (excl. agencies)"
 
   data$department[data$workplace == "NHS"] <- "NHS"
 
   defra_orgs <- c(
     "Department for Environment, Food and Rural Affairs (excl. agencies)",
     "Forestry Commission",
+    "Forest Research",
+    "Forestry England",
     "Animal and Plant Health Agency",
     "Centre for Environment, Fisheries and Aquaculture Science",
     "Rural Payments Agency",

diff --git a/R/render.R b/R/render.R
@@ -41,6 +41,7 @@ create_filtered_pages <- function(data, type = c("professions", "departments"),
 
   if (type == "professions") {
     prof_cols <- c(
+      "prof_DE",
       "prof_DS",
       "prof_DDAT",
       "prof_GAD",
@@ -52,6 +53,7 @@ create_filtered_pages <- function(data, type = c("professions", "departments"),
     )
 
     prof_names <- c(
+      "government data engineers",
       "government data scientists",
       "digital and data profession (DDAT)",
       "government actuary's department (GAD)",
@@ -63,6 +65,7 @@ create_filtered_pages <- function(data, type = c("professions", "departments"),
     )
 
     filenames <- c(
+      "data-engineers.qmd",
       "data-scientists.qmd",
       "digital-and-data.qmd",
       "government-actuarys-department.qmd",

diff --git a/tests/testthat/test-clean_departments.R b/tests/testthat/test-clean_departments.R
@@ -3,32 +3,36 @@ test_that("clean_departments output is as expected", {
 
   dummy_data <- data.frame(department = c(NA,
                                           "test",
-                                          "test",
+                                          "Foreign, Commonwealth & Development Office (excl. agencies)",
                                           "Department for Environment, Food and Rural Affairs (excl. agencies)",
+                                          "Forestry Commission",
+                                          "Forest Research",
+                                          "Forestry England",
                                           "Animal and Plant Health Agency",
                                           "Centre for Environment, Fisheries and Aquaculture Science",
                                           "Rural Payments Agency",
                                           "Environment Agency",
                                           "Marine Management Organisation",
                                           "Natural England"),
-                           other_department_name = c(NA, "Forest research", rep("test", 8)),
-                           workplace = c(NA, "test", "NHS", rep("test", 7)))
+                           workplace = c(NA, "NHS", rep("test", 11)))
 
   got <- clean_departments(dummy_data)
 
   expected <- data.frame(department = c(NA,
-                                        "Forestry Commission",
                                         "NHS",
+                                        "Foreign, Commonwealth and Development Office (excl. agencies)",
                                         "Department for Environment, Food and Rural Affairs (excl. agencies)",
+                                        "Forestry Commission",
+                                        "Forest Research",
+                                        "Forestry England",
                                         "Animal and Plant Health Agency",
                                         "Centre for Environment, Fisheries and Aquaculture Science",
                                         "Rural Payments Agency",
                                         "Environment Agency",
                                         "Marine Management Organisation",
                                         "Natural England"),
-                         other_department_name = c(NA, "Forest research", rep("test", 8)),
-                         workplace = c(NA, "test", "NHS", rep("test", 7)),
-                         defra = c(FALSE, TRUE, FALSE, rep(TRUE, 7)))
+                         workplace = c(NA, "NHS", rep("test", 11)),
+                         defra = c(rep(FALSE, 3), rep(TRUE, 10)))
 
   expect_equal(got, expected)