From 1e6691b0df401ee412c1762d078c1281d0031baa Mon Sep 17 00:00:00 2001
From: Luke Davies <117165341+ldavies99@users.noreply.github.com>
Date: Mon, 8 Jan 2024 08:39:08 +0000
Subject: [PATCH] Fix summarise coding tools (#75)

* Change levels to match new survery responses and reorder the levels

* Rearrange programming languages

* Fix rap comp table
---
 R/frequency-tables.R                          | 13 ++-
 tests/testthat/test-summarise_coding_tools.R  | 82 +++++++++----------
 .../test-summarise_languages_by_prof.R        |  6 +-
 tests/testthat/test-summarise_rap_comp.R      |  6 +-
 4 files changed, 55 insertions(+), 52 deletions(-)

diff --git a/R/frequency-tables.R b/R/frequency-tables.R
index 0e12296..d926a4f 100644
--- a/R/frequency-tables.R
+++ b/R/frequency-tables.R
@@ -122,7 +122,11 @@ summarise_coding_tools <- function(data, type = list("knowledge", "access"), pro
                  "access_SPSS", "knowledge_stata", "access_stata",
                  "knowledge_matlab", "access_matlab")
 
-  levels <- c("Yes", "Don't know", "No")
+  if (type == "knowledge") {
+    levels <- c("Yes", "No", "Not required for my work")
+  } else {
+    levels <- c("Yes", "No", "Don't know")
+  }
 
   labels <- c("R", "SQL", "SAS", "VBA", "Python", "SPSS", "Stata", "Matlab")
 
@@ -130,7 +134,8 @@ summarise_coding_tools <- function(data, type = list("knowledge", "access"), pro
 
   questions <- questions[grepl(paste0(type, "_"), questions)]
 
-  frequencies <- calculate_freqs(data, questions, levels, labels, prop = prop)
+  frequencies <- calculate_freqs(data, questions, levels, labels, prop = prop) %>%
+    dplyr::arrange(match(name, c("Python", "R", "SQL", "Matlab", "SAS", "SPSS", "Stata", "VBA")))
 
   return(frequencies)
 }
@@ -424,7 +429,7 @@ summarise_rap_comp <- function(data) {
               "Team open source code",
               "Version control",
               "Peer review",
-              "Development QA",
+              "Proportionate QA",
               "Documentation",
               "Functions",
               "Unit testing",
@@ -438,7 +443,7 @@ summarise_rap_comp <- function(data) {
                  "open_code_score",
                  "version_control_score",
                  "peer_review_score",
-                 "development_QA_score",
+                 "proportionate_QA_score",
                  "doc_score",
                  "function_score",
                  "unit_test_score",
diff --git a/tests/testthat/test-summarise_coding_tools.R b/tests/testthat/test-summarise_coding_tools.R
index ac12fab..4288e4b 100644
--- a/tests/testthat/test-summarise_coding_tools.R
+++ b/tests/testthat/test-summarise_coding_tools.R
@@ -1,22 +1,22 @@
 # Coding tools frequency tables (access or knowledge)
 
 dummy_data <- data.frame(
-  knowledge_R = c("Yes", rep("No", 2), rep("Don't know", 3)),
-  access_R = c(rep("Yes", 2), "No", rep("Don't know", 3)),
-  knowledge_SQL = c(rep("Yes", 3), rep("No", 2), "Don't know"),
-  access_SQL = c("Yes", rep("No", 3), rep("Don't know", 2)),
-  knowledge_SAS = c(rep("Yes", 2), rep("No", 3), "Don't know"),
-  access_SAS = c(rep("Yes", 3), "No", rep("Don't know", 2)),
-  knowledge_VBA = c("Yes", rep("No", 2), rep("Don't know", 3)),
-  access_VBA = c(rep("Yes", 2), "No", rep("Don't know", 3)),
-  knowledge_python = c(rep("Yes", 3), rep("No", 2), "Don't know"),
-  access_python = c("Yes", rep("No", 3), rep("Don't know", 2)),
-  knowledge_SPSS = c(rep("Yes", 2), rep("No", 3), "Don't know"),
-  access_SPSS = c(rep("Yes", 3), "No", rep("Don't know", 2)),
-  knowledge_stata = c("Yes", rep("No", 2), rep("Don't know", 3)),
-  access_stata = c(rep("Yes", 2), "No", rep("Don't know", 3)),
-  knowledge_matlab = c(rep("Yes", 3), rep("No", 2), "Don't know"),
-  access_matlab = c("Yes", rep("No", 5), rep("Don't know", 0)) # Used to check zero counts aren't missing
+  knowledge_R = c("Yes", "No", "Not required for my work"),
+  access_R = c("Yes", "No", "Don't know"),
+  knowledge_SQL = c("Yes", "No", "Not required for my work"),
+  access_SQL = c("Yes", "No", "Don't know"),
+  knowledge_SAS = c("Yes", "No", "Not required for my work"),
+  access_SAS = c("Yes", "No", "Don't know"),
+  knowledge_VBA = c("Yes", "No", "Not required for my work"),
+  access_VBA = c("Yes", "No", "Don't know"),
+  knowledge_python = c("Yes", "No", "Not required for my work"),
+  access_python = c("Yes", "No", "Don't know"),
+  knowledge_SPSS = c("Yes", "No", "Not required for my work"),
+  access_SPSS = c("Yes", "No", "Don't know"),
+  knowledge_stata = c("Yes", "No", "Not required for my work"),
+  access_stata = c("Yes", "No", "Don't know"),
+  knowledge_matlab = c("Yes", "No", "No"),
+  access_matlab = c("Yes", "No", "No") # Used to check zero counts aren't missing
 )
 
 test_that("summarise_coding_tools missing data is handled correctly", {
@@ -33,19 +33,18 @@ test_that("summarise_coding_tools knowledge output is as expected", {
 
   got_knowledge <- summarise_coding_tools(dummy_data, "knowledge")
 
-  expected_knowledge <- data.frame("name" = rep(c("Matlab",
-                                                  "Python",
-                                                  "R",
-                                                  "SAS",
-                                                  "SPSS",
-                                                  "SQL",
-                                                  "Stata",
-                                                  "VBA"), each=3),
-                                   "value" = factor(rep(c("Yes", "Don't know", "No"), 8),
-                                                    levels = c("Yes", "Don't know", "No")),
-                                   "n" = c(1/2, 1/6, 1/3, 1/2, 1/6, 1/3, 1/6, 1/2,
-                                           1/3, 1/3, 1/6, 1/2, 1/3, 1/6, 1/2, 1/2,
-                                           1/6, 1/3, 1/6, 1/2, 1/3, 1/6, 1/2, 1/3))
+  expected_knowledge <- data.frame(name = rep(c("Python",
+                                                "R",
+                                                "SQL",
+                                                "Matlab",
+                                                "SAS",
+                                                "SPSS",
+                                                "Stata",
+                                                "VBA"), each=3),
+                                   value = factor(rep(c("Yes", "No", "Not required for my work"), 8),
+                                                  levels = c("Yes", "No", "Not required for my work")),
+                                   n = c(rep(1/3, times=9), 1/3, 2/3, 0, rep(1/3, times=12))
+  )
 
   expect_equal(got_knowledge, expected_knowledge)
 
@@ -55,19 +54,18 @@ test_that("summarise_coding_tools access output is as expected", {
 
   got_access <- summarise_coding_tools(dummy_data, "access")
 
-  expected_access <- data.frame("name" = rep(c("Matlab",
-                                               "Python",
-                                               "R",
-                                               "SAS",
-                                               "SPSS",
-                                               "SQL",
-                                               "Stata",
-                                               "VBA"), each=3),
-                                "value" = factor(rep(c("Yes", "Don't know", "No"), 8),
-                                                 levels = c("Yes", "Don't know", "No")),
-                                "n" = c(1/6, 0, 5/6, 1/6, 1/3, 1/2, 1/3, 1/2,
-                                        1/6, 1/2, 1/3, 1/6, 1/2, 1/3, 1/6, 1/6,
-                                        1/3, 1/2, 1/3, 1/2, 1/6, 1/3, 1/2, 1/6))
+  expected_access <- data.frame(name = rep(c("Python",
+                                             "R",
+                                             "SQL",
+                                             "Matlab",
+                                             "SAS",
+                                             "SPSS",
+                                             "Stata",
+                                             "VBA"), each=3),
+                                value = factor(rep(c("Yes", "No", "Don't know"), 8),
+                                               levels = c("Yes", "No", "Don't know")),
+                                n = c(rep(1/3, times=9), 1/3, 2/3, 0, rep(1/3, times=12))
+  )
 
   expect_equal(got_access, expected_access)
 
diff --git a/tests/testthat/test-summarise_languages_by_prof.R b/tests/testthat/test-summarise_languages_by_prof.R
index 74b0045..41393c4 100644
--- a/tests/testthat/test-summarise_languages_by_prof.R
+++ b/tests/testthat/test-summarise_languages_by_prof.R
@@ -4,7 +4,7 @@ knowledge_response <- rep(c(
   NA,
   "Yes",
   "No",
-  "Don't know"),
+  "Not required for my work"),
   each = 3, times = 6)
 
 prof_response <- rep(c(
@@ -61,12 +61,12 @@ test_that("summarise_languages_by_prof output is as expected", {
   expected <- data.frame(
 
     lang = rep(c(
-      "Matlab",
       "Python",
       "R",
+      "SQL",
+      "Matlab",
       "SAS",
       "SPSS",
-      "SQL",
       "Stata",
       "VBA"),
       times = 9),
diff --git a/tests/testthat/test-summarise_rap_comp.R b/tests/testthat/test-summarise_rap_comp.R
index f77bde3..5355423 100644
--- a/tests/testthat/test-summarise_rap_comp.R
+++ b/tests/testthat/test-summarise_rap_comp.R
@@ -13,7 +13,7 @@ dummy_data <- data.frame(
   open_code_score = rep(c(NA, 1, 0), times = 5),
   version_control_score = rep(c(NA, 1, 0), times = 5),
   peer_review_score = rep(c(NA, 1, 0), times = 5),
-  development_QA_score = rep(c(NA, 1, 0), times = 5),
+  proportionate_QA_score = rep(c(NA, 1, 0), times = 5),
   doc_score = rep(c(NA, 1, 0), times = 5),
   basic_rap_score = rep(c(NA, 1, 0), times = 5),
   function_score = rep(c(NA, 1, 0), times = 5),
@@ -48,7 +48,7 @@ test_that("summarise_rap_comp output is as expected", {
       "Team open source code",
       "Version control",
       "Peer review",
-      "Development QA",
+      "Proportionate QA",
       "Documentation",
       "Functions",
       "Unit testing",
@@ -62,7 +62,7 @@ test_that("summarise_rap_comp output is as expected", {
          "Team open source code",
          "Version control",
          "Peer review",
-         "Development QA",
+         "Proportionate QA",
          "Documentation",
          "Functions",
          "Unit testing",