From 1e6691b0df401ee412c1762d078c1281d0031baa Mon Sep 17 00:00:00 2001 From: Luke Davies <117165341+ldavies99@users.noreply.github.com> Date: Mon, 8 Jan 2024 08:39:08 +0000 Subject: [PATCH] Fix summarise coding tools (#75) * Change levels to match new survery responses and reorder the levels * Rearrange programming languages * Fix rap comp table --- R/frequency-tables.R | 13 ++- tests/testthat/test-summarise_coding_tools.R | 82 +++++++++---------- .../test-summarise_languages_by_prof.R | 6 +- tests/testthat/test-summarise_rap_comp.R | 6 +- 4 files changed, 55 insertions(+), 52 deletions(-) diff --git a/R/frequency-tables.R b/R/frequency-tables.R index 0e12296..d926a4f 100644 --- a/R/frequency-tables.R +++ b/R/frequency-tables.R @@ -122,7 +122,11 @@ summarise_coding_tools <- function(data, type = list("knowledge", "access"), pro "access_SPSS", "knowledge_stata", "access_stata", "knowledge_matlab", "access_matlab") - levels <- c("Yes", "Don't know", "No") + if (type == "knowledge") { + levels <- c("Yes", "No", "Not required for my work") + } else { + levels <- c("Yes", "No", "Don't know") + } labels <- c("R", "SQL", "SAS", "VBA", "Python", "SPSS", "Stata", "Matlab") @@ -130,7 +134,8 @@ summarise_coding_tools <- function(data, type = list("knowledge", "access"), pro questions <- questions[grepl(paste0(type, "_"), questions)] - frequencies <- calculate_freqs(data, questions, levels, labels, prop = prop) + frequencies <- calculate_freqs(data, questions, levels, labels, prop = prop) %>% + dplyr::arrange(match(name, c("Python", "R", "SQL", "Matlab", "SAS", "SPSS", "Stata", "VBA"))) return(frequencies) } @@ -424,7 +429,7 @@ summarise_rap_comp <- function(data) { "Team open source code", "Version control", "Peer review", - "Development QA", + "Proportionate QA", "Documentation", "Functions", "Unit testing", @@ -438,7 +443,7 @@ summarise_rap_comp <- function(data) { "open_code_score", "version_control_score", "peer_review_score", - "development_QA_score", + "proportionate_QA_score", "doc_score", "function_score", "unit_test_score", diff --git a/tests/testthat/test-summarise_coding_tools.R b/tests/testthat/test-summarise_coding_tools.R index ac12fab..4288e4b 100644 --- a/tests/testthat/test-summarise_coding_tools.R +++ b/tests/testthat/test-summarise_coding_tools.R @@ -1,22 +1,22 @@ # Coding tools frequency tables (access or knowledge) dummy_data <- data.frame( - knowledge_R = c("Yes", rep("No", 2), rep("Don't know", 3)), - access_R = c(rep("Yes", 2), "No", rep("Don't know", 3)), - knowledge_SQL = c(rep("Yes", 3), rep("No", 2), "Don't know"), - access_SQL = c("Yes", rep("No", 3), rep("Don't know", 2)), - knowledge_SAS = c(rep("Yes", 2), rep("No", 3), "Don't know"), - access_SAS = c(rep("Yes", 3), "No", rep("Don't know", 2)), - knowledge_VBA = c("Yes", rep("No", 2), rep("Don't know", 3)), - access_VBA = c(rep("Yes", 2), "No", rep("Don't know", 3)), - knowledge_python = c(rep("Yes", 3), rep("No", 2), "Don't know"), - access_python = c("Yes", rep("No", 3), rep("Don't know", 2)), - knowledge_SPSS = c(rep("Yes", 2), rep("No", 3), "Don't know"), - access_SPSS = c(rep("Yes", 3), "No", rep("Don't know", 2)), - knowledge_stata = c("Yes", rep("No", 2), rep("Don't know", 3)), - access_stata = c(rep("Yes", 2), "No", rep("Don't know", 3)), - knowledge_matlab = c(rep("Yes", 3), rep("No", 2), "Don't know"), - access_matlab = c("Yes", rep("No", 5), rep("Don't know", 0)) # Used to check zero counts aren't missing + knowledge_R = c("Yes", "No", "Not required for my work"), + access_R = c("Yes", "No", "Don't know"), + knowledge_SQL = c("Yes", "No", "Not required for my work"), + access_SQL = c("Yes", "No", "Don't know"), + knowledge_SAS = c("Yes", "No", "Not required for my work"), + access_SAS = c("Yes", "No", "Don't know"), + knowledge_VBA = c("Yes", "No", "Not required for my work"), + access_VBA = c("Yes", "No", "Don't know"), + knowledge_python = c("Yes", "No", "Not required for my work"), + access_python = c("Yes", "No", "Don't know"), + knowledge_SPSS = c("Yes", "No", "Not required for my work"), + access_SPSS = c("Yes", "No", "Don't know"), + knowledge_stata = c("Yes", "No", "Not required for my work"), + access_stata = c("Yes", "No", "Don't know"), + knowledge_matlab = c("Yes", "No", "No"), + access_matlab = c("Yes", "No", "No") # Used to check zero counts aren't missing ) test_that("summarise_coding_tools missing data is handled correctly", { @@ -33,19 +33,18 @@ test_that("summarise_coding_tools knowledge output is as expected", { got_knowledge <- summarise_coding_tools(dummy_data, "knowledge") - expected_knowledge <- data.frame("name" = rep(c("Matlab", - "Python", - "R", - "SAS", - "SPSS", - "SQL", - "Stata", - "VBA"), each=3), - "value" = factor(rep(c("Yes", "Don't know", "No"), 8), - levels = c("Yes", "Don't know", "No")), - "n" = c(1/2, 1/6, 1/3, 1/2, 1/6, 1/3, 1/6, 1/2, - 1/3, 1/3, 1/6, 1/2, 1/3, 1/6, 1/2, 1/2, - 1/6, 1/3, 1/6, 1/2, 1/3, 1/6, 1/2, 1/3)) + expected_knowledge <- data.frame(name = rep(c("Python", + "R", + "SQL", + "Matlab", + "SAS", + "SPSS", + "Stata", + "VBA"), each=3), + value = factor(rep(c("Yes", "No", "Not required for my work"), 8), + levels = c("Yes", "No", "Not required for my work")), + n = c(rep(1/3, times=9), 1/3, 2/3, 0, rep(1/3, times=12)) + ) expect_equal(got_knowledge, expected_knowledge) @@ -55,19 +54,18 @@ test_that("summarise_coding_tools access output is as expected", { got_access <- summarise_coding_tools(dummy_data, "access") - expected_access <- data.frame("name" = rep(c("Matlab", - "Python", - "R", - "SAS", - "SPSS", - "SQL", - "Stata", - "VBA"), each=3), - "value" = factor(rep(c("Yes", "Don't know", "No"), 8), - levels = c("Yes", "Don't know", "No")), - "n" = c(1/6, 0, 5/6, 1/6, 1/3, 1/2, 1/3, 1/2, - 1/6, 1/2, 1/3, 1/6, 1/2, 1/3, 1/6, 1/6, - 1/3, 1/2, 1/3, 1/2, 1/6, 1/3, 1/2, 1/6)) + expected_access <- data.frame(name = rep(c("Python", + "R", + "SQL", + "Matlab", + "SAS", + "SPSS", + "Stata", + "VBA"), each=3), + value = factor(rep(c("Yes", "No", "Don't know"), 8), + levels = c("Yes", "No", "Don't know")), + n = c(rep(1/3, times=9), 1/3, 2/3, 0, rep(1/3, times=12)) + ) expect_equal(got_access, expected_access) diff --git a/tests/testthat/test-summarise_languages_by_prof.R b/tests/testthat/test-summarise_languages_by_prof.R index 74b0045..41393c4 100644 --- a/tests/testthat/test-summarise_languages_by_prof.R +++ b/tests/testthat/test-summarise_languages_by_prof.R @@ -4,7 +4,7 @@ knowledge_response <- rep(c( NA, "Yes", "No", - "Don't know"), + "Not required for my work"), each = 3, times = 6) prof_response <- rep(c( @@ -61,12 +61,12 @@ test_that("summarise_languages_by_prof output is as expected", { expected <- data.frame( lang = rep(c( - "Matlab", "Python", "R", + "SQL", + "Matlab", "SAS", "SPSS", - "SQL", "Stata", "VBA"), times = 9), diff --git a/tests/testthat/test-summarise_rap_comp.R b/tests/testthat/test-summarise_rap_comp.R index f77bde3..5355423 100644 --- a/tests/testthat/test-summarise_rap_comp.R +++ b/tests/testthat/test-summarise_rap_comp.R @@ -13,7 +13,7 @@ dummy_data <- data.frame( open_code_score = rep(c(NA, 1, 0), times = 5), version_control_score = rep(c(NA, 1, 0), times = 5), peer_review_score = rep(c(NA, 1, 0), times = 5), - development_QA_score = rep(c(NA, 1, 0), times = 5), + proportionate_QA_score = rep(c(NA, 1, 0), times = 5), doc_score = rep(c(NA, 1, 0), times = 5), basic_rap_score = rep(c(NA, 1, 0), times = 5), function_score = rep(c(NA, 1, 0), times = 5), @@ -48,7 +48,7 @@ test_that("summarise_rap_comp output is as expected", { "Team open source code", "Version control", "Peer review", - "Development QA", + "Proportionate QA", "Documentation", "Functions", "Unit testing", @@ -62,7 +62,7 @@ test_that("summarise_rap_comp output is as expected", { "Team open source code", "Version control", "Peer review", - "Development QA", + "Proportionate QA", "Documentation", "Functions", "Unit testing",