From e0552af6820fb0e8faaea60540338ac5d53c92e9 Mon Sep 17 00:00:00 2001 From: Shane Orr Date: Tue, 26 Sep 2023 17:38:30 -0400 Subject: [PATCH] create pkgdown site --- .Rbuildignore | 2 + .github/.gitignore | 1 + .github/workflows/pkgdown.yaml | 48 ++ DESCRIPTION | 2 +- _pkgdown.yml | 12 +- docs/date_to_sy_worked_example.R | 26 - docs/date_to_sy_worked_example.Rmd | 52 -- docs/date_to_sy_worked_example.html | 408 --------------- docs/factorizing_a_dataset.R | 83 --- docs/factorizing_a_dataset.Rmd | 161 ------ docs/factorizing_a_dataset.html | 657 ------------------------ docs/tntp-style-plots.R | 163 ------ docs/tntp-style-plots.Rmd | 214 -------- docs/tntp-style-plots.html | 440 ---------------- docs/tntpr-introduction.R | 53 -- docs/tntpr-introduction.Rmd | 146 ------ docs/tntpr-introduction.html | 501 ------------------ vignettes/date_to_sy_worked_example.Rmd | 7 +- vignettes/factorizing_a_dataset.Rmd | 7 +- vignettes/tntp-style-plots.Rmd | 4 +- vignettes/tntpr-introduction.Rmd | 4 +- vignettes/visualization-cookbook.Rmd | 2 +- 22 files changed, 70 insertions(+), 2923 deletions(-) create mode 100644 .github/.gitignore create mode 100644 .github/workflows/pkgdown.yaml delete mode 100644 docs/date_to_sy_worked_example.R delete mode 100644 docs/date_to_sy_worked_example.Rmd delete mode 100644 docs/date_to_sy_worked_example.html delete mode 100644 docs/factorizing_a_dataset.R delete mode 100644 docs/factorizing_a_dataset.Rmd delete mode 100644 docs/factorizing_a_dataset.html delete mode 100644 docs/tntp-style-plots.R delete mode 100644 docs/tntp-style-plots.Rmd delete mode 100644 docs/tntp-style-plots.html delete mode 100644 docs/tntpr-introduction.R delete mode 100644 docs/tntpr-introduction.Rmd delete mode 100644 docs/tntpr-introduction.html diff --git a/.Rbuildignore b/.Rbuildignore index 8e2b9c6..db29b71 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -8,3 +8,5 @@ ^docs$ ^pkgdown$ ^README\.Rmd$ +^\.github$ +^vignettes/tntp-style-plots\.rmd$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 0000000..ed7650c --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,48 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.4.1 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/DESCRIPTION b/DESCRIPTION index 0fce48c..e3491e2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,7 +10,7 @@ Description: An internal TNTP R package that includes functions and package to be a one-stop shop for R tools by analysts and other data users at TNTP. License: CC BY 4.0 -URL: https://github.com/tntp/tntpr +URL: https://github.com/tntp/tntpr, https://tntp.github.io/tntpr/ Depends: R (>= 3.2) Imports: diff --git a/_pkgdown.yml b/_pkgdown.yml index 8072c00..ba86cc9 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,4 +1,14 @@ -url: 'https://tntp.github.io/tntpr/' +url: https://tntp.github.io/tntpr/ + +articles: +- title: Articles + navbar: ~ + contents: + - tntpr-introduction + - visualization-cookbook + - date_to_sy_worked_example + - factorizing_a_dataset + - tntp-style-plots template: bootstrap: 5 diff --git a/docs/date_to_sy_worked_example.R b/docs/date_to_sy_worked_example.R deleted file mode 100644 index 218c0e2..0000000 --- a/docs/date_to_sy_worked_example.R +++ /dev/null @@ -1,26 +0,0 @@ -## ----setup,echo=FALSE, include = FALSE---------------------------------------- -library(knitr) -knitr::opts_chunk$set(error = TRUE) -knitr::opts_chunk$set(out.width = "750px", dpi = 300) -knitr::opts_chunk$set(dev = "png", fig.width = 8, fig.height = 4.8889, dpi = 300) - -## ----load_packages, include = FALSE------------------------------------------- -library(pacman) -if (!require("tntpr")) install_github("tntp/tntpr") -library(tntpr) -p_load(tidyverse, janitor, lubridate) - -## ----echo=FALSE--------------------------------------------------------------- -set.seed(1) - -appl_dat <- tibble( - student_id = 1:100, - test_date = sample(seq(as.Date("2010/08/01"), as.Date("2020/01/01"), by = "day"), 100) -) - -appl_dat - -## ----------------------------------------------------------------------------- -appl_dat %>% - mutate(hire_date_sy = date_to_sy(test_date, last_day_of_sy = ymd("2018-06-01"))) - diff --git a/docs/date_to_sy_worked_example.Rmd b/docs/date_to_sy_worked_example.Rmd deleted file mode 100644 index b160c26..0000000 --- a/docs/date_to_sy_worked_example.Rmd +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: 'Worked Example: `tntpr::date_to_sy`' -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{date_to_sy} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - - - - - - -```{r setup,echo=FALSE, include = FALSE} -library(knitr) -knitr::opts_chunk$set(error = TRUE) -knitr::opts_chunk$set(out.width = "750px", dpi = 300) -knitr::opts_chunk$set(dev = "png", fig.width = 8, fig.height = 4.8889, dpi = 300) -``` - -```{r load_packages, include = FALSE} -library(pacman) -if (!require("tntpr")) install_github("tntp/tntpr") -library(tntpr) -p_load(tidyverse, janitor, lubridate) -``` - -- `date_to_sy`: Checks to see if a date is past the user-specified cutoff point for delineating school years, then maps to the appropriate year. - -Say you have a test date and you want to create a new variable that tells you the school year the test was taken. - - -```{r, echo=FALSE} -set.seed(1) - -appl_dat <- tibble( - student_id = 1:100, - test_date = sample(seq(as.Date("2010/08/01"), as.Date("2020/01/01"), by = "day"), 100) -) - -appl_dat -``` - -Historically, I would have used a long, error-prone `case_when` mutation, but the `tntpr::date_to_sy` function is much easier. - -The function takes two arguments, `date_var` and `last_day_of_sy` (year doesn't matter) and returns a character string with the school year in the form '(year) - (year)'. - -```{r} -appl_dat %>% - mutate(hire_date_sy = date_to_sy(test_date, last_day_of_sy = ymd("2018-06-01"))) -``` diff --git a/docs/date_to_sy_worked_example.html b/docs/date_to_sy_worked_example.html deleted file mode 100644 index 54dae77..0000000 --- a/docs/date_to_sy_worked_example.html +++ /dev/null @@ -1,408 +0,0 @@ - - - - - - - - - - - - - - -Worked Example: tntpr::date_to_sy - - - - - - - - - - - - - - - - - - - - - - - - - - -

Worked Example: -tntpr::date_to_sy

- - - - - - - - -

Say you have a test date and you want to create a new variable that -tells you the school year the test was taken.

-
## # A tibble: 100 × 2
-##    student_id test_date 
-##         <int> <date>    
-##  1          1 2013-05-13
-##  2          2 2012-06-09
-##  3          3 2016-07-16
-##  4          4 2013-02-15
-##  5          5 2014-10-11
-##  6          6 2011-11-14
-##  7          7 2017-01-02
-##  8          8 2011-04-27
-##  9          9 2013-11-23
-## 10         10 2019-10-31
-## # ℹ 90 more rows
-

Historically, I would have used a long, error-prone -case_when mutation, but the tntpr::date_to_sy -function is much easier.

-

The function takes two arguments, date_var and -last_day_of_sy (year doesn’t matter) and returns a -character string with the school year in the form ‘(year) - (year)’.

-
appl_dat %>%
-  mutate(hire_date_sy = date_to_sy(test_date, last_day_of_sy = ymd("2018-06-01")))
-
## # A tibble: 100 × 3
-##    student_id test_date  hire_date_sy
-##         <int> <date>     <chr>       
-##  1          1 2013-05-13 2012 - 2013 
-##  2          2 2012-06-09 2012 - 2013 
-##  3          3 2016-07-16 2016 - 2017 
-##  4          4 2013-02-15 2012 - 2013 
-##  5          5 2014-10-11 2014 - 2015 
-##  6          6 2011-11-14 2011 - 2012 
-##  7          7 2017-01-02 2016 - 2017 
-##  8          8 2011-04-27 2010 - 2011 
-##  9          9 2013-11-23 2013 - 2014 
-## 10         10 2019-10-31 2019 - 2020 
-## # ℹ 90 more rows
- - - - - - - - - - - diff --git a/docs/factorizing_a_dataset.R b/docs/factorizing_a_dataset.R deleted file mode 100644 index 025bffe..0000000 --- a/docs/factorizing_a_dataset.R +++ /dev/null @@ -1,83 +0,0 @@ -## ----setup,echo=FALSE, include = FALSE---------------------------------------- -library(knitr) -knitr::opts_chunk$set(error = TRUE) -knitr::opts_chunk$set(out.width = "750px", dpi = 300) -knitr::opts_chunk$set(dev = "png", fig.width = 8, fig.height = 4.8889, dpi = 300) -knitr::opts_chunk$set(fig.path = "introduction_files/") - -## ----load_packages, include = FALSE------------------------------------------- -library(pacman) -if (!require("tntpr")) install_github("tntp/tntpr") -library(tntpr) -p_load(tidyverse, janitor, praise) - -## ----example_data, echo=FALSE, include=FALSE---------------------------------- -qtype_1 <- c("Strongly Disagree", "Disagree", "Somewhat disagree", "Somewhat agree", "Agree", "Strongly agree") -qtype_2 <- c("Strongly Disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree") -qtype_3 <- c("No", "Yes") - -survey_dat <- tibble( - response_id = 1:100, - years_of_experience = round(runif(100) * 10, digits = 0), - q1 = sample(qtype_1, 100, replace = TRUE), - q2 = sample(qtype_1 %>% str_remove("Strongly disagree"), 100, replace = TRUE), - q3 = sample(qtype_1, 100, replace = TRUE), - q4 = replicate(100, praise()), - q5 = sample(qtype_2, 100, replace = TRUE), - q6 = sample(qtype_2, 100, replace = TRUE), - q7 = replicate(100, praise("${Exclamation}! ${EXCLAMATION}!-${EXCLAMATION}! This is just ${adjective}!")), - q8 = sample(qtype_3, 100, replace = TRUE), - q9 = sample(qtype_3, 100, replace = TRUE) -) - -## ----------------------------------------------------------------------------- -survey_dat %>% - glimpse() - -## ----------------------------------------------------------------------------- -survey_dat %>% - map(unique) %>% - map(length) - -## ----------------------------------------------------------------------------- -survey_dat %>% - select(q1, q2, q3, q5, q6, q8, q9) %>% - map(unique) - -## ----------------------------------------------------------------------------- -survey_dat$q1 %>% unique() - -## ----------------------------------------------------------------------------- -survey_dat <- survey_dat %>% - factorize_df(lvls = c("Strongly Disagree", "Disagree", "Somewhat disagree", "Somewhat agree", "Agree", "Strongly agree")) - -survey_dat %>% - glimpse() - -survey_dat %>% - map(levels) - -## ----------------------------------------------------------------------------- -survey_dat$q5 %>% unique() - -## ----------------------------------------------------------------------------- -survey_dat <- survey_dat %>% - factorize_df(lvls = c("Strongly Disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree")) - -## ----------------------------------------------------------------------------- -survey_dat$q8 %>% unique() - -## ----------------------------------------------------------------------------- -survey_dat <- survey_dat %>% - factorize_df(lvls = c("No", "Yes")) - -## ----------------------------------------------------------------------------- -survey_dat %>% - select(q1, q2, q3, q5, q6, q8, q9) %>% - map(is.factor) - -## ----------------------------------------------------------------------------- -survey_dat %>% - select(q1, q2, q3, q5, q6, q8, q9) %>% - map(levels) - diff --git a/docs/factorizing_a_dataset.Rmd b/docs/factorizing_a_dataset.Rmd deleted file mode 100644 index e56f55d..0000000 --- a/docs/factorizing_a_dataset.Rmd +++ /dev/null @@ -1,161 +0,0 @@ ---- -title: "Factorizing a survey dataset" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{factorizing-a-dataset} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - - - - - - -```{r setup,echo=FALSE, include = FALSE} -library(knitr) -knitr::opts_chunk$set(error = TRUE) -knitr::opts_chunk$set(out.width = "750px", dpi = 300) -knitr::opts_chunk$set(dev = "png", fig.width = 8, fig.height = 4.8889, dpi = 300) -knitr::opts_chunk$set(fig.path = "introduction_files/") -``` - -```{r load_packages, include = FALSE} -library(pacman) -if (!require("tntpr")) install_github("tntp/tntpr") -library(tntpr) -p_load(tidyverse, janitor, praise) -``` - -```{r example_data, echo=FALSE, include=FALSE} -qtype_1 <- c("Strongly Disagree", "Disagree", "Somewhat disagree", "Somewhat agree", "Agree", "Strongly agree") -qtype_2 <- c("Strongly Disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree") -qtype_3 <- c("No", "Yes") - -survey_dat <- tibble( - response_id = 1:100, - years_of_experience = round(runif(100) * 10, digits = 0), - q1 = sample(qtype_1, 100, replace = TRUE), - q2 = sample(qtype_1 %>% str_remove("Strongly disagree"), 100, replace = TRUE), - q3 = sample(qtype_1, 100, replace = TRUE), - q4 = replicate(100, praise()), - q5 = sample(qtype_2, 100, replace = TRUE), - q6 = sample(qtype_2, 100, replace = TRUE), - q7 = replicate(100, praise("${Exclamation}! ${EXCLAMATION}!-${EXCLAMATION}! This is just ${adjective}!")), - q8 = sample(qtype_3, 100, replace = TRUE), - q9 = sample(qtype_3, 100, replace = TRUE) -) -``` - -## Worked Example - -Imagine you conduct a survey and are about to analyze the data. One of the first steps in the data cleaning process is to "factorize" the dataset. R uses factors to handle categorical variables, variables that have a fixed and known set of possible values. Usually this is one of the most manual, error-prone parts of the data-cleaning process. - -Below is a workflow I use that might be helpful for others. I use a couple functions that may be new to you. - -- `glimpse`: This function makes it possible to see every column in a data frame. It shows columns run down the page and data runs across. - -- `map`: The purrr version of the apply functions. Transforms the input by applying a function to each element and returning a vector the same length as the input. In this example, I use the map function with a data frame as an arguement. In this case, the inputs are the variable vectors and map applies the function across these variable vectors. - -- `factorize_df`: examines each column in a data.frame; when it finds a column composed solely of the values provided to the `lvls` argument it updates them to be factor variables, with levels in the order provided. - -### Step 1: Identify which variables should be factors. - -Admittedly, this part usually takes some knowledge of the dataset and/or exploratory perusing of the dataset. Let's first use `glimpse` to take a look at the dataset. - -```{r} -survey_dat %>% - glimpse() -``` - -First thing I notice is all the question variables are character vectors. Since I know some of the questions where multiple-choice questions, it's likely some of these should be converted to factors. - -One thing you could try is seeing how many unique values each variable has. - -```{r} -survey_dat %>% - map(unique) %>% - map(length) -``` - -Here I notice `response_id`, `q4`, and `q7` have 50+ unique values and should likely not be transformed to factors. Knowing the dataset, I think `response_id` should probably be a character data type but will not do that in this example. Finally, since each `q1`, `q2`, `q3`, `q5`, `q6`, `q8`, and `q9` are "variables that have a fixed and known set of possible values" they are likely candidates to be factors. - -Next, I'll see what the unique values are for these variables. - -```{r} -survey_dat %>% - select(q1, q2, q3, q5, q6, q8, q9) %>% - map(unique) -``` - -Aha! These do indeed look like factors. - -### Step 2: Transform to factors with appropriate level ordering. - -Here a lot of people would `mutate` their dataset with a series of factor(., levels = c(...)) mutations. This works, but let's use `factorize_df` to help us do this faster with less typing. - -First, let's see what the "levels" of `q1` are. - -```{r} -survey_dat$q1 %>% unique() -``` - -Okay, looks like a 6-pt likert agreement scale. Let's pass these levels through the `factorize_df` function (in order) as the lvls arguement. - -```{r} -survey_dat <- survey_dat %>% - factorize_df(lvls = c("Strongly Disagree", "Disagree", "Somewhat disagree", "Somewhat agree", "Agree", "Strongly agree")) - -survey_dat %>% - glimpse() - -survey_dat %>% - map(levels) -``` - -Okay, that doesn't look too impressive but notice that the function: - a) searched each column in the dataset and tested whether the unique values that were a subset of the values in the lvls arguement, - b) identified `q1`, `q2`, and `q3` as having fitting that criteria, and - c) transformed those and only those three columns to factors with the appropriate levels - -Next I can move onto the next factor `q5`. - -```{r} -survey_dat$q5 %>% unique() -``` - -```{r} -survey_dat <- survey_dat %>% - factorize_df(lvls = c("Strongly Disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree")) -``` - -And finally `q8`. - -```{r} -survey_dat$q8 %>% unique() -``` - -```{r} -survey_dat <- survey_dat %>% - factorize_df(lvls = c("No", "Yes")) -``` - -### Step 3: Check if factorizing worked like you intended. - -Okay we said we needed to change `q1`, `q2`, `q3`, `q5`, `q6`, `q8`, and `q9` to factors. Let's verify this worked. - -```{r} -survey_dat %>% - select(q1, q2, q3, q5, q6, q8, q9) %>% - map(is.factor) -``` - -They're all factors... - -```{r} -survey_dat %>% - select(q1, q2, q3, q5, q6, q8, q9) %>% - map(levels) -``` - -... with the correct levels and level order. And voila, a 'factorized' dataset. diff --git a/docs/factorizing_a_dataset.html b/docs/factorizing_a_dataset.html deleted file mode 100644 index c5951a9..0000000 --- a/docs/factorizing_a_dataset.html +++ /dev/null @@ -1,657 +0,0 @@ - - - - - - - - - - - - - - -Factorizing a survey dataset - - - - - - - - - - - - - - - - - - - - - - - - - - -

Factorizing a survey dataset

- - - - - - - -
-

Worked Example

-

Imagine you conduct a survey and are about to analyze the data. One -of the first steps in the data cleaning process is to “factorize” the -dataset. R uses factors to handle categorical variables, variables that -have a fixed and known set of possible values. Usually this is one of -the most manual, error-prone parts of the data-cleaning process.

-

Below is a workflow I use that might be helpful for others. I use a -couple functions that may be new to you.

- -
-

Step 1: Identify which variables should be factors.

-

Admittedly, this part usually takes some knowledge of the dataset -and/or exploratory perusing of the dataset. Let’s first use -glimpse to take a look at the dataset.

-
survey_dat %>%
-  glimpse()
-
## Rows: 100
-## Columns: 11
-## $ response_id         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,…
-## $ years_of_experience <dbl> 10, 5, 5, 2, 8, 5, 5, 2, 2, 6, 6, 1, 0, 6, 9, 6, 6…
-## $ q1                  <chr> "Disagree", "Somewhat agree", "Disagree", "Disagre…
-## $ q2                  <chr> "Agree", "Strongly agree", "Strongly Disagree", "A…
-## $ q3                  <chr> "Somewhat disagree", "Somewhat disagree", "Somewha…
-## $ q4                  <chr> "You are fantabulous!", "You are exquisite!", "You…
-## $ q5                  <chr> "Strongly Disagree", "Neither agree nor disagree",…
-## $ q6                  <chr> "Strongly Disagree", "Strongly Disagree", "Agree",…
-## $ q7                  <chr> "Huzzah! AYE!-HMM! This is just riveting!", "Hoora…
-## $ q8                  <chr> "No", "Yes", "No", "No", "Yes", "No", "Yes", "Yes"…
-## $ q9                  <chr> "Yes", "No", "No", "Yes", "Yes", "No", "No", "No",…
-

First thing I notice is all the question variables are character -vectors. Since I know some of the questions where multiple-choice -questions, it’s likely some of these should be converted to factors.

-

One thing you could try is seeing how many unique values each -variable has.

-
survey_dat %>%
-  map(unique) %>%
-  map(length)
-
## $response_id
-## [1] 100
-## 
-## $years_of_experience
-## [1] 11
-## 
-## $q1
-## [1] 6
-## 
-## $q2
-## [1] 6
-## 
-## $q3
-## [1] 6
-## 
-## $q4
-## [1] 62
-## 
-## $q5
-## [1] 5
-## 
-## $q6
-## [1] 5
-## 
-## $q7
-## [1] 100
-## 
-## $q8
-## [1] 2
-## 
-## $q9
-## [1] 2
-

Here I notice response_id, q4, and -q7 have 50+ unique values and should likely not be -transformed to factors. Knowing the dataset, I think -response_id should probably be a character data type but -will not do that in this example. Finally, since each q1, -q2, q3, q5, q6, -q8, and q9 are “variables that have a fixed -and known set of possible values” they are likely candidates to be -factors.

-

Next, I’ll see what the unique values are for these variables.

-
survey_dat %>%
-  select(q1, q2, q3, q5, q6, q8, q9) %>%
-  map(unique)
-
## $q1
-## [1] "Disagree"          "Somewhat agree"    "Agree"            
-## [4] "Strongly Disagree" "Strongly agree"    "Somewhat disagree"
-## 
-## $q2
-## [1] "Agree"             "Strongly agree"    "Strongly Disagree"
-## [4] "Somewhat agree"    "Disagree"          "Somewhat disagree"
-## 
-## $q3
-## [1] "Somewhat disagree" "Somewhat agree"    "Strongly agree"   
-## [4] "Strongly Disagree" "Disagree"          "Agree"            
-## 
-## $q5
-## [1] "Strongly Disagree"          "Neither agree nor disagree"
-## [3] "Disagree"                   "Strongly agree"            
-## [5] "Agree"                     
-## 
-## $q6
-## [1] "Strongly Disagree"          "Agree"                     
-## [3] "Strongly agree"             "Neither agree nor disagree"
-## [5] "Disagree"                  
-## 
-## $q8
-## [1] "No"  "Yes"
-## 
-## $q9
-## [1] "Yes" "No"
-

Aha! These do indeed look like factors.

-
-
-

Step 2: Transform to factors with appropriate level ordering.

-

Here a lot of people would mutate their dataset with a -series of factor(., levels = c(…)) mutations. This works, but let’s use -factorize_df to help us do this faster with less -typing.

-

First, let’s see what the “levels” of q1 are.

-
survey_dat$q1 %>% unique()
-
## [1] "Disagree"          "Somewhat agree"    "Agree"            
-## [4] "Strongly Disagree" "Strongly agree"    "Somewhat disagree"
-

Okay, looks like a 6-pt likert agreement scale. Let’s pass these -levels through the factorize_df function (in order) as the -lvls arguement.

-
survey_dat <- survey_dat %>%
-  factorize_df(lvls = c("Strongly Disagree", "Disagree", "Somewhat disagree", "Somewhat agree", "Agree", "Strongly agree"))
-
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
-## `.name_repair` is omitted as of tibble 2.0.0.
-## ℹ Using compatibility `.name_repair`.
-## ℹ The deprecated feature was likely used in the tntpr package.
-##   Please report the issue to the authors.
-## This warning is displayed once every 8 hours.
-## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
-## generated.
-
## Transformed these columns: 
-##  *  q1, 
-## *  q2, 
-## *  q3
-
survey_dat %>%
-  glimpse()
-
## Rows: 100
-## Columns: 11
-## $ response_id         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,…
-## $ years_of_experience <dbl> 10, 5, 5, 2, 8, 5, 5, 2, 2, 6, 6, 1, 0, 6, 9, 6, 6…
-## $ q1                  <fct> Disagree, Somewhat agree, Disagree, Disagree, Some…
-## $ q2                  <fct> Agree, Strongly agree, Strongly Disagree, Agree, S…
-## $ q3                  <fct> Somewhat disagree, Somewhat disagree, Somewhat agr…
-## $ q4                  <chr> "You are fantabulous!", "You are exquisite!", "You…
-## $ q5                  <chr> "Strongly Disagree", "Neither agree nor disagree",…
-## $ q6                  <chr> "Strongly Disagree", "Strongly Disagree", "Agree",…
-## $ q7                  <chr> "Huzzah! AYE!-HMM! This is just riveting!", "Hoora…
-## $ q8                  <chr> "No", "Yes", "No", "No", "Yes", "No", "Yes", "Yes"…
-## $ q9                  <chr> "Yes", "No", "No", "Yes", "Yes", "No", "No", "No",…
-
survey_dat %>%
-  map(levels)
-
## $response_id
-## NULL
-## 
-## $years_of_experience
-## NULL
-## 
-## $q1
-## [1] "Strongly Disagree" "Disagree"          "Somewhat disagree"
-## [4] "Somewhat agree"    "Agree"             "Strongly agree"   
-## 
-## $q2
-## [1] "Strongly Disagree" "Disagree"          "Somewhat disagree"
-## [4] "Somewhat agree"    "Agree"             "Strongly agree"   
-## 
-## $q3
-## [1] "Strongly Disagree" "Disagree"          "Somewhat disagree"
-## [4] "Somewhat agree"    "Agree"             "Strongly agree"   
-## 
-## $q4
-## NULL
-## 
-## $q5
-## NULL
-## 
-## $q6
-## NULL
-## 
-## $q7
-## NULL
-## 
-## $q8
-## NULL
-## 
-## $q9
-## NULL
-

Okay, that doesn’t look too impressive but notice that the function: -a) searched each column in the dataset and tested whether the unique -values that were a subset of the values in the lvls arguement, b) -identified q1, q2, and q3 as -having fitting that criteria, and c) transformed those and only those -three columns to factors with the appropriate levels

-

Next I can move onto the next factor q5.

-
survey_dat$q5 %>% unique()
-
## [1] "Strongly Disagree"          "Neither agree nor disagree"
-## [3] "Disagree"                   "Strongly agree"            
-## [5] "Agree"
-
survey_dat <- survey_dat %>%
-  factorize_df(lvls = c("Strongly Disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree"))
-
## Transformed these columns: 
-##  *  q5, 
-## *  q6
-

And finally q8.

-
survey_dat$q8 %>% unique()
-
## [1] "No"  "Yes"
-
survey_dat <- survey_dat %>%
-  factorize_df(lvls = c("No", "Yes"))
-
## Transformed these columns: 
-##  *  q8, 
-## *  q9
-
-
-

Step 3: Check if factorizing worked like you intended.

-

Okay we said we needed to change q1, q2, -q3, q5, q6, q8, and -q9 to factors. Let’s verify this worked.

-
survey_dat %>%
-  select(q1, q2, q3, q5, q6, q8, q9) %>%
-  map(is.factor)
-
## $q1
-## [1] TRUE
-## 
-## $q2
-## [1] TRUE
-## 
-## $q3
-## [1] TRUE
-## 
-## $q5
-## [1] TRUE
-## 
-## $q6
-## [1] TRUE
-## 
-## $q8
-## [1] TRUE
-## 
-## $q9
-## [1] TRUE
-

They’re all factors…

-
survey_dat %>%
-  select(q1, q2, q3, q5, q6, q8, q9) %>%
-  map(levels)
-
## $q1
-## [1] "Strongly Disagree" "Disagree"          "Somewhat disagree"
-## [4] "Somewhat agree"    "Agree"             "Strongly agree"   
-## 
-## $q2
-## [1] "Strongly Disagree" "Disagree"          "Somewhat disagree"
-## [4] "Somewhat agree"    "Agree"             "Strongly agree"   
-## 
-## $q3
-## [1] "Strongly Disagree" "Disagree"          "Somewhat disagree"
-## [4] "Somewhat agree"    "Agree"             "Strongly agree"   
-## 
-## $q5
-## [1] "Strongly Disagree"          "Disagree"                  
-## [3] "Neither agree nor disagree" "Agree"                     
-## [5] "Strongly agree"            
-## 
-## $q6
-## [1] "Strongly Disagree"          "Disagree"                  
-## [3] "Neither agree nor disagree" "Agree"                     
-## [5] "Strongly agree"            
-## 
-## $q8
-## [1] "No"  "Yes"
-## 
-## $q9
-## [1] "No"  "Yes"
-

… with the correct levels and level order. And voila, a ‘factorized’ -dataset.

-
-
- - - - - - - - - - - diff --git a/docs/tntp-style-plots.R b/docs/tntp-style-plots.R deleted file mode 100644 index f3fde07..0000000 --- a/docs/tntp-style-plots.R +++ /dev/null @@ -1,163 +0,0 @@ -## ----include = FALSE---------------------------------------------------------- -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) - -## ----setup, include=FALSE----------------------------------------------------- -library(tntpr) - -# Load packages - first pacman, installing if necessary, then others -if (!require("pacman")) install.packages("pacman") -library(pacman) -p_load(devtools, tidyverse, knitr) -if (!require("patchwork")) devtools::install_github("thomasp85/patchwork") - -## ----knitr_options, include = FALSE------------------------------------------- -knitr::opts_chunk$set(collapse = TRUE, comment = "#>") -knitr::opts_chunk$set(error = TRUE) -knitr::opts_chunk$set(out.width = "750px", dpi = 300) -knitr::opts_chunk$set(dev = "png", fig.width = 8, fig.height = 4.8889, dpi = 300) - -## ----sample_datasets, include=FALSE------------------------------------------- -performance_data <- data.frame( - teacher_experience = c(rep("0-3 years ", 5), rep("4-6 years", 4), rep("7+ years", 4)), - y1_teacher_performance = sample(0:100, size = 13, rep = TRUE), - y2_teacher_performance = sample(0:100, size = 13, rep = TRUE) -) %>% - mutate(y1_performance_quartile = ntile(x = y1_teacher_performance, n = 4)) - -survey_question <- data.frame( - question = c(rep("To what extent do you \nagree with ...", 100)), - answer = sample(1:5, size = 100, rep = TRUE) -) - -## ----theme_tntp, echo=FALSE, warning=FALSE------------------------------------ -ex_plot_default <- performance_data %>% - ggplot(aes(factor(teacher_experience))) + - geom_bar() + - labs( - title = "default", - subtitle = "Subtitle", - x = "x label", - y = "y label", - caption = "caption" - ) - -ex_plot_theme_tntp <- performance_data %>% - ggplot(aes(factor(teacher_experience))) + - geom_bar() + - labs( - title = "theme_tntp()", - subtitle = "Subtitle", - x = "x label", - y = "y label", - caption = "caption" - ) + - theme_tntp() - -ex_plot_theme_tntp_2018 <- performance_data %>% - ggplot(aes(factor(teacher_experience))) + - geom_bar() + - labs( - title = "theme_tntp_2018()", - subtitle = "Subtitle", - x = "x label", - y = "y label", - caption = "caption" - ) + - theme_tntp_2018() - -## ----echo=FALSE, warning=FALSE, fig.width=7----------------------------------- -ex_plot_default - -## ----echo=FALSE, warning=FALSE, fig.width=7----------------------------------- -ex_plot_theme_tntp - -## ----echo=FALSE, warning=FALSE, fig.width=7----------------------------------- -ex_plot_theme_tntp_2018 - -## ----plot_color_palette, include = FALSE-------------------------------------- -# This function is for displaying color palettes -plot_color_palette <- function(dat, title = "") { - dat %>% - as.data.frame() %>% - rownames_to_column() %>% - set_names(c("labels", "hex")) %>% - mutate(row = row_number()) %>% - ggplot(aes(x = row, y = 1, fill = hex)) + - geom_bar(stat = "identity") + - geom_text(aes(label = paste0(labels, " (", hex, ")")), position = position_stack(vjust = 0.5), color = "white") + - scale_fill_identity() + - coord_flip() + - labs( - title = title, - x = NULL, - y = NULL - ) + - theme_tntp_2018() + - theme( - axis.text = element_blank(), - panel.grid = element_blank() - ) -} - -## ----------------------------------------------------------------------------- -palette_tntp("dark_blue") - -## ----fig.height= 12, fig.width=5, warning=FALSE, echo = FALSE----------------- -# A one-off function to plot color palettes, see code above if curious -plot_color_palette(tntpr::colors_tntp, title = "palette_tntp colors") - -## ----fig.height= 12, fig.width=7, warning=FALSE, echo = FALSE----------------- -palettes <- c("default", "likert_4pt", "likert_5pt", "likert_6pt", "likert_orange_to_green_4pt", "likert_orange_to_green_5pt", "likert_orange_to_green_6pt") - -palette_plots <- map(palettes, palette_tntp_scales) %>% - map2(palettes, plot_color_palette) - -palette_plots[[1]] + (palette_plots[[2]] + palette_plots[[3]] + palette_plots[[4]]) + palette_plots[[5]] + palette_plots[[6]] + palette_plots[[7]] + plot_layout(ncol = 1) - -## ----fig.width=7, fig.length=5, warning=FALSE, echo=TRUE---------------------- -performance_data %>% - ggplot(aes(factor(teacher_experience), fill = factor(y1_performance_quartile))) + - geom_bar(position = position_fill()) + - labs( - title = "Title", - subtitle = "Subtitle", - x = "x label", - y = "y label", - fill = "fill", - caption = "caption" - ) + - theme_tntp_2018() + - scale_fill_tntp() - -## ----fig.width=7, fig.length=5, warning=FALSE, echo=TRUE---------------------- -performance_data %>% - ggplot(aes(x = y1_teacher_performance, y = y2_teacher_performance, color = factor(teacher_experience))) + - geom_point(size = 2) + - labs( - title = "Title", - subtitle = "Subtitle", - x = "x label", - y = "y label", - fill = "fill", - caption = "caption" - ) + - theme_tntp_2018() + - scale_color_tntp() - -## ----fig.width=7, fig.length=5, warning=FALSE, echo = TRUE-------------------- -survey_question %>% - ggplot(aes(factor(question), fill = factor(answer))) + - geom_bar(position = position_fill()) + - labs( - title = "Title", - subtitle = "Subtitle", - x = "x label", - y = "y label", - caption = "caption" - ) + - theme_tntp_2018() + - scale_fill_tntp(palette = "likert_5pt") - diff --git a/docs/tntp-style-plots.Rmd b/docs/tntp-style-plots.Rmd deleted file mode 100644 index 552b7c6..0000000 --- a/docs/tntp-style-plots.Rmd +++ /dev/null @@ -1,214 +0,0 @@ ---- -title: "tntp-style-plots" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{tntp-style-plots} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -```{r setup, include=FALSE} -library(tntpr) - -# Load packages - first pacman, installing if necessary, then others -if (!require("pacman")) install.packages("pacman") -library(pacman) -p_load(devtools, tidyverse, knitr) -if (!require("patchwork")) devtools::install_github("thomasp85/patchwork") -``` - -```{r knitr_options, include = FALSE} -knitr::opts_chunk$set(collapse = TRUE, comment = "#>") -knitr::opts_chunk$set(error = TRUE) -knitr::opts_chunk$set(out.width = "750px", dpi = 300) -knitr::opts_chunk$set(dev = "png", fig.width = 8, fig.height = 4.8889, dpi = 300) -``` - -```{r sample_datasets, include=FALSE} -performance_data <- data.frame( - teacher_experience = c(rep("0-3 years ", 5), rep("4-6 years", 4), rep("7+ years", 4)), - y1_teacher_performance = sample(0:100, size = 13, rep = TRUE), - y2_teacher_performance = sample(0:100, size = 13, rep = TRUE) -) %>% - mutate(y1_performance_quartile = ntile(x = y1_teacher_performance, n = 4)) - -survey_question <- data.frame( - question = c(rep("To what extent do you \nagree with ...", 100)), - answer = sample(1:5, size = 100, rep = TRUE) -) -``` - - -### ggplot Themes - -tntpr now has two ggplot2 themes; the classic `theme_tntp()`, and the updated `theme_tntp_2018()`. - -```{r theme_tntp, echo=FALSE, warning=FALSE} -ex_plot_default <- performance_data %>% - ggplot(aes(factor(teacher_experience))) + - geom_bar() + - labs( - title = "default", - subtitle = "Subtitle", - x = "x label", - y = "y label", - caption = "caption" - ) - -ex_plot_theme_tntp <- performance_data %>% - ggplot(aes(factor(teacher_experience))) + - geom_bar() + - labs( - title = "theme_tntp()", - subtitle = "Subtitle", - x = "x label", - y = "y label", - caption = "caption" - ) + - theme_tntp() - -ex_plot_theme_tntp_2018 <- performance_data %>% - ggplot(aes(factor(teacher_experience))) + - geom_bar() + - labs( - title = "theme_tntp_2018()", - subtitle = "Subtitle", - x = "x label", - y = "y label", - caption = "caption" - ) + - theme_tntp_2018() -``` - -```{r, echo=FALSE, warning=FALSE, fig.width=7} -ex_plot_default -``` - -```{r, echo=FALSE, warning=FALSE, fig.width=7} -ex_plot_theme_tntp -``` - -```{r, echo=FALSE, warning=FALSE, fig.width=7} -ex_plot_theme_tntp_2018 -``` - -### palette_tntp() gives you access to TNTP-style colors: - -```{r plot_color_palette, include = FALSE} -# This function is for displaying color palettes -plot_color_palette <- function(dat, title = "") { - dat %>% - as.data.frame() %>% - rownames_to_column() %>% - set_names(c("labels", "hex")) %>% - mutate(row = row_number()) %>% - ggplot(aes(x = row, y = 1, fill = hex)) + - geom_bar(stat = "identity") + - geom_text(aes(label = paste0(labels, " (", hex, ")")), position = position_stack(vjust = 0.5), color = "white") + - scale_fill_identity() + - coord_flip() + - labs( - title = title, - x = NULL, - y = NULL - ) + - theme_tntp_2018() + - theme( - axis.text = element_blank(), - panel.grid = element_blank() - ) -} -``` - -You can still use palette_tntp the way you used to... - -```{r} -palette_tntp("dark_blue") -``` - -... but now you have a larger selection of colors. - -```{r, fig.height= 12, fig.width=5, warning=FALSE, echo = FALSE} -# A one-off function to plot color palettes, see code above if curious -plot_color_palette(tntpr::colors_tntp, title = "palette_tntp colors") -``` - -The `palette_tntp_scales()` function provides access to 5 TNTP color scales: - --- `"default"` (colors in the PPT and Word template), - --- `"likert_4pt"`,`"likert_5pt"`, `"likert_6pt"`,and - --- `"likert_orange_to_green_4pt"`, `"likert_orange_to_green_5pt"`, `"likert_orange_to_green_6pt"` - --- `colors_tntp_classic` (original `palette_tntp` colors from when this package was created years ago). - - -```{r, fig.height= 12, fig.width=7, warning=FALSE, echo = FALSE} -palettes <- c("default", "likert_4pt", "likert_5pt", "likert_6pt", "likert_orange_to_green_4pt", "likert_orange_to_green_5pt", "likert_orange_to_green_6pt") - -palette_plots <- map(palettes, palette_tntp_scales) %>% - map2(palettes, plot_color_palette) - -palette_plots[[1]] + (palette_plots[[2]] + palette_plots[[3]] + palette_plots[[4]]) + palette_plots[[5]] + palette_plots[[6]] + palette_plots[[7]] + plot_layout(ncol = 1) -``` - -### `scale_fill_tntp()` and `scale_color_tntp()` - -Supply TNTP-palette scales for filling and coloring. - -```{r, fig.width=7, fig.length=5, warning=FALSE, echo=TRUE} -performance_data %>% - ggplot(aes(factor(teacher_experience), fill = factor(y1_performance_quartile))) + - geom_bar(position = position_fill()) + - labs( - title = "Title", - subtitle = "Subtitle", - x = "x label", - y = "y label", - fill = "fill", - caption = "caption" - ) + - theme_tntp_2018() + - scale_fill_tntp() -``` - -```{r, fig.width=7, fig.length=5, warning=FALSE, echo=TRUE} -performance_data %>% - ggplot(aes(x = y1_teacher_performance, y = y2_teacher_performance, color = factor(teacher_experience))) + - geom_point(size = 2) + - labs( - title = "Title", - subtitle = "Subtitle", - x = "x label", - y = "y label", - fill = "fill", - caption = "caption" - ) + - theme_tntp_2018() + - scale_color_tntp() -``` - - -You can specify which color palette you want to use. -```{r, fig.width=7, fig.length=5, warning=FALSE, echo = TRUE} -survey_question %>% - ggplot(aes(factor(question), fill = factor(answer))) + - geom_bar(position = position_fill()) + - labs( - title = "Title", - subtitle = "Subtitle", - x = "x label", - y = "y label", - caption = "caption" - ) + - theme_tntp_2018() + - scale_fill_tntp(palette = "likert_5pt") -``` diff --git a/docs/tntp-style-plots.html b/docs/tntp-style-plots.html deleted file mode 100644 index 6e92881..0000000 --- a/docs/tntp-style-plots.html +++ /dev/null @@ -1,440 +0,0 @@ - - - - - - - - - - - - - - -tntp-style-plots - - - - - - - - - - - - - - - - - - - - - - - - - - -

tntp-style-plots

- - - -
-

ggplot Themes

-

tntpr now has two ggplot2 themes; the classic -theme_tntp(), and the updated -theme_tntp_2018().

-

-

-

-
-
-

palette_tntp() gives you access to TNTP-style colors:

-

You can still use palette_tntp the way you used to…

-
palette_tntp("dark_blue")
-#> [1] "#00355F"
-

… but now you have a larger selection of colors.

-

-

The palette_tntp_scales() function provides access to 5 -TNTP color scales:

-

"default" (colors in the PPT and Word template),

-

"likert_4pt","likert_5pt", -"likert_6pt",and

-

"likert_orange_to_green_4pt", -"likert_orange_to_green_5pt", -"likert_orange_to_green_6pt"

-

colors_tntp_classic (original -palette_tntp colors from when this package was created -years ago).

-
#> Error in `map()`:
-#> ℹ In index: 1.
-#> Caused by error in `match.arg()`:
-#> ! 'arg' should be one of "tntp_palette", "likert_4pt", "likert_5pt", "likert_6pt", "likert_orange_to_green_4pt", "likert_orange_to_green_5pt", "likert_orange_to_green_6pt"
-#> Error in eval(expr, envir, enclos): object 'palette_plots' not found
-
-
-

scale_fill_tntp() and -scale_color_tntp()

-

Supply TNTP-palette scales for filling and coloring.

-
performance_data %>%
-  ggplot(aes(factor(teacher_experience), fill = factor(y1_performance_quartile))) +
-  geom_bar(position = position_fill()) +
-  labs(
-    title = "Title",
-    subtitle = "Subtitle",
-    x = "x label",
-    y = "y label",
-    fill = "fill",
-    caption = "caption"
-  ) +
-  theme_tntp_2018() +
-  scale_fill_tntp()
-

-
performance_data %>%
-  ggplot(aes(x = y1_teacher_performance, y = y2_teacher_performance, color = factor(teacher_experience))) +
-  geom_point(size = 2) +
-  labs(
-    title = "Title",
-    subtitle = "Subtitle",
-    x = "x label",
-    y = "y label",
-    fill = "fill",
-    caption = "caption"
-  ) +
-  theme_tntp_2018() +
-  scale_color_tntp()
-

-

You can specify which color palette you want to use.

-
survey_question %>%
-  ggplot(aes(factor(question), fill = factor(answer))) +
-  geom_bar(position = position_fill()) +
-  labs(
-    title = "Title",
-    subtitle = "Subtitle",
-    x = "x label",
-    y = "y label",
-    caption = "caption"
-  ) +
-  theme_tntp_2018() +
-  scale_fill_tntp(palette = "likert_5pt")
-

-
- - - - - - - - - - - diff --git a/docs/tntpr-introduction.R b/docs/tntpr-introduction.R deleted file mode 100644 index 0f59c00..0000000 --- a/docs/tntpr-introduction.R +++ /dev/null @@ -1,53 +0,0 @@ -## ----include = FALSE---------------------------------------------------------- -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) - -## ----setup, include=FALSE----------------------------------------------------- -library(tntpr) -library(tidyverse) - -## ----include = FALSE---------------------------------------------------------- -library(tntpr) -library(magrittr) -library(ggplot2) -library(tidyr) -library(dplyr) - -## ----colors------------------------------------------------------------------- -palette_tntp("dark_blue", "orange", "light_gray") - -## ----------------------------------------------------------------------------- -palette_tntp_scales(palette = "likert_5pt") - -## ----scale_fill_tntp, fig.width=7, fig.align='center', warning=FALSE---------- -data.frame( - question = "To what extent do you agree...", - response = c( - rep("Strongly disagree", 3), - rep("Disagree", 4), - rep("Somewhat disagree", 3), - rep("Somewhat agree", 4), - rep("Agree", 10), - rep("Strongly agree", 2) - ) -) %>% - mutate(response = response %>% factor(levels = rev(c( - "Strongly disagree", - "Disagree", - "Somewhat disagree", - "Somewhat agree", - "Agree", - "Strongly agree" - )))) %>% - ggplot(aes(question, fill = response)) + - geom_bar(position = position_fill()) + - theme_tntp_2018(axis_text = "Y", grid = FALSE) + - labs( - x = NULL, y = NULL, - fill = "Response" - ) + - coord_flip() + - scale_fill_tntp(palette = "likert_6pt") - diff --git a/docs/tntpr-introduction.Rmd b/docs/tntpr-introduction.Rmd deleted file mode 100644 index 645fe8a..0000000 --- a/docs/tntpr-introduction.Rmd +++ /dev/null @@ -1,146 +0,0 @@ ---- -title: "tntpr-introduction" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{tntpr-introduction} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -```{r setup, include=FALSE} -library(tntpr) -library(tidyverse) -``` - -## About - -The `tntpr` package makes data science at TNTP easier and more accurate by -supplying tools that are needed for common TNTP analyses. - -## Package summary - -Some of the highlights of the package include: - -- A TNTP-themed RMarkdown template, for starting a new analysis with a shell that can already generate a TNTP-themed .docx report -- Functions for initializing a new repository or project folder with TNTP-standard directories and documentation -- TNTP-specific ggplot2 themes and color palettes -- Survey analysis tools -- Wrappers for quickly making typical TNTP-style charts (e.g., bar chart of the distribution of one variable, grouped by a second) -- Education-specific data management functions (e.g., `date_to_SY()` to convert continuous hire dates into school years using a specified cutoff date), and a built-in fake student achievement dataset to play with called `wisc`. - -## Installing the package - -This package is not on CRAN, and probably will not ever be. You'll need to install this package from its GitHub repository. You can add this to the top of your analysis script: - -```{r, include = FALSE} -library(tntpr) -library(magrittr) -library(ggplot2) -library(tidyr) -library(dplyr) -``` - -Once installed, you can update the package with `update_tntpr()`. - -## Usage - -### Reporting templates - -Start your analysis with a good-looking .docx file as output, right off the bat. -Right now we have just a single TNTP template, "Data Memo", but it can be -adapted and improved and if we have other common needs (a different set of -headings?) those can easily be separate templates. - -**To access templates once you've installed the tntpr package:** go to `File` -> `New File` -> `R Markdown` -> `From Template`. You'll see a choice "Data Memo" from the tntpr -package. Just specify the document's file name and the directory you want it -in (probably a subfolder of a Bitbucket repository) and you're off! - -A file `tntp-style-file.docx` will be copied into that directory; leave it there. -That provides the TNTP .docx stylings when you re-knit your R Markdown document. - -### Setting up repositories and subfolders -This saves time getting started with a new analysis and encourages use of common -file storage conventions and documentation that make work more browsable and transparent. - -**Directory conventions** - -TNTP's Bitbucket directory structure is a single repository per client or region, with -subdirectories in the repository corresponding to specific analysis projects. For example: - -> - -> - -> - -> - -> - -> - ... - -**Usage** - -After creating a new, empty repo in Bitbucket and cloning it, run `setup_repo` to -initialize it. This will create a subfolder as well, in which you'll conduct an analysis project. - -If the repository already exists, and you just want to begin a new analysis project, -create the new subdirectory with `setup_subdirectory`. - -Both functions take the same arguments, used to setup the project subfolder and its README: - - *subfolder*: what the name of the subdirectory should be, e.g., "xyz_instructional_audit" - - *proj_name*: the full name of the analysis project, e.g., "XYZ Public Schools Equity Study". Appears in the README. - - *analyst_name*: the analyst(s) working on this project. Appears in the README. - -`setup_repo` will also add a `.Rproj` RProject file, `.gitignore` file, and create a README.Md file for the main repository. - -### TNTP colors - -You can access the official TNTP-branded colors using `palette_tntp()`. This will -return a vector with hex code for our colors: -```{r colors} -palette_tntp("dark_blue", "orange", "light_gray") -``` - -Or you can select a specific TNTP palette (`"default"`, `"colors_tntp_classic"`, `"likert_4pt"`, -`"likert_5pt"`, or `"likert_6pt"`) with `palette_tntp_scales` and return a vector with hex -codes for that TNTP palette. - -```{r} -palette_tntp_scales(palette = "likert_5pt") -``` - -You can use these scale palettes as fill or color aesthetics in ggplot with `scale_fill_tntp` and `scale_color_tntp`. - -```{r scale_fill_tntp, fig.width=7, fig.align='center', warning=FALSE} -data.frame( - question = "To what extent do you agree...", - response = c( - rep("Strongly disagree", 3), - rep("Disagree", 4), - rep("Somewhat disagree", 3), - rep("Somewhat agree", 4), - rep("Agree", 10), - rep("Strongly agree", 2) - ) -) %>% - mutate(response = response %>% factor(levels = rev(c( - "Strongly disagree", - "Disagree", - "Somewhat disagree", - "Somewhat agree", - "Agree", - "Strongly agree" - )))) %>% - ggplot(aes(question, fill = response)) + - geom_bar(position = position_fill()) + - theme_tntp_2018(axis_text = "Y", grid = FALSE) + - labs( - x = NULL, y = NULL, - fill = "Response" - ) + - coord_flip() + - scale_fill_tntp(palette = "likert_6pt") -``` diff --git a/docs/tntpr-introduction.html b/docs/tntpr-introduction.html deleted file mode 100644 index 034de59..0000000 --- a/docs/tntpr-introduction.html +++ /dev/null @@ -1,501 +0,0 @@ - - - - - - - - - - - - - - -tntpr-introduction - - - - - - - - - - - - - - - - - - - - - - - - - - -

tntpr-introduction

- - - -
-

About

-

The tntpr package makes data science at TNTP easier and -more accurate by supplying tools that are needed for common TNTP -analyses.

-
-
-

Package summary

-

Some of the highlights of the package include:

-
    -
  • A TNTP-themed RMarkdown template, for starting a new analysis with a -shell that can already generate a TNTP-themed .docx report
  • -
  • Functions for initializing a new repository or project folder with -TNTP-standard directories and documentation
  • -
  • TNTP-specific ggplot2 themes and color palettes
  • -
  • Survey analysis tools
  • -
  • Wrappers for quickly making typical TNTP-style charts (e.g., bar -chart of the distribution of one variable, grouped by a second)
  • -
  • Education-specific data management functions (e.g., -date_to_SY() to convert continuous hire dates into school -years using a specified cutoff date), and a built-in fake student -achievement dataset to play with called wisc.
  • -
-
-
-

Installing the package

-

This package is not on CRAN, and probably will not ever be. You’ll -need to install this package from its GitHub repository. You can add -this to the top of your analysis script:

-

Once installed, you can update the package with -update_tntpr().

-
-
-

Usage

-
-

Reporting templates

-

Start your analysis with a good-looking .docx file as output, right -off the bat. Right now we have just a single TNTP template, “Data Memo”, -but it can be adapted and improved and if we have other common needs (a -different set of headings?) those can easily be separate templates.

-

To access templates once you’ve installed the tntpr -package: go to File -> New File --> R Markdown -> From Template. You’ll -see a choice “Data Memo” from the tntpr package. Just specify the -document’s file name and the directory you want it in (probably a -subfolder of a Bitbucket repository) and you’re off!

-

A file tntp-style-file.docx will be copied into that -directory; leave it there. That provides the TNTP .docx stylings when -you re-knit your R Markdown document.

-
-
-

Setting up repositories and subfolders

-

This saves time getting started with a new analysis and encourages -use of common file storage conventions and documentation that make work -more browsable and transparent.

-

Directory conventions

-

TNTP’s Bitbucket directory structure is a single repository per -client or region, with subdirectories in the repository corresponding to -specific analysis projects. For example:

-
-
    -
  • <City #1> -
      -
    • <Project #1 name>
    • -
    • <Project #2 name>
    • -
    • <Project #3 name>
    • -
  • -
  • <City or Region #2> -
      -
    • -
  • -
-
-

Usage

-

After creating a new, empty repo in Bitbucket and cloning it, run -setup_repo to initialize it. This will create a subfolder -as well, in which you’ll conduct an analysis project.

-

If the repository already exists, and you just want to begin a new -analysis project, create the new subdirectory with -setup_subdirectory.

-

Both functions take the same arguments, used to setup the project -subfolder and its README:
-- subfolder: what the name of the subdirectory should be, e.g., -“xyz_instructional_audit”
-- proj_name: the full name of the analysis project, e.g., “XYZ -Public Schools Equity Study”. Appears in the README.
-- analyst_name: the analyst(s) working on this project. Appears -in the README.

-

setup_repo will also add a .Rproj RProject -file, .gitignore file, and create a README.Md file for the -main repository.

-
-
-

TNTP colors

-

You can access the official TNTP-branded colors using -palette_tntp(). This will return a vector with hex code for -our colors:

-
palette_tntp("dark_blue", "orange", "light_gray")
-#> [1] "#00355F" "#EA8835" "#C1C2C4"
-

Or you can select a specific TNTP palette ("default", -"colors_tntp_classic", "likert_4pt", -"likert_5pt", or "likert_6pt") with -palette_tntp_scales and return a vector with hex codes for -that TNTP palette.

-
palette_tntp_scales(palette = "likert_5pt")
-#>   dark_blue medium_blue  light_grey    orange_3    orange_4 
-#>   "#00355F"   "#00A4C7"   "#C1C2C4"   "#EA8835"   "#BC5A07"
-

You can use these scale palettes as fill or color aesthetics in -ggplot with scale_fill_tntp and -scale_color_tntp.

-
data.frame(
-  question = "To what extent do you agree...",
-  response = c(
-    rep("Strongly disagree", 3),
-    rep("Disagree", 4),
-    rep("Somewhat disagree", 3),
-    rep("Somewhat agree", 4),
-    rep("Agree", 10),
-    rep("Strongly agree", 2)
-  )
-) %>%
-  mutate(response = response %>% factor(levels = rev(c(
-    "Strongly disagree",
-    "Disagree",
-    "Somewhat disagree",
-    "Somewhat agree",
-    "Agree",
-    "Strongly agree"
-  )))) %>%
-  ggplot(aes(question, fill = response)) +
-  geom_bar(position = position_fill()) +
-  theme_tntp_2018(axis_text = "Y", grid = FALSE) +
-  labs(
-    x = NULL, y = NULL,
-    fill = "Response"
-  ) +
-  coord_flip() +
-  scale_fill_tntp(palette = "likert_6pt")
-

-
-
- - - - - - - - - - - diff --git a/vignettes/date_to_sy_worked_example.Rmd b/vignettes/date_to_sy_worked_example.Rmd index b160c26..6106be1 100644 --- a/vignettes/date_to_sy_worked_example.Rmd +++ b/vignettes/date_to_sy_worked_example.Rmd @@ -2,16 +2,11 @@ title: 'Worked Example: `tntpr::date_to_sy`' output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{date_to_sy} + %\VignetteIndexEntry{Worked Example: `tntpr::date_to_sy`} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- - - - - - ```{r setup,echo=FALSE, include = FALSE} library(knitr) knitr::opts_chunk$set(error = TRUE) diff --git a/vignettes/factorizing_a_dataset.Rmd b/vignettes/factorizing_a_dataset.Rmd index e56f55d..bc77cf3 100644 --- a/vignettes/factorizing_a_dataset.Rmd +++ b/vignettes/factorizing_a_dataset.Rmd @@ -2,16 +2,11 @@ title: "Factorizing a survey dataset" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{factorizing-a-dataset} + %\VignetteIndexEntry{Factorizing a survey dataset} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- - - - - - ```{r setup,echo=FALSE, include = FALSE} library(knitr) knitr::opts_chunk$set(error = TRUE) diff --git a/vignettes/tntp-style-plots.Rmd b/vignettes/tntp-style-plots.Rmd index 552b7c6..6c802b3 100644 --- a/vignettes/tntp-style-plots.Rmd +++ b/vignettes/tntp-style-plots.Rmd @@ -1,8 +1,8 @@ --- -title: "tntp-style-plots" +title: "Old - TNTP style plots" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{tntp-style-plots} + %\VignetteIndexEntry{Old - TNTP style plots} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- diff --git a/vignettes/tntpr-introduction.Rmd b/vignettes/tntpr-introduction.Rmd index 645fe8a..4675342 100644 --- a/vignettes/tntpr-introduction.Rmd +++ b/vignettes/tntpr-introduction.Rmd @@ -1,8 +1,8 @@ --- -title: "tntpr-introduction" +title: "Introduction to Package" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{tntpr-introduction} + %\VignetteIndexEntry{Introduction to Package} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- diff --git a/vignettes/visualization-cookbook.Rmd b/vignettes/visualization-cookbook.Rmd index 3036c28..d38067f 100644 --- a/vignettes/visualization-cookbook.Rmd +++ b/vignettes/visualization-cookbook.Rmd @@ -2,7 +2,7 @@ title: "TNTP Visualization Cookbook" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{visualization-cookbook} + %\VignetteIndexEntry{TNTP Visualization Cookbook} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} ---