From 68654c0e0b82862389c7ba5e661d75321e67126b Mon Sep 17 00:00:00 2001 From: Brendan Cullen Date: Thu, 8 Aug 2024 14:35:08 -0700 Subject: [PATCH] add hints, remove a redundant exercise --- slides/warm-up.Rmd | 110 +++++++++++++++++------------------- slides/warm-up.html | 132 ++++++++++++++++++++------------------------ 2 files changed, 111 insertions(+), 131 deletions(-) diff --git a/slides/warm-up.Rmd b/slides/warm-up.Rmd index 9514abc..12ab6b4 100644 --- a/slides/warm-up.Rmd +++ b/slides/warm-up.Rmd @@ -106,15 +106,23 @@ knitr::include_graphics("images/welcome/campsite_warmup.png") ## `r emo::ji("rocket")` Warm-up - -```{r label, echo = FALSE, out.width="60%"} +.pull-left[ +```{r label, echo = FALSE} knitr::include_graphics("images/welcome/your-turn-example.png") ``` -* __Working together__ with your neighbors is encouraged. +] + +.pull-right[ + +* __Work together__ with your neighbors + +* There are often several different ways of getting to the right answer. * After 1-2 minutes, we'll go over the answer together. And then move on to the next question. +] + --- class: inverse, center, middle @@ -160,7 +168,9 @@ class: your-turn # Your Turn 1 -Read in the `seattle_pets` data and explore it. Can you recreate output that looks like this? +**Read in the `seattle_pets` data and explore it. Can you recreate output that looks like this?** + +`r emo::ji("bulb")` Hint: What function from dplyr gives you a quick glimpse of your data? ```{r read-in-outbreaks, include = FALSE} seattle_pets <- readr::read_csv("data/warm-up/seattle_pets.csv") @@ -192,7 +202,10 @@ class: your-turn # Your Turn 2 -How many different species are represented in `seattle_pets`? How many pets of each species are there? +**How many different species are represented in `seattle_pets`? How many pets of each species are there?** + + +`r emo::ji("bulb")` Hint: What function from dplyr lets you count the unique values of one or more variables? --- @@ -200,7 +213,8 @@ How many different species are represented in `seattle_pets`? How many pets of e .pull-left[ ```{r eval = FALSE} -seattle_pets |> count(species, sort = TRUE) +seattle_pets |> + count(species, sort = TRUE) ``` or... @@ -224,65 +238,41 @@ class: your-turn # Your Turn 3 -What is the most popular pet name in this data set? *Hint* Look up the help documentation for `slice_max()` from dplyr. +**What is the most popular pet name in this data set?** + +`r emo::ji("bulb")` Hint: Look up the help documentation for `slice_max()` from dplyr. --- # Solution 3 +.pull-left[ + ```{r eval = FALSE} seattle_pets |> count(animal_name) |> - slice_max(n) |> + slice_max(order_by = n) |> pull(animal_name) ``` +or... -```{r echo = FALSE, message = FALSE} +```{r eval=FALSE} seattle_pets |> count(animal_name) |> - slice_max(n) |> + filter(n == max(n)) |> pull(animal_name) ``` ---- -class: your-turn - -# Your Turn 4 - -How many different primary dog breeds are there? - ---- - -# Solution 4 - -.pull-left[ - -```{r eval=FALSE} -seattle_pets |> - filter(species == "Dog") |> - distinct(primary_breed) |> - nrow() -``` - -or... - -```{r eval = FALSE} -seattle_pets |> - filter(species == "Dog") |> - pull(primary_breed) |> - n_distinct() -``` - ] .pull-right[ -```{r echo=FALSE} -seattle_pets |> - filter(species == "Dog") |> - pull(primary_breed) |> - n_distinct() +```{r echo = FALSE, message = FALSE} +seattle_pets |> + count(animal_name) |> + slice_max(order_by = n) |> + pull(animal_name) ``` ] @@ -290,44 +280,46 @@ seattle_pets |> --- class: your-turn -# Your Turn 5 +# Your Turn 4 + +**What are the top 10 most popular primary dog breeds?** -Let's narrow this down -- what are the top 10 most popular dog breeds? +`r emo::ji("bulb")` Hint: Try using `count()` and `slice_max()` again in your solution -- which argument to `slice_max()` specifies the number of rows to return? --- -# Solution 5 +# Solution 4 ```{r eval=FALSE} seattle_pets |> filter(species == "Dog") |> count(primary_breed) |> - slice_max(n, n = 10) + slice_max(order_by = n, n = 10) ``` ```{r echo=FALSE} seattle_pets |> filter(species == "Dog") |> count(primary_breed) |> - slice_max(n, n = 10) + slice_max(order_by = n, n = 10) ``` --- class: your-turn -# Your Turn 6 -- last one! +# Your Turn 5 -- last one! -Visualize the top 10 dog breeds in a bar chart. +**Visualize the top 10 dog breeds, re-creating the plot below.** .pull-left[ -**Hint**: Start with your code from the previous exercise: +`r emo::ji("bulb")` Hint: Start with your code from the previous exercise: ```{r eval=FALSE} seattle_pets |> filter(species == "Dog") |> count(primary_breed) |> - slice_max(n, n = 10) |> + slice_max(order_by = n, n = 10) |> ____ # add code here ``` @@ -338,7 +330,7 @@ seattle_pets |> seattle_pets |> filter(species == "Dog") |> count(primary_breed) |> - slice_max(n, n = 10) |> + slice_max(order_by = n, n = 10) |> ggplot(aes(primary_breed, n)) + geom_col() + coord_flip() @@ -348,7 +340,7 @@ seattle_pets |> --- -# Solution 6 +# Solution 5 .pull-left[ @@ -356,7 +348,7 @@ seattle_pets |> seattle_pets |> filter(species == "Dog") |> count(primary_breed) |> - slice_max(n, n = 10) |> + slice_max(order_by = n, n = 10) |> ggplot(aes(primary_breed, n)) + geom_col() + coord_flip() @@ -369,7 +361,7 @@ seattle_pets |> seattle_pets |> filter(species == "Dog") |> count(primary_breed) |> - slice_max(n, n = 10) |> + slice_max(order_by = n, n = 10) |> ggplot(aes(primary_breed, n)) + geom_col() + coord_flip() @@ -397,7 +389,7 @@ We would need to handle **factors** (categorical variables). seattle_pets |> filter(species == "Dog") |> count(primary_breed) |> - slice_max(n, n = 20) |> + slice_max(n, n = 10) |> ggplot(aes(fct_reorder(primary_breed, n), n)) + geom_col() + coord_flip() + @@ -417,7 +409,7 @@ We would need to handle **dates**. .pull-right[ -```{r echo=FALSE, out.width = "90%"} +```{r echo=FALSE, message = FALSE, warning = FALSE, out.width = "90%"} seattle_pets |> mutate( license_issue_date = mdy(license_issue_date), diff --git a/slides/warm-up.html b/slides/warm-up.html index 6bd8fc0..7d177e2 100644 --- a/slides/warm-up.html +++ b/slides/warm-up.html @@ -62,13 +62,21 @@ ## 🚀 Warm-up +.pull-left[ +<img src="images/welcome/your-turn-example.png" width="577" /> + +] -<img src="images/welcome/your-turn-example.png" width="60%" /> +.pull-right[ -* __Working together__ with your neighbors is encouraged. +* __Work together__ with your neighbors + +* There are often several different ways of getting to the right answer. * After 1-2 minutes, we'll go over the answer together. And then move on to the next question. +] + --- class: inverse, center, middle @@ -106,7 +114,9 @@ # Your Turn 1 -Read in the `seattle_pets` data and explore it. Can you recreate output that looks like this? +**Read in the `seattle_pets` data and explore it. Can you recreate output that looks like this?** + +💡 Hint: What function from dplyr gives you a quick glimpse of your data? @@ -114,13 +124,13 @@ ``` ## Rows: 43,683 ## Columns: 7 -## $ license_issue_date <chr> "December 18 2015", "June 14 2016", "August 04 2016", "February 13 2019", "August 10 2019", "November … -## $ license_number <chr> "S107948", "S116503", "S119301", "962273", "S133113", "8002549", "S112835", "S131986", "S112996", "S14… -## $ animal_name <chr> "Zen", "Misty", "Lyra", "Veronica", "Spider", "Maxx", "Diamond", "Nacho", "Pixel", "C.C.", "Gemma", "O… -## $ species <chr> "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat… -## $ primary_breed <chr> "Domestic Longhair", "Siberian", "Mix", "Domestic Longhair", "LaPerm", "American Shorthair", "Domestic… -## $ secondary_breed <chr> "Mix", NA, NA, NA, NA, NA, "Mix", "Mix", "Mix", "Mix", "American Shorthair", "Mix", NA, NA, "Mix", NA,… -## $ zip_code <dbl> 98117, 98117, 98121, 98107, 98115, 98125, 98103, 98126, 98112, 98117, 98126, 98199, 98115, 98116, 9813… +## $ license_issue_date <chr> "December 18 2015", "June 14 2016", "August 04 2016… +## $ license_number <chr> "S107948", "S116503", "S119301", "962273", "S133113… +## $ animal_name <chr> "Zen", "Misty", "Lyra", "Veronica", "Spider", "Maxx… +## $ species <chr> "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "C… +## $ primary_breed <chr> "Domestic Longhair", "Siberian", "Mix", "Domestic L… +## $ secondary_breed <chr> "Mix", NA, NA, NA, NA, NA, "Mix", "Mix", "Mix", "Mi… +## $ zip_code <dbl> 98117, 98117, 98121, 98107, 98115, 98125, 98103, 98… ``` @@ -141,13 +151,13 @@ ``` ## Rows: 43,683 ## Columns: 7 -## $ license_issue_date <chr> "December 18 2015", "June 14 2016", "August 04 2016", "February 13 2019", "August 10 2019", "November … -## $ license_number <chr> "S107948", "S116503", "S119301", "962273", "S133113", "8002549", "S112835", "S131986", "S112996", "S14… -## $ animal_name <chr> "Zen", "Misty", "Lyra", "Veronica", "Spider", "Maxx", "Diamond", "Nacho", "Pixel", "C.C.", "Gemma", "O… -## $ species <chr> "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat… -## $ primary_breed <chr> "Domestic Longhair", "Siberian", "Mix", "Domestic Longhair", "LaPerm", "American Shorthair", "Domestic… -## $ secondary_breed <chr> "Mix", NA, NA, NA, NA, NA, "Mix", "Mix", "Mix", "Mix", "American Shorthair", "Mix", NA, NA, "Mix", NA,… -## $ zip_code <dbl> 98117, 98117, 98121, 98107, 98115, 98125, 98103, 98126, 98112, 98117, 98126, 98199, 98115, 98116, 9813… +## $ license_issue_date <chr> "December 18 2015", "June 14 2016", "August 04 2016… +## $ license_number <chr> "S107948", "S116503", "S119301", "962273", "S133113… +## $ animal_name <chr> "Zen", "Misty", "Lyra", "Veronica", "Spider", "Maxx… +## $ species <chr> "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "C… +## $ primary_breed <chr> "Domestic Longhair", "Siberian", "Mix", "Domestic L… +## $ secondary_breed <chr> "Mix", NA, NA, NA, NA, NA, "Mix", "Mix", "Mix", "Mi… +## $ zip_code <dbl> 98117, 98117, 98121, 98107, 98115, 98125, 98103, 98… ``` --- @@ -155,7 +165,10 @@ # Your Turn 2 -How many different species are represented in `seattle_pets`? How many pets of each species are there? +**How many different species are represented in `seattle_pets`? How many pets of each species are there?** + + +💡 Hint: What function from dplyr lets you count the unique values of one or more variables? --- @@ -164,7 +177,8 @@ .pull-left[ ```r -seattle_pets |> count(species, sort = TRUE) +seattle_pets |> + count(species, sort = TRUE) ``` or... @@ -196,55 +210,32 @@ # Your Turn 3 -What is the most popular pet name in this data set? *Hint* Look up the help documentation for `slice_max()` from dplyr. - ---- - -# Solution 3 - - -```r -seattle_pets |> - count(animal_name) |> - slice_max(n) |> - pull(animal_name) -``` - - - -``` -## [1] "Luna" -``` - ---- -class: your-turn +**What is the most popular pet name in this data set?** -# Your Turn 4 - -How many different primary dog breeds are there? +💡 Hint: Look up the help documentation for `slice_max()` from dplyr. --- -# Solution 4 +# Solution 3 .pull-left[ ```r seattle_pets |> - filter(species == "Dog") |> - distinct(primary_breed) |> - nrow() + count(animal_name) |> + slice_max(order_by = n) |> + pull(animal_name) ``` or... ```r -seattle_pets |> - filter(species == "Dog") |> - pull(primary_breed) |> - n_distinct() +seattle_pets |> + count(animal_name) |> + filter(n == max(n)) |> + pull(animal_name) ``` ] @@ -253,7 +244,7 @@ ``` -## [1] 274 +## [1] "Luna" ``` ] @@ -261,20 +252,22 @@ --- class: your-turn -# Your Turn 5 +# Your Turn 4 + +**What are the top 10 most popular primary dog breeds?** -Let's narrow this down -- what are the top 10 most popular dog breeds? +💡 Hint: Try using `count()` and `slice_max()` again in your solution -- which argument to `slice_max()` specifies the number of rows to return? --- -# Solution 5 +# Solution 4 ```r seattle_pets |> filter(species == "Dog") |> count(primary_breed) |> - slice_max(n, n = 10) + slice_max(order_by = n, n = 10) ``` @@ -297,33 +290,33 @@ --- class: your-turn -# Your Turn 6 -- last one! +# Your Turn 5 -- last one! -Visualize the top 10 dog breeds in a bar chart. +**Visualize the top 10 dog breeds, re-creating the plot below.** .pull-left[ -**Hint**: Start with your code from the previous exercise: +💡 Hint: Start with your code from the previous exercise: ```r seattle_pets |> filter(species == "Dog") |> count(primary_breed) |> - slice_max(n, n = 10) |> + slice_max(order_by = n, n = 10) |> ____ # add code here ``` ] .pull-right[ -<img src="warm-up_files/figure-html/unnamed-chunk-16-1.png" width="80%" /> +<img src="warm-up_files/figure-html/unnamed-chunk-14-1.png" width="80%" /> ] --- -# Solution 6 +# Solution 5 .pull-left[ @@ -332,7 +325,7 @@ seattle_pets |> filter(species == "Dog") |> count(primary_breed) |> - slice_max(n, n = 10) |> + slice_max(order_by = n, n = 10) |> ggplot(aes(primary_breed, n)) + geom_col() + coord_flip() @@ -341,7 +334,7 @@ ] .pull-right[ -<img src="warm-up_files/figure-html/unnamed-chunk-18-1.png" width="80%" /> +<img src="warm-up_files/figure-html/unnamed-chunk-16-1.png" width="80%" /> ] @@ -361,7 +354,7 @@ ] .pull-right[ -<img src="warm-up_files/figure-html/unnamed-chunk-19-1.png" width="90%" /> +<img src="warm-up_files/figure-html/unnamed-chunk-17-1.png" width="90%" /> ] @@ -376,12 +369,7 @@ .pull-right[ - -``` -## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument. -``` - -<img src="warm-up_files/figure-html/unnamed-chunk-20-1.png" width="90%" /> +<img src="warm-up_files/figure-html/unnamed-chunk-18-1.png" width="90%" /> ]