-
Notifications
You must be signed in to change notification settings - Fork 0
/
3.2 Basic Data Wrangling.R
39 lines (27 loc) · 1.31 KB
/
3.2 Basic Data Wrangling.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
## Basic Data Wrangling
# "dplyr" is a package which provides intuitive functionality for working with tables.
install.packages("dplyr")
library(dplyr)
# add a column to your data table using "mutate"
library(dslabs)
data("murders")
murders <- mutate(murders, rate = total/population*100000)
# to filter the data by subsetting rows, use "filter"
filter(murders, rate <= 0.71)
# to subset the data by selecting specific columns, use "select"
new_table <- select(murders, state, region, rate)
# the "pipe operator" (%>%) allows us to perform a series of operations by sending the results of one function to another function.
murders <- mutate(murders, rate=total/population*100000)
head(murders)
filter(murders,rate<= 0.71)
new_table <-select(murders,state,region,rate)
murders %>% select(state,region,rate) %>% filter(rate<=.071)
## Creating Data Frames
#A data frame is a data structure that organizes data into a 2-dimensional table of rows and columns, much like a spreadsheet.
grades <- data.frame(names = c("John", "Juan", "Jean", "Yao"),
exam_1 = c(95, 80, 90, 85),
exam_2 = c(90, 85, 85, 90),
stringsAsFactors = FALSE)
grades
# Be warned: framed data turns characters into factors. The argument "stringsAsFactors = FALSE" fixes this.
class(grades$names)