forked from scunning1975/mixtape_learnr
-
Notifications
You must be signed in to change notification settings - Fork 26
/
Panel_Data.Rmd
150 lines (119 loc) · 5.24 KB
/
Panel_Data.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
---
title: "Causal Inference: <br> *The Mixtape*"
subtitle: "<it>Panel Data</it>"
output:
learnr::tutorial:
css: css/style.css
highlight: "kate"
runtime: shiny_prerendered
---
## Welcome
This is material for the **Panel Data** chapter in Scott Cunningham's book, [Causal Inference: The Mixtape.](https://mixtape.scunning.com/)
### Packages needed
The first thing you need to do is install a few packages to make sure everything runs:
```{r, eval = FALSE}
install.packages("tidyverse")
install.packages("cli")
install.packages("haven")
install.packages("rmarkdown")
install.packages("learnr")
install.packages("haven")
install.packages("stargazer")
# This Chapter Only
install.packages("plm")
```
### Load
```{r load, warning=FALSE, message=FALSE}
library(learnr)
library(haven)
library(tidyverse)
library(stargazer)
# This Chapter Only
library(plm)
# 10 minute code time limit
options(tutorial.exercise.timelimit = 600)
# read_data function
read_data <- function(df) {
full_path <- paste0("https://raw.github.com/scunning1975/mixtape/master/", df)
return(haven::read_dta(full_path))
}
```
## Survey of Adult Service Providers
```{r sasp, exercise=TRUE, echo=FALSE}
sasp <- read_data("sasp_panel.dta")
#-- Delete all NA
sasp <- na.omit(sasp)
#-- order by id and session
sasp <- sasp %>%
arrange(id, session)
#Balance Data
balanced_sasp <- make.pbalanced(sasp, balance.type = "shared.individuals")
#Demean Data
balanced_sasp <- balanced_sasp %>%
mutate(
demean_lnw = lnw - ave(lnw, id),
demean_age = age - ave(age, id),
demean_asq = asq - ave(asq, id),
demean_bmi = bmi - ave(bmi, id),
demean_hispanic = hispanic - ave(hispanic, id),
demean_black = black - ave(black, id),
demean_other = other - ave(other, id),
demean_asian = asian - ave(asian, id),
demean_schooling = schooling - ave(schooling, id),
demean_cohab = cohab - ave(cohab, id),
demean_married = married - ave(married, id),
demean_divorced = divorced - ave(divorced, id),
demean_separated = separated - ave(separated, id),
demean_age_cl = age_cl - ave(age_cl, id),
demean_unsafe = unsafe - ave(unsafe, id),
demean_llength = llength - ave(llength, id),
demean_reg = reg - ave(reg, id),
demean_asq_cl = asq_cl - ave(asq_cl, id),
demean_appearance_cl = appearance_cl - ave(appearance_cl, id),
demean_provider_second = provider_second - ave(provider_second, id),
demean_asian_cl = asian_cl - ave(asian_cl, id),
demean_black_cl = black_cl - ave(black_cl, id),
demean_hispanic_cl = hispanic_cl - ave(hispanic_cl, id),
demean_othrace_cl = othrace_cl - ave(lnw, id),
demean_hot = hot - ave(hot, id),
demean_massage_cl = massage_cl - ave(massage_cl, id)
)
#-- POLS
cli::cli_h1("Pooled OLS")
ols <- lm_robust(lnw ~ age + asq + bmi + hispanic + black + other + asian + schooling + cohab + married + divorced + separated +
age_cl + unsafe + llength + reg + asq_cl + appearance_cl + provider_second + asian_cl + black_cl + hispanic_cl +
othrace_cl + hot + massage_cl, data = balanced_sasp)
summary(ols)
#-- FE
cli::cli_h1("Fixed Effects")
formula <- as.formula("lnw ~ age + asq + bmi + hispanic + black + other + asian + schooling +
cohab + married + divorced + separated +
age_cl + unsafe + llength + reg + asq_cl + appearance_cl +
provider_second + asian_cl + black_cl + hispanic_cl +
othrace_cl + hot + massage_cl")
model_fe <- lm_robust(formula = formula,
data = balanced_sasp,
fixed_effect = ~id,
se_type = "stata")
summary(model_fe)
#-- Demean OLS
cli::cli_h1("Demeaned OLS")
dm_formula <- as.formula("demean_lnw ~ demean_age + demean_asq + demean_bmi +
demean_hispanic + demean_black + demean_other +
demean_asian + demean_schooling + demean_cohab +
demean_married + demean_divorced + demean_separated +
demean_age_cl + demean_unsafe + demean_llength + demean_reg +
demean_asq_cl + demean_appearance_cl +
demean_provider_second + demean_asian_cl + demean_black_cl +
demean_hispanic_cl + demean_othrace_cl +
demean_hot + demean_massage_cl")
ols_demean <- lm_robust(formula = dm_formula,
data = balanced_sasp, clusters = id,
se_type = "stata")
summary(ols_demean)
```
#### QUESTIONS
- Interpret the effect of natural log of session length on the natural log of hourly wage. Describe the economic theory that might explain this relationship? (HINT: Consider the role that supplier fixed versus variable costs may have on the hourly wage.)
- Becker described discrimination in terms of ``taste based``. This meant that social interactions with people of the other race were factors into marginal cost. Given that these persist, what does this imply about the effect that competition is having on discrimination?
- Hamermesh and Biddle suggest that beauty is valued on the market. Describe some reasons why there is no effect on client beauty once we use the within estimators?
- What other interesting results did you find in this analysis? Which ones surprised you and which ones were intuitive and why?