Missing data imputation and fit regression
Mind map
Goal
- impute missing data
- fit regression
library(tidyverse)
library(AER)
library(naniar)
library(mice)
Steps
Step 01. missing data
wages_data <- read_csv("/Users/zero/myrepo/jixingBlogdown/data/Mroz.csv")
wages_data <- wages_data %>%
select(wage, educ, fatheduc, motheduc, inlf, hours,
kidslt6, kidsge6, age, huswage,
mtr, unem, city, exper) %>%
mutate_at(vars(kidslt6, kidsge6, hours, educ, age, wage, huswage, mtr,
motheduc, fatheduc, unem, exper), as.numeric) %>%
mutate_at(vars(city, inlf), as.character)
wages_data <- wages_data %>%
mutate(wage = ifelse(is.na(wage), 0, wage))
vis_miss(wages_data)
wages_mis <- ampute(wages_data)$amp
vis_miss(wages_mis)
Step 02. impute data
imp_wages <- mice::parlmice(data = wages_mis, m = 10, maxit = 20, cl.type = "FORK")
imp_wages_df <- mice::complete(imp_wages, "long")
ihs <- function(x){
log(x + sqrt(x**2 + 1))
}
imp_wages_df <- imp_wages_df %>%
group_by(.imp) %>%
mutate(ihs_wage = ihs(wage),
exper2 = exper**2)
Step 03. nest
(imp_wages <- imp_wages_df %>%
group_by(.imp) %>%
nest())
## # A tibble: 10 x 2
## .imp data
## <int> <list>
## 1 1 <tibble [753 × 17]>
## 2 2 <tibble [753 × 17]>
## 3 3 <tibble [753 × 17]>
## 4 4 <tibble [753 × 17]>
## 5 5 <tibble [753 × 17]>
## 6 6 <tibble [753 × 17]>
## 7 7 <tibble [753 × 17]>
## 8 8 <tibble [753 × 17]>
## 9 9 <tibble [753 × 17]>
## 10 10 <tibble [753 × 17]>
Step 04. fit
imp_wages_reg = imp_wages %>%
mutate(lin_reg = map(data,
~lm(ihs_wage ~ educ + inlf + hours +
kidslt6 + kidsge6 + age + huswage +
mtr + unem + city + exper + exper2,
data = .)))
Step 05. pool
pool_lin_reg <- pool(imp_wages_reg$lin_reg)
Step 06. summary
summary(pool_lin_reg)
## estimate std.error statistic df p.value
## (Intercept) 1.012423e+00 3.461251e-01 2.92501944 400.7382 3.557091e-03
## educ 4.539668e-02 8.424241e-03 5.38881542 656.8772 9.735809e-08
## inlf 1.866331e+00 5.583397e-02 33.42644968 231.0175 0.000000e+00
## hours -6.421963e-05 3.382794e-05 -1.89841948 135.1251 5.805680e-02
## kidslt6 -5.544361e-03 3.795262e-02 -0.14608637 609.2085 8.838958e-01
## kidsge6 -5.237696e-03 1.509400e-02 -0.34700510 301.2112 7.286930e-01
## age 2.382734e-04 2.760765e-03 0.08630701 549.0705 9.312473e-01
## huswage -2.568072e-02 6.643815e-03 -3.86535757 319.0680 1.213720e-04
## mtr -2.089116e+00 3.418482e-01 -6.11123791 434.8243 1.650737e-09
## unem 1.802800e-03 5.595457e-03 0.32218990 573.9496 7.474062e-01
## city 1.423946e-02 3.801650e-02 0.37456010 432.9497 7.081025e-01
## exper 1.279662e-02 6.353684e-03 2.01404701 579.9830 4.439128e-02
## exper2 -1.819858e-04 1.987354e-04 -0.91571864 691.7068 3.601336e-01