*** Code for Roodman Worms post #1 *** requires public Worms and Worms at Work data sets (doi.org/10.7910/DVN/28038, doi.org/10.7910/DVN/ZNSY5O) and "outreg" Stata module by John Luke Gallup cd "C:\Users\David\Dropbox\Documents\Work\Library\Worms\Miguel & Kremer 2004\data" * load attendance data use namelist if visit==981, clear collapse (count) np=pupid (mean) wgrp sex elg98 stdgap yrbirth, by(sch98v1) rename sch98v1 schid * add school-level variables merge 1:1 schid using schoolvar, nogen keepusing(mk96_s distlake pup_pop latr_pup z_inf98 pop1_3km_updated pop1_36k_updated popT_3km_updated popT_36k_updated) // school data replace mk96_s = mk96_s * 0.4357/0.8318 // Normalize 1996 mock tests to be in units of individual std dev, equivalent to 1998, 1999 * add pupil questionnaire data preserve use pupq if pupdate_98_1!="" | schid_98_2<., clear // pupil questionnaire, for those with 1998 data ren schid_98_2 schid gen preatt_98 = 1-absdays_98_6/20 // pre-program school attendance based on # days absent in previous four weeks gen byte Ilivestock_98 = cows_98_23 | goats_98_24 | sheep_98_25 | pigs_98_26 if !missing(cows_98_23, goats_98_24, sheep_98_25, pigs_98_26) // Household Has Livestock? gen byte Isoften_98 = 3.fallsick_98_37 // Child Sick Often? gen byte Iclean_98 = 1.clean_98_15 // Child clean? collapse preatt_98 havelatr_98_33 Ilivestock_98 waz_98 bloodst_98_58 Isoften_98 malaria_98_48 Iclean_98 pigs_98_26 cows_98_23 sheep_98_25 goats_98_24 (count) np38 = pupid, by(schid) tempfile tempfile save "`tempfile'" restore merge 1:1 schid using "`tempfile'", nogen * Replicate (revised) Table I--compare to Appendix I of PSDP-REP_ 2014-11.pdf at doi.org/10.7910/DVN/28038 outreg, clear(TableI) est clear forvalues panel=1/3 { local varlist: word `panel' of "sex - yrbirth" "preatt_98 - Iclean_98" "pup_pop - pop1_36k_updated" local weight : word `panel' of np np38 1 foreach var of varlist `varlist' { table wgrp [aw=`weight'], c(mean `var') regress `var' ib3.wgrp [aw=`weight'] outreg, keep(1.wgrp) se noautosumm ctitle("","Group 1 - Group 3") rtitle("`var'") sdec(2) starlevels(10 5 1) nodisplay outreg, keep(2.wgrp) se noautosumm ctitle("","Group 2 - Group 3") rtitle("`var'") sdec(2) starlevels(10 5 1) nodisplay merge outreg, replay(TableI) append store(TableI) nodisplay qui regress `var' ib3.wgrp // for use by suest, don't cluster errors, and avoid weights since they differ by outcome var est store `var' } } outreg, replay(TableI) // replicated Table I * SUR-based test for balance, with small-sample adjustment suest * test 1.wgrp 2.wgrp * "confirmed with formal tests that these differences...are indeed unexpected if the groups were statistically the same" di "F(" r(df) "," e(N)-1 ") = " r(chi2)/r(df) * (e(N)-r(df))/e(N) "; p = " Ftail(r(df), e(N)-1, r(chi2)/r(df) * (e(N)-r(df))/e(N)) * OLS/F test for balance replace distlake = 3.75 if schid==133 // Roodman calculation for this missing data point xi i.wgrp, noomit * Do with and without latrines/pupil because of missing data for 5 obs * "confirmed with formal tests that these differences...are indeed unexpected if the groups were statistically the same" reg _Iwgrp_1 latr_pup sex elg98 stdgap yrbirth preatt_98 havelatr_98_33 Ilivestock_98 waz_98 bloodst_98_58 Isoften_98 malaria_98_48 Iclean_98 pup_pop z_inf98 distlake mk96_s popT_3km_updated pop1_3km_updated popT_36k_updated pop1_36k_updated if wgrp!=2 reg _Iwgrp_1 sex elg98 stdgap yrbirth preatt_98 havelatr_98_33 Ilivestock_98 waz_98 bloodst_98_58 Isoften_98 malaria_98_48 Iclean_98 pup_pop z_inf98 distlake mk96_s popT_3km_updated pop1_3km_updated popT_36k_updated pop1_36k_updated if wgrp!=2 reg _Iwgrp_2 latr_pup sex elg98 stdgap yrbirth preatt_98 havelatr_98_33 Ilivestock_98 waz_98 bloodst_98_58 Isoften_98 malaria_98_48 Iclean_98 pup_pop z_inf98 distlake mk96_s popT_3km_updated pop1_3km_updated popT_36k_updated pop1_36k_updated if wgrp!=1 reg _Iwgrp_2 sex elg98 stdgap yrbirth preatt_98 havelatr_98_33 Ilivestock_98 waz_98 bloodst_98_58 Isoften_98 malaria_98_48 Iclean_98 pup_pop z_inf98 distlake mk96_s popT_3km_updated pop1_3km_updated popT_36k_updated pop1_36k_updated if wgrp!=1 * this form is most relevant for Baird et al. 2016, which defines treatment = groups 1 & 2 reg _Iwgrp_3 latr_pup sex elg98 stdgap yrbirth preatt_98 havelatr_98_33 Ilivestock_98 waz_98 bloodst_98_58 Isoften_98 malaria_98_48 Iclean_98 pup_pop z_inf98 distlake mk96_s popT_3km_updated pop1_3km_updated popT_36k_updated pop1_36k_updated reg _Iwgrp_3 sex elg98 stdgap yrbirth preatt_98 havelatr_98_33 Ilivestock_98 waz_98 bloodst_98_58 Isoften_98 malaria_98_48 Iclean_98 pup_pop z_inf98 distlake mk96_s popT_3km_updated pop1_3km_updated popT_36k_updated pop1_36k_updated * Although included above, rerun school-reported attendance regression to show imbalance regress preatt_98 ib3.wgrp [aw=np38] // "when Worms compares groups 1 and 2 to 3, it does not find especially significant differences" test 1.wgrp = 2.wgrp // "the distance from group 1 to 2...is large enough to be statistically significant" regress preatt_98 ib3.wgrp [aw=np38] // "...as is that from group 1 to 2 and 3 averaged together" * Association between school- and researcher-recorded pre-treatment attendance, at school level use namelist if inlist(visit, 981, 982), clear // first two surprise visits in 1998, presumably first quarter, pre-treatment collapse prs, by(pupid) merge m:1 pupid using pupq, keep(match) gen preatt_98 = (20-absdays_98_6)/20 // school-reported attendance rate over last 4 weeks collapse prs preatt_98 (count) np=pupid, by(schid_98_2) regress prs preatt_98 [aw=np] // "Each 1% increase in a school's self-reported attendance...predicted a 3% increase in researcher-recorded attendance" ren schid_98_2 schid merge m:1 schid using schoolvar, nogen regress prs i.wgrp [aw=np] // "those more-accurate numbers suggest little imbalance across the three groups" * Miguel & Kremer, Table 9, col. 1, as updated (PSDP-REP_ 2014-11.pdf at doi.org/10.7910/DVN/28038) with and without school-recorded attendance use namelist if visit>981 & elg98<. & (std98v1>=0 & std98v1<=8 | std98v1==55), clear replace schid = sch98v1 merge m:1 schid using schoolvar, nogen merge m:1 pupid using pupq , nogen keep(master match) merge m:m pupid using comply , nogen keep(master match) // note: duplicates in namelist.dta & comply.dta duplicate geometrically in merge gen byte Y98 = visit>980 & visit<990 replace mk96_s = mk96 * 0.4357/0.8318 // Normalize and adjust mock scores to individual units gen byte yr = 1 + (visit > 992) gen byte t_any = yr >= wgrp gen p1 = cond(std98v1==5 | std98v1==6, z9899_56, cond(std98v1==7 | std98v1==8, z9899_78, z9899_34)) // standard-specific zonal infection rate. note: assigns z9899_34 to those with missing or other grades collapse prs t_any elg98 p1 mk96_s Y98 sap? std98v1 Isem? schid absdays_98_6 (sum) obs, by(pupid yr) regress prs t_any elg98 p1 mk96_s c.Y98##(sap?) i.std98v1 Isem? [aw=obs], cluster(schid) * "controlling for school-recorded attendance hardly perturbs the widely cited impact estimates for researcher-recorded attendance" gen preatt_98 = 1 - absdays_98_6/20 // school-recorded attendance, early 1998 regress prs t_any elg98 p1 mk96_s c.Y98##(sap?) i.std98v1 Isem? [aw=obs] if preatt_98<., cluster(schid) regress prs t_any preatt_98 elg98 p1 mk96_s c.Y98##(sap?) i.std98v1 Isem? [aw=obs] if preatt_98<., cluster(schid) * "The regression that anchors GiveWell's cost-effectiveness analysis, which puts the impact of 2.4 extra years of deworming on later wage earnings at 31% (p = 0.002)---also estimates that being in the cost-sharing treatment arm for a year cut wage earnings by 14%" cd "C:\Users\David\Dropbox\Documents\Work\Library\Worms\Baird et al 2016\data" use Baird-etal-QJE-2016_data_primary, clear regress ln_emp_salary_total treatment /// // Baird et al., Table IV, Panel B, row 1, col. 1, exponentiated coefficients for interpretation cost_sharing saturation_dm demeaned_popT_6k /// zoneidI2-zoneidI8 pup_pop wave2 month_interviewI2-month_interviewI12 /// std98_base_I2 std98_base_I3 std98_base_I4 std98_base_I5 std98_base_I6 female_baseline avgtest96 [aw=weight], cluster(psdpsch98) eform(ExpCoef) test _b[treatment]/2.41 = -_b[cost_sharing]/.8 // "The hypothesis that the two implied rates of impact are equal...fits the data"