This repository was archived by the owner on Oct 10, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path04_create_v01_caces.do
More file actions
195 lines (137 loc) · 4.23 KB
/
04_create_v01_caces.do
File metadata and controls
195 lines (137 loc) · 4.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#delimit cr
capture clear all
capture log close
set more off
global data_directory "C:\Users\wodtke\Desktop\projects\nhood_mediation_toxins\data\caces\"
global savedata_directory "C:\Users\wodtke\Desktop\projects\nhood_mediation_toxins\data\caces\"
global crosswalk_directory "C:\Users\wodtke\Desktop\projects\nhood_mediation_toxins\data\crosswalks\"
global log_directory "C:\Users\wodtke\Desktop\projects\nhood_mediation_toxins\programs\_LOGS\"
log using "${log_directory}04_create_v01_caces.log", replace
/******************************************************************
PROGRAM NAME: 04_create_v01_caces.do
AUTHOR: KW, GW
PURPOSE: input, clean, and aggregate data from CACES
*******************************************************************/
/*******************
INPUT RAW CACES DATA
********************/
import delimited "${data_directory}caces_raw.csv", encoding(ISO-8859-1)
format fips %15.0g
sort fips year pollutant
rename pred_wght poll_
rename fips geo2010
label var geo2010 "Concatenation of state, county and tract for 2010 Census Tracts"
keep geo2010 pollutant year poll_
order geo2010 year pollutant poll_
sort geo2010 year pollutant
reshape wide poll_, i(geo2010 year) j(pollutant) string
rename poll_co co_
rename poll_no2 no2_
rename poll_o3 o3_
rename poll_pm10 pm10_
rename poll_pm25 pm25_
rename poll_so2 so2_
reshape wide co_ no2_ o3_ pm10_ pm25_ so2_, i(geo2010) j(year)
sum co_1980 - so2_2015
save "${savedata_directory}cacesv1.dta", replace
clear
/******************************
2010 TO 2000 CENSUS TRACT XWALK
*******************************/
insheet using "${crosswalk_directory}us2010trf.txt", clear delim(",")
keep v4 v13 v22 v24
rename v4 geo2000
rename v13 geo2010
rename v22 landpct00
rename v24 landpct10
label var geo2000 "Concatenation of 2000 state, county and tract for Census 2000 Tract"
label var geo2010 "Concatenation of state, county and tract for 2010 Census Tracts"
label var landpct00 "Percent of the total 2000 tract land area represents"
label var landpct10 "Percent of the total 2010 tract land area represents"
format geo2000 %15.0g
format geo2010 %15.0g
sort geo2010 geo2000
by geo2010: gen freq=_n
by geo2010: egen maxfreq=max(freq)
gen xwgt=landpct10/100 if maxfreq>1
replace xwgt=landpct00/100 if maxfreq==1
egen sumwt = sum(xwgt), by(geo2010)
sum sumwt, det
drop sumwt
sort geo2000 geo2010
egen sumwt = sum(xwgt), by(geo2000)
replace xwgt = xwgt/sumwt
drop sumwt
egen sumwt = sum(xwgt), by(geo2000)
sum sumwt, det
drop sumwt
keep geo* xwgt
sort geo2010 geo2000
merge m:1 geo2010 using "${savedata_directory}cacesv1.dta"
tab _merge
drop if _merge == 1
drop _merge
sort geo2010 geo2000
foreach var of varlist ///
co_1980-so2_2015 {
replace `var'= `var'*xwgt
}
collapse (sum) co_1980-so2_2015, by(geo2000)
sort geo2000
sum co_1980-so2_2015
save "${savedata_directory}cacesv1.dta", replace
clear
/***************************************
2000 CENSUS TRACT TO 2000 ZIP CODE XWALK
****************************************/
use "${crosswalk_directory}crosswalk_lndareawt.dta"
gen t = substr(tract,1,4)
gen u = substr(tract,6,2)
drop tract
gen tract = t + u
gen geo2000 = county + tract
destring geo2000, replace
format geo2000 %15.0g
destring afact, replace
keep geo2000 zcta5 afact
sort zcta5 geo2000
label var geo2000 "Concatenation of 2000 state, county and tract for Census 2000 Tract"
label var zcta5 "2000 ZIP Codes"
label var afact "Portion of 2000 Censust tract land area located within 2000 ZIP/ZCTA"
egen sumafact = sum(afact), by(geo2000)
sum sumafact, det
drop sumafact
egen sumwt = sum(afact), by(zcta5)
replace afact = afact/sumwt
drop sumwt
egen sumwt = sum(afact), by(zcta5)
sum sumwt, det
drop sumwt
merge m:1 geo2000 using "${savedata_directory}cacesv1.dta"
tab _merge
keep if _merge == 3
drop _merge
sort zcta5 geo2000
foreach var of varlist ///
co_1980-so2_2015 {
replace `var'= `var'*afact
}
collapse (sum) co_1980-so2_2015, by(zcta5)
sum co_1980-so2_2015
/**********
CLEAN ZCTAs
***********/
drop if regexm(zcta5, "HH")
drop if regexm(zcta5, "XX")
destring zcta5, replace
/*********
SAVE CACES
**********/
save "${savedata_directory}v01_caces.dta", replace
erase "${savedata_directory}cacesv1.dta"
/***********
DESCRIPTIVES
************/
*codebook
clear
log close