This repository was archived by the owner on Oct 10, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path03_create_v01_ncdb.do
More file actions
327 lines (261 loc) · 8.17 KB
/
03_create_v01_ncdb.do
File metadata and controls
327 lines (261 loc) · 8.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
#delimit cr
capture clear all
capture log close
set more off
global data_directory "C:\Users\wodtke\Desktop\projects\nhood_mediation_toxins\data\ncdb\csvFiles\"
global savedata_directory "C:\Users\wodtke\Desktop\projects\nhood_mediation_toxins\data\ncdb\"
global log_directory "C:\Users\wodtke\Desktop\projects\nhood_mediation_toxins\programs\_LOGS\"
global crosswalk_directory "C:\Users\wodtke\Desktop\projects\nhood_mediation_toxins\data\crosswalks\"
log using "${log_directory}03_create_v01_ncdb.log", replace
/******************************************************************
PROGRAM NAME: 03_create_v01_ncdb.do
AUTHOR: KW, GW
PURPOSE: input, clean, interpolate, and aggregate data from NCDB
*******************************************************************/
/******************
INPUT RAW NCDB DATA
*******************/
import delimited "${data_directory}ncdb2010.csv", encoding(ISO-8859-1)
save "C:\Users\wodtke\Desktop\projects\nhood_mediation_toxins\data\_TEMP\ncdb2010_temp.dta", replace
clear
import delimited "${data_directory}ncdb2000.csv", encoding(ISO-8859-1)
merge 1:1 geo2010 using "C:\Users\wodtke\Desktop\projects\nhood_mediation_toxins\data\_TEMP\ncdb2010_temp.dta"
drop _merge
erase "C:\Users\wodtke\Desktop\projects\nhood_mediation_toxins\data\_TEMP\ncdb2010_temp.dta"
/**********
RENAME VARS
***********/
rename trctpop0 trctpop_2000
rename trctpop1 trctpop_2010
rename educ80 educ8_2000
rename educ110 educ11_2000
rename educ120 educ12_2000
rename educ150 educ15_2000
rename educa0 educa_2000
rename educ160 educ16_2000
rename educpp0 educpp_2000
rename educ81a educ8_2010
rename educ111a educ11_2010
rename educ121a educ12_2010
rename educ151a educ15_2010
rename educa1a educa_2010
rename educ161a educ16_2010
rename educpp1a educpp_2010
rename ffh0d ffhd_2000
rename ffh0n ffhn_2000
rename ffh1ad ffhd_2010
rename ffh1an ffhn_2010
rename povrat0d povratd_2000
rename povrat0n povratn_2000
rename povrat1ad povratd_2010
rename povrat1an povratn_2010
rename unempt0d unemptd_2000
rename unempt0n unemptn_2000
rename unempt1an unemptn_2010
rename unempt1ad unemptd_2010
rename shrwht0n shrwhtn_2000
rename shrwht1n shrwhtn_2010
rename shrblk0n shrblkn_2000
rename shrblk1n shrblkn_2010
keep geo2010 state region division county trctpop* educ*_* ffhd* ffhn* ///
povratd* povratn* unemptd* unemptn* shrwhtn* shrblkn*
sort geo2010
save "${savedata_directory}ncdbv1.dta", replace
clear
/******************************
2010 TO 2000 CENSUS TRACT XWALK
*******************************/
insheet using "${crosswalk_directory}us2010trf.txt", clear delim(",")
keep v4 v13 v29 v30
rename v4 geo2000
rename v13 geo2010
rename v29 hupct00
rename v30 hupct10
label var geo2000 "Concatenation of 2000 state, county and tract for Census 2000 Tract"
label var geo2010 "Concatenation of state, county and tract for 2010 Census Tracts"
label var hupct00 "Percent of the HU00 this record contains"
label var hupct10 "Percent of the HU10 this record contains"
format geo2000 %15.0g
format geo2010 %15.0g
sort geo2010 geo2000
by geo2010: gen freq=_n
by geo2010: egen maxfreq=max(freq)
gen xwgt=hupct10/100 if maxfreq>1
replace xwgt=hupct00/100 if maxfreq==1
egen sumwt = sum(xwgt), by(geo2010)
sum sumwt, det
drop sumwt
keep geo* xwgt
merge m:1 geo2010 using "${savedata_directory}ncdbv1.dta"
drop if _merge == 1
drop _merge
sort geo2010 geo2000
foreach var of varlist ///
trctpop_2000-povratd_2010 {
replace `var'= `var'*xwgt
}
collapse (sum) trctpop_2000-povratd_2010 (firstnm) state division county, by(geo2000)
sort geo2000
save "${savedata_directory}ncdbv1.dta", replace
clear
/***************************************
2000 CENSUS TRACT TO 2000 ZIP CODE XWALK
****************************************/
use "${crosswalk_directory}crosswalk_popsizewt.dta"
gen t = substr(tract,1,4)
gen u = substr(tract,6,2)
drop tract
gen tract = t + u
gen geo2000 = county + tract
destring geo2000, replace
format geo2000 %15.0g
destring afact, replace
destring pop2k, replace
gen zipx = strpos(zcta5, "X") > 0
tabulate zipx
drop if zipx == 1
destring zcta5, replace
keep geo2000 zcta5 afact
sort geo2000 zcta5
label var geo2000 "Concatenation of 2000 state, county and tract for Census 2000 Tract"
label var zcta5 "2000 ZIP Codes"
label var afact "Portion of 2000 Censust tract population located within 2000 ZIP/ZCTA"
egen sumafact = sum(afact), by(geo2000)
sum sumafact, det
drop sumafact
merge m:1 geo2000 using "${savedata_directory}ncdbv1.dta"
tab _merge
drop if _merge == 2
foreach var of varlist ///
trctpop_2000-povratd_2010 {
replace `var'= `var'*afact
}
collapse (sum) trctpop_2000-povratd_2010 (firstnm) state division county, by(zcta5)
sort zcta5
save "${savedata_directory}ncdbv1.dta", replace
/********
LAND AREA
*********/
use "${crosswalk_directory}crosswalk_lndareawt.dta", clear
gen t = substr(tract,1,4)
gen u = substr(tract,6,2)
drop tract
gen tract = t + u
gen geo2000 = county + tract
destring geo2000, replace
format geo2000 %15.0g
destring afact landsqmi, replace
keep geo2000 zcta5 landsqmi
sort geo2000 zcta5
collapse (sum) landsqmi, by(zcta5)
replace zcta5 = " " if regexm(zcta5, "HH")
replace zcta5 = " " if regexm(zcta5, "XX")
destring zcta5, replace
drop if zcta5==.
merge 1:1 zcta5 using "${savedata_directory}ncdbv1.dta"
keep if _merge==3
drop _merge
/*****************************************
LINEAR INTERPOLATION FOR INTERCENSAL YEARS
******************************************/
capture macro drop vars08
global vars08 educ8 educ11 educ12 educ15 educa educ16 educpp ///
ffhd ffhn povratd povratn unemptd unemptn ///
trctpop shrwhtn shrblkn
foreach v of global vars08 {
forval t=2001/2009 {
gen `v'_`t'=`v'_2000+(`v'_2010-`v'_2000)*((`t'-2000)/(2010-2000))
}
}
/***********
RESHAPE DATA
************/
capture macro drop stubs
global stubs educ8_ educ11_ educ12_ educ15_ educa_ educ16_ educpp_ ///
ffhd_ ffhn_ povratd_ povratn_ unemptd_ unemptn_ ///
trctpop_ shrwhtn_ shrblkn_
reshape long $stubs, i(zcta5) j(year)
foreach v in educ8 educ11 educ12 educ15 educa educ16 educpp ///
ffhd ffhn povratd povratn unemptd unemptn ///
trctpop shrwhtn shrblkn {
rename `v'_ `v'
}
/*******************
CREATE NEW VARIABLES
********************/
/***EDUCATIONAL COMPOSITION***/
gen nhlesshs=(educ8+educ11)/educpp
gen nhhsgrad=(educ12)/educpp
gen nhsomcol=(educ15+educa)/educpp
gen nhcolgrd=(educ16)/educpp
foreach v in nhlesshs nhhsgrad nhsomcol nhcolgrd {
replace `v'=. if inrange(`v',1,99)
}
sum nhlesshs-nhcolgrd
/***FEMALE-HEADED FAMILIES WITH CHILDREN***/
gen nhfemhd=ffhn/ffhd
replace nhfemhd=. if nhfemhd>1
sum nhfemhd
/***POVERTY RATE***/
gen nhpovrt=povratn/povratd
sum nhpovrt
/***UNEMPLOYMENT RATE*/
gen nhunemprt=unemptn/unemptd
sum nhunemprt
/***RACIAL COMPOSITION***/
gen nhshrwht=1-(shrwhtn/trctpop )
replace nhshrwht=. if nhshrwht>1
sum nhshrwht
gen nhshrblk=shrblkn/trctpop
replace nhshrblk=. if nhshrblk>1
sum nhshrblk
/***COMPOSITE NH DADVG SCALE***/
pca nhpovrt nhunemprt nhfemhd nhlesshs nhshrwht
predict nhdadvg
/***POPULATION DENSITY***/
gen nhpopden=trctpop/landsqmi
sum nhpopden, detail
/***CENSUS REGION***/
recode division (1 2 = 1) (3 4 = 2) (5 6 7 = 3) (8 9 = 4), gen(region)
label def region_lbl ///
1 "Northeast" ///
2 "Midwest" ///
3 "South" ///
4 "West"
label values region region_lbl
/***********
RESHAPE DATA
************/
sort zcta5 year
keep zcta5 state region division county year n*
capture macro drop stubs
global stubs nhlesshs nhhsgrad nhsomcol nhcolgrd nhfemhd nhpovrt ///
nhunemprt nhshrwht nhshrblk nhpopden nhdadvg
reshape wide $stubs, i(zcta5) j(year)
foreach v of global stubs {
rename `v'2000 `v'00
rename `v'2001 `v'01
rename `v'2002 `v'02
rename `v'2003 `v'03
rename `v'2004 `v'04
rename `v'2005 `v'05
rename `v'2006 `v'06
rename `v'2007 `v'07
rename `v'2008 `v'08
rename `v'2009 `v'09
rename `v'2010 `v'10
}
save "${savedata_directory}ncdbv1.dta", replace
/********
SAVE NCDB
*********/
save "${savedata_directory}v01_ncdb.dta", replace
erase "${savedata_directory}ncdbv1.dta"
/***********
DESCRIPTIVES
************/
*codebook
centile nhpovrt01, c(10 20 30 40 50 60 70 80 90)
centile nhdadvg01, c(10 20 30 40 50 60 70 80 90)
clear
log close