1+ """
2+ Census Geocoder wrapper
3+ For details on the API, see:
4+ http://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf
5+ """
6+
7+ # Copyright (C) 2015-9 Neil Freeman
8+
9+ # This program is free software: you can redistribute it and/or modify
10+ # it under the terms of the GNU General Public License as published by
11+ # the Free Software Foundation, either version 3 of the License, or
12+ # (at your option) any later version.
13+
14+ # This program is distributed in the hope that it will be useful,
15+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17+ # GNU General Public License for more details.
18+
19+ # You should have received a copy of the GNU General Public License
20+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
21+
22+ # noqa
23+
24+ import csv
25+ import io
26+ import warnings
27+
28+ import requests
29+ from requests .exceptions import RequestException
30+ from requests_toolbelt .multipart .encoder import MultipartEncoder
31+
32+
33+ DEFAULT_BENCHMARK = "Public_AR_Current"
34+ DEFAULT_VINTAGE = "Current_Current"
35+
36+
37+ class CensusGeocode :
38+ """Fetch results from the Census Geocoder"""
39+
40+ _url = "https://geocoding.geo.census.gov/geocoder/{returntype}/{searchtype}"
41+ returntypes = ["geographies" , "locations" ]
42+
43+ batchfields = {
44+ "locations" : [
45+ "id" ,
46+ "address" ,
47+ "match" ,
48+ "matchtype" ,
49+ "parsed" ,
50+ "coordinate" ,
51+ "tigerlineid" ,
52+ "side" ,
53+ ],
54+ "geographies" : [
55+ "id" ,
56+ "address" ,
57+ "match" ,
58+ "matchtype" ,
59+ "parsed" ,
60+ "coordinate" ,
61+ "tigerlineid" ,
62+ "side" ,
63+ "statefp" ,
64+ "countyfp" ,
65+ "tract" ,
66+ "block" ,
67+ ],
68+ }
69+
70+ def __init__ (self , benchmark = None , vintage = None ):
71+ """
72+ Arguments:
73+ benchmark (str): A name that references the version of the locator to use.
74+ See https://geocoding.geo.census.gov/geocoder/benchmarks
75+ vintage (str): The geography part of the desired vintage.
76+ See: https://geocoding.geo.census.gov/geocoder/vintages?form
77+
78+ >>> CensusGeocode(benchmark='Public_AR_Current', vintage='Current_Current')
79+ """
80+
81+ self ._benchmark = benchmark or DEFAULT_BENCHMARK
82+ self ._vintage = vintage or DEFAULT_VINTAGE
83+
84+ def _geturl (self , searchtype , returntype = None ):
85+ """Construct an URL for the geocoder."""
86+
87+ returntype = returntype or self .returntypes [0 ]
88+ return self ._url .format (returntype = returntype , searchtype = searchtype )
89+
90+ def _fetch (self , searchtype , fields , ** kwargs ):
91+ """Fetch a response from the Geocoding API."""
92+
93+ fields ["vintage" ] = self .vintage
94+ fields ["benchmark" ] = self .benchmark
95+
96+ fields ["format" ] = "json"
97+
98+ if "layers" in kwargs :
99+ fields ["layers" ] = kwargs ["layers" ]
100+
101+ returntype = kwargs .get ("returntype" , "geographies" )
102+ url = self ._geturl (searchtype , returntype )
103+
104+ try :
105+ with requests .get (url , params = fields , timeout = kwargs .get ("timeout" )) as r :
106+ content = r .json ()
107+ if "addressMatches" in content .get ("result" , {}):
108+ return AddressResult (content )
109+
110+ if "geographies" in content .get ("result" , {}):
111+ return GeographyResult (content )
112+
113+ raise ValueError ()
114+
115+ except (ValueError , KeyError ):
116+ raise ValueError ("Unable to parse response from Census" )
117+
118+ except RequestException as err :
119+ raise err
120+
121+ def coordinates (self , x , y , ** kwargs ):
122+ """Geocode a (lon, lat) coordinate."""
123+
124+ kwargs ["returntype" ] = "geographies"
125+ fields = {"x" : x , "y" : y }
126+
127+ return self ._fetch ("coordinates" , fields , ** kwargs )
128+
129+ def address (self , street , city = None , state = None , ** kwargs ):
130+ """Geocode an address."""
131+
132+ fields = {
133+ "street" : street ,
134+ "city" : city ,
135+ "state" : state ,
136+ "zip" : kwargs .get ('zip' ) or kwargs .get ('zipcode' ),
137+ }
138+
139+ return self ._fetch ("address" , fields , ** kwargs )
140+
141+ def onelineaddress (self , address , ** kwargs ):
142+ """
143+ Geocode an an address passed as one string.
144+
145+ e.g. "4600 Silver Hill Rd, Suitland, MD 20746"
146+ """
147+
148+ fields = {
149+ "address" : address ,
150+ }
151+
152+ return self ._fetch ("onelineaddress" , fields , ** kwargs )
153+
154+ def set_benchmark (self , benchmark ):
155+ """
156+ Set the Census Geocoding API benchmark the class will use.
157+
158+ See: https://geocoding.geo.census.gov/geocoder/vintages?form
159+ """
160+
161+ self ._benchmark = benchmark
162+
163+ @property
164+ def benchmark (self ):
165+ """
166+ Give the Census Geocoding API benchmark the class is using.
167+
168+ See: https://geocoding.geo.census.gov/geocoder/benchmarks
169+ """
170+
171+ return getattr (self , "_benchmark" )
172+
173+ def set_vintage (self , vintage ):
174+ """
175+ Set the Census Geocoding API vintage the class will use.
176+
177+ See: https://geocoding.geo.census.gov/geocoder/vintages?form
178+ """
179+
180+ self ._vintage = vintage
181+
182+ @property
183+ def vintage (self ):
184+ """
185+ Give the Census Geocoding API vintage the class is using.
186+
187+ See: https://geocoding.geo.census.gov/geocoder/vintages?form
188+ """
189+
190+ return getattr (self , "_vintage" )
191+
192+ def _parse_batch_result (self , data , returntype ):
193+ """Parse the batch address results returned from the Census Geocoding API."""
194+
195+ try :
196+ fieldnames = self .batchfields [returntype ]
197+ except KeyError as err :
198+ raise ValueError ("unknown returntype: {}" .format (returntype )) from err
199+
200+ def parse (row ):
201+ row ["lat" ], row ["lon" ] = None , None
202+
203+ if row ["coordinate" ]:
204+ try :
205+ row ["lon" ], row ["lat" ] = tuple (float (a ) for a in row ["coordinate" ].split ("," ))
206+ except :
207+ pass
208+
209+ del row ["coordinate" ]
210+ row ["match" ] = row ["match" ] == "Match"
211+ return row
212+
213+ # return as list of dicts
214+ with io .StringIO (data ) as f :
215+ reader = csv .DictReader (f , fieldnames = fieldnames )
216+ return [parse (row ) for row in reader ]
217+
218+ def _post_batch (self , data = None , f = None , ** kwargs ):
219+ """Send batch address file to the Census Geocoding API."""
220+
221+ returntype = kwargs .get ("returntype" , "geographies" )
222+ url = self ._geturl ("addressbatch" , returntype )
223+
224+ if data :
225+ # For Python 3, compile data into a StringIO
226+ f = io .StringIO ()
227+ writer = csv .DictWriter (f , fieldnames = ["id" , "street" , "city" , "state" , "zip" ])
228+ for i , row in enumerate (data , 1 ):
229+ row .setdefault ("id" , i )
230+ writer .writerow (row )
231+ if i == 10001 :
232+ warnings .warn ("Sending more than 10,000 records, the upper limit for the Census Geocoder. Request will likely fail" )
233+
234+ f .seek (0 )
235+
236+ elif f is None :
237+ raise ValueError ("Need either data or a file for CensusGeocode.addressbatch" )
238+
239+ try :
240+ form = MultipartEncoder (
241+ fields = {
242+ "vintage" : self .vintage ,
243+ "benchmark" : self .benchmark ,
244+ "addressFile" : ("batch.csv" , f , "text/plain" ),
245+ }
246+ )
247+ headers = {"Content-Type" : form .content_type }
248+
249+ with requests .post (url , data = form , timeout = kwargs .get ("timeout" ), headers = headers ) as r :
250+ # return as list of dicts
251+ return self ._parse_batch_result (r .text , returntype )
252+
253+ except RequestException as err :
254+ raise err
255+
256+ finally :
257+ f .close ()
258+
259+ def addressbatch (self , data , ** kwargs ):
260+ """
261+ Send either a CSV file or data to the addressbatch API.
262+
263+ According to the Census, "there is currently an upper limit of 10,000 records per batch file."
264+
265+ If a file, can either be a file-like with a `read()` method, or a `str` that's a path to the
266+ file. Either way, it must have no header and have fields id,street,city,state,zip
267+
268+ If data, should be an iterable of dicts with the above fields (although ID is optional).
269+ """
270+
271+ # Does data quack like a file handle?
272+ if hasattr (data , "read" ):
273+ return self ._post_batch (f = data , ** kwargs )
274+
275+ # If it is a string, assume it's a filename
276+ if isinstance (data , str ):
277+ with open (data , "rb" ) as f :
278+ return self ._post_batch (f = f , ** kwargs )
279+
280+ # Otherwise, assume an iterable of dicts
281+ return self ._post_batch (data = data , ** kwargs )
282+
283+
284+ class GeographyResult (dict ):
285+ """Wrapper for geography objects returned by the Census Geocoding API."""
286+
287+ def __init__ (self , data ):
288+ self .input = data ["result" ].get ("input" , {})
289+ super ().__init__ (data ["result" ]["geographies" ])
290+
291+ # create float coordinate tuples
292+ for geolist in self .values ():
293+ for geo in geolist :
294+ try :
295+ geo ["CENT" ] = float (geo ["CENTLON" ]), float (geo ["CENTLAT" ])
296+ except ValueError :
297+ geo ["CENT" ] = ()
298+
299+ try :
300+ geo ["INTPT" ] = float (geo ["INTPTLON" ]), float (geo ["INTPTLAT" ])
301+ except ValueError :
302+ geo ["INTPT" ] = ()
303+
304+
305+ class AddressResult (list ):
306+ """Wrapper for address objects returned by the Census Geocoding API."""
307+
308+ def __init__ (self , data ):
309+ self .input = data ["result" ].get ("input" , {})
310+ super ().__init__ (data ["result" ]["addressMatches" ])
0 commit comments