3 # © Copyright 2022, Scott Gasch
5 """Wrapper around US Census address geocoder API described here:
7 * https://www2.census.gov/geo/pdfs/maps-data/data/Census_Geocoder_User_Guide.pdf
8 * https://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf
13 --form benchmark=2020 \
14 https://geocoding.geo.census.gov/geocoder/locations/addressbatch \
15 --output geocoderesult.csv
20 from typing import Any, Dict, List, Optional
23 from requests.utils import requote_uri
27 logger = logging.getLogger(__name__)
30 def geocode_address(address: str) -> Optional[Dict[str, Any]]:
31 """Send a single address to the US Census geocoding API in order to
32 lookup relevant data about it (including, if possible, its
33 lat/long). The response is a parsed JSON chunk of data with N
34 addressMatches in the result section and the details of each match
38 address: the full address to lookup in the form: "STREET
39 ADDRESS, CITY, STATE, ZIPCODE". These components may be
40 omitted and the service will make educated guesses but
41 the commas delimiting each component must be included.
44 A parsed json dict with a bunch of information about the
45 address contained within it. Each 'addressMatch'
46 in the JSON describes the details of a possible match.
47 Returns None if there was an error or the address is
50 >>> json = geocode_address('4600 Silver Hill Rd,, 20233')
51 >>> json['result']['addressMatches'][0]['matchedAddress']
52 '4600 SILVER HILL RD, WASHINGTON, DC, 20233'
54 >>> json['result']['addressMatches'][0]['coordinates']
55 {'x': -76.9274328556918, 'y': 38.845989080537514}
57 url = 'https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress'
58 url += f'?address={address}'
59 url += '&returntype=geographies&layers=all&benchmark=4&vintage=4&format=json'
60 url = requote_uri(url)
61 logger.debug('GET: %s', url)
64 except Exception as e:
68 if r.status_code != 200:
70 logger.error('Unexpected response code %d, wanted 200. Fail.', r.status_code)
72 logger.debug('Response: %s', json.dumps(r.json(), indent=4, sort_keys=True))
76 def batch_geocode_addresses(addresses: List[str]) -> Optional[List[str]]:
77 """Send a list of addresses for batch geocoding to a web service
78 operated by the US Census Bureau.
81 addresses: a list of addresses to geocode. Each line of the
82 input list should be a single address in the form: "STREET
83 ADDRESS, CITY, STATE, ZIPCODE". Individual address components
84 may be omitted and the service will make educated guesses but
85 the commas delimiters between address components may not be
89 An array of the same size as the input array with one
90 answer record per line. Returns None on error.
92 Note: this code will deal with requests >10k addresses by chunking
93 them internally because the census website disallows requests >
96 >>> batch_geocode_addresses(
98 ... '4600 Silver Hill Rd, Washington, DC, 20233',
99 ... '935 Pennsylvania Avenue, NW, Washington, DC, 20535-0001',
100 ... '1600 Pennsylvania Avenue NW, Washington, DC, 20500',
101 ... '700 Pennsylvania Avenue NW, Washington, DC, 20408',
104 ['"1"," 4600 Silver Hill Rd, Washington, DC, 20233","Match","Exact","4600 SILVER HILL RD, WASHINGTON, DC, 20233","-76.92743285599994,38.84598908100003","76355984","L","24","033","802405","2004"', '"2"," 935 Pennsylvania Avenue, NW, Washington, DC","No_Match"', '"3"," 1600 Pennsylvania Avenue NW, Washington, DC, 20500","Match","Exact","1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20500","-77.03534009999998,38.89875363300007","76225813","L","11","001","980000","1034"', '"4"," 700 Pennsylvania Avenue NW, Washington, DC, 20408","Match","Exact","700 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20408","-77.02304089899997,38.89361872300003","76226346","L","11","001","980000","1025"']
108 url = 'https://geocoding.geo.census.gov/geocoder/geographies/addressbatch'
109 payload = {'benchmark': '4', 'vintage': '4'}
111 for chunk in list_utils.shard(addresses, 9999):
113 for address in chunk:
114 raw_file += f'{n}, {address}\n'
116 files = {'addressFile': ('input.csv', raw_file)}
117 logger.debug('POST: %s', url)
119 r = requests.post(url, files=files, data=payload)
120 except Exception as e:
124 if r.status_code != 200:
126 logger.error('Unexpected response code %d, wanted 200. Fail.', r.status_code)
128 logger.debug('Response: %s', r.text)
129 for line in r.text.split('\n'):
136 if __name__ == '__main__':