3 # © Copyright 2022, Scott Gasch
5 """Wrapper around US Census address geocoder API described here:
7 * https://www2.census.gov/geo/pdfs/maps-data/data/Census_Geocoder_User_Guide.pdf
8 * https://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf
13 --form benchmark=2020 \
14 https://geocoding.geo.census.gov/geocoder/locations/addressbatch \
15 --output geocoderesult.csv
21 from typing import Any, Dict, List, Optional
24 from requests.utils import requote_uri
26 from pyutils import list_utils
28 logger = logging.getLogger(__name__)
31 @functools.lru_cache(maxsize=256)
32 def geocode_address(address: str) -> Optional[Dict[str, Any]]:
33 """Send a single address to the US Census geocoding API in order to
34 lookup relevant data about it (including, if possible, its
35 lat/long). The response is a parsed JSON chunk of data with N
36 addressMatches in the result section and the details of each match
40 address: the full address to lookup in the form: "STREET
41 ADDRESS, CITY, STATE, ZIPCODE". These components may be
42 omitted and the service will make educated guesses but
43 the commas delimiting each component must be included.
46 A parsed json dict with a bunch of information about the
47 address contained within it. Each 'addressMatch'
48 in the JSON describes the details of a possible match.
49 Returns None if there was an error or the address is
52 >>> json = geocode_address('4600 Silver Hill Rd,, 20233')
53 >>> json['result']['addressMatches'][0]['matchedAddress']
54 '4600 SILVER HILL RD, WASHINGTON, DC, 20233'
56 >>> json['result']['addressMatches'][0]['coordinates']
57 {'x': -76.9274328556918, 'y': 38.845989080537514}
59 url = "https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress"
60 url += f"?address={address}"
61 url += "&returntype=geographies&layers=all&benchmark=4&vintage=4&format=json"
62 url = requote_uri(url)
63 logger.debug("GET: %s", url)
65 r = requests.get(url, timeout=10.0)
66 except Exception as e:
72 logger.error("Unexpected response code %d, wanted 200. Fail.", r.status_code)
74 logger.debug("Response: %s", json.dumps(r.json(), indent=4, sort_keys=True))
78 def batch_geocode_addresses(addresses: List[str]) -> Optional[List[str]]:
79 """Send a list of addresses for batch geocoding to a web service
80 operated by the US Census Bureau.
83 addresses: a list of addresses to geocode. Each line of the
84 input list should be a single address in the form: "STREET
85 ADDRESS, CITY, STATE, ZIPCODE". Individual address components
86 may be omitted and the service will make educated guesses but
87 the commas delimiters between address components may not be
91 An array of the same size as the input array with one
92 answer record per line. Returns None on error.
94 Note: this code will deal with requests >10k addresses by chunking
95 them internally because the census website disallows requests >
98 >>> batch_geocode_addresses(
100 ... '4600 Silver Hill Rd, Washington, DC, 20233',
101 ... '935 Pennsylvania Avenue, NW, Washington, DC, 20535-0001',
102 ... '1600 Pennsylvania Avenue NW, Washington, DC, 20500',
103 ... '700 Pennsylvania Avenue NW, Washington, DC, 20408',
106 ['"1"," 4600 Silver Hill Rd, Washington, DC, 20233","Match","Exact","4600 SILVER HILL RD, WASHINGTON, DC, 20233","-76.92743285599994,38.84598908100003","76355984","L","24","033","802405","2004"', '"2"," 935 Pennsylvania Avenue, NW, Washington, DC","No_Match"', '"3"," 1600 Pennsylvania Avenue NW, Washington, DC, 20500","Match","Exact","1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20500","-77.03654072899997,38.89874352700008","76225813","L","11","001","980000","1034"', '"4"," 700 Pennsylvania Avenue NW, Washington, DC, 20408","Match","Exact","700 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20408","-77.02304089899997,38.89361872300003","76226346","L","11","001","980000","1025"']
110 url = "https://geocoding.geo.census.gov/geocoder/geographies/addressbatch"
111 payload = {"benchmark": "4", "vintage": "4"}
113 for chunk in list_utils.shard(addresses, 9999):
115 for address in chunk:
116 raw_file += f"{n}, {address}\n"
118 files = {"addressFile": ("input.csv", raw_file)}
119 logger.debug("POST: %s", url)
121 r = requests.post(url, files=files, data=payload, timeout=10.0)
122 except Exception as e:
129 "Unexpected response code %d, wanted 200. Fail.", r.status_code
132 logger.debug("Response: %s", r.text)
133 for line in r.text.split("\n"):
140 if __name__ == "__main__":