3 """Wrapper around US Census address geocoder API described here:
4 https://www2.census.gov/geo/pdfs/maps-data/data/Census_Geocoder_User_Guide.pdf"""
8 from typing import Dict, Optional
11 from bs4 import BeautifulSoup
12 from requests.utils import requote_uri
14 logger = logging.getLogger(__name__)
17 def geocode_address(address: str) -> Optional[Dict[str, str]]:
18 """Send a single address to the US Census geocoding API.
20 >>> out = geocode_address('5 Shelbern Dr,,, 07738')
21 >>> out['Matched Address']
22 '5 SHELBERN DR, LINCROFT, NJ, 07738'
25 url = 'https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress'
26 url += f'?address={address}'
27 url += '&layers=all&benchmark=4&vintage=4'
28 url = requote_uri(url)
29 logger.debug('GET: %s', url)
31 if r.status_code != 200:
32 logger.error('Unexpected response code %d, wanted 200. Fail.', r.status_code)
35 soup = BeautifulSoup(r.text, 'html.parser')
36 result = soup.find('div', id='pl_gov_census_geo_geocoder_domain_AddressResult')
37 logger.debug('Unhelpful result blurb: "%s"', result)
38 output = result.get_text('\n')
41 for line in output.split('\n'):
42 if re.match(r'.*: *$', line):
45 logger.debug('Label is: "%s"', label)
48 out[label] = line.strip()
49 logger.debug('Value is: "%s"', out[label])
53 if __name__ == '__main__':