3 # © Copyright 2022, Scott Gasch
5 """Wrapper around US Census address geocoder API described here:
6 https://www2.census.gov/geo/pdfs/maps-data/data/Census_Geocoder_User_Guide.pdf"""
10 from typing import Any, Dict, Optional
13 from bs4 import BeautifulSoup
14 from requests.utils import requote_uri
18 logger = logging.getLogger(__name__)
21 def geocode_address(address: str) -> Optional[Dict[str, Any]]:
22 """Send a single address to the US Census geocoding API.
24 >>> out = geocode_address('4600 Silver Hill Rd,, 20233')
25 >>> out['Matched Address']
26 '4600 SILVER HILL RD, WASHINGTON, DC, 20233'
27 >>> out['Interpolated Longitude (X) Coordinates']
29 >>> out['Interpolated Latitude (Y) Coordinates']
33 url = 'https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress'
34 url += f'?address={address}'
35 url += '&layers=all&benchmark=4&vintage=4'
36 url = requote_uri(url)
37 logger.debug('GET: %s', url)
40 except Exception as e:
44 if r.status_code != 200:
45 logger.error('Unexpected response code %d, wanted 200. Fail.', r.status_code)
48 soup = BeautifulSoup(r.text, 'html.parser')
49 result = soup.find('div', id='pl_gov_census_geo_geocoder_domain_AddressResult')
50 logger.debug('Unhelpful result blurb: "%s"', result)
51 output = result.get_text('\n')
54 for line in output.split('\n'):
55 if re.match(r'.*: *$', line):
58 logger.debug('Label is: "%s"', label)
62 if string_utils.is_integer_number(value):
64 elif string_utils.is_number(value):
66 logger.debug('Value is: "%s"', value)
71 if __name__ == '__main__':