1ee7f20574e98b4ce7123eeab06c84e9d832e3f0
[python_utils.git] / geocode.py
1 #!/usr/bin/env python3
2
3 """Wrapper around US Census address geocoder API described here:
4 https://www2.census.gov/geo/pdfs/maps-data/data/Census_Geocoder_User_Guide.pdf"""
5
6 import logging
7 import re
8 from typing import Dict, Optional
9
10 import requests
11 from bs4 import BeautifulSoup
12 from requests.utils import quote
13
14 logger = logging.getLogger(__name__)
15
16
17 def geocode_address(address: str) -> Optional[Dict[str, str]]:
18     """Send a single address to the US Census geocoding API.
19
20     >>> out = geocode_address('5 Shelbern Dr,,, 07738')
21     >>> out['Matched Address']
22     '5 SHELBERN DR, LINCROFT, NJ, 07738'
23
24     """
25     encoded_address = quote(address)
26     url = 'https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress'
27     url += f'?address={encoded_address}'
28     url += '&layers=all&benchmark=4&vintage=4'
29     logger.debug('GET: %s', url)
30     r = requests.get(url)
31     if r.status_code != 200:
32         logger.error(f'Unexpected response code {r.status_code}, wanted 200.  Fail.')
33         return None
34     else:
35         soup = BeautifulSoup(r.text, 'html.parser')
36         result = soup.find('div', id='pl_gov_census_geo_geocoder_domain_AddressResult')
37         logger.debug('Unhelpful result blurb: "%s"', result)
38         output = result.get_text('\n')
39         label = None
40         out = {}
41         for line in output.split('\n'):
42             if re.match(r'.*: *$', line):
43                 label = line[:-2].strip()
44                 logger.debug('Label is: "%s"', label)
45             else:
46                 if label:
47                     out[label] = line.strip()
48                     logger.debug('Value is: "%s"', out[label])
49     return out
50
51
52 if __name__ == '__main__':
53     import doctest
54
55     doctest.testmod()