Handle census site unavailability w/o throwing.
[python_utils.git] / geocode.py
1 #!/usr/bin/env python3
2
3 # © Copyright 2022, Scott Gasch
4
5 """Wrapper around US Census address geocoder API described here:
6 https://www2.census.gov/geo/pdfs/maps-data/data/Census_Geocoder_User_Guide.pdf"""
7
8 import logging
9 import re
10 from typing import Any, Dict, Optional
11
12 import requests
13 from bs4 import BeautifulSoup
14 from requests.utils import requote_uri
15
16 import string_utils
17
18 logger = logging.getLogger(__name__)
19
20
21 def geocode_address(address: str) -> Optional[Dict[str, Any]]:
22     """Send a single address to the US Census geocoding API.
23
24     >>> out = geocode_address('4600 Silver Hill Rd,, 20233')
25     >>> out['Matched Address']
26     '4600 SILVER HILL RD, WASHINGTON, DC, 20233'
27     >>> out['Interpolated Longitude (X) Coordinates']
28     -76.92743
29     >>> out['Interpolated Latitude (Y) Coordinates']
30     38.84599
31
32     """
33     url = 'https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress'
34     url += f'?address={address}'
35     url += '&layers=all&benchmark=4&vintage=4'
36     url = requote_uri(url)
37     logger.debug('GET: %s', url)
38     try:
39         r = requests.get(url)
40     except Exception as e:
41         logger.exception(e)
42         return None
43
44     if r.status_code != 200:
45         logger.error('Unexpected response code %d, wanted 200.  Fail.', r.status_code)
46         return None
47     else:
48         soup = BeautifulSoup(r.text, 'html.parser')
49         result = soup.find('div', id='pl_gov_census_geo_geocoder_domain_AddressResult')
50         logger.debug('Unhelpful result blurb: "%s"', result)
51         output = result.get_text('\n')
52         label = None
53         out = {}
54         for line in output.split('\n'):
55             if re.match(r'.*: *$', line):
56                 line = line.strip()
57                 label = line[:-1]
58                 logger.debug('Label is: "%s"', label)
59             else:
60                 if label:
61                     value = line.strip()
62                     if string_utils.is_integer_number(value):
63                         value = int(value)
64                     elif string_utils.is_number(value):
65                         value = float(value)
66                     logger.debug('Value is: "%s"', value)
67                     out[label] = value
68     return out
69
70
71 if __name__ == '__main__':
72     import doctest
73
74     doctest.testmod()