Change address, recognize numbers, add copyright.
[python_utils.git] / geocode.py
1 #!/usr/bin/env python3
2
3 # © Copyright 2021-2022, Scott Gasch
4
5 """Wrapper around US Census address geocoder API described here:
6 https://www2.census.gov/geo/pdfs/maps-data/data/Census_Geocoder_User_Guide.pdf"""
7
8 import logging
9 import re
10 from typing import Dict, Optional
11
12 import requests
13 from bs4 import BeautifulSoup
14 from requests.utils import requote_uri
15
16 import string_utils
17
18 logger = logging.getLogger(__name__)
19
20
21 def geocode_address(address: str) -> Optional[Dict[str, str]]:
22     """Send a single address to the US Census geocoding API.
23
24     >>> out = geocode_address('4600 Silver Hill Rd,, 20233')
25     >>> out['Matched Address']
26     '4600 SILVER HILL RD, WASHINGTON, DC, 20233'
27     >>> out['Interpolated Longitude (X) Coordinates']
28     -76.92743
29     >>> out['Interpolated Latitude (Y) Coordinates']
30     38.84599
31
32     """
33     url = 'https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress'
34     url += f'?address={address}'
35     url += '&layers=all&benchmark=4&vintage=4'
36     url = requote_uri(url)
37     logger.debug('GET: %s', url)
38     r = requests.get(url)
39     if r.status_code != 200:
40         logger.error('Unexpected response code %d, wanted 200.  Fail.', r.status_code)
41         return None
42     else:
43         soup = BeautifulSoup(r.text, 'html.parser')
44         result = soup.find('div', id='pl_gov_census_geo_geocoder_domain_AddressResult')
45         logger.debug('Unhelpful result blurb: "%s"', result)
46         output = result.get_text('\n')
47         label = None
48         out = {}
49         for line in output.split('\n'):
50             if re.match(r'.*: *$', line):
51                 line = line.strip()
52                 label = line[:-1]
53                 logger.debug('Label is: "%s"', label)
54             else:
55                 if label:
56                     value = line.strip()
57                     if string_utils.is_integer_number(value):
58                         value = int(value)
59                     elif string_utils.is_number(value):
60                         value = float(value)
61                     logger.debug('Value is: "%s"', value)
62                     out[label] = value
63     return out
64
65
66 if __name__ == '__main__':
67     import doctest
68
69     doctest.testmod()