#!/usr/bin/env python3 """Wrapper around US Census address geocoder API described here: https://www2.census.gov/geo/pdfs/maps-data/data/Census_Geocoder_User_Guide.pdf""" import logging import re from typing import Dict, Optional import requests from bs4 import BeautifulSoup from requests.utils import requote_uri logger = logging.getLogger(__name__) def geocode_address(address: str) -> Optional[Dict[str, str]]: """Send a single address to the US Census geocoding API. >>> out = geocode_address('5 Shelbern Dr,,, 07738') >>> out['Matched Address'] '5 SHELBERN DR, LINCROFT, NJ, 07738' """ url = 'https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress' url += f'?address={address}' url += '&layers=all&benchmark=4&vintage=4' url = requote_uri(url) logger.debug('GET: %s', url) r = requests.get(url) if r.status_code != 200: logger.error('Unexpected response code %d, wanted 200. Fail.', r.status_code) return None else: soup = BeautifulSoup(r.text, 'html.parser') result = soup.find('div', id='pl_gov_census_geo_geocoder_domain_AddressResult') logger.debug('Unhelpful result blurb: "%s"', result) output = result.get_text('\n') label = None out = {} for line in output.split('\n'): if re.match(r'.*: *$', line): line = line.strip() label = line[:-1] logger.debug('Label is: "%s"', label) else: if label: out[label] = line.strip() logger.debug('Value is: "%s"', out[label]) return out if __name__ == '__main__': import doctest doctest.testmod()