#!/usr/bin/env python3 # © Copyright 2022, Scott Gasch """Wrapper around US Census address geocoder API described here: https://www2.census.gov/geo/pdfs/maps-data/data/Census_Geocoder_User_Guide.pdf""" import logging import re from typing import Any, Dict, Optional import requests from bs4 import BeautifulSoup from requests.utils import requote_uri import string_utils logger = logging.getLogger(__name__) def geocode_address(address: str) -> Optional[Dict[str, Any]]: """Send a single address to the US Census geocoding API. >>> out = geocode_address('4600 Silver Hill Rd,, 20233') >>> out['Matched Address'] '4600 SILVER HILL RD, WASHINGTON, DC, 20233' >>> out['Interpolated Longitude (X) Coordinates'] -76.92743 >>> out['Interpolated Latitude (Y) Coordinates'] 38.84599 """ url = 'https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress' url += f'?address={address}' url += '&layers=all&benchmark=4&vintage=4' url = requote_uri(url) logger.debug('GET: %s', url) r = requests.get(url) if r.status_code != 200: logger.error('Unexpected response code %d, wanted 200. Fail.', r.status_code) return None else: soup = BeautifulSoup(r.text, 'html.parser') result = soup.find('div', id='pl_gov_census_geo_geocoder_domain_AddressResult') logger.debug('Unhelpful result blurb: "%s"', result) output = result.get_text('\n') label = None out = {} for line in output.split('\n'): if re.match(r'.*: *$', line): line = line.strip() label = line[:-1] logger.debug('Label is: "%s"', label) else: if label: value = line.strip() if string_utils.is_integer_number(value): value = int(value) elif string_utils.is_number(value): value = float(value) logger.debug('Value is: "%s"', value) out[label] = value return out if __name__ == '__main__': import doctest doctest.testmod()