From 7ea467428820e0c18a759f553452907a18faca2e Mon Sep 17 00:00:00 2001 From: Scott Gasch Date: Tue, 26 Apr 2022 18:04:46 -0700 Subject: [PATCH] Geocoder. --- geocode.py | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 geocode.py diff --git a/geocode.py b/geocode.py new file mode 100644 index 0000000..1ee7f20 --- /dev/null +++ b/geocode.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 + +"""Wrapper around US Census address geocoder API described here: +https://www2.census.gov/geo/pdfs/maps-data/data/Census_Geocoder_User_Guide.pdf""" + +import logging +import re +from typing import Dict, Optional + +import requests +from bs4 import BeautifulSoup +from requests.utils import quote + +logger = logging.getLogger(__name__) + + +def geocode_address(address: str) -> Optional[Dict[str, str]]: + """Send a single address to the US Census geocoding API. + + >>> out = geocode_address('5 Shelbern Dr,,, 07738') + >>> out['Matched Address'] + '5 SHELBERN DR, LINCROFT, NJ, 07738' + + """ + encoded_address = quote(address) + url = 'https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress' + url += f'?address={encoded_address}' + url += '&layers=all&benchmark=4&vintage=4' + logger.debug('GET: %s', url) + r = requests.get(url) + if r.status_code != 200: + logger.error(f'Unexpected response code {r.status_code}, wanted 200. Fail.') + return None + else: + soup = BeautifulSoup(r.text, 'html.parser') + result = soup.find('div', id='pl_gov_census_geo_geocoder_domain_AddressResult') + logger.debug('Unhelpful result blurb: "%s"', result) + output = result.get_text('\n') + label = None + out = {} + for line in output.split('\n'): + if re.match(r'.*: *$', line): + label = line[:-2].strip() + logger.debug('Label is: "%s"', label) + else: + if label: + out[label] = line.strip() + logger.debug('Value is: "%s"', out[label]) + return out + + +if __name__ == '__main__': + import doctest + + doctest.testmod() -- 2.45.0