Geocoder.
authorScott Gasch <[email protected]>
Wed, 27 Apr 2022 01:04:46 +0000 (18:04 -0700)
committerScott Gasch <[email protected]>
Wed, 27 Apr 2022 01:04:46 +0000 (18:04 -0700)
geocode.py [new file with mode: 0644]

diff --git a/geocode.py b/geocode.py
new file mode 100644 (file)
index 0000000..1ee7f20
--- /dev/null
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+
+"""Wrapper around US Census address geocoder API described here:
+https://www2.census.gov/geo/pdfs/maps-data/data/Census_Geocoder_User_Guide.pdf"""
+
+import logging
+import re
+from typing import Dict, Optional
+
+import requests
+from bs4 import BeautifulSoup
+from requests.utils import quote
+
+logger = logging.getLogger(__name__)
+
+
+def geocode_address(address: str) -> Optional[Dict[str, str]]:
+    """Send a single address to the US Census geocoding API.
+
+    >>> out = geocode_address('5 Shelbern Dr,,, 07738')
+    >>> out['Matched Address']
+    '5 SHELBERN DR, LINCROFT, NJ, 07738'
+
+    """
+    encoded_address = quote(address)
+    url = 'https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress'
+    url += f'?address={encoded_address}'
+    url += '&layers=all&benchmark=4&vintage=4'
+    logger.debug('GET: %s', url)
+    r = requests.get(url)
+    if r.status_code != 200:
+        logger.error(f'Unexpected response code {r.status_code}, wanted 200.  Fail.')
+        return None
+    else:
+        soup = BeautifulSoup(r.text, 'html.parser')
+        result = soup.find('div', id='pl_gov_census_geo_geocoder_domain_AddressResult')
+        logger.debug('Unhelpful result blurb: "%s"', result)
+        output = result.get_text('\n')
+        label = None
+        out = {}
+        for line in output.split('\n'):
+            if re.match(r'.*: *$', line):
+                label = line[:-2].strip()
+                logger.debug('Label is: "%s"', label)
+            else:
+                if label:
+                    out[label] = line.strip()
+                    logger.debug('Value is: "%s"', out[label])
+    return out
+
+
+if __name__ == '__main__':
+    import doctest
+
+    doctest.testmod()