Reduce the doctest lease duration...
[python_utils.git] / geocode.py
index 3e83d023bca918588056dd5e9b8db110d3286641..e9e5c35c5fbec6a272518351c461ec5a4fed4243 100644 (file)
@@ -3,10 +3,11 @@
 # © Copyright 2022, Scott Gasch
 
 """Wrapper around US Census address geocoder API described here:
-https://www2.census.gov/geo/pdfs/maps-data/data/Census_Geocoder_User_Guide.pdf
-https://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf
 
-Also try:
+* https://www2.census.gov/geo/pdfs/maps-data/data/Census_Geocoder_User_Guide.pdf
+* https://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf
+
+Also try::
 
     $ curl --form [email protected] \
            --form benchmark=2020 \
@@ -27,17 +28,31 @@ logger = logging.getLogger(__name__)
 
 
 def geocode_address(address: str) -> Optional[Dict[str, Any]]:
-    """Send a single address to the US Census geocoding API.  The response
-    is a parsed JSON chunk of data with N addressMatches in the result
-    section and the details of each match within it.  Returns None on error.
+    """Send a single address to the US Census geocoding API in order to
+    lookup relevant data about it (including, if possible, its
+    lat/long).  The response is a parsed JSON chunk of data with N
+    addressMatches in the result section and the details of each match
+    within it.
+
+    Args:
+        address: the full address to lookup in the form: "STREET
+        ADDRESS, CITY, STATE, ZIPCODE".  These components may be
+        omitted and the service will make educated guesses but
+        the commas delimiting each component must be included.
+
+    Returns:
+        A parsed json dict with a bunch of information about the
+            address contained within it.  Each 'addressMatch'
+            in the JSON describes the details of a possible match.
+            Returns None if there was an error or the address is
+            not known.
 
     >>> json = geocode_address('4600 Silver Hill Rd,, 20233')
     >>> json['result']['addressMatches'][0]['matchedAddress']
     '4600 SILVER HILL RD, WASHINGTON, DC, 20233'
 
     >>> json['result']['addressMatches'][0]['coordinates']
-    {'x': -76.92743, 'y': 38.84599}
-
+    {'x': -76.9274328556918, 'y': 38.845989080537514}
     """
     url = 'https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress'
     url += f'?address={address}'
@@ -58,15 +73,25 @@ def geocode_address(address: str) -> Optional[Dict[str, Any]]:
     return r.json()
 
 
-def batch_geocode_addresses(addresses: List[str]):
-    """Send up to addresses for batch geocoding.  Each line of the input
-    list should be a single address of the form: STREET ADDRESS, CITY,
-    STATE, ZIP.  Components may be omitted but the commas may not be.
-    Result is an array of the same size as the input array with one
-    answer record per line.  Returns None on error.
+def batch_geocode_addresses(addresses: List[str]) -> Optional[List[str]]:
+    """Send a list of addresses for batch geocoding to a web service
+    operated by the US Census Bureau.
+
+    Args:
+        addresses: a list of addresses to geocode.  Each line of the
+            input list should be a single address in the form: "STREET
+            ADDRESS, CITY, STATE, ZIPCODE".  Individual address components
+            may be omitted and the service will make educated guesses but
+            the commas delimiters between address components may not be
+            omitted.
+
+    Returns:
+        An array of the same size as the input array with one
+        answer record per line.  Returns None on error.
 
-    This code will deal with requests >10k addresses by chunking them
-    internally because the census website disallows requests > 10k lines.
+    Note: this code will deal with requests >10k addresses by chunking
+    them internally because the census website disallows requests >
+    10k lines.
 
     >>> batch_geocode_addresses(
     ...     [
@@ -76,7 +101,7 @@ def batch_geocode_addresses(addresses: List[str]):
     ...         '700 Pennsylvania Avenue NW, Washington, DC, 20408',
     ...     ]
     ... )
-    ['"1"," 4600 Silver Hill Rd,  Washington,  DC,  20233","Match","Exact","4600 SILVER HILL RD, WASHINGTON, DC, 20233","-76.92743,38.84599","76355984","L","24","033","802405","2004"', '"2"," 935 Pennsylvania Avenue,  NW,  Washington,  DC","No_Match"', '"3"," 1600 Pennsylvania Avenue NW,  Washington,  DC,  20500","Match","Exact","1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20500","-77.03534,38.898754","76225813","L","11","001","980000","1034"', '"4"," 700 Pennsylvania Avenue NW,  Washington,  DC,  20408","Match","Exact","700 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20408","-77.02304,38.89362","76226346","L","11","001","980000","1025"']
+    ['"1"," 4600 Silver Hill Rd,  Washington,  DC,  20233","Match","Exact","4600 SILVER HILL RD, WASHINGTON, DC, 20233","-76.92743285599994,38.84598908100003","76355984","L","24","033","802405","2004"', '"2"," 935 Pennsylvania Avenue,  NW,  Washington,  DC","No_Match"', '"3"," 1600 Pennsylvania Avenue NW,  Washington,  DC,  20500","Match","Exact","1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20500","-77.03534009999998,38.89875363300007","76225813","L","11","001","980000","1034"', '"4"," 700 Pennsylvania Avenue NW,  Washington,  DC,  20408","Match","Exact","700 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20408","-77.02304089899997,38.89361872300003","76226346","L","11","001","980000","1025"']
     """
 
     n = 1