import requests
from requests.utils import requote_uri
+import list_utils
+
logger = logging.getLogger(__name__)
Result is an array of the same size as the input array with one
answer record per line. Returns None on error.
+ This code will deal with requests >10k addresses by chunking them
+ internally because the census website disallows requests > 10k lines.
+
>>> batch_geocode_addresses(
... [
... '4600 Silver Hill Rd, Washington, DC, 20233',
['"1"," 4600 Silver Hill Rd, Washington, DC, 20233","Match","Exact","4600 SILVER HILL RD, WASHINGTON, DC, 20233","-76.92743,38.84599","76355984","L","24","033","802405","2004"', '"2"," 935 Pennsylvania Avenue, NW, Washington, DC","No_Match"', '"3"," 1600 Pennsylvania Avenue NW, Washington, DC, 20500","Match","Exact","1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20500","-77.03534,38.898754","76225813","L","11","001","980000","1034"', '"4"," 700 Pennsylvania Avenue NW, Washington, DC, 20408","Match","Exact","700 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20408","-77.02304,38.89362","76226346","L","11","001","980000","1025"']
"""
- # TODO: use list_utils.shard to break up the input if it's >10k records
- # b/c the census website has a hard limit at 10k.
-
+ n = 1
url = 'https://geocoding.geo.census.gov/geocoder/geographies/addressbatch'
payload = {'benchmark': '4', 'vintage': '4'}
- raw_file = ''
- for n, address in enumerate(addresses):
- raw_file += f'{n+1}, {address}\n'
- files = {'addressFile': ('input.csv', raw_file)}
- logger.debug('POST: %s', url)
- try:
- r = requests.post(url, files=files, data=payload)
- except Exception as e:
- logger.exception(e)
- return None
- if r.status_code != 200:
- print(r.text)
- logger.error('Unexpected response code %d, wanted 200. Fail.', r.status_code)
- return None
-
out = []
- for line in r.text.split('\n'):
- line = line.strip()
- if len(line) > 0:
- out.append(line)
+ for chunk in list_utils.shard(addresses, 9999):
+ raw_file = ''
+ for address in chunk:
+ raw_file += f'{n}, {address}\n'
+ n += 1
+ files = {'addressFile': ('input.csv', raw_file)}
+ logger.debug('POST: %s', url)
+ try:
+ r = requests.post(url, files=files, data=payload)
+ except Exception as e:
+ logger.exception(e)
+ return None
+ if r.status_code != 200:
+ print(r.text)
+ logger.error('Unexpected response code %d, wanted 200. Fail.', r.status_code)
+ return None
+ for line in r.text.split('\n'):
+ line = line.strip()
+ if len(line) > 0:
+ out.append(line)
return out