+ logger.debug('Response: %s', json.dumps(r.json(), indent=4, sort_keys=True))
+ return r.json()
+
+
+def batch_geocode_addresses(addresses: List[str]) -> Optional[List[str]]:
+ """Send a list of addresses for batch geocoding to a web service
+ operated by the US Census Bureau.
+
+ Args:
+ addresses: a list of addresses to geocode. Each line of the
+ input list should be a single address in the form: "STREET
+ ADDRESS, CITY, STATE, ZIPCODE". Individual address components
+ may be omitted and the service will make educated guesses but
+ the commas delimiters between address components may not be
+ omitted.
+
+ Returns:
+ An array of the same size as the input array with one
+ answer record per line. Returns None on error.
+
+ Note: this code will deal with requests >10k addresses by chunking
+ them internally because the census website disallows requests >
+ 10k lines.
+
+ >>> batch_geocode_addresses(
+ ... [
+ ... '4600 Silver Hill Rd, Washington, DC, 20233',
+ ... '935 Pennsylvania Avenue, NW, Washington, DC, 20535-0001',
+ ... '1600 Pennsylvania Avenue NW, Washington, DC, 20500',
+ ... '700 Pennsylvania Avenue NW, Washington, DC, 20408',
+ ... ]
+ ... )
+ ['"1"," 4600 Silver Hill Rd, Washington, DC, 20233","Match","Exact","4600 SILVER HILL RD, WASHINGTON, DC, 20233","-76.92743285599994,38.84598908100003","76355984","L","24","033","802405","2004"', '"2"," 935 Pennsylvania Avenue, NW, Washington, DC","No_Match"', '"3"," 1600 Pennsylvania Avenue NW, Washington, DC, 20500","Match","Exact","1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20500","-77.03534009999998,38.89875363300007","76225813","L","11","001","980000","1034"', '"4"," 700 Pennsylvania Avenue NW, Washington, DC, 20408","Match","Exact","700 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20408","-77.02304089899997,38.89361872300003","76226346","L","11","001","980000","1025"']
+ """
+
+ n = 1
+ url = 'https://geocoding.geo.census.gov/geocoder/geographies/addressbatch'
+ payload = {'benchmark': '4', 'vintage': '4'}
+ out = []
+ for chunk in list_utils.shard(addresses, 9999):
+ raw_file = ''
+ for address in chunk:
+ raw_file += f'{n}, {address}\n'
+ n += 1
+ files = {'addressFile': ('input.csv', raw_file)}
+ logger.debug('POST: %s', url)
+ try:
+ r = requests.post(url, files=files, data=payload)
+ except Exception as e:
+ logger.exception(e)
+ return None
+
+ if r.status_code != 200:
+ logger.debug(r.text)
+ logger.error('Unexpected response code %d, wanted 200. Fail.', r.status_code)
+ return None
+ logger.debug('Response: %s', r.text)
+ for line in r.text.split('\n'):
+ line = line.strip()
+ if len(line) > 0:
+ out.append(line)