Last Friday I took the code from an earlier go at this and made it filter census data by county to only have data in the target area. You can also view my results of the program.
#!/usr/bin/env python
import sys
import urllib
def dms(d,m,s):
return d+m/60.+s/3600.
def parsedms(f):
"""
Imports a coordinates from tsv file and returns it as a list
"""
a=[]
for line in f:
t=line.rsplit("t")
lon=dms(int(t[0]),int(t[1]),int(t[2]))
lat=dms(int(t[3]),int(t[4]),int(t[5]))
a.append((-lon,lat))
return a
print >> sys.stderr, "loading river data"
river_file = urllib.urlopen("http://collincode.files.wordpress.com/2008/02/mississippi-river.txt")
print >> sys.stderr, "parsing river data"
river = parsedms(river_file)
def distance(Lon1,Lat1,Lon2,Lat2):
"""
Calculates the distance between two coordinates
Based on calcDistance in http://nmviewogc.cr.usgs.gov/javascript/aimsMap.js
"""
import math as Math
Lon1 = Lon1 * Math.pi / 180;
Lon2 = Lon2 * Math.pi / 180;
Lat1 = Lat1 * Math.pi / 180;
Lat2 = Lat2 * Math.pi / 180;
LonDist = Lon1-Lon2;
LatDist = Lat1-Lat2;
A = Math.pow(Math.sin(LatDist / 2),2) + Math.cos(Lat1) * Math.cos(Lat2) * Math.pow(Math.sin(LonDist /2),2);
C = 2 * Math.asin(Math.sqrt(A));
D = (3963.1676 - 13 * Math.sin((Lat1 + Lat2) / 2)) * C;
return D
def dist2river(lon,lat,river):
'returns the distance and closest point on the river to the point'
smallest = distance(river[0][0],river[0][1],lon,lat) #check against initial point
for point in river:
d = distance(point[0],point[1],lon,lat)
if d <= smallest: #see if this point is closer
thepoint=point
smallest=d
return smallest, thepoint
COLUMNS = (
(1, 2, 'state', str),
(8, 71, 'name', lambda x: str(x).strip()),
(72, 80, 'pop', int),
(81, 89, 'houses', long),
(118, 129, 'land', float),
(130, 141, 'water', float),
(142, 151, 'lat', float),
(152, 162, 'lon', float),
)
def parse_line(line):
return dict([(name, klass(line[s-1:e])) for s, e, name, klass in COLUMNS])
def parse_data(county_file):
for line in county_file:
if line:
yield parse_line(line)
print >> sys.stderr, "loading county data"
county_file = urllib.urlopen("http://www.census.gov/tiger/tms/gazetteer/county2k.txt")
print >> sys.stderr, "parsing county data"
data = []
for county in parse_data(county_file):
if county['name'].endswith(' County') or county['name'].endswith(' Parish'):
county['name'] = county['name'][:-len(' County')]
data.append(county)
print >> sys.stderr, "calculating distances to river"
distances = {}
for county in data:
key = "%s, %s" % (county['name'], county['state'])
key = key.lower()
dist = dist2river(county['lon'], county['lat'], river)[0]
if dist < 305:
distances[key] = dist
print >> sys.stderr, "processing your data"
def runfile(i, o):
for line in i:
line = line.strip("n")
columns = line.split('t')
county = columns[0]
key = county.strip().lower()
if key in distances:
print >> o, "%.3ft%s" % (distances[key], line)
import os
for x in os.listdir('.'):
if x.startswith('b'):
print "running %s" % x
runfile(file(x), file('o' + x, 'w'))