Counties in the Mississippi River Valley 

Last Friday I took the code from an earlier go at this and made it filter census data by county to only have data in the target area. You can also view my results of the program.

#!/usr/bin/env python
import sys
import urllib

def dms(d,m,s):
    return d+m/60.+s/3600.

def parsedms(f):
    """
    Imports a coordinates from tsv file and returns it as a list
    """
    a=[]
    for line in f:
        t=line.rsplit("t")
        lon=dms(int(t[0]),int(t[1]),int(t[2]))
        lat=dms(int(t[3]),int(t[4]),int(t[5]))
        a.append((-lon,lat))
    return a

print >> sys.stderr, "loading river data"
river_file = urllib.urlopen("http://collincode.files.wordpress.com/2008/02/mississippi-river.txt")
print >> sys.stderr, "parsing river data"
river = parsedms(river_file)

def distance(Lon1,Lat1,Lon2,Lat2):
    """
    Calculates the distance between two coordinates
    Based on calcDistance in http://nmviewogc.cr.usgs.gov/javascript/aimsMap.js
    """
    import math as Math
    Lon1 = Lon1 * Math.pi / 180;
    Lon2 = Lon2 * Math.pi / 180;
    Lat1 = Lat1 * Math.pi / 180;
    Lat2 = Lat2 * Math.pi / 180;
    LonDist = Lon1-Lon2;
    LatDist = Lat1-Lat2;
    A = Math.pow(Math.sin(LatDist / 2),2) + Math.cos(Lat1) * Math.cos(Lat2) * Math.pow(Math.sin(LonDist /2),2);
    C = 2 * Math.asin(Math.sqrt(A));
    D = (3963.1676 - 13 * Math.sin((Lat1 + Lat2) / 2)) * C;
    return D

def dist2river(lon,lat,river):
    'returns the distance and closest point on the river to the point'
    smallest = distance(river[0][0],river[0][1],lon,lat) #check against initial point
    for point in river:
        d = distance(point[0],point[1],lon,lat)
        if d <= smallest: #see if this point is closer
            thepoint=point
            smallest=d
    return smallest, thepoint


COLUMNS = (
    (1, 2, 'state', str),
    (8, 71, 'name', lambda x: str(x).strip()),
    (72, 80, 'pop', int),
    (81, 89, 'houses', long),
    (118, 129, 'land', float),
    (130, 141, 'water', float),
    (142, 151, 'lat', float),
    (152, 162, 'lon', float),
)

def parse_line(line):
    return dict([(name, klass(line[s-1:e])) for s, e, name, klass in COLUMNS])

def parse_data(county_file):
    for line in county_file:
        if line:
            yield parse_line(line)

print >> sys.stderr, "loading county data"
county_file = urllib.urlopen("http://www.census.gov/tiger/tms/gazetteer/county2k.txt")
print >> sys.stderr, "parsing county data"
data = []
for county in parse_data(county_file):
    if county['name'].endswith(' County') or  county['name'].endswith(' Parish'):
        county['name'] = county['name'][:-len(' County')]
        data.append(county)

print >> sys.stderr, "calculating distances to river"
distances = {}
for county in data:
    key = "%s, %s" % (county['name'], county['state'])
    key = key.lower()
    dist = dist2river(county['lon'], county['lat'], river)[0]
    if dist < 305:
        distances[key] = dist


print >> sys.stderr, "processing your data"
def runfile(i, o):
    for line in i:
        line = line.strip("n")
        columns = line.split('t')
        county = columns[0]
        key = county.strip().lower()
        if key in distances:
            print >> o, "%.3ft%s" % (distances[key], line)

import os
for x in os.listdir('.'):
    if x.startswith('b'):
        print "running %s" % x
        runfile(file(x), file('o' + x, 'w'))