| 1 | import fileinput |
|---|
| 2 | # Some postmiles files have duplicate entries, |
|---|
| 3 | # same postmile but slightly different street names. |
|---|
| 4 | # This program presents the duplicates to the user to |
|---|
| 5 | # select which one to keep and discards the others. |
|---|
| 6 | # Only works when duplicates are in successive rows of the file, which is |
|---|
| 7 | # how it should be if it's sorted by postmile. But sometimes |
|---|
| 8 | # there's weird data where two SLIGHTLY different postmiles have |
|---|
| 9 | # the same latlong, and you'll have to deal with these by hand. |
|---|
| 10 | |
|---|
| 11 | # Read from file name provided on command line |
|---|
| 12 | # Expected input format: route, direction, lat, long, name |
|---|
| 13 | # Assumes commas are separators and don't appear in name. |
|---|
| 14 | prevName = "" |
|---|
| 15 | prevCoords = "" |
|---|
| 16 | prevstreet = "" |
|---|
| 17 | prevFields = ["","","","","",""] |
|---|
| 18 | matching = False |
|---|
| 19 | streets = [] |
|---|
| 20 | outputFile = "uniqFile" |
|---|
| 21 | outFile = open(outputFile,"w") # put results here |
|---|
| 22 | # Examine each input line |
|---|
| 23 | for line in fileinput.input(): |
|---|
| 24 | lineIn = line.rstrip() #to trim whitespace and newline |
|---|
| 25 | fields = lineIn.split(",") |
|---|
| 26 | current = fields[3]+","+fields[4] # match the latLong pair |
|---|
| 27 | # Does this item match the previous one? |
|---|
| 28 | if (current == prevCoords): |
|---|
| 29 | if not matching: |
|---|
| 30 | # indicate we are in matching mode (for 1 or more items) |
|---|
| 31 | matching = True |
|---|
| 32 | print fields[0] + "\t" + prevstreet |
|---|
| 33 | print "\t\t" + fields[5] |
|---|
| 34 | streets.append(fields[5]) |
|---|
| 35 | else: |
|---|
| 36 | if matching: # if we were matching, turn it off |
|---|
| 37 | matching = False |
|---|
| 38 | # Ask user to select one of the matching items |
|---|
| 39 | choice = raw_input("choose one of "+str(len(streets)) + ": ") |
|---|
| 40 | if (len(choice)==0): |
|---|
| 41 | choice = "1" |
|---|
| 42 | print " OK, you selected: ",streets[int(choice)-1] |
|---|
| 43 | print |
|---|
| 44 | print "-----------" |
|---|
| 45 | # outFile.write(prevName + "," +prevCoords + "," + streets[int(choice)-1]) |
|---|
| 46 | outFile.write(prevFields[0]+ "," + prevFields[1]+ "," + prevFields[2]+ "," + prevFields[3]+ "," + prevFields[4]+ "," + streets[int(choice)-1]) |
|---|
| 47 | outFile.write("\n") |
|---|
| 48 | else: |
|---|
| 49 | # output the previous unmatched item (unless were at the first line) |
|---|
| 50 | if prevFields[0] != "": |
|---|
| 51 | # outFile.write(prevName + "," + prevCoords + "," + prevstreet + "\n") |
|---|
| 52 | outFile.write(prevFields[0]+ "," + prevFields[1]+ "," + prevFields[2]+ "," + prevFields[3]+ "," + prevFields[4]+ "," + prevFields[5]) |
|---|
| 53 | outFile.write("\n") |
|---|
| 54 | # reset the saved street names |
|---|
| 55 | streets = [] |
|---|
| 56 | streets.append(fields[5]) |
|---|
| 57 | # save current for next iteration |
|---|
| 58 | prevName = fields[0] |
|---|
| 59 | prevCoords = current |
|---|
| 60 | prevstreet = fields[5] |
|---|
| 61 | prevFields = fields |
|---|
| 62 | # output last entry |
|---|
| 63 | outFile.write(prevFields[0]+ "," + prevFields[1]+ "," + prevFields[2]+ "," + prevFields[3]+ "," + prevFields[4]+ "," + prevFields[5]) |
|---|
| 64 | outFile.write("\n") |
|---|
| 65 | outFile.close() |
|---|
| 66 | |
|---|
| 67 | print "All Done, output in ", outputFile |
|---|