import fileinput # Some postmiles files have duplicate entries, # same postmile but slightly different street names. # This program presents the duplicates to the user to # select which one to keep and discards the others. # Only works when duplicates are in successive rows of the file, which is # how it should be if it's sorted by postmile. But sometimes # there's weird data where two SLIGHTLY different postmiles have # the same latlong, and you'll have to deal with these by hand. # Read from file name provided on command line # Expected input format: route, direction, lat, long, name # Assumes commas are separators and don't appear in name. prevName = "" prevCoords = "" prevstreet = "" prevFields = ["","","","","",""] matching = False streets = [] outputFile = "uniqFile" outFile = open(outputFile,"w") # put results here # Examine each input line for line in fileinput.input(): lineIn = line.rstrip() #to trim whitespace and newline fields = lineIn.split(",") current = fields[3]+","+fields[4] # match the latLong pair # Does this item match the previous one? if (current == prevCoords): if not matching: # indicate we are in matching mode (for 1 or more items) matching = True print fields[0] + "\t" + prevstreet print "\t\t" + fields[5] streets.append(fields[5]) else: if matching: # if we were matching, turn it off matching = False # Ask user to select one of the matching items choice = raw_input("choose one of "+str(len(streets)) + ": ") if (len(choice)==0): choice = "1" print " OK, you selected: ",streets[int(choice)-1] print print "-----------" # outFile.write(prevName + "," +prevCoords + "," + streets[int(choice)-1]) outFile.write(prevFields[0]+ "," + prevFields[1]+ "," + prevFields[2]+ "," + prevFields[3]+ "," + prevFields[4]+ "," + streets[int(choice)-1]) outFile.write("\n") else: # output the previous unmatched item (unless were at the first line) if prevFields[0] != "": # outFile.write(prevName + "," + prevCoords + "," + prevstreet + "\n") outFile.write(prevFields[0]+ "," + prevFields[1]+ "," + prevFields[2]+ "," + prevFields[3]+ "," + prevFields[4]+ "," + prevFields[5]) outFile.write("\n") # reset the saved street names streets = [] streets.append(fields[5]) # save current for next iteration prevName = fields[0] prevCoords = current prevstreet = fields[5] prevFields = fields # output last entry outFile.write(prevFields[0]+ "," + prevFields[1]+ "," + prevFields[2]+ "," + prevFields[3]+ "," + prevFields[4]+ "," + prevFields[5]) outFile.write("\n") outFile.close() print "All Done, output in ", outputFile