Skip to content

Commit 177cd7d

Browse files
authored
Merge pull request #1 from mjanowiecki/master
Update with columnName
2 parents 91a5082 + 961939f commit 177cd7d

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

stringComparisonFromCSV.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,29 @@
55

66
parser = argparse.ArgumentParser()
77
parser.add_argument('-f', '--fileName', help='the CSV file of headings. optional - if not provided, the script will ask for input')
8-
parser.add_argument('-t', '--threshold', help='the threshold (e.g. \'90\' means the strings are 90% similar and 10% different ). optional - if not provided, the script will ask for input')
8+
parser.add_argument('-c', '--columnName', help='the name of the column in the CSV file containing the strings to be compared. optional - if not provided, the script will ask for input')
9+
parser.add_argument('-t', '--threshold', help='the threshold (e.g. \'90\' means the strings are 90% similar and 10% different). optional - if not provided, the script will ask for input')
910
args = parser.parse_args()
1011

1112
if args.fileName:
1213
fileName = args.fileName
1314
else:
1415
fileName = raw_input('Enter the file name of the CSV of headings (including \'.csv\'): ')
16+
if args.columnName:
17+
columnName = args.columnName
18+
else:
19+
columnName = raw_input('Enter the name of the column in the CSV file containing the strings to be compared: ')
1520
if args.threshold:
1621
threshold = int(args.threshold)
1722
else:
18-
threshold = int(raw_input('Enter threshold (e.g. \'90\' means the strings are 90% similar and 10% different ): '))
23+
threshold = int(raw_input('Enter threshold (e.g. \'90\' means the strings are 90% similar and 10% different): '))
1924

2025
startTime = time.time()
2126
nameList = []
2227
with open(fileName) as csvfile:
2328
reader = csv.DictReader(csvfile)
2429
for row in reader:
25-
nameList.append(str(row['prefLabel']))
30+
nameList.append(str(row[columnName]))
2631
counter = len(nameList)
2732
f=csv.writer(open(fileName[:fileName.index('.')]+'NearMatches.csv','wb'))
2833
f.writerow(['percentage']+['name1']+['name2'])

0 commit comments

Comments
 (0)