-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathfilters.py
59 lines (30 loc) · 983 Bytes
/
filters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# -*- coding: utf-8 -*-
import numpy
import soundex
def topns(matrix, n):
flatted = matrix.flatten()
idx_1d = numpy.argsort(flatted)
idx_2d = numpy.vstack(numpy.unravel_index(idx_1d, matrix.shape)).transpose()
idx_2d = list(reversed(idx_2d))
return idx_2d
def leven(gnodes, tnodes, pairs):
pass
def sondx(gnodes, tnodes, pairs):
similaritys = []
for pair in pairs:
str1 = gnodes[pair[1]]
str2 = tnodes[pair[0]]
similarity = soundex.Soundex().compare(str1, str2)
similaritys.append(similarity)
return similaritys
def start(matrix, gnodes, tnodes):
numpy.set_printoptions(threshold="nan")
gnodes = list(gnodes)
tnodes = list(tnodes)
matrix = numpy.array(matrix)
pairs = topns(matrix, matrix.shape[0] * matrix.shape[1])
similaritys = sondx(gnodes, tnodes, pairs)
results = []
for pair, similarity in zip(pairs, similaritys):
results.append([gnodes[pair[1]], tnodes[pair[0]], matrix[pair[0]][pair[1]], similarity])
return results