-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalysis.go
More file actions
190 lines (154 loc) · 5.11 KB
/
analysis.go
File metadata and controls
190 lines (154 loc) · 5.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
package gohoa
import (
"fmt"
"os"
"sort"
"strconv"
"strings"
"github.com/agnivade/levenshtein"
"github.com/avito-tech/normalize"
)
const (
DEFAULT_ADDR_SEG_LEN = 2
SIM_THRESHOLD = 0.7
)
type Analysis struct {
db MemberDB
ap AddressLineParser
segLen int
}
type rankContact struct {
contact Contact
distance int
distF int
distL int
}
func NewAnalysis() *Analysis {
a := Analysis{}
a.db = NewMemberDB()
a.ap = &segmentBasedStreetParser{}
a.segLen = DEFAULT_ADDR_SEG_LEN
lenSeg, found := os.LookupEnv("ADDR_SEG_LENGTH")
if found {
if parsedSeg, err := strconv.ParseInt(lenSeg, 10, 16); err == nil {
a.segLen = int(parsedSeg)
}
}
return &a
}
func (a *Analysis) LoadAllMembers(allMembers *AllMembers) {
for _, m := range allMembers.Members {
mTemp := m
a.db.Add(MakeKey(&m), &mTemp)
}
}
func (a *Analysis) CrossCheckOrders(ao *AllOrders) ([]OrderMatch, []OrderMiss, error) {
var matches []OrderMatch
var misses []OrderMiss
for _, o := range ao.orders {
streetClean := a.parseStreetName(o.StreetName)
mKey := Mkey{streetClean, o.StreetNumber}
// fmt.Printf("order name: %s\n", o.Name)
m, found := a.db.Fetch(mKey)
// fmt.Printf("Fetched: %s %s\n", m.Contacts[0].LastName, m.Contacts[0].FirstName)
if !found {
fmt.Printf("Could not find Mkey: %v\n", mKey)
misses = append(misses, OrderMiss(o))
} else {
// fmt.Printf("Appendig match: %s %s\n", m.Contacts[0].LastName, m.Contacts[0].FirstName)
//Must test that the contact actually matches as well
contactFound, nameMatches := MatchFuzzyNameFromOrder(m, o)
if nameMatches {
om := OrderMatch{NeighborName: o.Name, StreetNumber: o.StreetNumber, StreetName: o.StreetName,
DiretoryContacts: m.Contacts, MatchedContact: contactFound}
matches = append(matches, om)
} else {
fmt.Printf("Could not find name from order in contact list: %v\n", o.Name)
misses = append(misses, OrderMiss(o))
}
}
}
var err error
if len(misses) > 0 {
err = fmt.Errorf("found %d misses", len(misses))
}
return matches, misses, err
}
func (a *Analysis) FetchInhabitants(streetNum int, streetName string) []Contact {
streetClean := a.parseStreetName(streetName)
mKey := Mkey{streetClean, streetNum}
m, found := a.db.Fetch(mKey)
if !found {
fmt.Printf("Could not find Mkey: %v\n", mKey)
return nil
}
return m.Contacts
}
func (a *Analysis) parseStreetName(rawStreetName string) string {
streetName := strings.Join(a.ap.ParseStreetSegments(rawStreetName, a.segLen), " ")
return streetName
}
func MatchFuzzyNameFromOrder(m *Member, o Order) (Contact, bool) {
//Parse the name coming as a single string
tokens := strings.Fields(o.Name)
if len(tokens) == 2 {
return hasFuzzyContactMatch(m, tokens[0], tokens[1])
}
return Contact{}, false
}
// check if the member given has any contact matching the name
func hasFuzzyContactMatch(memberf *Member, first string, last string) (contactf Contact, found bool) {
//Initial problem with this fuzzy library is each of the characters must be present as a subset of
// characters in the target string. If the compared string has a character that doesn't exist
// in the target, then that makes the entire string be a no-match. Kinda like a bloom filter I guess
//first check the more strict version of Match
for _, contact := range memberf.Contacts {
// if fuzzy.MatchFold(last, contact.LastName) && fuzzy.MatchFold(first, contact.FirstName) {
if last == contact.LastName && first == contact.FirstName {
contactf, found = contact, true
return
}
}
var rc []rankContact
//next try Levneshtein distance backed algorithm
for _, contact := range memberf.Contacts {
// concatTarget := fmt.Sprintf("%s%s", contact.FirstName, contact.LastName)
// concatInput := fmt.Sprintf("%s%s", first, last)
if similarFirst, similarLast := normalize.AreStringsSimilar(contact.FirstName, first, SIM_THRESHOLD),
normalize.AreStringsSimilar(contact.LastName, last, SIM_THRESHOLD); similarLast && similarFirst {
targetNorm := normalize.Many([]string{contact.FirstName, contact.LastName})
inputNorm := normalize.Many([]string{first, last})
distF := levenshtein.ComputeDistance(targetNorm[0], inputNorm[0])
distL := levenshtein.ComputeDistance(targetNorm[1], inputNorm[1])
rc = append(rc, rankContact{contact, distF + distL, distF, distL})
}
}
//Sort if needed, return the best
if len(rc) > 0 {
if len(rc) > 1 {
sort.Slice(rc, func(i, j int) bool {
return rc[i].distance < rc[j].distance
})
}
contactf, found = rc[0].contact, true
}
return
}
func (a *Analysis) FetchSingle(streetNum int, streetName string) (*Member, bool) {
return a.db.Fetch(MakeKeyV(a.parseStreetName(streetName), streetNum))
}
func (a *Analysis) UniqeStreetNames() []string {
var uniqueNames []string
keys := a.db.(*memberMapDB).getAllMKeys()
unique := make(map[string]bool)
for _, key := range keys {
unique[key.streetName] = true
}
for k := range unique {
uniqueNames = append(uniqueNames, k)
}
return uniqueNames
}
func (a *Analysis) FetchAllMembersOnStreet(streetName string) []*Member {
return a.db.(*memberMapDB).getAllMembersOnStreet(a.parseStreetName(streetName))
}