forked from mtibben/confusables
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfusables.go
82 lines (67 loc) · 1.99 KB
/
confusables.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
//go:generate go run maketables.go > tables.go
package confusables
import (
"bytes"
"golang.org/x/text/unicode/norm"
)
// TODO: document casefolding approaches
// (suggest to force casefold strings; explain how to catch paypal - pAypal)
// TODO: DOC you might want to store the Skeleton and check against it later
// TODO: implement xidmodifications.txt restricted characters
type lookupFunc func(rune) (string)
func lookupReplacement(r rune) string {
return confusablesMap[r]
}
func lookupReplacementTweaked(r rune) string {
if replacement, ok := tweaksMap[r]; ok {
return replacement
}
return confusablesMap[r]
}
func skeletonBase(s string, lookup lookupFunc) string {
// 1. Converting X to NFD format
s = norm.NFD.String(s)
// 2. Successively mapping each source character in X to the target string
// according to the specified data table
var buf bytes.Buffer
changed := false // fast path: if this remains false, keep s intact
prevPos := 0
var replacement string
for i, r := range s {
if changed && replacement == "" {
buf.WriteString(s[prevPos:i])
}
prevPos = i
replacement = lookup(r)
if replacement != "" {
if !changed {
changed = true
// first replacement: copy over the previously unmodified text
buf.WriteString(s[:i])
}
buf.WriteString(replacement)
}
}
if changed && replacement == "" {
buf.WriteString(s[prevPos:]) // loop-and-a-half
}
if changed {
s = buf.String()
}
// 3. Reapplying NFD
s = norm.NFD.String(s)
return s
}
// Skeleton converts a string to its "skeleton" form
// as described in http://www.unicode.org/reports/tr39/#Confusable_Detection
func Skeleton(s string) string {
return skeletonBase(s, lookupReplacement)
}
// SkeletonTweaked is like Skeleton, but it implements some custom overrides
// to the confusables table (currently it removes the m -> rn mapping):
func SkeletonTweaked(s string) string {
return skeletonBase(s, lookupReplacementTweaked)
}
func Confusable(x, y string) bool {
return Skeleton(x) == Skeleton(y)
}