@@ -5,6 +5,42 @@ import {
55 sanitize ,
66 removeUnicode
77} from '../../commons/text' ;
8+ import stem from 'wink-porter2-stemmer' ;
9+
10+ const threshold = 0.75 ;
11+
12+ function cleanText ( str ) {
13+ return str
14+ ?. toLowerCase ( )
15+ . normalize ( 'NFKC' )
16+ . replace ( / [ \u200B - \u200D \u2060 \uFEFF ] / g, '' )
17+ . trim ( ) ;
18+ }
19+
20+ function replaceSynonyms ( text ) {
21+ const synonymMap = {
22+ '&' : 'and'
23+ } ;
24+ return text
25+ . split ( / [ ^ \p{ L} \p{ N} ] + / u)
26+ . map ( word => synonymMap [ word ] || word )
27+ . join ( ' ' ) ;
28+ }
29+
30+ function stringStemmer ( str ) {
31+ return replaceSynonyms ( str )
32+ . split ( / [ ^ \p{ L} \p{ N} ] + / u)
33+ . filter ( Boolean )
34+ . map ( word => {
35+ const w = cleanText ( word ) . replace ( / [ ^ \p{ L} \p{ N} ] / gu, '' ) ;
36+ try {
37+ return stem ( w ) ;
38+ } catch ( err ) {
39+ return w ;
40+ }
41+ } )
42+ . join ( ' ' ) ;
43+ }
844
945/**
1046 * Check if a given text exists in another
@@ -14,12 +50,45 @@ import {
1450 * @returns {Boolean }
1551 */
1652function isStringContained ( compare , compareWith ) {
53+ compare = stringStemmer ( compare ) ;
54+ compareWith = stringStemmer ( compareWith ) ;
55+
1756 const curatedCompareWith = curateString ( compareWith ) ;
1857 const curatedCompare = curateString ( compare ) ;
1958 if ( ! curatedCompareWith || ! curatedCompare ) {
2059 return false ;
2160 }
22- return curatedCompareWith . includes ( curatedCompare ) ;
61+ const res = curatedCompareWith . includes ( curatedCompare ) ;
62+ if ( res ) {
63+ return res ;
64+ }
65+
66+ const tokensA = compare . split ( / [ ^ \p{ L} \p{ N} ] + / u) ;
67+ const tokensB = compareWith . split ( / [ ^ \p{ L} \p{ N} ] + / u) ;
68+ const freqA = { } ,
69+ freqB = { } ;
70+ tokensA . forEach ( word => {
71+ freqA [ word ] = ( freqA [ word ] || 0 ) + 1 ;
72+ } ) ;
73+ tokensB . forEach ( word => {
74+ freqB [ word ] = ( freqB [ word ] || 0 ) + 1 ;
75+ } ) ;
76+
77+ let dot = 0 ,
78+ magA = 0 ,
79+ magB = 0 ;
80+ const allTerms = new Set ( [ ...Object . keys ( freqA ) , ...Object . keys ( freqB ) ] ) ;
81+ allTerms . forEach ( term => {
82+ const a = freqA [ term ] || 0 ;
83+ const b = freqB [ term ] || 0 ;
84+ dot += a * b ;
85+ magA += a * a ;
86+ magB += b * b ;
87+ } ) ;
88+
89+ const similarity =
90+ magA && magB ? dot / ( Math . sqrt ( magA ) * Math . sqrt ( magB ) ) : 0 ;
91+ return similarity >= threshold ; // comparision with threshold as 75%
2392}
2493
2594/**
@@ -32,7 +101,8 @@ function curateString(str) {
32101 const noUnicodeStr = removeUnicode ( str , {
33102 emoji : true ,
34103 nonBmp : true ,
35- punctuations : true
104+ punctuations : true ,
105+ whitespace : true
36106 } ) ;
37107 return sanitize ( noUnicodeStr ) ;
38108}
@@ -52,9 +122,11 @@ function labelContentNameMismatchEvaluate(node, options, virtualNode) {
52122 subtreeDescendant : true ,
53123 ignoreIconLigature : true ,
54124 pixelThreshold,
55- occurrenceThreshold
125+ occurrenceThreshold,
126+ ignoreNativeTextAlternative : true // To Skip for nativeTextAlternative
56127 } )
57128 ) . toLowerCase ( ) ;
129+
58130 if ( ! visibleText ) {
59131 return true ;
60132 }
0 commit comments