Skip to content

Commit

Permalink
create string utils with methods for normalizing, computing similarit…
Browse files Browse the repository at this point in the history
…y score, re #10780
  • Loading branch information
whatisgalen committed Aug 15, 2024
1 parent dab8c82 commit 63c0d30
Showing 1 changed file with 22 additions and 0 deletions.
22 changes: 22 additions & 0 deletions arches/app/media/js/utils/strings.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
define(['jquery', 'knockout', 'arches'], function($, ko, arches) {
const stringUtils = {
compareTwoStrings: function(str1, str2) {
// uses dice coefficient for string similarity score
if (str1.length < 2 || str2.length < 2) return 0;
let set1 = new Set();
let set2 = new Set();
for (let i = 0; i < str1.length - 1; i++) {
const bigram = str1.substr(i, 2);
set1.add(bigram);
}
for (let i = 0; i < str2.length - 1; i++) {
const bigram = str2.substr(i, 2);
set2.add(bigram);
}
const intersection = new Set([...set1].filter(x => set2.has(x)));
return (2 * intersection.size) / (set1.size + set2.size);
},
normalizeText: function(text) { return text.toLowerCase().replace(/\W+/g, ''); },
};
return stringUtils;
});

0 comments on commit 63c0d30

Please sign in to comment.