From 63c0d300c0d14b9c0b3d8667ac9f43cf30933c8f Mon Sep 17 00:00:00 2001 From: Galen Date: Wed, 14 Aug 2024 18:50:31 -0700 Subject: [PATCH] create string utils with methods for normalizing, computing similarity score, re #10780 --- arches/app/media/js/utils/strings.js | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 arches/app/media/js/utils/strings.js diff --git a/arches/app/media/js/utils/strings.js b/arches/app/media/js/utils/strings.js new file mode 100644 index 00000000000..b268ce4aa10 --- /dev/null +++ b/arches/app/media/js/utils/strings.js @@ -0,0 +1,22 @@ +define(['jquery', 'knockout', 'arches'], function($, ko, arches) { + const stringUtils = { + compareTwoStrings: function(str1, str2) { + // uses dice coefficient for string similarity score + if (str1.length < 2 || str2.length < 2) return 0; + let set1 = new Set(); + let set2 = new Set(); + for (let i = 0; i < str1.length - 1; i++) { + const bigram = str1.substr(i, 2); + set1.add(bigram); + } + for (let i = 0; i < str2.length - 1; i++) { + const bigram = str2.substr(i, 2); + set2.add(bigram); + } + const intersection = new Set([...set1].filter(x => set2.has(x))); + return (2 * intersection.size) / (set1.size + set2.size); + }, + normalizeText: function(text) { return text.toLowerCase().replace(/\W+/g, ''); }, + }; + return stringUtils; +});