Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: remove entities from spotlight #10862

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions packages/client/utils/smartGroup/computeJaccardDistanceMatrix.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/**
* For each pair of reflections, measure how many words they share
* vs. how many unique words total. (Jaccard similarity = intersection/union).
* We then use distance = 1 - similarity and feed that to clusterfck.
*/
export const computeJaccardDistanceMatrix = (reflections: string[]) => {
const tokenizedReflections = reflections.map((text) => tokenize(text))

const distanceMatrix: number[][] = tokenizedReflections.map((tokensA) => {
return tokenizedReflections.map((tokensB) => {
return jaccardDistance(tokensA, tokensB)
})
})
return distanceMatrix
}

const tokenize = (text: string): string[] => {
return text
.toLowerCase()
.replace(/[^\w\s]/g, '') // remove punctuation
.split(/\s+/)
.filter(Boolean)
}

const jaccardDistance = (aTokens: string[], bTokens: string[]): number => {
const setA = new Set(aTokens)
const setB = new Set(bTokens)
const intersectionSize = [...setA].filter((x) => setB.has(x)).length
const unionSize = new Set([...setA, ...setB]).size
if (unionSize === 0) return 0 // edge case, treat empty union as identical although reflections should always have text
const similarity = intersectionSize / unionSize
const distance = 1 - similarity
return distance
}
75 changes: 23 additions & 52 deletions packages/client/utils/smartGroup/groupReflections.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,5 @@
import computeDistanceMatrix from './computeDistanceMatrix'
import getAllLemmasFromReflections from './getAllLemmasFromReflections'
import {computeJaccardDistanceMatrix} from './computeJaccardDistanceMatrix'
import getGroupMatrix from './getGroupMatrix'
import getTitleFromComputedGroup from './getTitleFromComputedGroup'

/*
* Read each reflection, parse the content for entities (i.e. nouns), group the reflections based on common themes
*/

type Entity = {
lemma: string
name: string
salience: number
}

type GroupedReflectionRes = {
reflectionId: string
entities: Entity[]
oldReflectionGroupId: string
sortOrder: number
reflectionGroupId: string
}

export type GroupingOptions = {
groupingThreshold: number
Expand All @@ -28,78 +8,69 @@ export type GroupingOptions = {
}

const groupReflections = <
T extends {entities: any[]; reflectionGroupId: string; id: string; plaintextContent: string}
T extends {id: string; reflectionGroupId: string; plaintextContent: string}
>(
reflections: T[],
groupingOptions: GroupingOptions
) => {
const allReflectionEntities = reflections.map(({entities}) => entities)
const oldReflectionGroupIds = reflections.map(({reflectionGroupId}) => reflectionGroupId)
const reflectionTexts = reflections.map((r) => r.plaintextContent || '')
const distanceMatrix = computeJaccardDistanceMatrix(reflectionTexts)

// create a unique array of all entity names mentioned in the meeting's reflect phase
const uniqueLemmaArr = getAllLemmasFromReflections(allReflectionEntities)
// create a distance vector for each reflection
const distanceMatrix = computeDistanceMatrix(allReflectionEntities, uniqueLemmaArr)
const {
groups: groupedArrays,
thresh,
nextThresh
} = getGroupMatrix(distanceMatrix, groupingOptions)
// replace the arrays with reflections
const updatedReflections = [] as GroupedReflectionRes[]
const reflectionGroupMapping = {} as Record<string, string>

const updatedReflections: Array<{
reflectionId: string
oldReflectionGroupId: string
sortOrder: number
reflectionGroupId: string
}> = []

const reflectionGroupMapping: Record<string, string> = {}
const oldReflectionGroupIds = reflections.map((r) => r.reflectionGroupId)

const updatedGroups = groupedArrays.map((group) => {
// look up the reflection by its vector, put them all in the same group
let reflectionGroupId = ''

const groupedReflectionsRes = group.map((reflectionDistanceArr, sortOrder) => {
const idx = distanceMatrix.indexOf(reflectionDistanceArr)
const reflection = reflections[idx]!
reflectionGroupId = reflectionGroupId || reflection.reflectionGroupId
return {
reflectionId: reflection.id,
entities: reflection.entities,
oldReflectionGroupId: reflection.reflectionGroupId,
sortOrder,
reflectionGroupId
} as GroupedReflectionRes
}
})

const groupedReflectionEntities = groupedReflectionsRes
.map(({entities}) => entities)
.filter(Boolean)
const smartTitle = getTitleFromComputedGroup(
uniqueLemmaArr,
group,
groupedReflectionEntities,
reflections
)

updatedReflections.push(...groupedReflectionsRes)

groupedReflectionsRes.forEach((groupedReflection) => {
reflectionGroupMapping[groupedReflection.oldReflectionGroupId] = reflectionGroupId
groupedReflectionsRes.forEach(({oldReflectionGroupId}) => {
reflectionGroupMapping[oldReflectionGroupId] = reflectionGroupId
})

return {
id: reflectionGroupId,
smartTitle,
title: smartTitle
id: reflectionGroupId
}
})

const newReflectionGroupIds = new Set(
updatedReflections.map(({reflectionGroupId}) => reflectionGroupId)
)
const removedReflectionGroupIds = oldReflectionGroupIds.filter(
(groupId) => !newReflectionGroupIds.has(groupId)
(oldId) => !newReflectionGroupIds.has(oldId)
)

return {
autoGroupThreshold: thresh,
groups: updatedGroups,
groupedReflectionsRes: updatedReflections,
reflectionGroupMapping,
removedReflectionGroupIds,
nextThresh: nextThresh as number
nextThresh
}
}

Expand Down