Skip to content

Commit

Permalink
Merge pull request #465 from easyops-cn/steve/perf
Browse files Browse the repository at this point in the history
fix: limit the number of terms, closes #312
  • Loading branch information
weareoutman authored Oct 9, 2024
2 parents 50f93f6 + c889047 commit 736344b
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 18 deletions.
11 changes: 0 additions & 11 deletions docusaurus-search-local/src/client/utils/smartQueries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,6 @@ export function smartQueries(
refinedTerms = terms.slice();
}

const MAX_TERMS = 10;
if (refinedTerms.length > MAX_TERMS) {
// Sort terms by length in ascending order.,
// And keep the top 10 terms.
refinedTerms.sort((a, b) => a.length - b.length);
refinedTerms.splice(MAX_TERMS, refinedTerms.length - MAX_TERMS);

terms.sort((a, b) => a.length - b.length);
terms.splice(MAX_TERMS, terms.length - MAX_TERMS);
}

// Also try to add extra terms which miss one of the searched tokens,
// when the term contains 3 or more tokens,
// to improve the search precision.
Expand Down
51 changes: 44 additions & 7 deletions docusaurus-search-local/src/client/utils/smartTerms.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import { SmartTerm } from "../../shared/interfaces";
import { cutZhWords } from "./cutZhWords";

const MAX_TERMS = 12;
const HALF_MAX_TERMS = MAX_TERMS / 2;

/**
* Get all possible terms for a list of tokens consists of words mixed in Chinese and non-Chinese,
* by a Chinese words dictionary.
Expand All @@ -14,19 +17,53 @@ export function smartTerms(
tokens: string[],
zhDictionary: string[]
): SmartTerm[] {
const tokenTerms = tokens.map((token) => {
if (/\p{Unified_Ideograph}/u.test(token)) {
return cutZhWords(token, zhDictionary);
const tokenTerms = tokens
.map((token) => {
if (/\p{Unified_Ideograph}/u.test(token)) {
return cutZhWords(token, zhDictionary);
} else {
return [{ value: token }];
}
})
.slice(0, MAX_TERMS);

const tokenTermsThatAreMultiple = tokenTerms.filter(
(tokenTerm) => tokenTerm.length > 1
);

let termsProduct = 1;
let overflowed = false;

for (const tokenTerm of tokenTermsThatAreMultiple) {
if (overflowed) {
tokenTerm.splice(1, tokenTerm.length - 1);
} else {
return [{ value: token }];
if (tokenTerm.length > HALF_MAX_TERMS) {
tokenTerm.splice(HALF_MAX_TERMS, tokenTerm.length - HALF_MAX_TERMS);
}
const product = termsProduct * tokenTerm.length;
if (product >= MAX_TERMS) {
if (product > MAX_TERMS) {
const max = Math.floor(MAX_TERMS / termsProduct);
tokenTerm.splice(max, tokenTerm.length - max);
termsProduct = max * termsProduct;
} else {
termsProduct = product;
}
if (termsProduct > HALF_MAX_TERMS) {
overflowed = true;
}
} else {
termsProduct = product;
}
}
});
}

// Get all possible combinations of terms.
const terms: SmartTerm[] = [];
function combine(index: number, carry: SmartTerm): void {
if (index === tokenTerms.length) {
terms.push(carry);
if (index === tokenTerms.length || carry.length >= MAX_TERMS) {
terms.push(carry.slice(0, MAX_TERMS));
return;
}
for (const term of tokenTerms[index]) {
Expand Down

0 comments on commit 736344b

Please sign in to comment.