Skip to content

Commit 05aea41

Browse files
authored
Merge pull request #300 from weaviate/feat/bm25-minimumShouldMatch
Extend BM25 and Hybrid search with minimum_should_match semantics.
2 parents 64d2a2b + 6132589 commit 05aea41

File tree

6 files changed

+229
-2
lines changed

6 files changed

+229
-2
lines changed

src/collections/query/index.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ export {
247247
BaseHybridOptions,
248248
BaseNearOptions,
249249
BaseNearTextOptions,
250+
Bm25OperatorOptions,
250251
Bm25Options,
251252
FetchObjectByIdOptions,
252253
FetchObjectsOptions,
@@ -266,3 +267,5 @@ export {
266267
QueryReturn,
267268
SearchOptions,
268269
} from './types.js';
270+
271+
export { Bm25Operator } from './utils.js';

src/collections/query/integration.test.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
/* eslint-disable @typescript-eslint/no-non-null-assertion */
22
/* eslint-disable @typescript-eslint/no-non-null-asserted-optional-chain */
3+
import { requireAtLeast } from '../../../test/version.js';
34
import { WeaviateUnsupportedFeatureError } from '../../errors.js';
45
import weaviate, { WeaviateClient } from '../../index.js';
56
import { Collection } from '../collection/index.js';
67
import { CrossReference, Reference } from '../references/index.js';
78
import { GroupByOptions } from '../types/index.js';
9+
import { Bm25Operator } from './utils.js';
810

911
describe('Testing of the collection.query methods with a simple collection', () => {
1012
let client: WeaviateClient;
@@ -132,6 +134,32 @@ describe('Testing of the collection.query methods with a simple collection', ()
132134
expect(ret.objects[0].uuid).toEqual(id);
133135
});
134136

137+
requireAtLeast(
138+
1,
139+
31,
140+
0
141+
)('bm25 search operator (minimum_should_match)', () => {
142+
it('should query with bm25 + operator', async () => {
143+
const ret = await collection.query.bm25('carrot', {
144+
limit: 1,
145+
operator: Bm25Operator.or({ minimumMatch: 1 }),
146+
});
147+
expect(ret.objects.length).toEqual(1);
148+
expect(ret.objects[0].properties.testProp).toEqual('carrot');
149+
expect(ret.objects[0].uuid).toEqual(id);
150+
});
151+
152+
it('should query with hybrid + bm25Operator', async () => {
153+
const ret = await collection.query.hybrid('carrot', {
154+
limit: 1,
155+
bm25Operator: Bm25Operator.and(),
156+
});
157+
expect(ret.objects.length).toEqual(1);
158+
expect(ret.objects[0].properties.testProp).toEqual('carrot');
159+
expect(ret.objects[0].uuid).toEqual(id);
160+
});
161+
});
162+
135163
it('should query with hybrid and vector', async () => {
136164
const ret = await collection.query.hybrid('carrot', {
137165
limit: 1,

src/collections/query/types.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,15 @@ export type Bm25QueryProperty<T> = {
8484
weight: number;
8585
};
8686

87+
export type Bm25OperatorOr = { operator: 'Or'; minimumMatch: number };
88+
export type Bm25OperatorAnd = { operator: 'And' };
89+
90+
export type Bm25OperatorOptions = Bm25OperatorOr | Bm25OperatorAnd;
91+
8792
export type Bm25SearchOptions<T> = {
8893
/** Which properties of the collection to perform the keyword search on. */
8994
queryProperties?: (PrimitiveKeys<T> | Bm25QueryProperty<T>)[];
95+
operator?: Bm25OperatorOptions;
9096
};
9197

9298
/** Base options available in the `query.bm25` method */
@@ -115,6 +121,7 @@ export type HybridSearchOptions<T> = {
115121
targetVector?: TargetVectorInputType;
116122
/** The specific vector to search for or a specific vector subsearch. If not specified, the query is vectorized and used in the similarity search. */
117123
vector?: NearVectorInputType | HybridNearTextSubSearch | HybridNearVectorSubSearch;
124+
bm25Operator?: Bm25OperatorOptions;
118125
};
119126

120127
/** Base options available in the `query.hybrid` method */

src/collections/query/utils.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { MultiTargetVectorJoin } from '../index.js';
2-
import { NearVectorInputType, TargetVectorInputType } from './types.js';
2+
import { Bm25OperatorOptions, Bm25OperatorOr, NearVectorInputType, TargetVectorInputType } from './types.js';
33

44
export class NearVectorInputGuards {
55
public static is1DArray(input: NearVectorInputType): input is number[] {
@@ -34,3 +34,13 @@ export class TargetVectorInputGuards {
3434
return i.combination !== undefined && i.targetVectors !== undefined;
3535
}
3636
}
37+
38+
export class Bm25Operator {
39+
static and(): Bm25OperatorOptions {
40+
return { operator: 'And' };
41+
}
42+
43+
static or(opts: Omit<Bm25OperatorOr, 'operator'>): Bm25OperatorOptions {
44+
return { ...opts, operator: 'Or' };
45+
}
46+
}

src/collections/serialize/index.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ import {
1515
NearThermalSearch,
1616
NearVector,
1717
NearVideoSearch,
18+
SearchOperatorOptions,
19+
SearchOperatorOptions_Operator,
1820
Targets,
1921
VectorForTarget,
2022
WeightsForTarget,
@@ -115,6 +117,7 @@ import {
115117
import {
116118
BaseHybridOptions,
117119
BaseNearOptions,
120+
Bm25OperatorOptions,
118121
Bm25Options,
119122
Bm25QueryProperty,
120123
Bm25SearchOptions,
@@ -960,10 +963,26 @@ export class Serialize {
960963
});
961964
};
962965

966+
private static bm25SearchOperator = (
967+
searchOperator?: Bm25OperatorOptions
968+
): SearchOperatorOptions | undefined => {
969+
if (searchOperator) {
970+
return SearchOperatorOptions.fromPartial(
971+
searchOperator.operator === ('And' as const)
972+
? { operator: SearchOperatorOptions_Operator.OPERATOR_AND }
973+
: {
974+
operator: SearchOperatorOptions_Operator.OPERATOR_OR,
975+
minimumOrTokensMatch: searchOperator.minimumMatch,
976+
}
977+
);
978+
}
979+
};
980+
963981
public static bm25Search = <T>(args: { query: string } & Bm25SearchOptions<T>): BM25 => {
964982
return BM25.fromPartial({
965983
query: args.query,
966984
properties: this.bm25QueryProperties(args.queryProperties),
985+
searchOperator: this.bm25SearchOperator(args.operator),
967986
});
968987
};
969988

@@ -1074,6 +1093,7 @@ export class Serialize {
10741093
vectorBytes: vectorBytes,
10751094
vectorDistance: args.maxVectorDistance,
10761095
fusionType: fusionType(args.fusionType),
1096+
bm25SearchOperator: this.bm25SearchOperator(args.bm25Operator),
10771097
targetVectors,
10781098
targets,
10791099
nearText,

0 commit comments

Comments
 (0)