Skip to content

Commit f185924

Browse files
author
Bruno Selva
committed
Add pagination support to SearchIndexTool and GetSegmentsTool to prevent token overflow
1 parent fa73f6c commit f185924

File tree

5 files changed

+285
-17
lines changed

5 files changed

+285
-17
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ The following tools are available but disabled by default. To enable them, see t
7777
- `opensearch_url` (optional): The OpenSearch cluster URL to connect to
7878
- `index` (required): The name of the index to search in
7979
- `query` (required): The search query in OpenSearch Query DSL format
80+
- `size` (optional): Maximum number of hits to return (default: 10, max: 100). Limits response size to prevent token overflow
81+
- `from` (optional): Starting offset for pagination (default: 0). Use with size for pagination
8082

8183
- **GetShardsTool**
8284
- `opensearch_url` (optional): The OpenSearch cluster URL to connect to
@@ -115,6 +117,7 @@ The following tools are available but disabled by default. To enable them, see t
115117

116118
- `opensearch_url` (optional): The OpenSearch cluster URL to connect to
117119
- `index` (optional): Limit the information returned to the specified indices. If not provided, returns segments for all indices
120+
- `limit` (optional): Maximum number of segments to return (default: 1000). Limits response size to prevent token overflow
118121

119122
- **CatNodesTool**
120123

src/opensearch/helper.py

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,28 @@ def get_index_mapping(args: GetIndexMappingArgs) -> json:
4545

4646

4747
def search_index(args: SearchIndexArgs) -> json:
48+
"""Search an index with pagination support.
49+
50+
Args:
51+
args: SearchIndexArgs containing index, query, and optional pagination params
52+
53+
Returns:
54+
json: Search results from OpenSearch
55+
"""
4856
from .client import initialize_client
4957

5058
client = initialize_client(args)
51-
response = client.search(index=args.index, body=args.query)
59+
60+
# Ensure query is a dict for merging
61+
query_body = args.query if isinstance(args.query, dict) else {}
62+
63+
# Apply pagination parameters (override any user-provided values)
64+
# Cap size at maximum of 100 to prevent token overflow
65+
effective_size = min(args.size, 100) if args.size else 10
66+
query_body['size'] = effective_size
67+
query_body['from'] = args.from_ if args.from_ is not None else 0
68+
69+
response = client.search(index=args.index, body=query_body)
5270
return response
5371

5472

@@ -62,21 +80,26 @@ def get_shards(args: GetShardsArgs) -> json:
6280

6381
def get_segments(args: GetSegmentsArgs) -> json:
6482
"""Get information about Lucene segments in indices.
65-
83+
6684
Args:
67-
args: GetSegmentsArgs containing optional index filter
68-
85+
args: GetSegmentsArgs containing optional index filter and limit
86+
6987
Returns:
7088
json: Segment information for the specified indices or all indices
7189
"""
7290
from .client import initialize_client
73-
91+
7492
client = initialize_client(args)
75-
93+
7694
# If index is provided, filter by that index
7795
index_param = args.index if args.index else None
78-
96+
7997
response = client.cat.segments(index=index_param, format='json')
98+
99+
# Apply limit to prevent token overflow
100+
if args.limit and isinstance(response, list):
101+
return response[:args.limit]
102+
80103
return response
81104

82105

src/tools/tool_params.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,21 @@ class GetIndexMappingArgs(baseToolArgs):
3131
class SearchIndexArgs(baseToolArgs):
3232
index: str = Field(description='The name of the index to search in')
3333
query: Any = Field(description='The search query in OpenSearch query DSL format')
34+
size: Optional[int] = Field(
35+
default=10,
36+
description='Maximum number of hits to return (default: 10, max: 100). Limits response size to prevent token overflow. Values exceeding 100 will be capped at 100.',
37+
ge=1,
38+
)
39+
from_: Optional[int] = Field(
40+
default=0,
41+
description='Starting offset for pagination (default: 0). Use with size for pagination.',
42+
alias='from',
43+
ge=0,
44+
serialization_alias='from',
45+
)
46+
47+
class Config:
48+
populate_by_name = True
3449

3550

3651
class GetShardsArgs(baseToolArgs):
@@ -65,12 +80,17 @@ class Config:
6580

6681
class GetSegmentsArgs(baseToolArgs):
6782
"""Arguments for the GetSegmentsTool."""
68-
83+
6984
index: Optional[str] = Field(
70-
default=None,
85+
default=None,
7186
description='Limit the information returned to the specified indices. If not provided, returns segments for all indices.'
7287
)
73-
88+
limit: Optional[int] = Field(
89+
default=1000,
90+
description='Maximum number of segments to return (default: 1000). Limits response size to prevent token overflow.',
91+
ge=1,
92+
)
93+
7494
class Config:
7595
json_schema_extra = {
7696
"examples": [

src/tools/tools.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ async def get_long_running_tasks_tool(args: GetLongRunningTasksArgs) -> list[dic
499499
},
500500
'SearchIndexTool': {
501501
'display_name': 'SearchIndexTool',
502-
'description': 'Searches an index using a query written in query domain-specific language (DSL) in OpenSearch',
502+
'description': 'Searches an index using a query written in query domain-specific language (DSL) in OpenSearch. Supports pagination with size (default: 10, max: 100) and from parameters to limit response size and prevent token overflow.',
503503
'input_schema': SearchIndexArgs.model_json_schema(),
504504
'function': search_index_tool,
505505
'args_model': SearchIndexArgs,
@@ -524,7 +524,7 @@ async def get_long_running_tasks_tool(args: GetLongRunningTasksArgs) -> list[dic
524524
},
525525
'GetSegmentsTool': {
526526
'display_name': 'GetSegmentsTool',
527-
'description': 'Gets information about Lucene segments in indices, including memory usage, document counts, and segment sizes. Can be filtered by specific indices.',
527+
'description': 'Gets information about Lucene segments in indices, including memory usage, document counts, and segment sizes. Can be filtered by specific indices. Supports limit parameter (default: 1000) to prevent token overflow.',
528528
'input_schema': GetSegmentsArgs.model_json_schema(),
529529
'function': get_segments_tool,
530530
'args_model': GetSegmentsArgs,

0 commit comments

Comments
 (0)