Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
0977e0f
feature update from custom version of doc processor
Jan 19, 2026
3500040
enhance worker for parsing service
Jan 20, 2026
8c76b15
add doc and modify bt review
Jan 21, 2026
7910ff8
Merge remote-tracking branch 'origin/main' into cjh/update-from-custo…
Jan 30, 2026
f82f0dd
modify code
Jan 30, 2026
e772a42
fix schema_extractor bug
wzh1994 Feb 4, 2026
d5ec170
Merge remote-tracking branch 'origin/main' into cjh/update-from-custo…
Feb 5, 2026
9c714de
temp
wzh1994 Feb 6, 2026
d40ad66
modify
Feb 7, 2026
3c417a3
Merge remote-tracking branch 'origin/main' into wzh/doc_process
wzh1994 Feb 12, 2026
2e64bda
temp code for mock api
Mar 10, 2026
e3e18a6
Merge from origin/main
Mar 10, 2026
809821f
fix dbname init
ChenJiahaoST Mar 10, 2026
61c5bec
fix tidb primary kay type
ChenJiahaoST Mar 10, 2026
cea23e3
temp
Mar 11, 2026
56cbcf4
fix api error
Mar 12, 2026
e387b3c
Merge remote-tracking branch 'origin/main' into cjh/update-from-custo…
Mar 12, 2026
e4e1e4d
fix node transform fallback
Mar 12, 2026
03a97e4
update kb api
Mar 12, 2026
aaa3f49
fix error
Mar 12, 2026
ee4ba13
fix delete params
Mar 12, 2026
3c02f74
modify api
Mar 13, 2026
83609a6
Merge remote-tracking branch origin/main into cjh/refact-doc-manager
Mar 13, 2026
c81dafd
temp doc server example
Mar 16, 2026
fd01c8e
Merge remote-tracking branch 'origin/main' into cjh/update-from-custo…
Mar 16, 2026
9ca28cb
lint
Mar 16, 2026
5bb2625
tmp code
Mar 16, 2026
fa3ab7b
tmp example
Mar 16, 2026
24b9ffa
fix docnode copy
Mar 16, 2026
3c7f948
fix sqlmanager pg adaption
Mar 16, 2026
914dd89
Merge remote-tracking branch origin/main into cjh/refact-doc-manager
Mar 17, 2026
ca6e49c
Merge remote-tracking branch origin/main into cjh/refact-doc-manager
Mar 17, 2026
77cc90d
fix reparse whole file
Mar 17, 2026
c73faba
add chunk list api
Mar 18, 2026
318e966
fix opensearch query
Mar 18, 2026
20e9373
support real offset for segment store
Mar 18, 2026
dce9402
Merge remote-tracking branch origin/main into cjh/update-from-custom-…
Mar 18, 2026
72f7406
Merge branch cjh/update-from-custom-processor into cjh/refact-doc-man…
Mar 18, 2026
786ecbe
refactor: expose public doc and processor APIs
Mar 23, 2026
3fb0f51
fix file transfer
Mar 24, 2026
230fdd0
fix
Mar 24, 2026
2ac3cf9
fix: honor excluded metadata in text splitter
Mar 30, 2026
6cc9176
fix: guard doc path conflicts in doc manager
Mar 30, 2026
7541505
fix: make mineru pipeline backend more robust
Mar 30, 2026
084060b
fix: reset doc impl monitor lock on pickle
Mar 30, 2026
251259d
fix opensearch segment deserialization
Apr 1, 2026
b9c835d
fix doc service mock test lint
Apr 2, 2026
5681411
Merge remote-tracking branch 'origin/main' into cjh/refact-doc-manager
Apr 2, 2026
05346f0
Merge branch 'LazyAGI:main' into cjh/refact-doc-manager
ChenJiahaoST Apr 7, 2026
5f7c27f
Converge document manager parameters
Apr 7, 2026
9fe7e1a
Export DocServer and trim docservice docs
Apr 7, 2026
49535df
Unify DocServer doc registration
Apr 7, 2026
473f5e2
Address doc_service review feedback
Apr 8, 2026
baefa88
Tighten doc manager formatting
Apr 8, 2026
50dad37
Finish remaining doc_service review follow-ups
Apr 8, 2026
4c5ff62
Fix doc UI and cancel status regressions
Apr 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ __pycache__
test/
dist/
tmp/
tmp*/
build
*.lock
*.db
Expand Down
53 changes: 22 additions & 31 deletions docs/en/API Reference/tools.md
Original file line number Diff line number Diff line change
Expand Up @@ -200,35 +200,29 @@
members: [find]
exclude-members:

::: lazyllm.tools.rag.DocManager
members: document, list_kb_groups, add_files, reparse_files
exclude-members:
::: lazyllm.tools.rag.doc_service.DocServer
members: [upload, add, reparse, delete, transfer, patch_metadata, list_docs, get_doc, list_tasks, get_task, cancel_task, list_kbs, get_kb, list_chunks, list_algorithms, get_algorithm_info, create_kb, update_kb, batch_get_kbs, delete_kb, delete_kbs]
exclude-members:

::: lazyllm.tools.rag.utils.SqliteDocListManager
members:
- table_inited
- get_status_cond_and_params
- validate_paths
- update_need_reparsing
- list_files
- get_docs
- set_docs_new_meta
- fetch_docs_changed_meta
- list_all_kb_group
- add_kb_group
- list_kb_group_files
- delete_unreferenced_doc
- get_docs_need_reparse
- get_existing_paths_by_pattern
- update_file_message
- update_file_status
- add_files_to_kb_group
- delete_files_from_kb_group
- get_file_status
- update_kb_group
- release
- get_status_cond_and_params
exclude-members:
::: lazyllm.tools.rag.doc_service.base.AddFileItem
members:
exclude-members:

::: lazyllm.tools.rag.doc_service.base.UploadRequest
members:
exclude-members:

::: lazyllm.tools.rag.doc_service.base.AddRequest
members:
exclude-members:

::: lazyllm.tools.rag.doc_service.base.TransferItem
members:
exclude-members:

::: lazyllm.tools.rag.doc_service.base.TransferRequest
members:
exclude-members:

::: lazyllm.tools.rag.data_loaders.DirectoryReader
members: load_data
Expand Down Expand Up @@ -425,9 +419,6 @@
::: lazyllm.tools.rag.rerank.ModuleReranker
members: forward
exclude-members:
::: lazyllm.tools.rag.utils.DocListManager
members:
exclude-members:
::: lazyllm.tools.rag.global_metadata.GlobalMetadataDesc
members:
exclude-members:
Expand Down
5 changes: 2 additions & 3 deletions docs/lazyllm-skill/assets/rag/document.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ def processYml(file):
... print("Call the function processYml.")
... return [DocNode(text=data)]
...
doc1 = Document(dataset_path="your_files_path", create_ui=False)
doc2 = Document(dataset_path="your_files_path", create_ui=False)
doc1 = Document(dataset_path="your_files_path")
doc2 = Document(dataset_path="your_files_path")
doc1.add_reader("**/*.yml", YmlReader)
print(doc1._impl._local_file_reader)
{'**/*.yml': <class '__main__.YmlReader'>}
Expand Down Expand Up @@ -213,4 +213,3 @@ res = retriever(query=query)
print(f"answer: {res}")
```


53 changes: 22 additions & 31 deletions docs/zh/API Reference/tools.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,35 +189,29 @@
members: [find]
exclude-members:

::: lazyllm.tools.rag.DocManager
members: document, list_kb_groups, add_files, reparse_files
exclude-members:
::: lazyllm.tools.rag.doc_service.DocServer
members: [upload, add, reparse, delete, transfer, patch_metadata, list_docs, get_doc, list_tasks, get_task, cancel_task, list_kbs, get_kb, list_chunks, list_algorithms, get_algorithm_info, create_kb, update_kb, batch_get_kbs, delete_kb, delete_kbs]
exclude-members:

::: lazyllm.tools.rag.utils.SqliteDocListManager
members:
- table_inited
- get_status_cond_and_params
- validate_paths
- update_need_reparsing
- list_files
- get_docs
- set_docs_new_meta
- fetch_docs_changed_meta
- list_all_kb_group
- add_kb_group
- list_kb_group_files
- delete_unreferenced_doc
- get_docs_need_reparse
- get_existing_paths_by_pattern
- update_file_message
- update_file_status
- add_files_to_kb_group
- delete_files_from_kb_group
- get_file_status
- update_kb_group
- release
- get_status_cond_and_params
exclude-members:
::: lazyllm.tools.rag.doc_service.base.AddFileItem
members:
exclude-members:

::: lazyllm.tools.rag.doc_service.base.UploadRequest
members:
exclude-members:

::: lazyllm.tools.rag.doc_service.base.AddRequest
members:
exclude-members:

::: lazyllm.tools.rag.doc_service.base.TransferItem
members:
exclude-members:

::: lazyllm.tools.rag.doc_service.base.TransferRequest
members:
exclude-members:

::: lazyllm.tools.rag.data_loaders.DirectoryReader
members: load_data
Expand Down Expand Up @@ -414,9 +408,6 @@
::: lazyllm.tools.rag.rerank.ModuleReranker
members: forward
exclude-members:
::: lazyllm.tools.rag.utils.DocListManager
members:
exclude-members:
::: lazyllm.tools.rag.global_metadata.GlobalMetadataDesc
members:
exclude-members:
Expand Down
85 changes: 85 additions & 0 deletions examples/rag/doc_service_mock_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
'''Connect a Document to a deployed DocServer.

Start DocServer first:
python examples/rag/doc_service_standalone.py --wait

Run this example:
python examples/rag/doc_service_mock_example.py --doc-server-url http://127.0.0.1:8848
'''

from __future__ import annotations

import argparse
import os
import tempfile
import time

from lazyllm import Document
from lazyllm.tools.rag.doc_service import DocServer
from lazyllm.tools.rag.doc_service.base import AddFileItem, AddRequest


def _normalize_base_url(url: str) -> str:
url = url.rstrip('/')
if url.endswith('/_call') or url.endswith('/generate'):
return url.rsplit('/', 1)[0]
return url


def _wait_task(server: DocServer, task_id: str, timeout: float = 30.0):
deadline = time.time() + timeout
while time.time() < deadline:
task = server.get_task(task_id)['data']
if task['status'] in {'SUCCESS', 'FAILED', 'CANCELED', 'DELETED'}:
return task
time.sleep(0.5)
raise TimeoutError(f'task {task_id} did not finish in time')


def main():
parser = argparse.ArgumentParser(description='Connect a Document to an existing DocServer.')
parser.add_argument('--doc-server-url', type=str, required=True, help='Existing DocServer base URL.')
parser.add_argument('--algo-id', type=str, default='doc_service_demo_algo', help='Document algorithm ID.')
parser.add_argument('--kb-id', type=str, default='__default__', help='Knowledge base ID.')
args = parser.parse_args()

doc_server = DocServer(url=_normalize_base_url(args.doc_server_url))
with tempfile.TemporaryDirectory(prefix='lazyllm_doc_service_example_') as dataset_dir:
file_path = os.path.join(dataset_dir, 'demo.txt')
with open(file_path, 'w', encoding='utf-8') as file:
file.write('hello from a real doc_service example\n')

# Step 1: create a Document and bind it to the deployed DocServer.
document = Document(dataset_path=dataset_dir, manager=doc_server, name=args.algo_id)
document.start()

try:
base_url = _normalize_base_url(args.doc_server_url)
print(f'DocServer URL: {base_url}')
print(f'DocServer Docs: {base_url}/docs')

# Step 2: add a local file through the DocServer client.
response = doc_server.add(AddRequest(
kb_id=args.kb_id,
algo_id=args.algo_id,
items=[AddFileItem(file_path=file_path)],
))
item = response['data']['items'][0]
print(f'Doc ID: {item["doc_id"]}')
print(f'Task ID: {item["task_id"]}')

# Step 3: wait for the asynchronous parse task to finish.
task = _wait_task(doc_server, item['task_id'])
print(f'Task Status: {task["status"]}')

# Step 4: list documents from the target knowledge base.
docs = doc_server.list_docs(
kb_id=args.kb_id, algo_id=args.algo_id, include_deleted_or_canceled=False
)['data']['items']
print(f'Doc Count In {args.kb_id}: {len(docs)}')
finally:
document.stop()


if __name__ == '__main__':
main()
Loading
Loading