Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add GET upload file API endpoint to dataset service api #11899

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/controllers/service_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@

from . import index
from .app import app, audio, completion, conversation, file, message, workflow
from .dataset import dataset, document, hit_testing, segment
from .dataset import dataset, document, hit_testing, segment, upload_file
54 changes: 54 additions & 0 deletions api/controllers/service_api/dataset/upload_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from werkzeug.exceptions import NotFound

from controllers.service_api import api
from controllers.service_api.wraps import (
DatasetApiResource,
)
from core.file import helpers as file_helpers
from extensions.ext_database import db
from models.dataset import Dataset
from models.model import UploadFile
from services.dataset_service import DocumentService


class UploadFileApi(DatasetApiResource):
def get(self, tenant_id, dataset_id, document_id):
"""Get upload file."""
# check dataset
dataset_id = str(dataset_id)
tenant_id = str(tenant_id)
dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
if not dataset:
raise NotFound("Dataset not found.")
# check document
document_id = str(document_id)
document = DocumentService.get_document(dataset.id, document_id)
if not document:
raise NotFound("Document not found.")
# check upload file
if document.data_source_type != "upload_file":
raise ValueError(f"Document data source type ({document.data_source_type}) is not upload_file.")
data_source_info = document.data_source_info_dict
if data_source_info and "upload_file_id" in data_source_info:
file_id = data_source_info["upload_file_id"]
upload_file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first()
if not upload_file:
raise NotFound("UploadFile not found.")
else:
raise ValueError("Upload file id not found in document data source info.")

url = file_helpers.get_signed_file_url(upload_file_id=upload_file.id)
return {
"id": upload_file.id,
"name": upload_file.name,
"size": upload_file.size,
"extension": upload_file.extension,
"url": url,
"download_url": f"{url}&as_attachment=true",
"mime_type": upload_file.mime_type,
"created_by": upload_file.created_by,
"created_at": upload_file.created_at.timestamp(),
}, 200


api.add_resource(UploadFileApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/upload-file")
51 changes: 51 additions & 0 deletions web/app/(commonLayout)/datasets/template/template.en.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -1069,6 +1069,57 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from

<hr className='ml-0 mr-0' />

<Heading
url='/datasets/{dataset_id}/documents/{document_id}/upload-file'
method='GET'
title='Get Upload File'
name='#get_upload_file'
/>
<Row>
<Col>
### Path
<Properties>
<Property name='dataset_id' type='string' key='dataset_id'>
Knowledge ID
</Property>
<Property name='document_id' type='string' key='document_id'>
Document ID
</Property>
</Properties>
</Col>
<Col sticky>
<CodeGroup
title="Request"
tag="GET"
label="/datasets/{dataset_id}/documents/{document_id}/upload-file"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
>
```bash {{ title: 'cURL' }}
curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json'
```
</CodeGroup>
<CodeGroup title="Response">
```json {{ title: 'Response' }}
{
"id": "file_id",
"name": "file_name",
"size": 1024,
"extension": "txt",
"url": "preview_url",
"download_url": "download_url",
"mime_type": "text/plain",
"created_by": "user_id",
"created_at": 1728734540,
}
```
</CodeGroup>
</Col>
</Row>

<hr className='ml-0 mr-0' />

<Heading
url='/datasets/{dataset_id}/retrieve'
method='POST'
Expand Down
51 changes: 51 additions & 0 deletions web/app/(commonLayout)/datasets/template/template.zh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -1070,6 +1070,57 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from

<hr className='ml-0 mr-0' />

<Heading
url='/datasets/{dataset_id}/documents/{document_id}/upload-file'
method='GET'
title='获取上传文件'
name='#get_upload_file'
/>
<Row>
<Col>
### Path
<Properties>
<Property name='dataset_id' type='string' key='dataset_id'>
知识库 ID
</Property>
<Property name='document_id' type='string' key='document_id'>
文档 ID
</Property>
</Properties>
</Col>
<Col sticky>
<CodeGroup
title="Request"
tag="GET"
label="/datasets/{dataset_id}/documents/{document_id}/upload-file"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
>
```bash {{ title: 'cURL' }}
curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json'
```
</CodeGroup>
<CodeGroup title="Response">
```json {{ title: 'Response' }}
{
"id": "file_id",
"name": "file_name",
"size": 1024,
"extension": "txt",
"url": "preview_url",
"download_url": "download_url",
"mime_type": "text/plain",
"created_by": "user_id",
"created_at": 1728734540,
}
```
</CodeGroup>
</Col>
</Row>

<hr className='ml-0 mr-0' />

<Heading
url='/datasets/{dataset_id}/retrieve'
method='POST'
Expand Down
Loading