From 04486e561e584b4ecba5afbc673c3b50acdbd1eb Mon Sep 17 00:00:00 2001 From: Zhang JianAo Date: Fri, 14 Jul 2023 20:35:13 +0800 Subject: [PATCH] Guess file type based on the filename in request. The original code guessed the file type based on the local temporary file name, which did not have an extension. This commit changes the logic to guess the file type based on the filename in the HTTP request. --- services/file.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/file.py b/services/file.py index 136fc17c5..1ae29c310 100644 --- a/services/file.py +++ b/services/file.py @@ -22,12 +22,12 @@ async def get_document_from_file( return doc -def extract_text_from_filepath(filepath: str, mimetype: Optional[str] = None) -> str: +def extract_text_from_filepath(filepath: str, mimetype: Optional[str] = None, filename: Optional[str] = None) -> str: """Return the text content of a file given its filepath.""" - if mimetype is None: + if mimetype is None or mimetype == 'application/octet-stream': # Get the mimetype of the file based on its extension - mimetype, _ = mimetypes.guess_type(filepath) + mimetype, _ = mimetypes.guess_type(filename) if not mimetype: if filepath.endswith(".md"): @@ -105,7 +105,7 @@ async def extract_text_from_form_file(file: UploadFile): f.write(file_stream) try: - extracted_text = extract_text_from_filepath(temp_file_path, mimetype) + extracted_text = extract_text_from_filepath(temp_file_path, mimetype, file.filename) except Exception as e: logger.error(e) os.remove(temp_file_path)