From 5adf13146750da33998a837c60451fb89e81c4db Mon Sep 17 00:00:00 2001 From: ItsRoy69 Date: Thu, 6 Mar 2025 09:47:53 +0530 Subject: [PATCH 1/2] fixed special symbols of pdf Signed-off-by: ItsRoy69 --- libs/agno/agno/vectordb/pgvector/pgvector.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/libs/agno/agno/vectordb/pgvector/pgvector.py b/libs/agno/agno/vectordb/pgvector/pgvector.py index e2c8f8a972..b6cb4e7cc7 100644 --- a/libs/agno/agno/vectordb/pgvector/pgvector.py +++ b/libs/agno/agno/vectordb/pgvector/pgvector.py @@ -225,7 +225,7 @@ def _record_exists(self, column, value) -> bool: def doc_exists(self, document: Document) -> bool: """ - Check if a document with the same content hash exists in the table. + Check if a document exists in the table. Args: document (Document): The document to check. @@ -234,7 +234,14 @@ def doc_exists(self, document: Document) -> bool: bool: True if the document exists, False otherwise. """ cleaned_content = document.content.replace("\x00", "\ufffd") - content_hash = md5(cleaned_content.encode()).hexdigest() + try: + content_hash = md5(cleaned_content.encode('utf-8')).hexdigest() + except UnicodeEncodeError: + cleaned_content = ''.join( + '\ufffd' if '\ud800' <= c <= '\udfff' else c + for c in cleaned_content + ) + content_hash = md5(cleaned_content.encode('utf-8')).hexdigest() return self._record_exists(self.table.c.content_hash, content_hash) def name_exists(self, name: str) -> bool: From a9c542d1e8949e6998bb1cbdbbdfd4afb941efa9 Mon Sep 17 00:00:00 2001 From: ItsRoy69 Date: Thu, 6 Mar 2025 09:51:32 +0530 Subject: [PATCH 2/2] fixed special symbols of pdf Signed-off-by: ItsRoy69 --- libs/agno/agno/vectordb/pgvector/pgvector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/agno/agno/vectordb/pgvector/pgvector.py b/libs/agno/agno/vectordb/pgvector/pgvector.py index b6cb4e7cc7..f7f7a79c3e 100644 --- a/libs/agno/agno/vectordb/pgvector/pgvector.py +++ b/libs/agno/agno/vectordb/pgvector/pgvector.py @@ -225,7 +225,7 @@ def _record_exists(self, column, value) -> bool: def doc_exists(self, document: Document) -> bool: """ - Check if a document exists in the table. + Check if a document with the same content hash exists in the table. Args: document (Document): The document to check.