Skip to content

Commit c23c62d

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Update v1 sdk to use new RagFileTransformationConfig proto
PiperOrigin-RevId: 700579567
1 parent a1f8bc2 commit c23c62d

File tree

9 files changed

+420
-359
lines changed

9 files changed

+420
-359
lines changed

google/cloud/aiplatform/utils/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,8 @@
142142
tensorboard_service_client_v1.TensorboardServiceClient,
143143
vizier_service_client_v1.VizierServiceClient,
144144
vertex_rag_service_client_v1.VertexRagServiceClient,
145+
vertex_rag_data_service_async_client_v1.VertexRagDataServiceAsyncClient,
146+
vertex_rag_data_service_client_v1.VertexRagDataServiceClient,
145147
)
146148

147149

tests/unit/vertex_rag/conftest.py

+74-2
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,18 @@
1616
from google import auth
1717
from google.api_core import operation as ga_operation
1818
from google.auth import credentials as auth_credentials
19-
from vertexai.preview import rag
20-
from google.cloud.aiplatform_v1beta1 import (
19+
from vertexai import rag
20+
from vertexai.preview import rag as rag_preview
21+
from google.cloud.aiplatform_v1 import (
2122
DeleteRagCorpusRequest,
2223
VertexRagDataServiceAsyncClient,
2324
VertexRagDataServiceClient,
2425
)
26+
from google.cloud.aiplatform_v1beta1 import (
27+
DeleteRagCorpusRequest as DeleteRagCorpusRequestPreview,
28+
VertexRagDataServiceAsyncClient as VertexRagDataServiceAsyncClientPreview,
29+
VertexRagDataServiceClient as VertexRagDataServiceClientPreview,
30+
)
2531
import test_rag_constants_preview
2632
import mock
2733
import pytest
@@ -75,6 +81,30 @@ def rag_data_client_mock():
7581
yield rag_data_client_mock
7682

7783

84+
@pytest.fixture
85+
def rag_data_client_preview_mock():
86+
with mock.patch.object(
87+
rag_preview.utils._gapic_utils, "create_rag_data_service_client"
88+
) as rag_data_client_mock:
89+
api_client_mock = mock.Mock(spec=VertexRagDataServiceClientPreview)
90+
91+
# get_rag_corpus
92+
api_client_mock.get_rag_corpus.return_value = (
93+
test_rag_constants_preview.TEST_GAPIC_RAG_CORPUS
94+
)
95+
# delete_rag_corpus
96+
delete_rag_corpus_lro_mock = mock.Mock(ga_operation.Operation)
97+
delete_rag_corpus_lro_mock.result.return_value = DeleteRagCorpusRequestPreview()
98+
api_client_mock.delete_rag_corpus.return_value = delete_rag_corpus_lro_mock
99+
# get_rag_file
100+
api_client_mock.get_rag_file.return_value = (
101+
test_rag_constants_preview.TEST_GAPIC_RAG_FILE
102+
)
103+
104+
rag_data_client_mock.return_value = api_client_mock
105+
yield rag_data_client_mock
106+
107+
78108
@pytest.fixture
79109
def rag_data_client_mock_exception():
80110
with mock.patch.object(
@@ -105,6 +135,36 @@ def rag_data_client_mock_exception():
105135
yield rag_data_client_mock_exception
106136

107137

138+
@pytest.fixture
139+
def rag_data_client_preview_mock_exception():
140+
with mock.patch.object(
141+
rag_preview.utils._gapic_utils, "create_rag_data_service_client"
142+
) as rag_data_client_mock_exception:
143+
api_client_mock = mock.Mock(spec=VertexRagDataServiceClientPreview)
144+
# create_rag_corpus
145+
api_client_mock.create_rag_corpus.side_effect = Exception
146+
# update_rag_corpus
147+
api_client_mock.update_rag_corpus.side_effect = Exception
148+
# get_rag_corpus
149+
api_client_mock.get_rag_corpus.side_effect = Exception
150+
# list_rag_corpora
151+
api_client_mock.list_rag_corpora.side_effect = Exception
152+
# delete_rag_corpus
153+
api_client_mock.delete_rag_corpus.side_effect = Exception
154+
# upload_rag_file
155+
api_client_mock.upload_rag_file.side_effect = Exception
156+
# import_rag_files
157+
api_client_mock.import_rag_files.side_effect = Exception
158+
# get_rag_file
159+
api_client_mock.get_rag_file.side_effect = Exception
160+
# list_rag_files
161+
api_client_mock.list_rag_files.side_effect = Exception
162+
# delete_rag_file
163+
api_client_mock.delete_rag_file.side_effect = Exception
164+
rag_data_client_mock_exception.return_value = api_client_mock
165+
yield rag_data_client_mock_exception
166+
167+
108168
@pytest.fixture
109169
def rag_data_async_client_mock_exception():
110170
with mock.patch.object(
@@ -115,3 +175,15 @@ def rag_data_async_client_mock_exception():
115175
api_client_mock.import_rag_files.side_effect = Exception
116176
rag_data_client_mock_exception.return_value = api_client_mock
117177
yield rag_data_async_client_mock_exception
178+
179+
180+
@pytest.fixture
181+
def rag_data_async_client_preview_mock_exception():
182+
with mock.patch.object(
183+
rag_preview.utils._gapic_utils, "create_rag_data_service_async_client"
184+
) as rag_data_async_client_mock_exception:
185+
api_client_mock = mock.Mock(spec=VertexRagDataServiceAsyncClientPreview)
186+
# import_rag_files
187+
api_client_mock.import_rag_files.side_effect = Exception
188+
rag_data_client_mock_exception.return_value = api_client_mock
189+
yield rag_data_async_client_mock_exception

tests/unit/vertex_rag/test_rag_constants.py

+23-90
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@
3737
VertexFeatureStore,
3838
)
3939

40-
from google.cloud.aiplatform_v1beta1 import (
40+
from google.cloud.aiplatform_v1 import (
4141
GoogleDriveSource,
4242
RagFileChunkingConfig,
43-
RagFileParsingConfig,
43+
RagFileTransformationConfig,
4444
ImportRagFilesConfig,
4545
ImportRagFilesRequest,
4646
ImportRagFilesResponse,
@@ -49,13 +49,12 @@
4949
RagFile as GapicRagFile,
5050
SharePointSources as GapicSharePointSources,
5151
SlackSource as GapicSlackSource,
52-
RagVectorDbConfig,
5352
)
5453
from google.cloud.aiplatform_v1 import (
5554
RagContexts,
5655
RetrieveContextsResponse,
5756
)
58-
from google.cloud.aiplatform_v1beta1.types import api_auth
57+
from google.cloud.aiplatform_v1.types import api_auth
5958
from google.protobuf import timestamp_pb2
6059

6160

@@ -99,61 +98,6 @@
9998
display_name=TEST_CORPUS_DISPLAY_NAME,
10099
description=TEST_CORPUS_DISCRIPTION,
101100
)
102-
TEST_GAPIC_RAG_CORPUS.rag_embedding_model_config.vertex_prediction_endpoint.endpoint = (
103-
"projects/{}/locations/{}/publishers/google/models/textembedding-gecko".format(
104-
TEST_PROJECT, TEST_REGION
105-
)
106-
)
107-
TEST_GAPIC_RAG_CORPUS_WEAVIATE = GapicRagCorpus(
108-
name=TEST_RAG_CORPUS_RESOURCE_NAME,
109-
display_name=TEST_CORPUS_DISPLAY_NAME,
110-
description=TEST_CORPUS_DISCRIPTION,
111-
rag_vector_db_config=RagVectorDbConfig(
112-
weaviate=RagVectorDbConfig.Weaviate(
113-
http_endpoint=TEST_WEAVIATE_HTTP_ENDPOINT,
114-
collection_name=TEST_WEAVIATE_COLLECTION_NAME,
115-
),
116-
api_auth=api_auth.ApiAuth(
117-
api_key_config=api_auth.ApiAuth.ApiKeyConfig(
118-
api_key_secret_version=TEST_WEAVIATE_API_KEY_SECRET_VERSION
119-
),
120-
),
121-
),
122-
)
123-
TEST_GAPIC_RAG_CORPUS_VERTEX_FEATURE_STORE = GapicRagCorpus(
124-
name=TEST_RAG_CORPUS_RESOURCE_NAME,
125-
display_name=TEST_CORPUS_DISPLAY_NAME,
126-
description=TEST_CORPUS_DISCRIPTION,
127-
rag_vector_db_config=RagVectorDbConfig(
128-
vertex_feature_store=RagVectorDbConfig.VertexFeatureStore(
129-
feature_view_resource_name=TEST_VERTEX_FEATURE_STORE_RESOURCE_NAME
130-
),
131-
),
132-
)
133-
TEST_GAPIC_RAG_CORPUS_VERTEX_VECTOR_SEARCH = GapicRagCorpus(
134-
name=TEST_RAG_CORPUS_RESOURCE_NAME,
135-
display_name=TEST_CORPUS_DISPLAY_NAME,
136-
description=TEST_CORPUS_DISCRIPTION,
137-
rag_vector_db_config=RagVectorDbConfig(
138-
vertex_vector_search=RagVectorDbConfig.VertexVectorSearch(
139-
index_endpoint=TEST_VERTEX_VECTOR_SEARCH_INDEX_ENDPOINT,
140-
index=TEST_VERTEX_VECTOR_SEARCH_INDEX,
141-
),
142-
),
143-
)
144-
TEST_GAPIC_RAG_CORPUS_PINECONE = GapicRagCorpus(
145-
name=TEST_RAG_CORPUS_RESOURCE_NAME,
146-
display_name=TEST_CORPUS_DISPLAY_NAME,
147-
description=TEST_CORPUS_DISCRIPTION,
148-
rag_vector_db_config=RagVectorDbConfig(
149-
pinecone=RagVectorDbConfig.Pinecone(index_name=TEST_PINECONE_INDEX_NAME),
150-
api_auth=api_auth.ApiAuth(
151-
api_key_config=api_auth.ApiAuth.ApiKeyConfig(
152-
api_key_secret_version=TEST_PINECONE_API_KEY_SECRET_VERSION
153-
),
154-
),
155-
),
156-
)
157101
TEST_EMBEDDING_MODEL_CONFIG = EmbeddingModelConfig(
158102
publisher_model="publishers/google/models/textembedding-gecko",
159103
)
@@ -198,7 +142,7 @@
198142
TEST_FILE_DISPLAY_NAME = "my-file.txt"
199143
TEST_FILE_DESCRIPTION = "my file."
200144
TEST_HEADERS = {"X-Goog-Upload-Protocol": "multipart"}
201-
TEST_UPLOAD_REQUEST_URI = "https://{}/upload/v1beta1/projects/{}/locations/{}/ragCorpora/{}/ragFiles:upload".format(
145+
TEST_UPLOAD_REQUEST_URI = "https://{}/upload/v1/projects/{}/locations/{}/ragCorpora/{}/ragFiles:upload".format(
202146
TEST_API_ENDPOINT, TEST_PROJECT_NUMBER, TEST_REGION, TEST_RAG_CORPUS_ID
203147
)
204148
TEST_RAG_FILE_ID = "generate-456"
@@ -215,10 +159,19 @@
215159
TEST_RAG_FILE_JSON_ERROR = {"error": {"code": 13}}
216160
TEST_CHUNK_SIZE = 512
217161
TEST_CHUNK_OVERLAP = 100
162+
TEST_RAG_FILE_TRANSFORMATION_CONFIG = RagFileTransformationConfig(
163+
rag_file_chunking_config=RagFileChunkingConfig(
164+
fixed_length_chunking=RagFileChunkingConfig.FixedLengthChunking(
165+
chunk_size=TEST_CHUNK_SIZE,
166+
chunk_overlap=TEST_CHUNK_OVERLAP,
167+
),
168+
),
169+
)
218170
# GCS
219-
TEST_IMPORT_FILES_CONFIG_GCS = ImportRagFilesConfig()
171+
TEST_IMPORT_FILES_CONFIG_GCS = ImportRagFilesConfig(
172+
rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG,
173+
)
220174
TEST_IMPORT_FILES_CONFIG_GCS.gcs_source.uris = [TEST_GCS_PATH]
221-
TEST_IMPORT_FILES_CONFIG_GCS.rag_file_parsing_config.use_advanced_pdf_parsing = False
222175
TEST_IMPORT_REQUEST_GCS = ImportRagFilesRequest(
223176
parent=TEST_RAG_CORPUS_RESOURCE_NAME,
224177
import_rag_files_config=TEST_IMPORT_FILES_CONFIG_GCS,
@@ -231,26 +184,22 @@
231184
TEST_DRIVE_FOLDER_2 = (
232185
f"https://drive.google.com/drive/folders/{TEST_DRIVE_FOLDER_ID}?resourcekey=0-eiOT3"
233186
)
234-
TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER = ImportRagFilesConfig()
187+
TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER = ImportRagFilesConfig(
188+
rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG,
189+
)
235190
TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER.google_drive_source.resource_ids = [
236191
GoogleDriveSource.ResourceId(
237192
resource_id=TEST_DRIVE_FOLDER_ID,
238193
resource_type=GoogleDriveSource.ResourceId.ResourceType.RESOURCE_TYPE_FOLDER,
239194
)
240195
]
241-
TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER.rag_file_parsing_config.use_advanced_pdf_parsing = (
242-
False
243-
)
244196
TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER_PARSING = ImportRagFilesConfig()
245197
TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER_PARSING.google_drive_source.resource_ids = [
246198
GoogleDriveSource.ResourceId(
247199
resource_id=TEST_DRIVE_FOLDER_ID,
248200
resource_type=GoogleDriveSource.ResourceId.ResourceType.RESOURCE_TYPE_FOLDER,
249201
)
250202
]
251-
TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER_PARSING.rag_file_parsing_config.use_advanced_pdf_parsing = (
252-
True
253-
)
254203
TEST_IMPORT_REQUEST_DRIVE_FOLDER = ImportRagFilesRequest(
255204
parent=TEST_RAG_CORPUS_RESOURCE_NAME,
256205
import_rag_files_config=TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER,
@@ -263,11 +212,7 @@
263212
TEST_DRIVE_FILE_ID = "456"
264213
TEST_DRIVE_FILE = f"https://drive.google.com/file/d/{TEST_DRIVE_FILE_ID}"
265214
TEST_IMPORT_FILES_CONFIG_DRIVE_FILE = ImportRagFilesConfig(
266-
rag_file_chunking_config=RagFileChunkingConfig(
267-
chunk_size=TEST_CHUNK_SIZE,
268-
chunk_overlap=TEST_CHUNK_OVERLAP,
269-
),
270-
rag_file_parsing_config=RagFileParsingConfig(use_advanced_pdf_parsing=False),
215+
rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG,
271216
)
272217
TEST_IMPORT_FILES_CONFIG_DRIVE_FILE.max_embedding_requests_per_min = 800
273218

@@ -322,10 +267,7 @@
322267
],
323268
)
324269
TEST_IMPORT_FILES_CONFIG_SLACK_SOURCE = ImportRagFilesConfig(
325-
rag_file_chunking_config=RagFileChunkingConfig(
326-
chunk_size=TEST_CHUNK_SIZE,
327-
chunk_overlap=TEST_CHUNK_OVERLAP,
328-
)
270+
rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG,
329271
)
330272
TEST_IMPORT_FILES_CONFIG_SLACK_SOURCE.slack_source.channels = [
331273
GapicSlackSource.SlackChannels(
@@ -377,10 +319,7 @@
377319
],
378320
)
379321
TEST_IMPORT_FILES_CONFIG_JIRA_SOURCE = ImportRagFilesConfig(
380-
rag_file_chunking_config=RagFileChunkingConfig(
381-
chunk_size=TEST_CHUNK_SIZE,
382-
chunk_overlap=TEST_CHUNK_OVERLAP,
383-
)
322+
rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG,
384323
)
385324
TEST_IMPORT_FILES_CONFIG_JIRA_SOURCE.jira_source.jira_queries = [
386325
GapicJiraSource.JiraQueries(
@@ -412,10 +351,7 @@
412351
],
413352
)
414353
TEST_IMPORT_FILES_CONFIG_SHARE_POINT_SOURCE = ImportRagFilesConfig(
415-
rag_file_chunking_config=RagFileChunkingConfig(
416-
chunk_size=TEST_CHUNK_SIZE,
417-
chunk_overlap=TEST_CHUNK_OVERLAP,
418-
),
354+
rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG,
419355
share_point_sources=GapicSharePointSources(
420356
share_point_sources=[
421357
GapicSharePointSources.SharePointSource(
@@ -490,10 +426,7 @@
490426
)
491427

492428
TEST_IMPORT_FILES_CONFIG_SHARE_POINT_SOURCE_NO_FOLDERS = ImportRagFilesConfig(
493-
rag_file_chunking_config=RagFileChunkingConfig(
494-
chunk_size=TEST_CHUNK_SIZE,
495-
chunk_overlap=TEST_CHUNK_OVERLAP,
496-
),
429+
rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG,
497430
share_point_sources=GapicSharePointSources(
498431
share_point_sources=[
499432
GapicSharePointSources.SharePointSource(

0 commit comments

Comments
 (0)