|
37 | 37 | VertexFeatureStore,
|
38 | 38 | )
|
39 | 39 |
|
40 |
| -from google.cloud.aiplatform_v1beta1 import ( |
| 40 | +from google.cloud.aiplatform_v1 import ( |
41 | 41 | GoogleDriveSource,
|
42 | 42 | RagFileChunkingConfig,
|
43 |
| - RagFileParsingConfig, |
| 43 | + RagFileTransformationConfig, |
44 | 44 | ImportRagFilesConfig,
|
45 | 45 | ImportRagFilesRequest,
|
46 | 46 | ImportRagFilesResponse,
|
|
55 | 55 | RagContexts,
|
56 | 56 | RetrieveContextsResponse,
|
57 | 57 | )
|
58 |
| -from google.cloud.aiplatform_v1beta1.types import api_auth |
| 58 | +from google.cloud.aiplatform_v1.types import api_auth |
59 | 59 | from google.protobuf import timestamp_pb2
|
60 | 60 |
|
61 | 61 |
|
|
99 | 99 | display_name=TEST_CORPUS_DISPLAY_NAME,
|
100 | 100 | description=TEST_CORPUS_DISCRIPTION,
|
101 | 101 | )
|
102 |
| -TEST_GAPIC_RAG_CORPUS.rag_embedding_model_config.vertex_prediction_endpoint.endpoint = ( |
103 |
| - "projects/{}/locations/{}/publishers/google/models/textembedding-gecko".format( |
104 |
| - TEST_PROJECT, TEST_REGION |
105 |
| - ) |
106 |
| -) |
107 |
| -TEST_GAPIC_RAG_CORPUS_WEAVIATE = GapicRagCorpus( |
108 |
| - name=TEST_RAG_CORPUS_RESOURCE_NAME, |
109 |
| - display_name=TEST_CORPUS_DISPLAY_NAME, |
110 |
| - description=TEST_CORPUS_DISCRIPTION, |
111 |
| - rag_vector_db_config=RagVectorDbConfig( |
112 |
| - weaviate=RagVectorDbConfig.Weaviate( |
113 |
| - http_endpoint=TEST_WEAVIATE_HTTP_ENDPOINT, |
114 |
| - collection_name=TEST_WEAVIATE_COLLECTION_NAME, |
115 |
| - ), |
116 |
| - api_auth=api_auth.ApiAuth( |
117 |
| - api_key_config=api_auth.ApiAuth.ApiKeyConfig( |
118 |
| - api_key_secret_version=TEST_WEAVIATE_API_KEY_SECRET_VERSION |
119 |
| - ), |
120 |
| - ), |
121 |
| - ), |
122 |
| -) |
123 |
| -TEST_GAPIC_RAG_CORPUS_VERTEX_FEATURE_STORE = GapicRagCorpus( |
124 |
| - name=TEST_RAG_CORPUS_RESOURCE_NAME, |
125 |
| - display_name=TEST_CORPUS_DISPLAY_NAME, |
126 |
| - description=TEST_CORPUS_DISCRIPTION, |
127 |
| - rag_vector_db_config=RagVectorDbConfig( |
128 |
| - vertex_feature_store=RagVectorDbConfig.VertexFeatureStore( |
129 |
| - feature_view_resource_name=TEST_VERTEX_FEATURE_STORE_RESOURCE_NAME |
130 |
| - ), |
131 |
| - ), |
132 |
| -) |
133 |
| -TEST_GAPIC_RAG_CORPUS_VERTEX_VECTOR_SEARCH = GapicRagCorpus( |
134 |
| - name=TEST_RAG_CORPUS_RESOURCE_NAME, |
135 |
| - display_name=TEST_CORPUS_DISPLAY_NAME, |
136 |
| - description=TEST_CORPUS_DISCRIPTION, |
137 |
| - rag_vector_db_config=RagVectorDbConfig( |
138 |
| - vertex_vector_search=RagVectorDbConfig.VertexVectorSearch( |
139 |
| - index_endpoint=TEST_VERTEX_VECTOR_SEARCH_INDEX_ENDPOINT, |
140 |
| - index=TEST_VERTEX_VECTOR_SEARCH_INDEX, |
141 |
| - ), |
142 |
| - ), |
143 |
| -) |
144 |
| -TEST_GAPIC_RAG_CORPUS_PINECONE = GapicRagCorpus( |
145 |
| - name=TEST_RAG_CORPUS_RESOURCE_NAME, |
146 |
| - display_name=TEST_CORPUS_DISPLAY_NAME, |
147 |
| - description=TEST_CORPUS_DISCRIPTION, |
148 |
| - rag_vector_db_config=RagVectorDbConfig( |
149 |
| - pinecone=RagVectorDbConfig.Pinecone(index_name=TEST_PINECONE_INDEX_NAME), |
150 |
| - api_auth=api_auth.ApiAuth( |
151 |
| - api_key_config=api_auth.ApiAuth.ApiKeyConfig( |
152 |
| - api_key_secret_version=TEST_PINECONE_API_KEY_SECRET_VERSION |
153 |
| - ), |
154 |
| - ), |
155 |
| - ), |
156 |
| -) |
157 | 102 | TEST_EMBEDDING_MODEL_CONFIG = EmbeddingModelConfig(
|
158 | 103 | publisher_model="publishers/google/models/textembedding-gecko",
|
159 | 104 | )
|
|
198 | 143 | TEST_FILE_DISPLAY_NAME = "my-file.txt"
|
199 | 144 | TEST_FILE_DESCRIPTION = "my file."
|
200 | 145 | TEST_HEADERS = {"X-Goog-Upload-Protocol": "multipart"}
|
201 |
| -TEST_UPLOAD_REQUEST_URI = "https://{}/upload/v1beta1/projects/{}/locations/{}/ragCorpora/{}/ragFiles:upload".format( |
| 146 | +TEST_UPLOAD_REQUEST_URI = "https://{}/upload/v1/projects/{}/locations/{}/ragCorpora/{}/ragFiles:upload".format( |
202 | 147 | TEST_API_ENDPOINT, TEST_PROJECT_NUMBER, TEST_REGION, TEST_RAG_CORPUS_ID
|
203 | 148 | )
|
204 | 149 | TEST_RAG_FILE_ID = "generate-456"
|
|
215 | 160 | TEST_RAG_FILE_JSON_ERROR = {"error": {"code": 13}}
|
216 | 161 | TEST_CHUNK_SIZE = 512
|
217 | 162 | TEST_CHUNK_OVERLAP = 100
|
| 163 | +TEST_RAG_FILE_TRANSFORMATION_CONFIG = RagFileTransformationConfig( |
| 164 | + rag_file_chunking_config=RagFileChunkingConfig( |
| 165 | + fixed_length_chunking=RagFileChunkingConfig.FixedLengthChunking( |
| 166 | + chunk_size=TEST_CHUNK_SIZE, |
| 167 | + chunk_overlap=TEST_CHUNK_OVERLAP, |
| 168 | + ), |
| 169 | + ), |
| 170 | +) |
218 | 171 | # GCS
|
219 |
| -TEST_IMPORT_FILES_CONFIG_GCS = ImportRagFilesConfig() |
| 172 | +TEST_IMPORT_FILES_CONFIG_GCS = ImportRagFilesConfig( |
| 173 | + rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG, |
| 174 | +) |
220 | 175 | TEST_IMPORT_FILES_CONFIG_GCS.gcs_source.uris = [TEST_GCS_PATH]
|
221 |
| -TEST_IMPORT_FILES_CONFIG_GCS.rag_file_parsing_config.use_advanced_pdf_parsing = False |
222 | 176 | TEST_IMPORT_REQUEST_GCS = ImportRagFilesRequest(
|
223 | 177 | parent=TEST_RAG_CORPUS_RESOURCE_NAME,
|
224 | 178 | import_rag_files_config=TEST_IMPORT_FILES_CONFIG_GCS,
|
|
231 | 185 | TEST_DRIVE_FOLDER_2 = (
|
232 | 186 | f"https://drive.google.com/drive/folders/{TEST_DRIVE_FOLDER_ID}?resourcekey=0-eiOT3"
|
233 | 187 | )
|
234 |
| -TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER = ImportRagFilesConfig() |
| 188 | +TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER = ImportRagFilesConfig( |
| 189 | + rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG, |
| 190 | +) |
235 | 191 | TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER.google_drive_source.resource_ids = [
|
236 | 192 | GoogleDriveSource.ResourceId(
|
237 | 193 | resource_id=TEST_DRIVE_FOLDER_ID,
|
238 | 194 | resource_type=GoogleDriveSource.ResourceId.ResourceType.RESOURCE_TYPE_FOLDER,
|
239 | 195 | )
|
240 | 196 | ]
|
241 |
| -TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER.rag_file_parsing_config.use_advanced_pdf_parsing = ( |
242 |
| - False |
243 |
| -) |
244 | 197 | TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER_PARSING = ImportRagFilesConfig()
|
245 | 198 | TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER_PARSING.google_drive_source.resource_ids = [
|
246 | 199 | GoogleDriveSource.ResourceId(
|
247 | 200 | resource_id=TEST_DRIVE_FOLDER_ID,
|
248 | 201 | resource_type=GoogleDriveSource.ResourceId.ResourceType.RESOURCE_TYPE_FOLDER,
|
249 | 202 | )
|
250 | 203 | ]
|
251 |
| -TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER_PARSING.rag_file_parsing_config.use_advanced_pdf_parsing = ( |
252 |
| - True |
253 |
| -) |
254 | 204 | TEST_IMPORT_REQUEST_DRIVE_FOLDER = ImportRagFilesRequest(
|
255 | 205 | parent=TEST_RAG_CORPUS_RESOURCE_NAME,
|
256 | 206 | import_rag_files_config=TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER,
|
|
263 | 213 | TEST_DRIVE_FILE_ID = "456"
|
264 | 214 | TEST_DRIVE_FILE = f"https://drive.google.com/file/d/{TEST_DRIVE_FILE_ID}"
|
265 | 215 | TEST_IMPORT_FILES_CONFIG_DRIVE_FILE = ImportRagFilesConfig(
|
266 |
| - rag_file_chunking_config=RagFileChunkingConfig( |
267 |
| - chunk_size=TEST_CHUNK_SIZE, |
268 |
| - chunk_overlap=TEST_CHUNK_OVERLAP, |
269 |
| - ), |
270 |
| - rag_file_parsing_config=RagFileParsingConfig(use_advanced_pdf_parsing=False), |
| 216 | + rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG, |
271 | 217 | )
|
272 | 218 | TEST_IMPORT_FILES_CONFIG_DRIVE_FILE.max_embedding_requests_per_min = 800
|
273 | 219 |
|
|
322 | 268 | ],
|
323 | 269 | )
|
324 | 270 | TEST_IMPORT_FILES_CONFIG_SLACK_SOURCE = ImportRagFilesConfig(
|
325 |
| - rag_file_chunking_config=RagFileChunkingConfig( |
326 |
| - chunk_size=TEST_CHUNK_SIZE, |
327 |
| - chunk_overlap=TEST_CHUNK_OVERLAP, |
328 |
| - ) |
| 271 | + rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG, |
329 | 272 | )
|
330 | 273 | TEST_IMPORT_FILES_CONFIG_SLACK_SOURCE.slack_source.channels = [
|
331 | 274 | GapicSlackSource.SlackChannels(
|
|
377 | 320 | ],
|
378 | 321 | )
|
379 | 322 | TEST_IMPORT_FILES_CONFIG_JIRA_SOURCE = ImportRagFilesConfig(
|
380 |
| - rag_file_chunking_config=RagFileChunkingConfig( |
381 |
| - chunk_size=TEST_CHUNK_SIZE, |
382 |
| - chunk_overlap=TEST_CHUNK_OVERLAP, |
383 |
| - ) |
| 323 | + rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG, |
384 | 324 | )
|
385 | 325 | TEST_IMPORT_FILES_CONFIG_JIRA_SOURCE.jira_source.jira_queries = [
|
386 | 326 | GapicJiraSource.JiraQueries(
|
|
412 | 352 | ],
|
413 | 353 | )
|
414 | 354 | TEST_IMPORT_FILES_CONFIG_SHARE_POINT_SOURCE = ImportRagFilesConfig(
|
415 |
| - rag_file_chunking_config=RagFileChunkingConfig( |
416 |
| - chunk_size=TEST_CHUNK_SIZE, |
417 |
| - chunk_overlap=TEST_CHUNK_OVERLAP, |
418 |
| - ), |
| 355 | + rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG, |
419 | 356 | share_point_sources=GapicSharePointSources(
|
420 | 357 | share_point_sources=[
|
421 | 358 | GapicSharePointSources.SharePointSource(
|
|
490 | 427 | )
|
491 | 428 |
|
492 | 429 | TEST_IMPORT_FILES_CONFIG_SHARE_POINT_SOURCE_NO_FOLDERS = ImportRagFilesConfig(
|
493 |
| - rag_file_chunking_config=RagFileChunkingConfig( |
494 |
| - chunk_size=TEST_CHUNK_SIZE, |
495 |
| - chunk_overlap=TEST_CHUNK_OVERLAP, |
496 |
| - ), |
| 430 | + rag_file_transformation_config=TEST_RAG_FILE_TRANSFORMATION_CONFIG, |
497 | 431 | share_point_sources=GapicSharePointSources(
|
498 | 432 | share_point_sources=[
|
499 | 433 | GapicSharePointSources.SharePointSource(
|
|
0 commit comments