21
21
prepare_import_files_request ,
22
22
set_embedding_model_config ,
23
23
)
24
+ from vertexai .rag .utils .resources import (
25
+ ChunkingConfig ,
26
+ TransformationConfig ,
27
+ )
24
28
from google .cloud .aiplatform_v1beta1 import (
25
29
VertexRagDataServiceAsyncClient ,
26
30
VertexRagDataServiceClient ,
@@ -276,6 +280,18 @@ def list_rag_files_pager_mock():
276
280
yield list_rag_files_pager_mock
277
281
278
282
283
+ def create_transformation_config (
284
+ chunk_size : int = test_rag_constants_preview .TEST_CHUNK_SIZE ,
285
+ chunk_overlap : int = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
286
+ ):
287
+ return TransformationConfig (
288
+ chunking_config = ChunkingConfig (
289
+ chunk_size = chunk_size ,
290
+ chunk_overlap = chunk_overlap ,
291
+ ),
292
+ )
293
+
294
+
279
295
def rag_corpus_eq (returned_corpus , expected_corpus ):
280
296
assert returned_corpus .name == expected_corpus .name
281
297
assert returned_corpus .display_name == expected_corpus .display_name
@@ -309,6 +325,10 @@ def import_files_request_eq(returned_request, expected_request):
309
325
returned_request .import_rag_files_config .rag_file_parsing_config
310
326
== expected_request .import_rag_files_config .rag_file_parsing_config
311
327
)
328
+ assert (
329
+ returned_request .import_rag_files_config .rag_file_transformation_config
330
+ == expected_request .import_rag_files_config .rag_file_transformation_config
331
+ )
312
332
313
333
314
334
@pytest .mark .usefixtures ("google_auth_mock" )
@@ -654,6 +674,17 @@ def test_delete_file_failure(self):
654
674
e .match ("Failed in RagFile deletion due to" )
655
675
656
676
def test_prepare_import_files_request_list_gcs_uris (self ):
677
+ paths = [test_rag_constants_preview .TEST_GCS_PATH ]
678
+ request = prepare_import_files_request (
679
+ corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
680
+ paths = paths ,
681
+ transformation_config = create_transformation_config (),
682
+ )
683
+ import_files_request_eq (
684
+ request , test_rag_constants_preview .TEST_IMPORT_REQUEST_GCS
685
+ )
686
+
687
+ def test_prepare_import_files_request_list_gcs_uris_no_transformation_config (self ):
657
688
paths = [test_rag_constants_preview .TEST_GCS_PATH ]
658
689
request = prepare_import_files_request (
659
690
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
@@ -676,8 +707,7 @@ def test_prepare_import_files_request_drive_folders(self, path):
676
707
request = prepare_import_files_request (
677
708
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
678
709
paths = [path ],
679
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
680
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
710
+ transformation_config = create_transformation_config (),
681
711
)
682
712
import_files_request_eq (
683
713
request , test_rag_constants_preview .TEST_IMPORT_REQUEST_DRIVE_FOLDER
@@ -694,8 +724,7 @@ def test_prepare_import_files_request_drive_folders_with_pdf_parsing(self, path)
694
724
request = prepare_import_files_request (
695
725
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
696
726
paths = [path ],
697
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
698
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
727
+ transformation_config = create_transformation_config (),
699
728
use_advanced_pdf_parsing = True ,
700
729
)
701
730
import_files_request_eq (
@@ -707,8 +736,7 @@ def test_prepare_import_files_request_drive_files(self):
707
736
request = prepare_import_files_request (
708
737
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
709
738
paths = paths ,
710
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
711
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
739
+ transformation_config = create_transformation_config (),
712
740
max_embedding_requests_per_min = 800 ,
713
741
)
714
742
import_files_request_eq (
@@ -721,8 +749,7 @@ def test_prepare_import_files_request_invalid_drive_path(self):
721
749
prepare_import_files_request (
722
750
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
723
751
paths = paths ,
724
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
725
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
752
+ transformation_config = create_transformation_config (),
726
753
)
727
754
e .match ("is not a valid Google Drive url" )
728
755
@@ -732,17 +759,15 @@ def test_prepare_import_files_request_invalid_path(self):
732
759
prepare_import_files_request (
733
760
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
734
761
paths = paths ,
735
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
736
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
762
+ transformation_config = create_transformation_config (),
737
763
)
738
764
e .match ("path must be a Google Cloud Storage uri or a Google Drive url" )
739
765
740
766
def test_prepare_import_files_request_slack_source (self ):
741
767
request = prepare_import_files_request (
742
768
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
743
769
source = test_rag_constants_preview .TEST_SLACK_SOURCE ,
744
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
745
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
770
+ transformation_config = create_transformation_config (),
746
771
)
747
772
import_files_request_eq (
748
773
request , test_rag_constants_preview .TEST_IMPORT_REQUEST_SLACK_SOURCE
@@ -752,8 +777,7 @@ def test_prepare_import_files_request_jira_source(self):
752
777
request = prepare_import_files_request (
753
778
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
754
779
source = test_rag_constants_preview .TEST_JIRA_SOURCE ,
755
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
756
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
780
+ transformation_config = create_transformation_config (),
757
781
)
758
782
import_files_request_eq (
759
783
request , test_rag_constants_preview .TEST_IMPORT_REQUEST_JIRA_SOURCE
@@ -763,8 +787,7 @@ def test_prepare_import_files_request_sharepoint_source(self):
763
787
request = prepare_import_files_request (
764
788
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
765
789
source = test_rag_constants_preview .TEST_SHARE_POINT_SOURCE ,
766
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
767
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
790
+ transformation_config = create_transformation_config (),
768
791
)
769
792
import_files_request_eq (
770
793
request , test_rag_constants_preview .TEST_IMPORT_REQUEST_SHARE_POINT_SOURCE
@@ -775,8 +798,7 @@ def test_prepare_import_files_request_sharepoint_source_2_drives(self):
775
798
prepare_import_files_request (
776
799
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
777
800
source = test_rag_constants_preview .TEST_SHARE_POINT_SOURCE_2_DRIVES ,
778
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
779
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
801
+ transformation_config = create_transformation_config (),
780
802
)
781
803
e .match ("drive_name and drive_id cannot both be set." )
782
804
@@ -785,8 +807,7 @@ def test_prepare_import_files_request_sharepoint_source_2_folders(self):
785
807
prepare_import_files_request (
786
808
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
787
809
source = test_rag_constants_preview .TEST_SHARE_POINT_SOURCE_2_FOLDERS ,
788
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
789
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
810
+ transformation_config = create_transformation_config (),
790
811
)
791
812
e .match ("sharepoint_folder_path and sharepoint_folder_id cannot both be set." )
792
813
@@ -795,17 +816,15 @@ def test_prepare_import_files_request_sharepoint_source_no_drives(self):
795
816
prepare_import_files_request (
796
817
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
797
818
source = test_rag_constants_preview .TEST_SHARE_POINT_SOURCE_NO_DRIVES ,
798
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
799
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
819
+ transformation_config = create_transformation_config (),
800
820
)
801
821
e .match ("Either drive_name and drive_id must be set." )
802
822
803
823
def test_prepare_import_files_request_sharepoint_source_no_folders (self ):
804
824
request = prepare_import_files_request (
805
825
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
806
826
source = test_rag_constants_preview .TEST_SHARE_POINT_SOURCE_NO_FOLDERS ,
807
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
808
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
827
+ transformation_config = create_transformation_config (),
809
828
)
810
829
import_files_request_eq (
811
830
request ,
0 commit comments