diff --git a/docs/changelog/next_release/144.feature.rst b/docs/changelog/next_release/144.feature.rst new file mode 100644 index 00000000..c26a813d --- /dev/null +++ b/docs/changelog/next_release/144.feature.rst @@ -0,0 +1 @@ +Add Parquet API schema \ No newline at end of file diff --git a/syncmaster/schemas/v1/__init__.py b/syncmaster/schemas/v1/__init__.py index 83a7630f..d181f668 100644 --- a/syncmaster/schemas/v1/__init__.py +++ b/syncmaster/schemas/v1/__init__.py @@ -43,6 +43,7 @@ XML, Excel, JSONLine, + Parquet, ) from syncmaster.schemas.v1.transfers.run import ( CreateRunSchema, diff --git a/syncmaster/schemas/v1/file_formats.py b/syncmaster/schemas/v1/file_formats.py index 26e1f2ea..668338fd 100644 --- a/syncmaster/schemas/v1/file_formats.py +++ b/syncmaster/schemas/v1/file_formats.py @@ -8,3 +8,4 @@ EXCEL_FORMAT = Literal["excel"] XML_FORMAT = Literal["xml"] ORC_FORMAT = Literal["orc"] +PARQUET_FORMAT = Literal["parquet"] diff --git a/syncmaster/schemas/v1/transfers/file/base.py b/syncmaster/schemas/v1/transfers/file/base.py index 4cacb30f..a3e80b49 100644 --- a/syncmaster/schemas/v1/transfers/file/base.py +++ b/syncmaster/schemas/v1/transfers/file/base.py @@ -14,6 +14,7 @@ XML, Excel, JSONLine, + Parquet, ) @@ -21,14 +22,14 @@ # classes are identical but may change in the future class ReadFileTransferSource(BaseModel): directory_path: str - file_format: CSV | JSONLine | JSON | Excel | XML | ORC = Field(..., discriminator="type") + file_format: CSV | JSONLine | JSON | Excel | XML | ORC | Parquet = Field(..., discriminator="type") options: dict[str, Any] class ReadFileTransferTarget(BaseModel): directory_path: str # JSON format is not supported for writing - file_format: CSV | JSONLine | Excel | XML | ORC = Field( + file_format: CSV | JSONLine | Excel | XML | ORC | Parquet = Field( ..., discriminator="type", ) @@ -39,7 +40,7 @@ class ReadFileTransferTarget(BaseModel): # classes are identical but may change in the future class CreateFileTransferSource(BaseModel): directory_path: str - file_format: CSV | JSONLine | JSON | Excel | XML | ORC = Field(..., discriminator="type") + file_format: CSV | JSONLine | JSON | Excel | XML | ORC | Parquet = Field(..., discriminator="type") options: dict[str, Any] = Field(default_factory=dict) class Config: @@ -56,7 +57,7 @@ def _directory_path_is_valid_path(cls, value): class CreateFileTransferTarget(BaseModel): directory_path: str # JSON format is not supported as a target - file_format: CSV | JSONLine | Excel | XML | ORC = Field( + file_format: CSV | JSONLine | Excel | XML | ORC | Parquet = Field( ..., discriminator="type", ) diff --git a/syncmaster/schemas/v1/transfers/file_format.py b/syncmaster/schemas/v1/transfers/file_format.py index 4f04a457..cab10edb 100644 --- a/syncmaster/schemas/v1/transfers/file_format.py +++ b/syncmaster/schemas/v1/transfers/file_format.py @@ -10,6 +10,7 @@ JSON_FORMAT, JSONLINE_FORMAT, ORC_FORMAT, + PARQUET_FORMAT, XML_FORMAT, ) @@ -50,3 +51,7 @@ class XML(BaseModel): class ORC(BaseModel): type: ORC_FORMAT + + +class Parquet(BaseModel): + type: PARQUET_FORMAT diff --git a/tests/test_unit/test_transfers/test_create_transfer.py b/tests/test_unit/test_transfers/test_create_transfer.py index 954872bd..f60526a6 100644 --- a/tests/test_unit/test_transfers/test_create_transfer.py +++ b/tests/test_unit/test_transfers/test_create_transfer.py @@ -636,11 +636,11 @@ async def test_developer_plus_can_not_create_transfer_with_target_format_json( "context": { "discriminator": "'type'", "tag": "json", - "expected_tags": "'csv', 'jsonline', 'excel', 'xml', 'orc'", + "expected_tags": "'csv', 'jsonline', 'excel', 'xml', 'orc', 'parquet'", }, "input": {"type": "json", "lineSep": "\n", "encoding": "utf-8"}, "location": ["body", "target_params", "s3", "file_format"], - "message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline', 'excel', 'xml', 'orc'", + "message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline', 'excel', 'xml', 'orc', 'parquet'", "code": "union_tag_invalid", }, ], diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py index a16cdf20..e5c97c2c 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py @@ -73,6 +73,16 @@ "some": "option", }, }, + { + "type": "s3", + "directory_path": "/some/parquet/path", + "file_format": { + "type": "parquet", + }, + "options": { + "some": "option", + }, + }, ], ) async def test_developer_plus_can_create_s3_transfer( @@ -157,6 +167,9 @@ async def test_developer_plus_can_create_s3_transfer( "orc": { "type": "orc", }, + "parquet": { + "type": "parquet", + }, } for params in (transfer.source_params, transfer.target_params): @@ -211,6 +224,13 @@ async def test_developer_plus_can_create_s3_transfer( "type": "orc", }, }, + { + "type": "hdfs", + "directory_path": "/some/parquet/path", + "file_format": { + "type": "parquet", + }, + }, ], ) async def test_developer_plus_can_create_hdfs_transfer( @@ -296,6 +316,9 @@ async def test_developer_plus_can_create_hdfs_transfer( "orc": { "type": "orc", }, + "parquet": { + "type": "parquet", + }, } for params in (transfer.source_params, transfer.target_params): @@ -350,6 +373,13 @@ async def test_developer_plus_can_create_hdfs_transfer( "type": "orc", }, }, + { + "type": "s3", + "directory_path": "some/path", + "file_format": { + "type": "parquet", + }, + }, ], ) async def test_cannot_create_file_transfer_with_relative_path( diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py index 198b2c13..c6fec34c 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py @@ -51,6 +51,14 @@ }, "options": {}, }, + { + "type": "s3", + "directory_path": "/some/parquet/path", + "file_format": { + "type": "parquet", + }, + "options": {}, + }, ], ) @pytest.mark.parametrize( diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py index 99f4043f..cdc8b1c9 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py @@ -51,6 +51,14 @@ }, "options": {}, }, + { + "type": "s3", + "directory_path": "/some/parquet/path", + "file_format": { + "type": "parquet", + }, + "options": {}, + }, ], ) @pytest.mark.parametrize(