diff --git a/docs/changelog/next_release/143.feature.rst b/docs/changelog/next_release/143.feature.rst new file mode 100644 index 00000000..680a7f30 --- /dev/null +++ b/docs/changelog/next_release/143.feature.rst @@ -0,0 +1 @@ +Add ORC API schema \ No newline at end of file diff --git a/syncmaster/schemas/v1/__init__.py b/syncmaster/schemas/v1/__init__.py index bf3d0e21..83a7630f 100644 --- a/syncmaster/schemas/v1/__init__.py +++ b/syncmaster/schemas/v1/__init__.py @@ -36,7 +36,14 @@ PostgresReadTransferSourceAndTarget, ReadDBTransfer, ) -from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, XML, Excel, JSONLine +from syncmaster.schemas.v1.transfers.file_format import ( + CSV, + JSON, + ORC, + XML, + Excel, + JSONLine, +) from syncmaster.schemas.v1.transfers.run import ( CreateRunSchema, ReadRunSchema, diff --git a/syncmaster/schemas/v1/file_formats.py b/syncmaster/schemas/v1/file_formats.py index b0e038e6..26e1f2ea 100644 --- a/syncmaster/schemas/v1/file_formats.py +++ b/syncmaster/schemas/v1/file_formats.py @@ -7,3 +7,4 @@ JSON_FORMAT = Literal["json"] EXCEL_FORMAT = Literal["excel"] XML_FORMAT = Literal["xml"] +ORC_FORMAT = Literal["orc"] diff --git a/syncmaster/schemas/v1/transfers/file/base.py b/syncmaster/schemas/v1/transfers/file/base.py index d4ddf4af..4cacb30f 100644 --- a/syncmaster/schemas/v1/transfers/file/base.py +++ b/syncmaster/schemas/v1/transfers/file/base.py @@ -7,21 +7,28 @@ from pydantic import BaseModel, Field, field_validator -from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, XML, Excel, JSONLine +from syncmaster.schemas.v1.transfers.file_format import ( + CSV, + JSON, + ORC, + XML, + Excel, + JSONLine, +) # At the moment the ReadTransferSourceParams and ReadTransferTargetParams # classes are identical but may change in the future class ReadFileTransferSource(BaseModel): directory_path: str - file_format: CSV | JSONLine | JSON | Excel | XML = Field(..., discriminator="type") + file_format: CSV | JSONLine | JSON | Excel | XML | ORC = Field(..., discriminator="type") options: dict[str, Any] class ReadFileTransferTarget(BaseModel): directory_path: str # JSON format is not supported for writing - file_format: CSV | JSONLine | Excel | XML = Field( + file_format: CSV | JSONLine | Excel | XML | ORC = Field( ..., discriminator="type", ) @@ -32,7 +39,7 @@ class ReadFileTransferTarget(BaseModel): # classes are identical but may change in the future class CreateFileTransferSource(BaseModel): directory_path: str - file_format: CSV | JSONLine | JSON | Excel | XML = Field(..., discriminator="type") + file_format: CSV | JSONLine | JSON | Excel | XML | ORC = Field(..., discriminator="type") options: dict[str, Any] = Field(default_factory=dict) class Config: @@ -49,7 +56,7 @@ def _directory_path_is_valid_path(cls, value): class CreateFileTransferTarget(BaseModel): directory_path: str # JSON format is not supported as a target - file_format: CSV | JSONLine | Excel | XML = Field( + file_format: CSV | JSONLine | Excel | XML | ORC = Field( ..., discriminator="type", ) diff --git a/syncmaster/schemas/v1/transfers/file_format.py b/syncmaster/schemas/v1/transfers/file_format.py index da0f7e53..4f04a457 100644 --- a/syncmaster/schemas/v1/transfers/file_format.py +++ b/syncmaster/schemas/v1/transfers/file_format.py @@ -9,6 +9,7 @@ EXCEL_FORMAT, JSON_FORMAT, JSONLINE_FORMAT, + ORC_FORMAT, XML_FORMAT, ) @@ -45,3 +46,7 @@ class XML(BaseModel): type: XML_FORMAT root_tag: str row_tag: str + + +class ORC(BaseModel): + type: ORC_FORMAT diff --git a/tests/test_unit/test_transfers/test_create_transfer.py b/tests/test_unit/test_transfers/test_create_transfer.py index 03245e54..954872bd 100644 --- a/tests/test_unit/test_transfers/test_create_transfer.py +++ b/tests/test_unit/test_transfers/test_create_transfer.py @@ -636,11 +636,11 @@ async def test_developer_plus_can_not_create_transfer_with_target_format_json( "context": { "discriminator": "'type'", "tag": "json", - "expected_tags": "'csv', 'jsonline', 'excel', 'xml'", + "expected_tags": "'csv', 'jsonline', 'excel', 'xml', 'orc'", }, "input": {"type": "json", "lineSep": "\n", "encoding": "utf-8"}, "location": ["body", "target_params", "s3", "file_format"], - "message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline', 'excel', 'xml'", + "message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline', 'excel', 'xml', 'orc'", "code": "union_tag_invalid", }, ], diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py index 7f584075..a16cdf20 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py @@ -63,6 +63,16 @@ "some": "option", }, }, + { + "type": "s3", + "directory_path": "/some/orc/path", + "file_format": { + "type": "orc", + }, + "options": { + "some": "option", + }, + }, ], ) async def test_developer_plus_can_create_s3_transfer( @@ -144,6 +154,9 @@ async def test_developer_plus_can_create_s3_transfer( "root_tag": "data", "row_tag": "record", }, + "orc": { + "type": "orc", + }, } for params in (transfer.source_params, transfer.target_params): @@ -191,6 +204,13 @@ async def test_developer_plus_can_create_s3_transfer( "row_tag": "record", }, }, + { + "type": "hdfs", + "directory_path": "/some/orc/path", + "file_format": { + "type": "orc", + }, + }, ], ) async def test_developer_plus_can_create_hdfs_transfer( @@ -273,6 +293,9 @@ async def test_developer_plus_can_create_hdfs_transfer( "root_tag": "data", "row_tag": "record", }, + "orc": { + "type": "orc", + }, } for params in (transfer.source_params, transfer.target_params): @@ -320,6 +343,13 @@ async def test_developer_plus_can_create_hdfs_transfer( "row_tag": "record", }, }, + { + "type": "s3", + "directory_path": "some/path", + "file_format": { + "type": "orc", + }, + }, ], ) async def test_cannot_create_file_transfer_with_relative_path( diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py index 5380f185..198b2c13 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py @@ -43,6 +43,14 @@ }, "options": {}, }, + { + "type": "s3", + "directory_path": "/some/orc/path", + "file_format": { + "type": "orc", + }, + "options": {}, + }, ], ) @pytest.mark.parametrize( diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py index 50398a11..99f4043f 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py @@ -43,6 +43,14 @@ }, "options": {}, }, + { + "type": "s3", + "directory_path": "/some/orc/path", + "file_format": { + "type": "orc", + }, + "options": {}, + }, ], ) @pytest.mark.parametrize(