Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/changelog/next_release/144.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add Parquet API schema
1 change: 1 addition & 0 deletions syncmaster/schemas/v1/file_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
EXCEL_FORMAT = Literal["excel"]
XML_FORMAT = Literal["xml"]
ORC_FORMAT = Literal["orc"]
PARQUET_FORMAT = Literal["parquet"]
9 changes: 5 additions & 4 deletions syncmaster/schemas/v1/transfers/file/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,22 @@
XML,
Excel,
JSONLine,
Parquet,
)


# At the moment the ReadTransferSourceParams and ReadTransferTargetParams
# classes are identical but may change in the future
class ReadFileTransferSource(BaseModel):
directory_path: str
file_format: CSV | JSONLine | JSON | Excel | XML | ORC = Field(..., discriminator="type")
file_format: CSV | JSONLine | JSON | Excel | XML | ORC | Parquet = Field(..., discriminator="type")
options: dict[str, Any]


class ReadFileTransferTarget(BaseModel):
directory_path: str
# JSON format is not supported for writing
file_format: CSV | JSONLine | Excel | XML | ORC = Field(
file_format: CSV | JSONLine | Excel | XML | ORC | Parquet = Field(
...,
discriminator="type",
)
Expand All @@ -39,7 +40,7 @@ class ReadFileTransferTarget(BaseModel):
# classes are identical but may change in the future
class CreateFileTransferSource(BaseModel):
directory_path: str
file_format: CSV | JSONLine | JSON | Excel | XML | ORC = Field(..., discriminator="type")
file_format: CSV | JSONLine | JSON | Excel | XML | ORC | Parquet = Field(..., discriminator="type")
options: dict[str, Any] = Field(default_factory=dict)

class Config:
Expand All @@ -56,7 +57,7 @@ def _directory_path_is_valid_path(cls, value):
class CreateFileTransferTarget(BaseModel):
directory_path: str
# JSON format is not supported as a target
file_format: CSV | JSONLine | Excel | XML | ORC = Field(
file_format: CSV | JSONLine | Excel | XML | ORC | Parquet = Field(
...,
discriminator="type",
)
Expand Down
5 changes: 5 additions & 0 deletions syncmaster/schemas/v1/transfers/file_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
JSON_FORMAT,
JSONLINE_FORMAT,
ORC_FORMAT,
PARQUET_FORMAT,
XML_FORMAT,
)

Expand Down Expand Up @@ -50,3 +51,7 @@ class XML(BaseModel):

class ORC(BaseModel):
type: ORC_FORMAT


class Parquet(BaseModel):
type: PARQUET_FORMAT
4 changes: 2 additions & 2 deletions tests/test_unit/test_transfers/test_create_transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,11 +636,11 @@ async def test_developer_plus_can_not_create_transfer_with_target_format_json(
"context": {
"discriminator": "'type'",
"tag": "json",
"expected_tags": "'csv', 'jsonline', 'excel', 'xml', 'orc'",
"expected_tags": "'csv', 'jsonline', 'excel', 'xml', 'orc', 'parquet'",
},
"input": {"type": "json", "lineSep": "\n", "encoding": "utf-8"},
"location": ["body", "target_params", "s3", "file_format"],
"message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline', 'excel', 'xml', 'orc'",
"message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline', 'excel', 'xml', 'orc', 'parquet'",
"code": "union_tag_invalid",
},
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,16 @@
"some": "option",
},
},
{
"type": "s3",
"directory_path": "/some/parquet/path",
"file_format": {
"type": "parquet",
},
"options": {
"some": "option",
},
},
],
)
async def test_developer_plus_can_create_s3_transfer(
Expand Down Expand Up @@ -157,6 +167,9 @@ async def test_developer_plus_can_create_s3_transfer(
"orc": {
"type": "orc",
},
"parquet": {
"type": "parquet",
},
}

for params in (transfer.source_params, transfer.target_params):
Expand Down Expand Up @@ -211,6 +224,13 @@ async def test_developer_plus_can_create_s3_transfer(
"type": "orc",
},
},
{
"type": "hdfs",
"directory_path": "/some/parquet/path",
"file_format": {
"type": "parquet",
},
},
],
)
async def test_developer_plus_can_create_hdfs_transfer(
Expand Down Expand Up @@ -296,6 +316,9 @@ async def test_developer_plus_can_create_hdfs_transfer(
"orc": {
"type": "orc",
},
"parquet": {
"type": "parquet",
},
}

for params in (transfer.source_params, transfer.target_params):
Expand Down Expand Up @@ -350,6 +373,13 @@ async def test_developer_plus_can_create_hdfs_transfer(
"type": "orc",
},
},
{
"type": "s3",
"directory_path": "some/path",
"file_format": {
"type": "parquet",
},
},
],
)
async def test_cannot_create_file_transfer_with_relative_path(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@
},
"options": {},
},
{
"type": "s3",
"directory_path": "/some/parquet/path",
"file_format": {
"type": "parquet",
},
"options": {},
},
],
)
@pytest.mark.parametrize(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@
},
"options": {},
},
{
"type": "s3",
"directory_path": "/some/parquet/path",
"file_format": {
"type": "parquet",
},
"options": {},
},
],
)
@pytest.mark.parametrize(
Expand Down