Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/changelog/next_release/144.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add Parquet API schema
1 change: 1 addition & 0 deletions syncmaster/schemas/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
XML,
Excel,
JSONLine,
Parquet,
)
from syncmaster.schemas.v1.transfers.run import (
CreateRunSchema,
Expand Down
1 change: 1 addition & 0 deletions syncmaster/schemas/v1/file_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
EXCEL_FORMAT = Literal["excel"]
XML_FORMAT = Literal["xml"]
ORC_FORMAT = Literal["orc"]
PARQUET_FORMAT = Literal["parquet"]
9 changes: 5 additions & 4 deletions syncmaster/schemas/v1/transfers/file/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,22 @@
XML,
Excel,
JSONLine,
Parquet,
)


# At the moment the ReadTransferSourceParams and ReadTransferTargetParams
# classes are identical but may change in the future
class ReadFileTransferSource(BaseModel):
directory_path: str
file_format: CSV | JSONLine | JSON | Excel | XML | ORC = Field(..., discriminator="type")
file_format: CSV | JSONLine | JSON | Excel | XML | ORC | Parquet = Field(..., discriminator="type")
options: dict[str, Any]


class ReadFileTransferTarget(BaseModel):
directory_path: str
# JSON format is not supported for writing
file_format: CSV | JSONLine | Excel | XML | ORC = Field(
file_format: CSV | JSONLine | Excel | XML | ORC | Parquet = Field(
...,
discriminator="type",
)
Expand All @@ -39,7 +40,7 @@ class ReadFileTransferTarget(BaseModel):
# classes are identical but may change in the future
class CreateFileTransferSource(BaseModel):
directory_path: str
file_format: CSV | JSONLine | JSON | Excel | XML | ORC = Field(..., discriminator="type")
file_format: CSV | JSONLine | JSON | Excel | XML | ORC | Parquet = Field(..., discriminator="type")
options: dict[str, Any] = Field(default_factory=dict)

class Config:
Expand All @@ -56,7 +57,7 @@ def _directory_path_is_valid_path(cls, value):
class CreateFileTransferTarget(BaseModel):
directory_path: str
# JSON format is not supported as a target
file_format: CSV | JSONLine | Excel | XML | ORC = Field(
file_format: CSV | JSONLine | Excel | XML | ORC | Parquet = Field(
...,
discriminator="type",
)
Expand Down
5 changes: 5 additions & 0 deletions syncmaster/schemas/v1/transfers/file_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
JSON_FORMAT,
JSONLINE_FORMAT,
ORC_FORMAT,
PARQUET_FORMAT,
XML_FORMAT,
)

Expand Down Expand Up @@ -50,3 +51,7 @@ class XML(BaseModel):

class ORC(BaseModel):
type: ORC_FORMAT


class Parquet(BaseModel):
type: PARQUET_FORMAT
4 changes: 2 additions & 2 deletions tests/test_unit/test_transfers/test_create_transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,11 +636,11 @@ async def test_developer_plus_can_not_create_transfer_with_target_format_json(
"context": {
"discriminator": "'type'",
"tag": "json",
"expected_tags": "'csv', 'jsonline', 'excel', 'xml', 'orc'",
"expected_tags": "'csv', 'jsonline', 'excel', 'xml', 'orc', 'parquet'",
},
"input": {"type": "json", "lineSep": "\n", "encoding": "utf-8"},
"location": ["body", "target_params", "s3", "file_format"],
"message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline', 'excel', 'xml', 'orc'",
"message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline', 'excel', 'xml', 'orc', 'parquet'",
"code": "union_tag_invalid",
},
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,16 @@
"some": "option",
},
},
{
"type": "s3",
"directory_path": "/some/parquet/path",
"file_format": {
"type": "parquet",
},
"options": {
"some": "option",
},
},
],
)
async def test_developer_plus_can_create_s3_transfer(
Expand Down Expand Up @@ -157,6 +167,9 @@ async def test_developer_plus_can_create_s3_transfer(
"orc": {
"type": "orc",
},
"parquet": {
"type": "parquet",
},
}

for params in (transfer.source_params, transfer.target_params):
Expand Down Expand Up @@ -211,6 +224,13 @@ async def test_developer_plus_can_create_s3_transfer(
"type": "orc",
},
},
{
"type": "hdfs",
"directory_path": "/some/parquet/path",
"file_format": {
"type": "parquet",
},
},
],
)
async def test_developer_plus_can_create_hdfs_transfer(
Expand Down Expand Up @@ -296,6 +316,9 @@ async def test_developer_plus_can_create_hdfs_transfer(
"orc": {
"type": "orc",
},
"parquet": {
"type": "parquet",
},
}

for params in (transfer.source_params, transfer.target_params):
Expand Down Expand Up @@ -350,6 +373,13 @@ async def test_developer_plus_can_create_hdfs_transfer(
"type": "orc",
},
},
{
"type": "s3",
"directory_path": "some/path",
"file_format": {
"type": "parquet",
},
},
],
)
async def test_cannot_create_file_transfer_with_relative_path(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@
},
"options": {},
},
{
"type": "s3",
"directory_path": "/some/parquet/path",
"file_format": {
"type": "parquet",
},
"options": {},
},
],
)
@pytest.mark.parametrize(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@
},
"options": {},
},
{
"type": "s3",
"directory_path": "/some/parquet/path",
"file_format": {
"type": "parquet",
},
"options": {},
},
],
)
@pytest.mark.parametrize(
Expand Down
Loading