Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/changelog/next_release/140.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add Excel API schema
2 changes: 1 addition & 1 deletion syncmaster/schemas/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
PostgresReadTransferSourceAndTarget,
ReadDBTransfer,
)
from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, JSONLine
from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, Excel, JSONLine
from syncmaster.schemas.v1.transfers.run import (
CreateRunSchema,
ReadRunSchema,
Expand Down
1 change: 1 addition & 0 deletions syncmaster/schemas/v1/file_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
CSV_FORMAT = Literal["csv"]
JSONLINE_FORMAT = Literal["jsonline"]
JSON_FORMAT = Literal["json"]
EXCEL_FORMAT = Literal["excel"]
10 changes: 5 additions & 5 deletions syncmaster/schemas/v1/transfers/file/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,28 @@

from pydantic import BaseModel, Field, field_validator

from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, JSONLine
from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, Excel, JSONLine


# At the moment the ReadTransferSourceParams and ReadTransferTargetParams
# classes are identical but may change in the future
class ReadFileTransferSource(BaseModel):
directory_path: str
file_format: CSV | JSONLine | JSON = Field(..., discriminator="type")
file_format: CSV | JSONLine | JSON | Excel = Field(..., discriminator="type")
options: dict[str, Any]


class ReadFileTransferTarget(BaseModel):
directory_path: str
file_format: CSV | JSONLine = Field(..., discriminator="type") # JSON format is not supported for writing
file_format: CSV | JSONLine | Excel = Field(..., discriminator="type") # JSON format is not supported for writing
options: dict[str, Any]


# At the moment the CreateTransferSourceParams and CreateTransferTargetParams
# classes are identical but may change in the future
class CreateFileTransferSource(BaseModel):
directory_path: str
file_format: CSV | JSONLine | JSON = Field(..., discriminator="type")
file_format: CSV | JSONLine | JSON | Excel = Field(..., discriminator="type")
options: dict[str, Any] = Field(default_factory=dict)

class Config:
Expand All @@ -44,7 +44,7 @@ def _directory_path_is_valid_path(cls, value):

class CreateFileTransferTarget(BaseModel):
directory_path: str
file_format: CSV | JSONLine = Field(..., discriminator="type") # JSON FORMAT IS NOT SUPPORTED AS A TARGET !
file_format: CSV | JSONLine | Excel = Field(..., discriminator="type") # JSON FORMAT IS NOT SUPPORTED AS A TARGET !
options: dict[str, Any] = Field(default_factory=dict)

class Config:
Expand Down
15 changes: 13 additions & 2 deletions syncmaster/schemas/v1/transfers/file_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@

from pydantic import BaseModel

from syncmaster.schemas.v1.file_formats import CSV_FORMAT, JSON_FORMAT, JSONLINE_FORMAT
from syncmaster.schemas.v1.file_formats import (
CSV_FORMAT,
EXCEL_FORMAT,
JSON_FORMAT,
JSONLINE_FORMAT,
)


class CSV(BaseModel):
Expand All @@ -13,7 +18,7 @@ class CSV(BaseModel):
encoding: str = "utf-8"
quote: str = '"'
escape: str = "\\"
header: bool = False
include_header: bool = False
line_sep: str = "\n"


Expand All @@ -27,3 +32,9 @@ class JSON(BaseModel):
type: JSON_FORMAT
encoding: str = "utf-8"
line_sep: str = "\n"


class Excel(BaseModel):
type: EXCEL_FORMAT
include_header: bool = False
start_cell: str | None = None
18 changes: 9 additions & 9 deletions tests/resources/file_df_connection/generate_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,11 @@ def _to_string(obj):
return obj


def _write_csv(data: list[dict], file: TextIO, header: bool = False, **kwargs) -> None:
def _write_csv(data: list[dict], file: TextIO, include_header: bool = False, **kwargs) -> None:
columns = list(data[0].keys())
writer = csv.DictWriter(file, fieldnames=columns, lineterminator="\n", **kwargs)

if header:
if include_header:
writer.writeheader()

for row in data:
Expand All @@ -123,7 +123,7 @@ def save_as_csv_without_header(data: list[dict], path: Path) -> None:
def save_as_csv_with_header(data: list[dict], path: Path) -> None:
path.mkdir(parents=True, exist_ok=True)
with open(path / "file.csv", "w", newline="") as file:
_write_csv(data, file, header=True)
_write_csv(data, file, include_header=True)


def save_as_csv_with_delimiter(data: list[dict], path: Path) -> None:
Expand Down Expand Up @@ -403,12 +403,12 @@ def save_as_xlsx(data: list[dict], path: Path) -> None:
shutil.rmtree(root, ignore_errors=True)
root.mkdir(parents=True, exist_ok=True)

save_as_xlsx_with_options(data, root / "without_header", header=False)
save_as_xlsx_with_options(data, root / "with_header", header=True)
save_as_xlsx_with_options(data, root / "without_header", include_header=False)
save_as_xlsx_with_options(data, root / "with_header", include_header=True)
save_as_xlsx_with_options(
data,
root / "with_data_address",
header=False,
include_header=False,
sheet_name="ABC",
startcol=10,
startrow=5,
Expand All @@ -420,12 +420,12 @@ def save_as_xls(data: list[dict], path: Path) -> None:
shutil.rmtree(root, ignore_errors=True)
root.mkdir(parents=True, exist_ok=True)

save_as_xls_with_options(data, root / "without_header", header=False)
save_as_xls_with_options(data, root / "with_header", header=True)
save_as_xls_with_options(data, root / "without_header", include_header=False)
save_as_xls_with_options(data, root / "with_header", include_header=True)
save_as_xls_with_options(
data,
root / "with_data_address",
header=False,
include_header=False,
sheet_name="ABC",
startcol=10,
startrow=5,
Expand Down
8 changes: 6 additions & 2 deletions tests/test_unit/test_transfers/test_create_transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,10 +633,14 @@ async def test_developer_plus_can_not_create_transfer_with_target_format_json(
"message": "Invalid request",
"details": [
{
"context": {"discriminator": "'type'", "tag": "json", "expected_tags": "'csv', 'jsonline'"},
"context": {
"discriminator": "'type'",
"tag": "json",
"expected_tags": "'csv', 'jsonline', 'excel'",
},
"input": {"type": "json", "lineSep": "\n", "encoding": "utf-8"},
"location": ["body", "target_params", "s3", "file_format"],
"message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline'",
"message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline', 'excel'",
"code": "union_tag_invalid",
},
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,24 @@
"directory_path": "/some/pure/path",
"file_format": {
"type": "csv",
"delimiter": ",",
"encoding": "utf-8",
"quote": '"',
"escape": "\\",
"include_header": False,
"line_sep": "\n",
},
"options": {
"some": "option",
},
},
{
"type": "s3",
"directory_path": "/some/excel/path",
"file_format": {
"type": "excel",
"include_header": True,
"start_cell": "A1",
},
"options": {
"some": "option",
Expand Down Expand Up @@ -94,11 +112,28 @@ async def test_developer_plus_can_create_s3_transfer(
"queue_id": transfer.queue_id,
}

expected_file_formats = {
"csv": {
"type": "csv",
"delimiter": ",",
"encoding": "utf-8",
"quote": '"',
"escape": "\\",
"include_header": False,
"line_sep": "\n",
},
"excel": {
"type": "excel",
"include_header": True,
"start_cell": "A1",
},
}

for params in (transfer.source_params, transfer.target_params):
assert params["type"] == "s3"
assert params["directory_path"] == "/some/pure/path"
assert params["file_format"]["type"] == "csv"
assert params["type"] == target_source_params["type"]
assert params["directory_path"] == target_source_params["directory_path"]
assert params["options"] == {"some": "option"}
assert params["file_format"] == expected_file_formats[params["file_format"]["type"]]


@pytest.mark.parametrize(
Expand All @@ -121,6 +156,15 @@ async def test_developer_plus_can_create_s3_transfer(
"type": "csv",
},
},
{
"type": "hdfs",
"directory_path": "/some/excel/path",
"file_format": {
"type": "excel",
"include_header": True,
"start_cell": "A1",
},
},
],
)
async def test_developer_plus_can_create_hdfs_transfer(
Expand Down Expand Up @@ -183,10 +227,27 @@ async def test_developer_plus_can_create_hdfs_transfer(
"queue_id": transfer.queue_id,
}

expected_file_formats = {
"csv": {
"type": "csv",
"delimiter": ",",
"encoding": "utf-8",
"quote": '"',
"escape": "\\",
"include_header": False,
"line_sep": "\n",
},
"excel": {
"type": "excel",
"include_header": True,
"start_cell": "A1",
},
}

for params in (transfer.source_params, transfer.target_params):
assert params["type"] == "hdfs"
assert params["directory_path"] == "/some/pure/path"
assert params["file_format"]["type"] == "csv"
assert params["type"] == target_source_params["type"]
assert params["directory_path"] == target_source_params["directory_path"]
assert params["file_format"] == expected_file_formats[params["file_format"]["type"]]
assert params["options"] == {}


Expand All @@ -211,6 +272,14 @@ async def test_developer_plus_can_create_hdfs_transfer(
"type": "csv",
},
},
{
"type": "s3",
"directory_path": "some/path",
"file_format": {
"type": "excel",
"include_header": True,
},
},
],
)
async def test_cannot_create_file_transfer_with_relative_path(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,23 @@
"delimiter": ",",
"encoding": "utf-8",
"escape": "\\",
"header": False,
"include_header": False,
"line_sep": "\n",
"quote": '"',
"type": "csv",
},
"options": {},
},
{
"type": "s3",
"directory_path": "/some/excel/path",
"file_format": {
"type": "excel",
"include_header": True,
"start_cell": "A1",
},
"options": {},
},
],
)
@pytest.mark.parametrize(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,23 @@
"delimiter": ",",
"encoding": "utf-8",
"escape": "\\",
"header": False,
"include_header": False,
"line_sep": "\n",
"quote": '"',
"type": "csv",
},
"options": {},
},
{
"type": "s3",
"directory_path": "/some/excel/path",
"file_format": {
"type": "excel",
"include_header": True,
"start_cell": "A1",
},
"options": {},
},
],
)
@pytest.mark.parametrize(
Expand Down Expand Up @@ -54,7 +64,7 @@ async def test_developer_plus_can_update_s3_transfer(
"source_params": {
"type": "s3",
"directory_path": "/some/new/test/directory",
"file_format": {"type": "jsonline"},
"file_format": create_transfer_data["file_format"],
"options": {"some": "option"},
},
},
Expand All @@ -65,14 +75,11 @@ async def test_developer_plus_can_update_s3_transfer(
source_params.update(
{
"directory_path": "/some/new/test/directory",
"file_format": {
"encoding": "utf-8",
"line_sep": "\n",
"type": "jsonline",
},
"file_format": create_transfer_data["file_format"],
"options": {"some": "option"},
},
)

# Assert
assert result.status_code == 200
assert result.json() == {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ async def group_transfers(
"delimiter": ",",
"encoding": "utf-8",
"escape": "\\",
"header": False,
"include_header": False,
"line_sep": "\n",
"quote": '"',
"type": "csv",
Expand Down
Loading