diff --git a/docs/changelog/next_release/140.feature.rst b/docs/changelog/next_release/140.feature.rst new file mode 100644 index 00000000..2ac222fb --- /dev/null +++ b/docs/changelog/next_release/140.feature.rst @@ -0,0 +1 @@ +Add Excel API schema \ No newline at end of file diff --git a/syncmaster/schemas/v1/__init__.py b/syncmaster/schemas/v1/__init__.py index aeffd086..6fa935b0 100644 --- a/syncmaster/schemas/v1/__init__.py +++ b/syncmaster/schemas/v1/__init__.py @@ -36,7 +36,7 @@ PostgresReadTransferSourceAndTarget, ReadDBTransfer, ) -from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, JSONLine +from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, Excel, JSONLine from syncmaster.schemas.v1.transfers.run import ( CreateRunSchema, ReadRunSchema, diff --git a/syncmaster/schemas/v1/file_formats.py b/syncmaster/schemas/v1/file_formats.py index a0175e99..ebf3e389 100644 --- a/syncmaster/schemas/v1/file_formats.py +++ b/syncmaster/schemas/v1/file_formats.py @@ -5,3 +5,4 @@ CSV_FORMAT = Literal["csv"] JSONLINE_FORMAT = Literal["jsonline"] JSON_FORMAT = Literal["json"] +EXCEL_FORMAT = Literal["excel"] diff --git a/syncmaster/schemas/v1/transfers/file/base.py b/syncmaster/schemas/v1/transfers/file/base.py index e52808da..3a131a4f 100644 --- a/syncmaster/schemas/v1/transfers/file/base.py +++ b/syncmaster/schemas/v1/transfers/file/base.py @@ -7,20 +7,20 @@ from pydantic import BaseModel, Field, field_validator -from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, JSONLine +from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, Excel, JSONLine # At the moment the ReadTransferSourceParams and ReadTransferTargetParams # classes are identical but may change in the future class ReadFileTransferSource(BaseModel): directory_path: str - file_format: CSV | JSONLine | JSON = Field(..., discriminator="type") + file_format: CSV | JSONLine | JSON | Excel = Field(..., discriminator="type") options: dict[str, Any] class ReadFileTransferTarget(BaseModel): directory_path: str - file_format: CSV | JSONLine = Field(..., discriminator="type") # JSON format is not supported for writing + file_format: CSV | JSONLine | Excel = Field(..., discriminator="type") # JSON format is not supported for writing options: dict[str, Any] @@ -28,7 +28,7 @@ class ReadFileTransferTarget(BaseModel): # classes are identical but may change in the future class CreateFileTransferSource(BaseModel): directory_path: str - file_format: CSV | JSONLine | JSON = Field(..., discriminator="type") + file_format: CSV | JSONLine | JSON | Excel = Field(..., discriminator="type") options: dict[str, Any] = Field(default_factory=dict) class Config: @@ -44,7 +44,7 @@ def _directory_path_is_valid_path(cls, value): class CreateFileTransferTarget(BaseModel): directory_path: str - file_format: CSV | JSONLine = Field(..., discriminator="type") # JSON FORMAT IS NOT SUPPORTED AS A TARGET ! + file_format: CSV | JSONLine | Excel = Field(..., discriminator="type") # JSON FORMAT IS NOT SUPPORTED AS A TARGET ! options: dict[str, Any] = Field(default_factory=dict) class Config: diff --git a/syncmaster/schemas/v1/transfers/file_format.py b/syncmaster/schemas/v1/transfers/file_format.py index cdf8496d..2c91ef8d 100644 --- a/syncmaster/schemas/v1/transfers/file_format.py +++ b/syncmaster/schemas/v1/transfers/file_format.py @@ -4,7 +4,12 @@ from pydantic import BaseModel -from syncmaster.schemas.v1.file_formats import CSV_FORMAT, JSON_FORMAT, JSONLINE_FORMAT +from syncmaster.schemas.v1.file_formats import ( + CSV_FORMAT, + EXCEL_FORMAT, + JSON_FORMAT, + JSONLINE_FORMAT, +) class CSV(BaseModel): @@ -13,7 +18,7 @@ class CSV(BaseModel): encoding: str = "utf-8" quote: str = '"' escape: str = "\\" - header: bool = False + include_header: bool = False line_sep: str = "\n" @@ -27,3 +32,9 @@ class JSON(BaseModel): type: JSON_FORMAT encoding: str = "utf-8" line_sep: str = "\n" + + +class Excel(BaseModel): + type: EXCEL_FORMAT + include_header: bool = False + start_cell: str | None = None diff --git a/tests/resources/file_df_connection/generate_files.py b/tests/resources/file_df_connection/generate_files.py index b4782d6c..12d2654a 100644 --- a/tests/resources/file_df_connection/generate_files.py +++ b/tests/resources/file_df_connection/generate_files.py @@ -103,11 +103,11 @@ def _to_string(obj): return obj -def _write_csv(data: list[dict], file: TextIO, header: bool = False, **kwargs) -> None: +def _write_csv(data: list[dict], file: TextIO, include_header: bool = False, **kwargs) -> None: columns = list(data[0].keys()) writer = csv.DictWriter(file, fieldnames=columns, lineterminator="\n", **kwargs) - if header: + if include_header: writer.writeheader() for row in data: @@ -123,7 +123,7 @@ def save_as_csv_without_header(data: list[dict], path: Path) -> None: def save_as_csv_with_header(data: list[dict], path: Path) -> None: path.mkdir(parents=True, exist_ok=True) with open(path / "file.csv", "w", newline="") as file: - _write_csv(data, file, header=True) + _write_csv(data, file, include_header=True) def save_as_csv_with_delimiter(data: list[dict], path: Path) -> None: @@ -403,12 +403,12 @@ def save_as_xlsx(data: list[dict], path: Path) -> None: shutil.rmtree(root, ignore_errors=True) root.mkdir(parents=True, exist_ok=True) - save_as_xlsx_with_options(data, root / "without_header", header=False) - save_as_xlsx_with_options(data, root / "with_header", header=True) + save_as_xlsx_with_options(data, root / "without_header", include_header=False) + save_as_xlsx_with_options(data, root / "with_header", include_header=True) save_as_xlsx_with_options( data, root / "with_data_address", - header=False, + include_header=False, sheet_name="ABC", startcol=10, startrow=5, @@ -420,12 +420,12 @@ def save_as_xls(data: list[dict], path: Path) -> None: shutil.rmtree(root, ignore_errors=True) root.mkdir(parents=True, exist_ok=True) - save_as_xls_with_options(data, root / "without_header", header=False) - save_as_xls_with_options(data, root / "with_header", header=True) + save_as_xls_with_options(data, root / "without_header", include_header=False) + save_as_xls_with_options(data, root / "with_header", include_header=True) save_as_xls_with_options( data, root / "with_data_address", - header=False, + include_header=False, sheet_name="ABC", startcol=10, startrow=5, diff --git a/tests/test_unit/test_transfers/test_create_transfer.py b/tests/test_unit/test_transfers/test_create_transfer.py index 35b98ea4..3826adde 100644 --- a/tests/test_unit/test_transfers/test_create_transfer.py +++ b/tests/test_unit/test_transfers/test_create_transfer.py @@ -633,10 +633,14 @@ async def test_developer_plus_can_not_create_transfer_with_target_format_json( "message": "Invalid request", "details": [ { - "context": {"discriminator": "'type'", "tag": "json", "expected_tags": "'csv', 'jsonline'"}, + "context": { + "discriminator": "'type'", + "tag": "json", + "expected_tags": "'csv', 'jsonline', 'excel'", + }, "input": {"type": "json", "lineSep": "\n", "encoding": "utf-8"}, "location": ["body", "target_params", "s3", "file_format"], - "message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline'", + "message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline', 'excel'", "code": "union_tag_invalid", }, ], diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py index ad8c2c27..f60d8782 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py @@ -28,6 +28,24 @@ "directory_path": "/some/pure/path", "file_format": { "type": "csv", + "delimiter": ",", + "encoding": "utf-8", + "quote": '"', + "escape": "\\", + "include_header": False, + "line_sep": "\n", + }, + "options": { + "some": "option", + }, + }, + { + "type": "s3", + "directory_path": "/some/excel/path", + "file_format": { + "type": "excel", + "include_header": True, + "start_cell": "A1", }, "options": { "some": "option", @@ -94,11 +112,28 @@ async def test_developer_plus_can_create_s3_transfer( "queue_id": transfer.queue_id, } + expected_file_formats = { + "csv": { + "type": "csv", + "delimiter": ",", + "encoding": "utf-8", + "quote": '"', + "escape": "\\", + "include_header": False, + "line_sep": "\n", + }, + "excel": { + "type": "excel", + "include_header": True, + "start_cell": "A1", + }, + } + for params in (transfer.source_params, transfer.target_params): - assert params["type"] == "s3" - assert params["directory_path"] == "/some/pure/path" - assert params["file_format"]["type"] == "csv" + assert params["type"] == target_source_params["type"] + assert params["directory_path"] == target_source_params["directory_path"] assert params["options"] == {"some": "option"} + assert params["file_format"] == expected_file_formats[params["file_format"]["type"]] @pytest.mark.parametrize( @@ -121,6 +156,15 @@ async def test_developer_plus_can_create_s3_transfer( "type": "csv", }, }, + { + "type": "hdfs", + "directory_path": "/some/excel/path", + "file_format": { + "type": "excel", + "include_header": True, + "start_cell": "A1", + }, + }, ], ) async def test_developer_plus_can_create_hdfs_transfer( @@ -183,10 +227,27 @@ async def test_developer_plus_can_create_hdfs_transfer( "queue_id": transfer.queue_id, } + expected_file_formats = { + "csv": { + "type": "csv", + "delimiter": ",", + "encoding": "utf-8", + "quote": '"', + "escape": "\\", + "include_header": False, + "line_sep": "\n", + }, + "excel": { + "type": "excel", + "include_header": True, + "start_cell": "A1", + }, + } + for params in (transfer.source_params, transfer.target_params): - assert params["type"] == "hdfs" - assert params["directory_path"] == "/some/pure/path" - assert params["file_format"]["type"] == "csv" + assert params["type"] == target_source_params["type"] + assert params["directory_path"] == target_source_params["directory_path"] + assert params["file_format"] == expected_file_formats[params["file_format"]["type"]] assert params["options"] == {} @@ -211,6 +272,14 @@ async def test_developer_plus_can_create_hdfs_transfer( "type": "csv", }, }, + { + "type": "s3", + "directory_path": "some/path", + "file_format": { + "type": "excel", + "include_header": True, + }, + }, ], ) async def test_cannot_create_file_transfer_with_relative_path( diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py index ad68ec29..bb5e9e3a 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py @@ -16,13 +16,23 @@ "delimiter": ",", "encoding": "utf-8", "escape": "\\", - "header": False, + "include_header": False, "line_sep": "\n", "quote": '"', "type": "csv", }, "options": {}, }, + { + "type": "s3", + "directory_path": "/some/excel/path", + "file_format": { + "type": "excel", + "include_header": True, + "start_cell": "A1", + }, + "options": {}, + }, ], ) @pytest.mark.parametrize( diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py index cc86107c..182bab00 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py @@ -16,13 +16,23 @@ "delimiter": ",", "encoding": "utf-8", "escape": "\\", - "header": False, + "include_header": False, "line_sep": "\n", "quote": '"', "type": "csv", }, "options": {}, }, + { + "type": "s3", + "directory_path": "/some/excel/path", + "file_format": { + "type": "excel", + "include_header": True, + "start_cell": "A1", + }, + "options": {}, + }, ], ) @pytest.mark.parametrize( @@ -54,7 +64,7 @@ async def test_developer_plus_can_update_s3_transfer( "source_params": { "type": "s3", "directory_path": "/some/new/test/directory", - "file_format": {"type": "jsonline"}, + "file_format": create_transfer_data["file_format"], "options": {"some": "option"}, }, }, @@ -65,14 +75,11 @@ async def test_developer_plus_can_update_s3_transfer( source_params.update( { "directory_path": "/some/new/test/directory", - "file_format": { - "encoding": "utf-8", - "line_sep": "\n", - "type": "jsonline", - }, + "file_format": create_transfer_data["file_format"], "options": {"some": "option"}, }, ) + # Assert assert result.status_code == 200 assert result.json() == { diff --git a/tests/test_unit/test_transfers/transfer_fixtures/transfers_fixture.py b/tests/test_unit/test_transfers/transfer_fixtures/transfers_fixture.py index 55f37e4f..39568e5c 100644 --- a/tests/test_unit/test_transfers/transfer_fixtures/transfers_fixture.py +++ b/tests/test_unit/test_transfers/transfer_fixtures/transfers_fixture.py @@ -57,7 +57,7 @@ async def group_transfers( "delimiter": ",", "encoding": "utf-8", "escape": "\\", - "header": False, + "include_header": False, "line_sep": "\n", "quote": '"', "type": "csv",