-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor(cli): added refactoring for split_csv cli.
- Loading branch information
1 parent
e6a0e23
commit 92558c0
Showing
3 changed files
with
192 additions
and
112 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,64 +1,130 @@ | ||
"""Tests for the split_csv CLI command.""" | ||
|
||
import hashlib | ||
import pathlib | ||
import os | ||
import tempfile | ||
from typing import Any, Callable | ||
from pathlib import Path | ||
|
||
import pytest | ||
from click.testing import CliRunner | ||
|
||
from src.stimulus.cli.split_csv import main | ||
from stimulus.cli.main import cli | ||
from stimulus.cli.split_csv import split_csv | ||
|
||
|
||
# Fixtures | ||
@pytest.fixture | ||
def csv_path_no_split() -> str: | ||
"""Fixture that returns the path to a CSV file without split column.""" | ||
return "tests/test_data/dna_experiment/test.csv" | ||
def csv_path() -> str: | ||
"""Get path to test CSV file.""" | ||
return str( | ||
Path(__file__).parent.parent / "test_data" / "titanic" / "titanic_stimulus.csv", | ||
) | ||
|
||
|
||
@pytest.fixture | ||
def csv_path_with_split() -> str: | ||
"""Fixture that returns the path to a CSV file with split column.""" | ||
return "tests/test_data/dna_experiment/test_with_split.csv" | ||
"""Get path to test CSV file with split column.""" | ||
return str( | ||
Path(__file__).parent.parent / "test_data" / "titanic" / "titanic_stimulus_split.csv", | ||
) | ||
|
||
|
||
@pytest.fixture | ||
def yaml_path() -> str: | ||
"""Fixture that returns the path to a YAML config file.""" | ||
return "tests/test_data/dna_experiment/dna_experiment_config_template_0.yaml" | ||
|
||
|
||
# Test cases | ||
test_cases = [ | ||
("csv_path_no_split", "yaml_path", False, None), | ||
("csv_path_with_split", "yaml_path", False, ValueError), | ||
("csv_path_no_split", "yaml_path", True, None), | ||
("csv_path_with_split", "yaml_path", True, None), | ||
] | ||
|
||
|
||
# Tests | ||
@pytest.mark.skip(reason="There is an issue with non-deterministic output") | ||
@pytest.mark.parametrize(("csv_type", "yaml_type", "force", "error"), test_cases) | ||
def test_split_csv( | ||
request: pytest.FixtureRequest, | ||
snapshot: Callable[[], Any], | ||
csv_type: str, | ||
yaml_type: str, | ||
force: bool, | ||
error: Exception | None, | ||
"""Get path to test config YAML file.""" | ||
return str( | ||
Path(__file__).parent.parent / "test_data" / "titanic" / "titanic_unique_split.yaml", | ||
) | ||
|
||
|
||
def test_split_csv_main( | ||
csv_path: str, | ||
yaml_path: str, | ||
) -> None: | ||
"""Tests the CLI command with correct and wrong YAML files.""" | ||
csv_path = request.getfixturevalue(csv_type) | ||
yaml_path = request.getfixturevalue(yaml_type) | ||
tmpdir = pathlib.Path(tempfile.gettempdir()) | ||
if error: | ||
with pytest.raises(error): # type: ignore[call-overload] | ||
main(csv_path, yaml_path, str(tmpdir / "test.csv"), force=force) | ||
else: | ||
filename = f"{csv_type}_{force}.csv" | ||
main(csv_path, yaml_path, str(tmpdir / filename), force=force) | ||
with open(tmpdir / filename) as file: | ||
"""Test that split_csv runs without errors. | ||
Args: | ||
csv_path: Path to test CSV data. | ||
yaml_path: Path to test config YAML. | ||
""" | ||
# Verify required files exist | ||
assert os.path.exists(csv_path), f"CSV file not found at {csv_path}" | ||
assert os.path.exists(yaml_path), f"YAML config not found at {yaml_path}" | ||
|
||
# Create temporary output file | ||
with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp_file: | ||
output_path = tmp_file.name | ||
|
||
try: | ||
# Run main function - should complete without errors | ||
split_csv( | ||
data_csv=csv_path, | ||
config_yaml=yaml_path, | ||
out_path=output_path, | ||
) | ||
|
||
# Verify output file exists and has content | ||
assert os.path.exists(output_path), "Output file was not created" | ||
with open(output_path) as file: | ||
hash = hashlib.md5(file.read().encode()).hexdigest() # noqa: S324 | ||
assert hash == snapshot | ||
assert hash # Verify we got a hash (file not empty) | ||
|
||
finally: | ||
# Clean up temporary file | ||
if os.path.exists(output_path): | ||
os.unlink(output_path) | ||
|
||
|
||
def test_split_csv_with_force( | ||
csv_path_with_split: str, | ||
yaml_path: str, | ||
) -> None: | ||
"""Test split_csv with force flag on file that already has split column. | ||
Args: | ||
csv_path_with_split: Path to test CSV data with split column. | ||
yaml_path: Path to test config YAML. | ||
""" | ||
with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp_file: | ||
output_path = tmp_file.name | ||
|
||
try: | ||
split_csv( | ||
data_csv=csv_path_with_split, | ||
config_yaml=yaml_path, | ||
out_path=output_path, | ||
force=True, | ||
) | ||
assert os.path.exists(output_path) | ||
|
||
finally: | ||
if os.path.exists(output_path): | ||
os.unlink(output_path) | ||
|
||
|
||
def test_cli_invocation( | ||
csv_path: str, | ||
yaml_path: str, | ||
) -> None: | ||
"""Test the CLI invocation of split-csv command. | ||
Args: | ||
csv_path: Path to test CSV data. | ||
yaml_path: Path to test config YAML. | ||
""" | ||
runner = CliRunner() | ||
with runner.isolated_filesystem(): | ||
output_path = "output.csv" | ||
result = runner.invoke( | ||
cli, | ||
[ | ||
"split-csv", | ||
"-c", | ||
csv_path, | ||
"-y", | ||
yaml_path, | ||
"-o", | ||
output_path, | ||
], | ||
) | ||
assert result.exit_code == 0 | ||
assert os.path.exists(output_path), "Output file was not created" |