Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .github/workflows/deliver.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Deliver results
on: [push] # for now, to get latest results

jobs:
make-plot:
runs-on: ubuntu-latest

steps:
- name: Checkout the code
uses: actions/checkout@v6

- name: Install pixi
uses: prefix-dev/[email protected]
with:
pixi-version: v0.62.2 # good to pin this since pixi is not 1.0 yet!

- name: run analysis
run: |
cd analysis
pixi run pkoffee analyze --data-file coffee_productivity.csv --output fitted_models.toml --show-rankings
pixi run pkoffee plot --data-file coffee_productivity.csv --models fitted_models.toml --output analysis.png --y-max 7

- name: upload results
uses: actions/upload-artifact@v6
with:
name: results
path: |
analysis/analysis.png
analysis/fitted_models.toml
46 changes: 46 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: Test Matrix
on: [push]

jobs:
code-quality:
runs-on: ubuntu-latest

steps:
- name: Checkout the code
uses: actions/checkout@v6

- name: Install pixi
uses: prefix-dev/[email protected]
with:
pixi-version: v0.62.2 # good to pin this since pixi is not 1.0 yet!

# - name: look for syntax errors with ruff
# run: pixi run ruff check

unit-tests:
runs-on: ${{ matrix.os }}
needs: code-quality # don't bother running tests if code-checks fails.

strategy:
fail-fast: false # don't stop all jobs if one fails
matrix:
os:
- ubuntu-latest
- macos-latest
- windows-latest
environment:
- test
- test314
- test312

steps:
- name: Checkout the code
uses: actions/checkout@v6

- name: Install pixi
uses: prefix-dev/[email protected]
with:
pixi-version: v0.62.2
environments: ${{ matrix.environment }}

- run: pixi run -e ${{ matrix.environment }} pytest
5,424 changes: 4,554 additions & 870 deletions pixi.lock

Large diffs are not rendered by default.

25 changes: 25 additions & 0 deletions pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ python = ["3.12.*", "3.13.*", "3.14.*"]

[dependencies]
pkoffee = { path = "." }
numpy = ">=2.4.1,<3"
matplotlib = ">=3.10.8,<4"

[package]
name = "pkoffee"
Expand Down Expand Up @@ -61,14 +63,37 @@ python=">=3.14,<3.15"
[feature.dev.dependencies]
jupyterlab = ">=4.4.3,<5"

[feature.test.dependencies]
# example task converts notebooks to markdown to include so they can be included in documentation
nbconvert = ">=7.16.6,<8"
pytest = ">=9,<10"
pytest-cov = ">=7,<8"

[feature.test.tasks.test]
args = [
{ "arg" = "coverage_dir", "default" = "htmlcov" },
{ "arg" = "cobertura_report", "default" = "cobertura_report.xml" },
{ "arg" = "junit_report", "default" = "junit_report.xml"}
]
cmd = "pytest -vv --cov-report=html:{{ coverage_dir }} --cov-report=xml:{{ cobertura_report }} --junitxml={{ junit_report }}"


[tasks]
analyse = "pkoffee analyze --data-file analysis/coffee_productivity.csv --output analysis/fitted_models.toml --show-rankings"
plot = "pkoffee plot --data-file analysis/coffee_productivity.csv --models analysis/fitted_models.toml --output analysis/analysis.png --y-max 7"


[environments]
prod = { features = ["py313"], solve-group = "prod" }
test = { features = ["test", "py313"], solve-group = "prod" }
# use default for the dev environment so tasks are resolved in this environment (avoids having to specify env)
# no need to include doc: pixi will figure the environment to use from the task if it is unique
default = { features = ["dev", "py313"], solve-group = "prod" }

# python 3.14 environments for testing
prod314 = { features = ["py314"], solve-group = "prod314" }
test314 = { features = ["test", "py314"], solve-group = "prod314" }

# python 3.12 environments for testing
prod312 = { features = ["py312"], solve-group = "prod312" }
test312 = { features = ["test", "py312"], solve-group = "prod312" }
1 change: 1 addition & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Unit tests initialization."""
140 changes: 140 additions & 0 deletions tests/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
"""Unit tests for data loading and preprocessing."""

from pathlib import Path

import numpy as np
import pandas as pd
import pytest

from pkoffee.data import (
ColumnTypeError,
CSVReadError,
MissingColumnsError,
RequiredColumn,
curate,
data_dtype,
extract_arrays,
load_csv,
validate,
)


def test_validate() -> None:
"""Test validate with valide DataFrame."""
assert validate(pd.DataFrame({"cups": [0], "productivity": [1.2]})) is None


def test_validate_wrong_type() -> None:
"""Test validate with incorrect required column type."""
with pytest.raises(ColumnTypeError):
validate(pd.DataFrame({"cups": [0], "productivity": ["a"]}))


def test_validate_missing_column() -> None:
"""Test validate with missing required column in DataFrame."""
with pytest.raises(MissingColumnsError):
validate(pd.DataFrame({f"{RequiredColumn.CUPS}": [1], "notproductivity": [1.2]}))


@pytest.mark.parametrize(
("data", "expected"),
[
(
pd.DataFrame({"cups": [1, np.nan, 2], "productivity": [1.2, 2.1, np.nan]}),
pd.DataFrame({"cups": [1.0], "productivity": [1.2]}),
),
(
pd.DataFrame({"cups": [np.nan, np.nan, np.nan], "productivity": [1.2, 2.1, 3.4]}),
pd.DataFrame({"cups": [], "productivity": []}),
),
(
pd.DataFrame({"cups": [1, 1, 4], "productivity": [1.2, 2.1, 0.5]}),
pd.DataFrame({"cups": [1, 1, 4], "productivity": [1.2, 2.1, 0.5]}),
),
],
)
def test_currate(data: pd.DataFrame, expected: pd.DataFrame) -> None:
"""Test curate with different DataFrames containing nans."""
assert curate(data).equals(expected)


def test_load_csv_valid_file(tmp_path: Path) -> None:
"""Test loading valid CSV."""
data_file = tmp_path / "valid.csv"
cups = np.array([1, 2, 3], dtype=int)
prod = np.array([2.3, 1.2, 4.8], dtype=data_dtype)
np.savetxt(
data_file,
np.stack([cups, prod], axis=1),
fmt=["%d", "%10.4f"],
delimiter=",",
header=f"{RequiredColumn.CUPS},{RequiredColumn.PRODUCTIVITY}",
comments="",
)

data = load_csv(data_file)
assert RequiredColumn.CUPS in data.columns
assert RequiredColumn.PRODUCTIVITY in data.columns
assert np.isclose(data[RequiredColumn.CUPS].to_numpy(), cups).all()
assert np.isclose(data[RequiredColumn.PRODUCTIVITY].to_numpy(), prod).all()
assert data.dtypes[RequiredColumn.CUPS] == np.int64
assert data.dtypes[RequiredColumn.PRODUCTIVITY] == np.float64


def test_load_csv_missing_file() -> None:
"""Test that FileNotFoundError is raised for missing files."""
with pytest.raises(FileNotFoundError):
load_csv(Path("nonexistent_file.csv"))


def test_load_csv_missing_columns(tmp_path: Path) -> None:
"""Test MissingColumnsError is raised for missing required columns."""
wrong_col_file = tmp_path / "missing_columns.csv"
data = np.stack([[1], [2.3]], axis=1)
np.savetxt(
wrong_col_file,
data,
fmt=["%d", "%10.4f"],
delimiter=",",
header=f"{RequiredColumn.CUPS},wrong_column",
comments="",
)
with pytest.raises(MissingColumnsError, match="Missing required columns"):
load_csv(wrong_col_file)


def test_load_data_with_nan_values(tmp_path: Path) -> None:
"""Test that rows with NaN values are dropped."""
data_file = tmp_path / "valid_with_nan.csv"
with data_file.open("w") as fh:
fh.write(f"{RequiredColumn.CUPS},{RequiredColumn.PRODUCTIVITY}\n")
fh.write("1,10.5\n")
fh.write("2,\n") # Missing productivity
fh.write("3,18.2\n")

data = load_csv(data_file)
expected = pd.DataFrame({RequiredColumn.CUPS: [1, 3], RequiredColumn.PRODUCTIVITY: [10.5, 18.2]})
assert data.equals(expected)


def test_load_data_with_extra_values(tmp_path: Path) -> None:
"""Test that rows with NaN values are dropped."""
data_file = tmp_path / "valid_with_nan.csv"
with data_file.open("w") as fh:
fh.write(f"{RequiredColumn.CUPS},{RequiredColumn.PRODUCTIVITY}\n")
fh.write("1,2.1\n")
# try to read the file while it is open for write
with pytest.raises(CSVReadError):
load_csv(data_file)


def test_extract_arrays() -> None:
"""Test extracting numpy arrays from DataFrame."""
cups_ref = np.array([1, 2, 3], dtype=int)
productivity_ref = np.array([10.5, 15.3, 18.2], dtype=np.float64)
data = pd.DataFrame({RequiredColumn.CUPS: cups_ref, RequiredColumn.PRODUCTIVITY: productivity_ref})

cups, productivity = extract_arrays(data)

assert np.allclose(cups_ref, cups)
assert np.allclose(productivity_ref, productivity)
Loading
Loading