From 3b9d8a35500f25244c7eda2ff329b4827521459e Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Wed, 12 Feb 2025 21:09:32 +0100 Subject: [PATCH 1/3] Chore: Improve linting. Remove Black and isort, use Ruff. Format code. --- .gitignore | 1 + examples/tracking_merlion.py | 5 +- examples/tracking_pycaret.py | 2 +- pyproject.toml | 144 ++++++++++++++++------------------- tests/test_examples.py | 12 +-- 5 files changed, 75 insertions(+), 89 deletions(-) diff --git a/.gitignore b/.gitignore index 528e4e4..f3d3f8f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ /.cache /.idea +/.mypy_cache /.pytest_cache /.ruff_cache /.vagrant diff --git a/examples/tracking_merlion.py b/examples/tracking_merlion.py index 9d0ab8e..251deaa 100644 --- a/examples/tracking_merlion.py +++ b/examples/tracking_merlion.py @@ -108,7 +108,8 @@ def import_data(data_table_name: str, anomalies_table_name: str): ("2014-02-07 14:55:00.000000", "2014-02-09 14:05:00.000000"), ] cursor.executemany( - f"INSERT INTO {anomalies_table_name} (ts_start, ts_end) VALUES (?, ?)", known_anomalies # noqa: S608 + f"INSERT INTO {anomalies_table_name} (ts_start, ts_end) VALUES (?, ?)", # noqa: S608 + known_anomalies, # noqa: S608 ) @@ -222,7 +223,7 @@ def run_experiment(time_series: pd.DataFrame, anomalies_table_name: str): r = TSADMetric.Recall.value(ground_truth=test_labels, predict=test_pred) f1 = TSADMetric.F1.value(ground_truth=test_labels, predict=test_pred) mttd = TSADMetric.MeanTimeToDetect.value(ground_truth=test_labels, predict=test_pred) - print(f"Precision: {p:.4f}, Recall: {r:.4f}, F1: {f1:.4f}\n" f"Mean Time To Detect: {mttd}") # noqa: T201 + print(f"Precision: {p:.4f}, Recall: {r:.4f}, F1: {f1:.4f}\nMean Time To Detect: {mttd}") # noqa: T201 mlflow.log_input(mlflow.data.from_pandas(input_test_data), context="training") mlflow.log_metric("precision", p) diff --git a/examples/tracking_pycaret.py b/examples/tracking_pycaret.py index d6a099b..c1f784f 100644 --- a/examples/tracking_pycaret.py +++ b/examples/tracking_pycaret.py @@ -147,7 +147,7 @@ def read_data(table_name: str) -> pd.DataFrame: FROM {table_name} GROUP BY month ORDER BY month - """ + """ # noqa: S608 with connect_database() as conn: data = pd.read_sql(query, conn) diff --git a/pyproject.toml b/pyproject.toml index 59336d1..c94adc1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,16 +11,10 @@ requires = [ "versioningit", ] -[tool.versioningit.vcs] -method = "git" -default-tag = "0.0.0" - [project] name = "mlflow-cratedb" description = "MLflow adapter for CrateDB" readme = "README.md" -requires-python = ">=3.8,<3.12" -license = {text = "Apache License 2.0"} keywords = [ "ai", "cratedb", @@ -32,9 +26,11 @@ keywords = [ "mlflow-tracking", "mlops", ] +license = { text = "Apache License 2.0" } authors = [ - {name = "Andreas Motl", email = "andreas.motl@crate.io"}, + { name = "Andreas Motl", email = "andreas.motl@crate.io" }, ] +requires-python = ">=3.8,<3.12" classifiers = [ "Development Status :: 4 - Beta", "Environment :: Console", @@ -84,51 +80,75 @@ dependencies = [ "sqlparse<0.6", ] -[project.optional-dependencies] -develop = [ - "black<25", +optional-dependencies.develop = [ "mypy<1.15", "poethepoet<1", "pyproject-fmt<2.6", - "ruff<0.9", + "ruff<0.10", "validate-pyproject<0.24", ] -examples = [ - 'pycaret[analysis,models,parallel,test,tuner]==3.3.2; platform_machine != "aarch64"', +optional-dependencies.examples = [ + "pycaret[analysis,models,parallel,test,tuner]==3.3.2; platform_machine!='aarch64'", "salesforce-merlion<2.1", "werkzeug==2.2.3", ] -release = [ +optional-dependencies.release = [ "build<2", "twine<7", ] -test = [ +optional-dependencies.test = [ "psutil==5.9.8", "pytest<9", "pytest-cov<7", ] -[project.scripts] -mlflow-cratedb = "mlflow_cratedb.cli:cli" -[project.entry-points."mlflow.app"] -mlflow-cratedb = "mlflow_cratedb.server:app" +urls.changelog = "https://github.com/crate/mlflow-cratedb/blob/main/CHANGES.md" +urls.documentation = "https://github.com/crate/mlflow-cratedb" +urls.homepage = "https://github.com/crate/mlflow-cratedb" +urls.repository = "https://github.com/crate/mlflow-cratedb" +scripts.mlflow-cratedb = "mlflow_cratedb.cli:cli" +entry-points."mlflow.app".mlflow-cratedb = "mlflow_cratedb.server:app" + [tool.setuptools] # https://setuptools.pypa.io/en/latest/userguide/package_discovery.html -packages = ["mlflow_cratedb"] - -[project.urls] -changelog = "https://github.com/crate/mlflow-cratedb/blob/main/CHANGES.md" -documentation = "https://github.com/crate/mlflow-cratedb" -homepage = "https://github.com/crate/mlflow-cratedb" -repository = "https://github.com/crate/mlflow-cratedb" -[tool.black] +packages = [ "mlflow_cratedb" ] + +[tool.ruff] line-length = 120 -extend-exclude = "tests/test_tracking.py" +extend-exclude = [ + "tests/test_tracking.py", +] + +lint.select = [ + # Builtins + "A", + # Bugbear + "B", + # comprehensions + "C4", + # Pycodestyle + "E", + # eradicate + "ERA", + # Pyflakes + "F", + # isort + "I", + # pandas-vet + "PD", + # return + "RET", + # Bandit + "S", + # print + "T20", + "W", + # flake8-2020 + "YTT", +] -[tool.isort] -profile = "black" -skip_glob = "**/site-packages/**" -skip_gitignore = false +lint.per-file-ignores."tests/*" = [ "S101" ] # Use of `assert` detected +lint.per-file-ignores."tests/conftest.py" = [ "E402" ] # Module level import not at top of file [tool.pytest.ini_options] minversion = "2.0" @@ -139,7 +159,7 @@ addopts = """ """ log_level = "DEBUG" log_cli_level = "DEBUG" -testpaths = ["tests"] +testpaths = [ "tests" ] xfail_strict = true markers = [ "examples", @@ -149,9 +169,9 @@ markers = [ [tool.coverage.run] branch = false -source = ["mlflow_cratedb"] +source = [ "mlflow_cratedb" ] omit = [ - "tests/*", + "tests/*", ] [tool.coverage.report] @@ -159,7 +179,7 @@ fail_under = 0 show_missing = true [tool.mypy] -packages = ["mlflow_cratedb"] +packages = [ "mlflow_cratedb" ] exclude = [ ] check_untyped_defs = true @@ -173,45 +193,9 @@ strict_equality = true warn_unused_ignores = true warn_redundant_casts = true -[tool.ruff] -line-length = 120 - -lint.select = [ - # Bandit - "S", - # Bugbear - "B", - # Builtins - "A", - # comprehensions - "C4", - # eradicate - "ERA", - # flake8-2020 - "YTT", - # isort - "I", - # pandas-vet - "PD", - # print - "T20", - # Pycodestyle - "E", - "W", - # Pyflakes - "F", - # return - "RET", -] - -extend-exclude = [ -] - - -[tool.ruff.lint.per-file-ignores] -"tests/*" = ["S101"] # Use of `assert` detected -"tests/conftest.py" = ["E402"] # Module level import not at top of file - +[tool.versioningit.vcs] +method = "git" +default-tag = "0.0.0" # =================== # Tasks configuration @@ -219,16 +203,16 @@ extend-exclude = [ [tool.poe.tasks] format = [ - { cmd = "black ." }, + { cmd = "ruff format ." }, # Configure Ruff not to auto-fix (remove!): # Ignore unused imports (F401), unused variables (F841), `print` statements (T201), and commented-out code (ERA001). - { cmd = "ruff --fix --ignore=ERA --ignore=F401 --ignore=F841 --ignore=T20 --ignore=ERA001 ." }, + { cmd = "ruff check --fix --ignore=ERA --ignore=F401 --ignore=F841 --ignore=T20 --ignore=ERA001 ." }, { cmd = "pyproject-fmt --keep-full-version pyproject.toml" }, ] lint = [ + { cmd = "ruff format --check ." }, { cmd = "ruff check ." }, - { cmd = "black --check ." }, { cmd = "validate-pyproject pyproject.toml" }, { cmd = "mypy" }, ] @@ -239,8 +223,8 @@ test-fast = [ { cmd = "pytest -m 'not slow'" }, ] build = { cmd = "python -m build" } -check = ["lint", "test"] -check-fast = ["lint", "test-fast"] +check = [ "lint", "test" ] +check-fast = [ "lint", "test-fast" ] release = [ { cmd = "python -m build" }, diff --git a/tests/test_examples.py b/tests/test_examples.py index 2144c53..f40b371 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -183,15 +183,15 @@ def test_tracking_pycaret(reset_database, engine: sa.Engine, tracking_store: Sql # We have 2 experiments - one for "Default" experiment and one for the example assert session.query(SqlExperiment).count() == 2, "experiments should have 2 rows" # We have 32 distinct runs in the experiment which produced metrics - assert ( - session.query(sa.func.count(sa.distinct(SqlMetric.run_uuid))).scalar() == 32 - ), "metrics should have 32 distinct run_uuid" + assert session.query(sa.func.count(sa.distinct(SqlMetric.run_uuid))).scalar() == 32, ( + "metrics should have 32 distinct run_uuid" + ) # We have 33 runs in total (1 parent + 32 child runs) assert session.query(SqlRun).count() == 33, "runs should have 33 rows" # We have 33 distinct runs which have parameters (1 parent + 32 child runs) - assert ( - session.query(sa.func.count(sa.distinct(SqlParam.run_uuid))).scalar() == 33 - ), "params should have 33 distinct run_uuid" + assert session.query(sa.func.count(sa.distinct(SqlParam.run_uuid))).scalar() == 33, ( + "params should have 33 distinct run_uuid" + ) # We have one model registered assert session.query(SqlRegisteredModel).count() == 1, "registered_models should have 1 row" From b8508f5af47cac0a44b7739f706ed79a90c8b68b Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Wed, 12 Feb 2025 21:12:43 +0100 Subject: [PATCH 2/3] CI: Run tests on Python 3.12 --- .github/workflows/main.yml | 6 +++++- CHANGES.md | 1 + pyproject.toml | 3 ++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 25086f9..4fe271f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -27,7 +27,11 @@ jobs: fail-fast: false matrix: os: ["ubuntu-latest"] - python-version: ["3.10", "3.11"] + python-version: [ + "3.10", + "3.11", + "3.12", + ] env: OS: ${{ matrix.os }} diff --git a/CHANGES.md b/CHANGES.md index 88cdc51..ec4ffb2 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,7 @@ ## in progress +- CI: Run tests on Python 3.12 ## 2024-06-25 v2.14.1 - Started using more SQLAlchemy patches and polyfills from `sqlalchemy-cratedb` diff --git a/pyproject.toml b/pyproject.toml index c94adc1..883c788 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ license = { text = "Apache License 2.0" } authors = [ { name = "Andreas Motl", email = "andreas.motl@crate.io" }, ] -requires-python = ">=3.8,<3.12" +requires-python = ">=3.8,<3.13" classifiers = [ "Development Status :: 4 - Beta", "Environment :: Console", @@ -48,6 +48,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Topic :: Communications", "Topic :: Database", "Topic :: Database :: Database Engines/Servers", From eee82b2cfe820763ff458b4962e1172c86a56449 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Wed, 12 Feb 2025 21:13:07 +0100 Subject: [PATCH 3/3] OCI: Update to Python 3.12 --- CHANGES.md | 1 + release/oci-runtime/Dockerfile | 2 +- release/oci-server/Dockerfile | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index ec4ffb2..90ba46d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ ## in progress - CI: Run tests on Python 3.12 +- OCI: Update to Python 3.12 ## 2024-06-25 v2.14.1 - Started using more SQLAlchemy patches and polyfills from `sqlalchemy-cratedb` diff --git a/release/oci-runtime/Dockerfile b/release/oci-runtime/Dockerfile index 5869550..d3190b2 100644 --- a/release/oci-runtime/Dockerfile +++ b/release/oci-runtime/Dockerfile @@ -4,7 +4,7 @@ # - https://vsupalov.com/buildkit-cache-mount-dockerfile/ # - https://github.com/FernandoMiguel/Buildkit#mounttypecache -FROM python:3.11-slim-bullseye +FROM python:3.12-slim-bullseye ENV DEBIAN_FRONTEND noninteractive ENV TERM linux diff --git a/release/oci-server/Dockerfile b/release/oci-server/Dockerfile index 7677c31..8b8824a 100644 --- a/release/oci-server/Dockerfile +++ b/release/oci-server/Dockerfile @@ -4,7 +4,7 @@ # - https://vsupalov.com/buildkit-cache-mount-dockerfile/ # - https://github.com/FernandoMiguel/Buildkit#mounttypecache -FROM python:3.11-slim-bullseye +FROM python:3.12-slim-bullseye ENV DEBIAN_FRONTEND noninteractive ENV TERM linux