fix: use mypy pre-commit in local environment (narwhals-dev#1966)

Co-authored-by: Dan Redding <[email protected]>
dangotbanned · Feb 11, 2025 · 2d6df36 · 2d6df36
1 parent b96c0e8
commit 2d6df36
Show file tree

Hide file tree

Showing 34 changed files with 242 additions and 200 deletions.
diff --git a/.github/workflows/check_tpch_queries.yml b/.github/workflows/check_tpch_queries.yml
@@ -25,7 +25,7 @@ jobs:
           cache-suffix: ${{ matrix.python-version }}
           cache-dependency-glob: "pyproject.toml"
       - name: local-install
-        run: uv pip install -U --pre -e ".[dev, core, dask]" --system
+        run: uv pip install -U --pre -e ".[tests, core, dask]" --system
       - name: generate-data
         run: cd tpch && python generate_data.py
       - name: tpch-tests 

diff --git a/.github/workflows/downstream_tests.yml b/.github/workflows/downstream_tests.yml
@@ -220,7 +220,7 @@ jobs:
         run: |
           cd tea-tasting
           pdm remove narwhals
-          pdm add ./..[dev]
+          pdm add ./..[tests]
       - name: show-deps
         run: |
           cd tea-tasting

diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml
@@ -28,7 +28,7 @@ jobs:
         run: uv pip install pipdeptree tox virtualenv setuptools pandas==0.25.3 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 "pyarrow-stubs<17" scipy==1.5.0 scikit-learn==1.1.0 duckdb==1.0 tzdata --system
       - name: install-reqs
         run: |
-          uv pip install -e ".[dev]" --system
+          uv pip install -e ".[tests]" --system
       - name: show-deps
         run: uv pip freeze
       - name: Assert dependencies
@@ -64,7 +64,7 @@ jobs:
       - name: install-pretty-old-versions
         run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.5 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 "pyarrow-stubs<17" pyspark==3.5.0 scipy==1.5.0 scikit-learn==1.1.0 duckdb==1.0 tzdata --system
       - name: install-reqs
-        run: uv pip install -e ".[dev]" --system
+        run: uv pip install -e ".[tests]" --system
       - name: show-deps
         run: uv pip freeze
       - name: show-deptree
@@ -103,7 +103,7 @@ jobs:
       - name: install-not-so-old-versions
         run: uv pip install tox virtualenv setuptools pandas==2.0.3 polars==0.20.8 numpy==1.24.4 pyarrow==15.0.0 "pyarrow-stubs<17" pyspark==3.5.0 scipy==1.8.0 scikit-learn==1.3.0 duckdb==1.0 dask[dataframe]==2024.10 tzdata --system
       - name: install-reqs
-        run: uv pip install -e ".[dev]" --system
+        run: uv pip install -e ".[tests]" --system
       - name: show-deps
         run: uv pip freeze
       - name: Assert not so old versions dependencies
@@ -140,7 +140,7 @@ jobs:
           cache-suffix: ${{ matrix.python-version }}
           cache-dependency-glob: "pyproject.toml"
       - name: install-reqs
-        run: uv pip install -e ".[dev]" --system
+        run: uv pip install -e ".[tests]" --system
       - name: install-kaggle
         run: uv pip install kaggle --system
       - name: Download Kaggle notebook artifact

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -25,7 +25,7 @@ jobs:
           cache-dependency-glob: "pyproject.toml"
       - name: install-reqs
         # Python3.8 is technically at end-of-life, so we don't test everything
-        run: uv pip install -e ".[dev, core]" --system
+        run: uv pip install -e ".[tests, core]" --system
       - name: show-deps
         run: uv pip freeze
       - name: Run pytest
@@ -49,7 +49,7 @@ jobs:
           cache-suffix: ${{ matrix.python-version }}
           cache-dependency-glob: "pyproject.toml"
       - name: install-reqs
-        run: uv pip install -e ".[dev, core, extra, dask, modin]" --system
+        run: uv pip install -e ".[tests, core, extra, dask, modin]" --system
       - name: install pyspark
         run: uv pip install -e ".[pyspark]" --system
         # PySpark is not yet available on Python3.12+
@@ -83,7 +83,7 @@ jobs:
           cache-suffix: ${{ matrix.python-version }}
           cache-dependency-glob: "pyproject.toml"
       - name: install-reqs
-        run: uv pip install -e ".[dev, core, extra, modin, dask]" --system
+        run: uv pip install -e ".[tests, core, extra, modin, dask]" --system
       - name: install pyspark
         run: uv pip install -e ".[pyspark]" --system
         # PySpark is not yet available on Python3.12+

diff --git a/.github/workflows/random_ci_pytest.yml b/.github/workflows/random_ci_pytest.yml
@@ -27,7 +27,7 @@ jobs:
       - name: install-random-verions
         run: uv pip install -r random-requirements.txt --system
       - name: install-narwhals
-        run: uv pip install -e ".[dev]" --system
+        run: uv pip install -e ".[tests]" --system
       - name: show versions
         run: uv pip freeze
       - name: Run pytest

diff --git a/.github/workflows/typing.yml b/.github/workflows/typing.yml
@@ -0,0 +1,40 @@
+name: Type checking
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+
+jobs:
+  mypy:
+    strategy:
+      matrix:
+        python-version: ["3.11"]
+        os: [ubuntu-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: "true"
+          cache-suffix: ${{ matrix.python-version }}
+          cache-dependency-glob: "pyproject.toml"
+      - name: Create venv
+        run: uv venv .venv
+      - name: install-reqs
+        # TODO: add more dependencies/backends incrementally
+        run: |
+          source .venv/bin/activate
+          uv pip install -e ".[tests, typing, core]"
+      - name: show-deps
+        run: |
+          source .venv/bin/activate
+          uv pip freeze
+      - name: Run mypy
+        run: |
+          source .venv/bin/activate
+          make typing
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,5 +1,6 @@
 ci:
   autoupdate_schedule: monthly
+  skip: [mypy]
 repos:
 - repo: https://github.com/astral-sh/ruff-pre-commit
   # Ruff version.
@@ -13,12 +14,6 @@ repos:
     - id: ruff
       alias: check-docstrings
       entry: python utils/check_docstrings.py
-- repo: https://github.com/pre-commit/mirrors-mypy
-  rev: 'v1.14.1'
-  hooks:
-    - id: mypy
-      additional_dependencies: ['polars==1.4.1', 'pytest==8.3.2']
-      files: ^(narwhals|tests)/
 - repo: https://github.com/codespell-project/codespell
   rev: 'v2.4.1'
   hooks:
@@ -84,6 +79,13 @@ repos:
       entry: pull_request_target
       language: pygrep
       files: ^\.github/workflows/
+    - id: mypy
+      name: mypy
+      entry: make typing
+      files: ^(narwhals|tests)/
+      language: system
+      types: [python]
+      require_serial: true
 - repo: https://github.com/adamchainz/blacken-docs
   rev: "1.19.1"  # replace with latest tag on GitHub
   hooks:

diff --git a/Makefile b/Makefile
@@ -0,0 +1,23 @@
+# Mostly based on polars Makefile
+# https://github.com/pola-rs/polars/blob/main/py-polars/Makefile
+
+.DEFAULT_GOAL := help
+
+SHELL=bash
+VENV=./.venv
+
+ifeq ($(OS),Windows_NT)
+	VENV_BIN=$(VENV)/Scripts
+else
+	VENV_BIN=$(VENV)/bin
+endif
+
+
+.PHONY: help
+help:  ## Display this help screen
+	@echo -e "\033[1mAvailable commands:\033[0m"
+	@grep -E '^[a-z.A-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "  \033[36m%-22s\033[0m %s\n", $$1, $$2}' | sort
+
+.PHONY: typing
+typing: ## Run typing checks
+	$(VENV_BIN)/mypy
diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py
@@ -39,6 +39,7 @@
     from narwhals._arrow.namespace import ArrowNamespace
     from narwhals.dtypes import DType
     from narwhals.typing import _1DArray
+    from narwhals.typing import _2DArray
     from narwhals.utils import Version
 
 
@@ -340,7 +341,7 @@ def __getitem__(self: Self, idx: int | slice | Sequence[int]) -> Any | Self:
     def scatter(self: Self, indices: int | Sequence[int], values: Any) -> Self:
         import numpy as np  # ignore-banned-import
 
-        mask = np.zeros(self.len(), dtype=bool)
+        mask: _1DArray = np.zeros(self.len(), dtype=bool)
         mask[indices] = True
         if isinstance(values, self.__class__):
             ser, values = broadcast_and_extract_native(
@@ -729,7 +730,7 @@ def to_dummies(self: Self, *, separator: str, drop_first: bool) -> ArrowDataFram
         name = self._name
         da = series.dictionary_encode(null_encoding="encode").combine_chunks()
 
-        columns = np.zeros((len(da.dictionary), len(da)), np.int8)
+        columns: _2DArray = np.zeros((len(da.dictionary), len(da)), np.int8)
         columns[da.indices, np.arange(len(da))] = 1
         null_col_pa, null_col_pl = f"{name}{separator}None", f"{name}{separator}null"
         cols = [

diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py
@@ -7,10 +7,10 @@
 
 import duckdb
 from duckdb import ColumnExpression
-from duckdb import ConstantExpression
 from duckdb import FunctionExpression
 
 from narwhals._duckdb.utils import ExprKind
+from narwhals._duckdb.utils import lit
 from narwhals._duckdb.utils import native_to_narwhals_dtype
 from narwhals._duckdb.utils import parse_exprs
 from narwhals.dependencies import get_duckdb
@@ -145,7 +145,7 @@ def aggregate(self: Self, *exprs: DuckDBExpr) -> Self:
         new_columns_map = parse_exprs(self, *exprs)
         return self._from_native_frame(
             self._native_frame.aggregate(
-                [val.alias(col) for col, val in new_columns_map.items()]
+                [val.alias(col) for col, val in new_columns_map.items()]  # type: ignore[arg-type]
             ),
             validate_column_names=False,
         )
@@ -302,7 +302,7 @@ def join(
                 raise NotImplementedError(msg)
             rel = self._native_frame.set_alias("lhs").cross(  # pragma: no cover
                 other._native_frame.set_alias("rhs")
-            )
+            )  # type: ignore[operator]
         else:
             # help mypy
             assert left_on is not None  # noqa: S101
@@ -467,9 +467,9 @@ def explode(self: Self, columns: list[str]) -> Self:
         rel = self._native_frame
         original_columns = self.columns
 
-        not_null_condition = (
-            col_to_explode.isnotnull() & FunctionExpression("len", col_to_explode) > 0
-        )
+        not_null_condition = col_to_explode.isnotnull() & FunctionExpression(
+            "len", col_to_explode
+        ) > lit(0)
         non_null_rel = rel.filter(not_null_condition).select(
             *(
                 FunctionExpression("unnest", col_to_explode).alias(col)
@@ -480,10 +480,7 @@ def explode(self: Self, columns: list[str]) -> Self:
         )
 
         null_rel = rel.filter(~not_null_condition).select(
-            *(
-                ConstantExpression(None).alias(col) if col in columns else col
-                for col in original_columns
-            )
+            *(lit(None).alias(col) if col in columns else col for col in original_columns)
         )
 
         return self._from_native_frame(