chore: add workaround for narwhals bug

Opened (narwhals-dev/narwhals#1897) Marking (#3631 (comment)) as resolved
vega · Jan 30, 2025 · e68ab89 · e68ab89
1 parent 2203972
commit e68ab89
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 7 deletions.
diff --git a/altair/datasets/_constraints.py b/altair/datasets/_constraints.py
@@ -2,6 +2,8 @@
 
 from __future__ import annotations
 
+import functools
+import operator
 from collections.abc import Set
 from itertools import chain
 from typing import TYPE_CHECKING, Any
@@ -59,7 +61,23 @@ def collect(**kwds: Unpack[Metadata]) -> Metadata:
         return dict(self)
 
     def to_expr(self) -> nw.Expr:
-        return nw.all_horizontal(nw.col(name) == val for name, val in self)
+        """
+        Convert constraint into a narhwals expression.
+
+        Notes
+        -----
+        Workaround for `issue`_ is performing the reduction with ``stdlib``
+
+        .. _issue:
+            https://github.com/narwhals-dev/narwhals/issues/1897
+        .. _discussion:
+            https://github.com/vega/altair/pull/3631#discussion_r1934313255
+        """
+        if not self:
+            msg = f"Unable to convert an empty set to an expression:\n\n{self!r}"
+            raise TypeError(msg)
+        exprs = (nw.col(name) == val for name, val in self)
+        return functools.reduce(operator.and_, exprs)
 
     def isdisjoint(self, other: Iterable[Any]) -> bool:
         return super().isdisjoint(other)

diff --git a/altair/datasets/_reader.py b/altair/datasets/_reader.py
@@ -171,13 +171,11 @@ def profile(self, mode: Literal["any", "each"]):
             )
             frame = self._scan_metadata().select("dataset_name", *relevant_columns)
             it = (impl._include_expr for impl in self._read)
-            # BUG: ``narwhals`` raises a ``ValueError`` when ``__invert__``-ing a previously used Expr?
-            # - Can't reproduce trivially
-            # - Doesnt seem to be related to genexp
             inc_expr = nw.any_horizontal(*it)
-            include = _dataset_names(frame, inc_expr)
-            exclude = _dataset_names(frame, ~nw.col("dataset_name").is_in(include))
-            return {"include": include, "exclude": exclude}
+            return {
+                "include": _dataset_names(frame, inc_expr),
+                "exclude": _dataset_names(frame, ~inc_expr),
+            }
         elif mode == "each":
             # FIXME: Rough draft of how to group results
             # - Don't really want a nested dict