Implement @shoyer's stricter equivalent() function

max-sixty · max-sixty · commit 42e5c9d1922f · 2025-09-15T10:04:10.000-07:00
- Rewrite equivalent() to reject non-boolean comparison results
- Accept numpy bool scalars (np.bool_) but reject other non-bool types
- Simplify equivalent_attrs() since equivalent() now handles non-bool cases
- Update tests to reflect stricter behavior with non-standard __eq__ methods

This makes comparisons more predictable by rejecting ambiguous cases like
Dataset comparisons, custom objects with weird __eq__, etc. The tradeoff is
being less permissive than Python's standard 'if a == b:' behavior.
diff --git a/xarray/core/utils.py b/xarray/core/utils.py
@@ -239,18 +239,34 @@ def equivalent(first: T, second: T) -> bool:
     """Compare two objects for equivalence (identity or equality), using
     array_equiv if either object is an ndarray. If both objects are lists,
     equivalent is sequentially called on all the elements.
+
+    Returns False for any comparison that doesn't return a boolean,
+    making this function safer to use with objects that have non-standard
+    __eq__ implementations.
     """
     # TODO: refactor to avoid circular import
     from xarray.core import duck_array_ops
 
     if first is second:
         return True
+
     if isinstance(first, np.ndarray) or isinstance(second, np.ndarray):
         return duck_array_ops.array_equiv(first, second)
+
     if isinstance(first, list) or isinstance(second, list):
         return list_equiv(first, second)  # type: ignore[arg-type]
 
-    return (first == second) or (pd.isnull(first) and pd.isnull(second))  # type: ignore[call-overload]
+    # For non-array/list types, use == but require boolean result
+    result = first == second
+    if not isinstance(result, bool):
+        # Accept numpy bool scalars as well
+        if isinstance(result, np.bool_):
+            return bool(result)
+        # Reject any other non-boolean type (Dataset, Series, custom objects, etc.)
+        return False
+
+    # Check for NaN equivalence
+    return result or (pd.isnull(first) and pd.isnull(second))  # type: ignore[call-overload]
 
 
 def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool:
diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py
@@ -614,31 +614,12 @@ def equivalent_attrs(a: Any, b: Any) -> bool:
     This handles cases like numpy arrays with ambiguous truth values
     and xarray Datasets which can't be directly converted to numpy arrays.
 
-    For non-boolean results, we use truthiness (consistent with `if a == b`).
-    This is an imperfect but pragmatic choice:
-
-    Pros of truthiness:
-    - Consistent with Python's normal `if a == b:` behavior
-    - Preserves numpy scalars (np.bool_(True)) and similar types
-    - More permissive for common use cases
-
-    Cons of truthiness:
-    - Keeps attrs when __eq__ returns truthy non-bool (e.g., "error")
-    - Drops attrs when __eq__ returns falsy non-bool (e.g., 0, [])
-
-    The alternative (strict bool checking) would be safer but would drop
-    many legitimate comparisons. We choose consistency with Python's
-    standard behavior, accepting edge cases with pathological __eq__ methods.
-
-    TODO: Revisit this behavior in the future - consider strict type checking
-    or a more sophisticated approach to handling non-boolean comparisons.
+    Since equivalent() now handles non-boolean returns by returning False,
+    this wrapper mainly catches exceptions from comparisons that can't be
+    evaluated at all.
     """
     try:
-        result = equivalent(a, b)
-        # Use truthiness, consistent with `if a == b:` behavior
-        # Note: This means non-boolean returns are interpreted by truthiness,
-        # which can lead to false positives/negatives but is more permissive
-        return bool(result)
+        return equivalent(a, b)
     except (ValueError, TypeError):
         # These exceptions indicate the comparison is truly ambiguous
         # (e.g., numpy arrays that would raise "ambiguous truth value")
diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py
@@ -236,13 +236,7 @@ def test_merge_attrs_drop_conflicts(self):
         assert_identical(actual, expected)
 
     def test_merge_attrs_drop_conflicts_non_bool_eq(self):
-        """Test drop_conflicts behavior when __eq__ returns non-bool values.
-
-        When comparing attribute values, the _equivalent_drop_conflicts() function
-        uses == which can return non-bool values. The new behavior treats ambiguous
-        or falsy equality results as non-equivalent, dropping the attribute rather
-        than raising an error.
-        """
+        """Test drop_conflicts behavior when __eq__ returns non-bool values."""
         import warnings
 
         import numpy as np
@@ -295,14 +289,14 @@ def __repr__(self):
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore", category=DeprecationWarning)
 
-            # With truthiness: objects returning [True] are kept (truthy)
+            # Objects returning arrays are dropped (non-boolean return)
             actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts")
-            assert "custom" in actual.attrs  # Kept - [True] is truthy
+            assert "custom" not in actual.attrs  # Dropped - returns array, not bool
             assert actual.attrs["x"] == 1
 
-            # Objects with different values: equivalent returns False (bool), dropped
+            # Different values also dropped (returns array, not bool)
             actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
-            assert "custom" not in actual.attrs  # Dropped - different values
+            assert "custom" not in actual.attrs  # Dropped - returns non-boolean
             assert actual.attrs["x"] == 1
             assert actual.attrs["y"] == 2
 
@@ -426,10 +420,9 @@ def test_merge_attrs_drop_conflicts_pathological_cases(self):
         assert "dataset_attr" not in actual.attrs  # Dropped due to TypeError
         assert actual.attrs["scalar"] == 42
 
-        # With truthiness: identical datasets are kept
-        # The comparison returns a truthy Dataset, so they're treated as equal
+        # Identical datasets are also dropped (comparison returns Dataset, not bool)
         actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
-        assert "dataset_attr" in actual.attrs  # Kept with truthiness approach
+        assert "dataset_attr" not in actual.attrs  # Dropped - returns Dataset, not bool
         assert actual.attrs["other"] == 99
 
         # Test 3: Pandas Series (raises ValueError due to ambiguous truth value)
@@ -457,22 +450,16 @@ def test_merge_attrs_drop_conflicts_pathological_cases(self):
             assert "series" not in actual.attrs  # Dropped due to ValueError
             assert actual.attrs["value"] == "a"
 
-    def test_merge_attrs_drop_conflicts_truthiness_edge_cases(self):
-        """Test edge cases demonstrating the truthiness tradeoff.
-
-        We deliberately use truthiness for consistency with Python's `if a == b:`
-        behavior. This test documents the implications of this design choice
-        with objects that have non-standard __eq__ methods.
-        """
+    def test_merge_attrs_drop_conflicts_non_boolean_eq_returns(self):
+        """Test objects with non-boolean __eq__ returns are dropped."""
 
-        # Case 1: Objects whose __eq__ returns truthy non-booleans
-        # These are kept because we respect truthiness
+        # Case 1: Objects whose __eq__ returns non-boolean strings
         class ReturnsString:
             def __init__(self, value):
                 self.value = value
 
             def __eq__(self, other):
-                # Always returns a string (truthy if non-empty)
+                # Always returns a string (non-boolean)
                 return "comparison result"
 
         obj1 = ReturnsString("A")
@@ -483,18 +470,16 @@ def __eq__(self, other):
 
         actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts")
 
-        # Truthiness behavior: keeps attribute because "comparison result" is truthy
-        # This is the expected behavior when respecting truthiness
-        assert "obj" in actual.attrs
+        # Strict behavior: drops attribute because __eq__ returns non-boolean
+        assert "obj" not in actual.attrs
 
-        # Case 2: Objects whose __eq__ returns falsy non-booleans
-        # These are dropped because we respect truthiness
+        # Case 2: Objects whose __eq__ returns numbers
         class ReturnsZero:
             def __init__(self, value):
                 self.value = value
 
             def __eq__(self, other):
-                # Always returns 0 (falsy) even if values match
+                # Always returns 0 (non-boolean)
                 return 0
 
         obj3 = ReturnsZero("same")
@@ -505,13 +490,9 @@ def __eq__(self, other):
 
         actual = xr.merge([ds3, ds4], combine_attrs="drop_conflicts")
 
-        # Truthiness behavior: drops attribute because 0 is falsy
-        # This is the expected behavior when respecting truthiness
+        # Strict behavior: drops attribute because __eq__ returns non-boolean
         assert "zero" not in actual.attrs
 
-        # Note: These edge cases demonstrate the tradeoff of using truthiness.
-        # Well-behaved __eq__ methods return booleans and work correctly.
-        # We accept these edge cases for consistency with Python's standard behavior.
 
     def test_merge_attrs_no_conflicts_compat_minimal(self):
         """make sure compat="minimal" does not silence errors"""