Skip to content

Commit e273125

Browse files
committed
Use truthiness in equivalent_attrs for consistency with Python
- Choose truthiness over strict boolean checking for consistency with Python's standard 'if a == b:' behavior - Accept edge cases with non-standard __eq__ methods as a deliberate tradeoff - Add comprehensive tests documenting the behavior and edge cases - Add clear documentation explaining the design choice and its implications This approach is more permissive and works better with numpy scalars and similar types, while still catching truly ambiguous cases (ValueError/TypeError).
1 parent d4c5522 commit e273125

File tree

2 files changed

+91
-17
lines changed

2 files changed

+91
-17
lines changed

xarray/structure/merge.py

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -610,19 +610,38 @@ def merge_coords(
610610
def equivalent_attrs(a: Any, b: Any) -> bool:
611611
"""Check if two attribute values are equivalent.
612612
613-
Returns False if the comparison raises ValueError or TypeError,
614-
or if the result is not a boolean. This handles cases like:
615-
- numpy arrays with ambiguous truth values
616-
- xarray Datasets which can't be directly converted to numpy arrays
617-
- pandas Series which return Series from comparisons
613+
Returns False if the comparison raises ValueError or TypeError.
614+
This handles cases like numpy arrays with ambiguous truth values
615+
and xarray Datasets which can't be directly converted to numpy arrays.
616+
617+
For non-boolean results, we use truthiness (consistent with `if a == b`).
618+
This is an imperfect but pragmatic choice:
619+
620+
Pros of truthiness:
621+
- Consistent with Python's normal `if a == b:` behavior
622+
- Preserves numpy scalars (np.bool_(True)) and similar types
623+
- More permissive for common use cases
624+
625+
Cons of truthiness:
626+
- Keeps attrs when __eq__ returns truthy non-bool (e.g., "error")
627+
- Drops attrs when __eq__ returns falsy non-bool (e.g., 0, [])
628+
629+
The alternative (strict bool checking) would be safer but would drop
630+
many legitimate comparisons. We choose consistency with Python's
631+
standard behavior, accepting edge cases with pathological __eq__ methods.
632+
633+
TODO: Revisit this behavior in the future - consider strict type checking
634+
or a more sophisticated approach to handling non-boolean comparisons.
618635
"""
619636
try:
620637
result = equivalent(a, b)
621-
# Ensure we have a boolean result, not an array-like or Dataset
622-
if not isinstance(result, bool):
623-
return False
624-
return result
638+
# Use truthiness, consistent with `if a == b:` behavior
639+
# Note: This means non-boolean returns are interpreted by truthiness,
640+
# which can lead to false positives/negatives but is more permissive
641+
return bool(result)
625642
except (ValueError, TypeError):
643+
# These exceptions indicate the comparison is truly ambiguous
644+
# (e.g., numpy arrays that would raise "ambiguous truth value")
626645
return False
627646

628647

xarray/tests/test_merge.py

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -295,15 +295,14 @@ def __repr__(self):
295295
with warnings.catch_warnings():
296296
warnings.filterwarnings("ignore", category=DeprecationWarning)
297297

298-
# Objects with custom __eq__ that returns non-bool are dropped
299-
# even if they're "equal" because the comparison result is ambiguous
298+
# With truthiness: objects returning [True] are kept (truthy)
300299
actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts")
301-
assert "custom" not in actual.attrs # Dropped due to non-bool comparison
300+
assert "custom" in actual.attrs # Kept - [True] is truthy
302301
assert actual.attrs["x"] == 1
303302

304-
# Objects with different values also get dropped
303+
# Objects with different values: equivalent returns False (bool), dropped
305304
actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
306-
assert "custom" not in actual.attrs # Dropped due to non-bool comparison
305+
assert "custom" not in actual.attrs # Dropped - different values
307306
assert actual.attrs["x"] == 1
308307
assert actual.attrs["y"] == 2
309308

@@ -427,10 +426,10 @@ def test_merge_attrs_drop_conflicts_pathological_cases(self):
427426
assert "dataset_attr" not in actual.attrs # Dropped due to TypeError
428427
assert actual.attrs["scalar"] == 42
429428

430-
# Even identical datasets cause TypeError in equivalent() and get dropped
431-
# because equivalent() tries to convert them to numpy arrays
429+
# With truthiness: identical datasets are kept
430+
# The comparison returns a truthy Dataset, so they're treated as equal
432431
actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
433-
assert "dataset_attr" not in actual.attrs # Dropped due to TypeError
432+
assert "dataset_attr" in actual.attrs # Kept with truthiness approach
434433
assert actual.attrs["other"] == 99
435434

436435
# Test 3: Pandas Series (raises ValueError due to ambiguous truth value)
@@ -458,6 +457,62 @@ def test_merge_attrs_drop_conflicts_pathological_cases(self):
458457
assert "series" not in actual.attrs # Dropped due to ValueError
459458
assert actual.attrs["value"] == "a"
460459

460+
def test_merge_attrs_drop_conflicts_truthiness_edge_cases(self):
461+
"""Test edge cases demonstrating the truthiness tradeoff.
462+
463+
We deliberately use truthiness for consistency with Python's `if a == b:`
464+
behavior. This test documents the implications of this design choice
465+
with objects that have non-standard __eq__ methods.
466+
"""
467+
468+
# Case 1: Objects whose __eq__ returns truthy non-booleans
469+
# These are kept because we respect truthiness
470+
class ReturnsString:
471+
def __init__(self, value):
472+
self.value = value
473+
474+
def __eq__(self, other):
475+
# Always returns a string (truthy if non-empty)
476+
return "comparison result"
477+
478+
obj1 = ReturnsString("A")
479+
obj2 = ReturnsString("B") # Different object
480+
481+
ds1 = xr.Dataset(attrs={"obj": obj1})
482+
ds2 = xr.Dataset(attrs={"obj": obj2})
483+
484+
actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts")
485+
486+
# Truthiness behavior: keeps attribute because "comparison result" is truthy
487+
# This is the expected behavior when respecting truthiness
488+
assert "obj" in actual.attrs
489+
490+
# Case 2: Objects whose __eq__ returns falsy non-booleans
491+
# These are dropped because we respect truthiness
492+
class ReturnsZero:
493+
def __init__(self, value):
494+
self.value = value
495+
496+
def __eq__(self, other):
497+
# Always returns 0 (falsy) even if values match
498+
return 0
499+
500+
obj3 = ReturnsZero("same")
501+
obj4 = ReturnsZero("same") # Different object, same value
502+
503+
ds3 = xr.Dataset(attrs={"zero": obj3})
504+
ds4 = xr.Dataset(attrs={"zero": obj4})
505+
506+
actual = xr.merge([ds3, ds4], combine_attrs="drop_conflicts")
507+
508+
# Truthiness behavior: drops attribute because 0 is falsy
509+
# This is the expected behavior when respecting truthiness
510+
assert "zero" not in actual.attrs
511+
512+
# Note: These edge cases demonstrate the tradeoff of using truthiness.
513+
# Well-behaved __eq__ methods return booleans and work correctly.
514+
# We accept these edge cases for consistency with Python's standard behavior.
515+
461516
def test_merge_attrs_no_conflicts_compat_minimal(self):
462517
"""make sure compat="minimal" does not silence errors"""
463518
ds1 = xr.Dataset({"a": ("x", [], {"a": 0})})

0 commit comments

Comments
 (0)