Skip to content

Commit d4c5522

Browse files
committed
Add pathological test cases suggested by @shoyer
- Test NumPy object arrays with nested arrays - Test xarray.Dataset objects as attributes - Test Pandas Series as attributes - Update equivalent_attrs to catch TypeError in addition to ValueError - Ensure equivalent_attrs always returns a boolean (not Dataset/array-like) These cases all properly get dropped when they can't be reliably compared.
1 parent b7c05aa commit d4c5522

File tree

2 files changed

+102
-7
lines changed

2 files changed

+102
-7
lines changed

xarray/structure/merge.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -610,11 +610,19 @@ def merge_coords(
610610
def equivalent_attrs(a: Any, b: Any) -> bool:
611611
"""Check if two attribute values are equivalent.
612612
613-
Returns False if the comparison raises ValueError (e.g., for numpy arrays).
613+
Returns False if the comparison raises ValueError or TypeError,
614+
or if the result is not a boolean. This handles cases like:
615+
- numpy arrays with ambiguous truth values
616+
- xarray Datasets which can't be directly converted to numpy arrays
617+
- pandas Series which return Series from comparisons
614618
"""
615619
try:
616-
return equivalent(a, b)
617-
except ValueError:
620+
result = equivalent(a, b)
621+
# Ensure we have a boolean result, not an array-like or Dataset
622+
if not isinstance(result, bool):
623+
return False
624+
return result
625+
except (ValueError, TypeError):
618626
return False
619627

620628

xarray/tests/test_merge.py

Lines changed: 91 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -295,14 +295,15 @@ def __repr__(self):
295295
with warnings.catch_warnings():
296296
warnings.filterwarnings("ignore", category=DeprecationWarning)
297297

298-
# Objects with same value (returning truthy array [True])
298+
# Objects with custom __eq__ that returns non-bool are dropped
299+
# even if they're "equal" because the comparison result is ambiguous
299300
actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts")
300-
assert "custom" in actual.attrs
301+
assert "custom" not in actual.attrs # Dropped due to non-bool comparison
301302
assert actual.attrs["x"] == 1
302303

303-
# Objects with different values (returning falsy array [False])
304+
# Objects with different values also get dropped
304305
actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
305-
assert "custom" not in actual.attrs # Dropped due to conflict
306+
assert "custom" not in actual.attrs # Dropped due to non-bool comparison
306307
assert actual.attrs["x"] == 1
307308
assert actual.attrs["y"] == 2
308309

@@ -371,6 +372,92 @@ def __repr__(self):
371372
actual = xr.merge([ds11, ds12], combine_attrs="drop_conflicts")
372373
assert "alltrue" not in actual.attrs # Dropped due to ambiguous comparison
373374

375+
def test_merge_attrs_drop_conflicts_pathological_cases(self):
376+
"""Test drop_conflicts with pathological cases suggested by @shoyer.
377+
378+
These test cases ensure we handle various problematic attribute types
379+
that can raise exceptions during equality comparison.
380+
"""
381+
import warnings
382+
383+
import numpy as np
384+
import pandas as pd
385+
386+
# Test 1: NumPy object arrays with nested arrays
387+
# These can have complex comparison behavior
388+
x = np.array([None], dtype=object)
389+
x[0] = np.arange(3)
390+
y = np.array([None], dtype=object)
391+
y[0] = np.arange(10, 13)
392+
393+
ds1 = xr.Dataset(attrs={"nested_array": x, "common": 1})
394+
ds2 = xr.Dataset(attrs={"nested_array": y, "common": 1})
395+
396+
# Different nested arrays should cause attribute to be dropped
397+
actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts")
398+
assert (
399+
"nested_array" not in actual.attrs
400+
) # Dropped due to different nested arrays
401+
assert actual.attrs["common"] == 1
402+
403+
# Test with identical nested arrays
404+
# Note: Even identical nested arrays will be dropped because comparison
405+
# raises ValueError due to ambiguous truth value
406+
z = np.array([None], dtype=object)
407+
z[0] = np.arange(3) # Same as x
408+
ds3 = xr.Dataset(attrs={"nested_array": z, "other": 2})
409+
410+
actual = xr.merge([ds1, ds3], combine_attrs="drop_conflicts")
411+
assert (
412+
"nested_array" not in actual.attrs
413+
) # Dropped due to ValueError in comparison
414+
assert actual.attrs["other"] == 2
415+
416+
# Test 2: xarray.Dataset objects as attributes (raises TypeError in equivalent)
417+
attr_ds1 = xr.Dataset({"foo": 1})
418+
attr_ds2 = xr.Dataset({"bar": 1}) # Different dataset
419+
attr_ds3 = xr.Dataset({"foo": 1}) # Same as attr_ds1
420+
421+
ds4 = xr.Dataset(attrs={"dataset_attr": attr_ds1, "scalar": 42})
422+
ds5 = xr.Dataset(attrs={"dataset_attr": attr_ds2, "scalar": 42})
423+
ds6 = xr.Dataset(attrs={"dataset_attr": attr_ds3, "other": 99})
424+
425+
# Different datasets raise TypeError and should be dropped
426+
actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts")
427+
assert "dataset_attr" not in actual.attrs # Dropped due to TypeError
428+
assert actual.attrs["scalar"] == 42
429+
430+
# Even identical datasets cause TypeError in equivalent() and get dropped
431+
# because equivalent() tries to convert them to numpy arrays
432+
actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
433+
assert "dataset_attr" not in actual.attrs # Dropped due to TypeError
434+
assert actual.attrs["other"] == 99
435+
436+
# Test 3: Pandas Series (raises ValueError due to ambiguous truth value)
437+
series1 = pd.Series([1, 2])
438+
series2 = pd.Series([3, 4]) # Different values
439+
series3 = pd.Series([1, 2]) # Same as series1
440+
441+
ds7 = xr.Dataset(attrs={"series": series1, "value": "a"})
442+
ds8 = xr.Dataset(attrs={"series": series2, "value": "a"})
443+
ds9 = xr.Dataset(attrs={"series": series3, "value": "a"})
444+
445+
# Suppress potential warnings from pandas comparisons
446+
with warnings.catch_warnings():
447+
warnings.filterwarnings("ignore", category=DeprecationWarning)
448+
warnings.filterwarnings("ignore", category=FutureWarning)
449+
450+
# Different series raise ValueError and get dropped
451+
actual = xr.merge([ds7, ds8], combine_attrs="drop_conflicts")
452+
assert "series" not in actual.attrs # Dropped due to ValueError
453+
assert actual.attrs["value"] == "a"
454+
455+
# Even identical series raise ValueError in equivalent() and get dropped
456+
# because Series comparison returns another Series with ambiguous truth value
457+
actual = xr.merge([ds7, ds9], combine_attrs="drop_conflicts")
458+
assert "series" not in actual.attrs # Dropped due to ValueError
459+
assert actual.attrs["value"] == "a"
460+
374461
def test_merge_attrs_no_conflicts_compat_minimal(self):
375462
"""make sure compat="minimal" does not silence errors"""
376463
ds1 = xr.Dataset({"a": ("x", [], {"a": 0})})

0 commit comments

Comments
 (0)