From 79671bfa5d3f43857f1d409acd96e36b17c274d1 Mon Sep 17 00:00:00 2001 From: Mike Kryjak Date: Fri, 17 Apr 2026 13:18:34 +0100 Subject: [PATCH 01/13] Handle Pathlib in lazy load The path check assumed the path was a string, which would make the dataset load in a non-lazy way if the path was pathlib.Path --- xbout/load.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xbout/load.py b/xbout/load.py index 8cf714db..a13b2136 100644 --- a/xbout/load.py +++ b/xbout/load.py @@ -291,7 +291,7 @@ def attrs_remove_section(obj, section): if "dump" in input_type or "restart" in input_type: def is_netcdf_collection(datapath): - if not isinstance(datapath, str): + if not _is_path(datapath): return None # Expand globs into a list of files p = Path(datapath) From d929c0330a2e707921e4acabf414cf54d47df22d Mon Sep 17 00:00:00 2001 From: David Bold Date: Mon, 20 Apr 2026 11:07:11 +0200 Subject: [PATCH 02/13] Ensure `dz` is in dataset --- xbout/lazyload.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xbout/lazyload.py b/xbout/lazyload.py index aeb59940..e3eb71e7 100644 --- a/xbout/lazyload.py +++ b/xbout/lazyload.py @@ -338,6 +338,7 @@ def lazy_open_boutdataset( ) # Process all data variables + coords = {} data_vars = {} for name, var in ds.data_vars.items(): if "x" in var.dims and "y" in var.dims: @@ -350,6 +351,8 @@ def lazy_open_boutdataset( attrs=var.attrs, ) elif len(var.dims) == 0: + if name == "dz": + data_vars[name] = var continue # scalars already in metadata elif ("x" not in var.dims) and ("y" not in var.dims): # Take DataArray from first processor @@ -360,7 +363,6 @@ def lazy_open_boutdataset( f"Variable '{name}' has only one of x/y dimensions and will be skipped" ) - coords = {} if "t_array" in ds: coords["t"] = ds["t_array"].values From 3068c035cf9a0769882819957d4004ee7340c9d2 Mon Sep 17 00:00:00 2001 From: David Bold Date: Mon, 20 Apr 2026 11:08:41 +0200 Subject: [PATCH 03/13] Remove double running 3.14 --- .github/workflows/master.yml | 2 +- .github/workflows/pythonpackage.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index b16fb83e..e8a753dc 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -20,7 +20,7 @@ jobs: if: always() strategy: matrix: - python-version: ["3.12", "3.13", "3.14", "3.x"] + python-version: ["3.12", "3.13", "3.14"] fail-fast: false steps: diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 66ee6182..55bfbef7 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -20,7 +20,7 @@ jobs: if: always() strategy: matrix: - python-version: ["3.12", "3.13", "3.14", "3.x"] + python-version: ["3.12", "3.13", "3.14"] fail-fast: false steps: From ba8815b53415efd202e7fe1123d3ae4eab267098 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 28 Apr 2026 06:51:19 +0200 Subject: [PATCH 04/13] Do not add `dz` to metadata in lazyload --- xbout/lazyload.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xbout/lazyload.py b/xbout/lazyload.py index 9969271c..3ad18efd 100644 --- a/xbout/lazyload.py +++ b/xbout/lazyload.py @@ -329,7 +329,9 @@ def lazy_open_boutdataset( # Extract all scalars as metadata metadata = { - name: var.item() for name, var in ds.data_vars.items() if len(var.dims) == 0 + name: var.item() + for name, var in ds.data_vars.items() + if len(var.dims) == 0 and name != "dz" } # Identify processor layout and the array slices from each file From cfa7c9d740f6cfefa747eabf5365d9994dfcf0e5 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 28 Apr 2026 09:52:51 +0200 Subject: [PATCH 05/13] Remove unused var --- xbout/lazyload.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xbout/lazyload.py b/xbout/lazyload.py index 3ad18efd..198c934c 100644 --- a/xbout/lazyload.py +++ b/xbout/lazyload.py @@ -340,7 +340,6 @@ def lazy_open_boutdataset( ) # Process all data variables - coords = {} data_vars = {} for name, var in ds.data_vars.items(): if "x" in var.dims and "y" in var.dims: From d17628f564d578d958beae3400e9537cacc6885a Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 28 Apr 2026 09:53:23 +0200 Subject: [PATCH 06/13] Give more helpful error on reading squashed dataset --- xbout/lazyload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xbout/lazyload.py b/xbout/lazyload.py index 198c934c..9efc4ffa 100644 --- a/xbout/lazyload.py +++ b/xbout/lazyload.py @@ -226,7 +226,10 @@ def make_lazy_array( # Check x and y dimension sizes assert file_shape[xdim] == chunkinfo["nxsub"] - assert file_shape[ydim] == chunkinfo["nysub"] + assert ( + file_shape[ydim] == chunkinfo["nysub"] + ), """Maybe you are trying to read a squashed datafile, which is +not supported with lazyloading. Try loading with setting lazy_load=False""" # The name serves two purposes: # 1. Graph key prefix — it's the first element of every task key tuple From 591991ff9309fe9204db525dcf6b09daec2f0035 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 28 Apr 2026 09:54:29 +0200 Subject: [PATCH 07/13] Do not open squashed dataset lazy The "squashed" dataset is quite different from a real, squashed dataset. Likely a real one would not work either. --- xbout/tests/test_load.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xbout/tests/test_load.py b/xbout/tests/test_load.py index 6b7ad323..339c84dc 100644 --- a/xbout/tests/test_load.py +++ b/xbout/tests/test_load.py @@ -400,6 +400,7 @@ def test_squashed_doublenull_file( datapath=path, keep_xboundaries=keep_xboundaries, keep_yboundaries=keep_yboundaries, + lazy_load=False, ) # bout_xyt_example_files when creating a 'squashed' file just makes it with From 94680563955710b72918b5301b4ec8baad7a7e94 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 28 Apr 2026 09:55:09 +0200 Subject: [PATCH 08/13] Suppress some warnings --- xbout/tests/test_load.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/xbout/tests/test_load.py b/xbout/tests/test_load.py index 339c84dc..12b10035 100644 --- a/xbout/tests/test_load.py +++ b/xbout/tests/test_load.py @@ -294,9 +294,9 @@ def test_single_file(self, tmp_path_factory, bout_xyt_example_files): with pytest.warns(UserWarning): actual = open_boutdataset(datapath=path, keep_xboundaries=False) expected = create_bout_ds() - expected = expected.set_coords(["t_array", "dx", "dy", "dz"]).rename( - t_array="t" - ) + expected = expected.set_coords(["t_array", "dx", "dy", "dz"]) + with pytest.warns(UserWarning): + expected = expected.rename(t_array="t") xrt.assert_equal( actual.drop_vars(["x", "y", "z"]).load(), expected.drop_vars( @@ -317,9 +317,9 @@ def test_squashed_file(self, tmp_path_factory, bout_xyt_example_files): with pytest.warns(UserWarning): actual = open_boutdataset(datapath=path, keep_xboundaries=False) expected = create_bout_ds(lengths=(6, 8, 12, 7)) - expected = expected.set_coords(["t_array", "dx", "dy", "dz"]).rename( - t_array="t" - ) + expected = expected.set_coords(["t_array", "dx", "dy", "dz"]) + with pytest.warns(UserWarning): + expected = expected.rename(t_array="t") xrt.assert_equal( actual.drop_vars(["x", "y", "z"]).load(), expected.drop_vars( @@ -429,9 +429,9 @@ def test_combine_along_x(self, tmp_path_factory, bout_xyt_example_files): dim="x", data_vars="minimal", ) - expected = expected.set_coords(["t_array", "dx", "dy", "dz"]).rename( - t_array="t" - ) + expected = expected.set_coords(["t_array", "dx", "dy", "dz"]) + with pytest.warns(UserWarning): + expected = expected.rename(t_array="t") xrt.assert_equal( actual.drop_vars(["x", "y", "z"]).load(), expected.drop_vars( @@ -463,9 +463,9 @@ def test_combine_along_y(self, tmp_path_factory, bout_xyt_example_files): expected = concat( [bout_ds(0), bout_ds(1), bout_ds(2)], dim="y", data_vars="minimal" ) - expected = expected.set_coords(["t_array", "dx", "dy", "dz"]).rename( - t_array="t" - ) + expected = expected.set_coords(["t_array", "dx", "dy", "dz"]) + with pytest.warns(UserWarning): + expected = expected.rename(t_array="t") xrt.assert_equal( actual.drop_vars(["x", "y", "z"]).load(), expected.drop_vars( @@ -526,9 +526,9 @@ def bout_ds(syn_data_type): data_vars="minimal", ) expected = concat([line1, line2, line3], dim="y", data_vars="minimal") - expected = expected.set_coords(["t_array", "dx", "dy", "dz"]).rename( - t_array="t" - ) + expected = expected.set_coords(["t_array", "dx", "dy", "dz"]) + with pytest.warns(UserWarning): + expected = expected.rename(t_array="t") vars_to_drop = METADATA_VARS + _BOUT_PER_PROC_VARIABLES xrt.assert_equal( actual.drop_vars(["x", "y", "z"]).load(), @@ -861,7 +861,7 @@ def test_infer_boundaries_2d_parallelization_doublenull_by_filenum( @pytest.mark.parametrize("is_restart", [False, True]) def test_keep_xboundaries(self, is_restart): ds = create_test_data(0) - ds = ds.rename({"dim2": "x"}) + ds = ds.swap_dims({"dim2": "x"}) # Manually add filename - encoding normally added by xr.open_dataset ds.encoding["source"] = "folder0/BOUT.dmp.0.nc" @@ -883,7 +883,7 @@ def test_keep_xboundaries(self, is_restart): @pytest.mark.parametrize("is_restart", [False, True]) def test_keep_yboundaries(self, is_restart): ds = create_test_data(0) - ds = ds.rename({"dim2": "y"}) + ds = ds.swap_dims({"dim2": "y"}) # Manually add filename - encoding normally added by xr.open_dataset ds.encoding["source"] = "folder0/BOUT.dmp.0.nc" @@ -911,7 +911,7 @@ def test_keep_yboundaries_doublenull_by_filenum( self, filenum, lower, upper, is_restart ): ds = create_test_data(0) - ds = ds.rename({"dim2": "y"}) + ds = ds.swap_dims({"dim2": "y"}) # Manually add filename - encoding normally added by xr.open_dataset ds.encoding["source"] = "folder0/BOUT.dmp." + str(filenum) + ".nc" From 270cc02048883d984b43a7f58e9fdb537b4024d6 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 28 Apr 2026 09:55:21 +0200 Subject: [PATCH 09/13] Fix docs --- xbout/load.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xbout/load.py b/xbout/load.py index a13b2136..1a758317 100644 --- a/xbout/load.py +++ b/xbout/load.py @@ -91,7 +91,7 @@ def open_boutdataset( source term was changed between some of the restarts, but the source term is saved as time-independent, without a t-dimension). In this case one workaround is to pass a list of - variable names to the keyword argument ``drop_vars`` to ignore the + variable names to the keyword argument ``drop_variables`` to ignore the variables with conflicts, e.g. if ``"S1"`` and ``"S2"`` have conflicts:: From 02b01b43ad41705353121fbd8accee6b5bf21a8b Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 28 Apr 2026 09:55:42 +0200 Subject: [PATCH 10/13] enable dropping vars with lazy_load --- xbout/lazyload.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xbout/lazyload.py b/xbout/lazyload.py index 9efc4ffa..520ecfc8 100644 --- a/xbout/lazyload.py +++ b/xbout/lazyload.py @@ -337,6 +337,8 @@ def lazy_open_boutdataset( if len(var.dims) == 0 and name != "dz" } + drop_vars = kwargs.get("drop_variables", []) + # Identify processor layout and the array slices from each file chunkinfo = make_chunkinfo( metadata, keep_xboundaries=keep_xboundaries, keep_yboundaries=keep_yboundaries @@ -345,6 +347,8 @@ def lazy_open_boutdataset( # Process all data variables data_vars = {} for name, var in ds.data_vars.items(): + if name in drop_vars: + continue if "x" in var.dims and "y" in var.dims: # Array distributed over processors in x and y data_vars[name] = xr.DataArray( From f835bcd93b6e459a3b73b4cee8c06062693f91f0 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 28 Apr 2026 09:56:28 +0200 Subject: [PATCH 11/13] Close result Not doing results in random warnings of unclosed files, and might also cause dead-locks --- xbout/tests/test_grid.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xbout/tests/test_grid.py b/xbout/tests/test_grid.py index 56d06cc9..cd7027f4 100644 --- a/xbout/tests/test_grid.py +++ b/xbout/tests/test_grid.py @@ -61,6 +61,7 @@ def test_open_grid_extra_dims(self, create_example_grid_file, tmp_path_factory): result = result.drop_vars(["x", "y"]) assert_equal(result, example_grid) example_grid.close() + result.close() def test_open_grid_apply_geometry(self, create_example_grid_file): @register_geometry(name="Schwarzschild") From cb3801f0133042c7f133da04887c361a53aacb57 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 28 Apr 2026 09:59:55 +0200 Subject: [PATCH 12/13] Suppress all warnings --- xbout/tests/test_grid.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/xbout/tests/test_grid.py b/xbout/tests/test_grid.py index cd7027f4..aa8c42de 100644 --- a/xbout/tests/test_grid.py +++ b/xbout/tests/test_grid.py @@ -55,10 +55,11 @@ def test_open_grid_extra_dims(self, create_example_grid_file, tmp_path_factory): merge([example_grid, new_var]).to_netcdf(dodgy_grid_path, engine="h5netcdf") with pytest.warns( - UserWarning, match="drop all variables containing " "the dimensions 'w'" + UserWarning, match="drop all variables containing the dimensions 'w'" ): - with open_boutdataset(datapath=dodgy_grid_path) as result: - result = result.drop_vars(["x", "y"]) + with pytest.warns(UserWarning, match="o geometry type fou"): + with open_boutdataset(datapath=dodgy_grid_path) as result: + result = result.drop_vars(["x", "y"]) assert_equal(result, example_grid) example_grid.close() result.close() From 3e34b46c30280b4c6b15b25dd94b38e2b05adf11 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 28 Apr 2026 10:00:09 +0200 Subject: [PATCH 13/13] Capture more warnings --- xbout/tests/test_load.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xbout/tests/test_load.py b/xbout/tests/test_load.py index 12b10035..75b1e33d 100644 --- a/xbout/tests/test_load.py +++ b/xbout/tests/test_load.py @@ -633,7 +633,10 @@ def test_restarts(self): datapath = Path(__file__).parent.joinpath( "data", "restart", "BOUT.restart.*.nc" ) - ds = open_boutdataset(datapath, keep_xboundaries=True, keep_yboundaries=True) + with pytest.warns(UserWarning): + ds = open_boutdataset( + datapath, keep_xboundaries=True, keep_yboundaries=True + ) assert "T" in ds