I'm getting a KeyError Traceback for several different HDF5 files.
File ~/GitHub/VirtualiZarr/virtualizarr/xarray.py:114, in open_virtual_dataset(filepath, filetype, drop_variables, loadable_variables, indexes, virtual_array_class, reader_options)
106 else:
107 # this is the only place we actually always need to use kerchunk directly
108 # TODO avoid even reading byte ranges for variables that will be dropped later anyway?
109 vds_refs = kerchunk.read_kerchunk_references_from_file(
110 filepath=filepath,
111 filetype=filetype,
112 reader_options=reader_options,
113 )
--> 114 virtual_vars = virtual_vars_from_kerchunk_refs(
115 vds_refs,
116 drop_variables=drop_variables + loadable_variables,
117 virtual_array_class=virtual_array_class,
118 )
119 ds_attrs = kerchunk.fully_decode_arr_refs(vds_refs["refs"]).get(".zattrs", {})
120 coord_names = ds_attrs.pop("coordinates", [])
File ~/GitHub/VirtualiZarr/virtualizarr/xarray.py:247, in virtual_vars_from_kerchunk_refs(refs, drop_variables, virtual_array_class)
241 drop_variables = []
242 var_names_to_keep = [
243 var_name for var_name in var_names if var_name not in drop_variables
244 ]
246 vars = {
--> 247 var_name: variable_from_kerchunk_refs(refs, var_name, virtual_array_class)
248 for var_name in var_names_to_keep
249 }
250 return vars
File ~/GitHub/VirtualiZarr/virtualizarr/xarray.py:293, in variable_from_kerchunk_refs(refs, var_name, virtual_array_class)
290 """Create a single xarray Variable by reading specific keys of a kerchunk references dict."""
292 arr_refs = kerchunk.extract_array_refs(refs, var_name)
--> 293 chunk_dict, zarray, zattrs = kerchunk.parse_array_refs(arr_refs)
295 manifest = ChunkManifest._from_kerchunk_chunk_dict(chunk_dict)
297 # we want to remove the _ARRAY_DIMENSIONS from the final variables' .attrs
File ~/GitHub/VirtualiZarr/virtualizarr/kerchunk.py:186, in parse_array_refs(arr_refs)
183 def parse_array_refs(
184 arr_refs: KerchunkArrRefs,
185 ) -> tuple[dict, ZArray, ZAttrs]:
--> 186 zarray = ZArray.from_kerchunk_refs(arr_refs.pop(".zarray"))
187 zattrs = arr_refs.pop(".zattrs", {})
188 chunk_dict = arr_refs
KeyError: '.zarray'
I'm getting a KeyError Traceback for several different HDF5 files.
For example, trying to use this test file from kerchunk:
https://github.com/fsspec/kerchunk/blob/ae692fead51a216691e4db9a67c99194c5ba8e14/kerchunk/tests/test_hdf.py#L307
File ~/GitHub/VirtualiZarr/virtualizarr/xarray.py:114, in open_virtual_dataset(filepath, filetype, drop_variables, loadable_variables, indexes, virtual_array_class, reader_options) 106 else: 107 # this is the only place we actually always need to use kerchunk directly 108 # TODO avoid even reading byte ranges for variables that will be dropped later anyway? 109 vds_refs = kerchunk.read_kerchunk_references_from_file( 110 filepath=filepath, 111 filetype=filetype, 112 reader_options=reader_options, 113 ) --> 114 virtual_vars = virtual_vars_from_kerchunk_refs( 115 vds_refs, 116 drop_variables=drop_variables + loadable_variables, 117 virtual_array_class=virtual_array_class, 118 ) 119 ds_attrs = kerchunk.fully_decode_arr_refs(vds_refs["refs"]).get(".zattrs", {}) 120 coord_names = ds_attrs.pop("coordinates", []) File ~/GitHub/VirtualiZarr/virtualizarr/xarray.py:247, in virtual_vars_from_kerchunk_refs(refs, drop_variables, virtual_array_class) 241 drop_variables = [] 242 var_names_to_keep = [ 243 var_name for var_name in var_names if var_name not in drop_variables 244 ] 246 vars = { --> 247 var_name: variable_from_kerchunk_refs(refs, var_name, virtual_array_class) 248 for var_name in var_names_to_keep 249 } 250 return vars File ~/GitHub/VirtualiZarr/virtualizarr/xarray.py:293, in variable_from_kerchunk_refs(refs, var_name, virtual_array_class) 290 """Create a single xarray Variable by reading specific keys of a kerchunk references dict.""" 292 arr_refs = kerchunk.extract_array_refs(refs, var_name) --> 293 chunk_dict, zarray, zattrs = kerchunk.parse_array_refs(arr_refs) 295 manifest = ChunkManifest._from_kerchunk_chunk_dict(chunk_dict) 297 # we want to remove the _ARRAY_DIMENSIONS from the final variables' .attrs File ~/GitHub/VirtualiZarr/virtualizarr/kerchunk.py:186, in parse_array_refs(arr_refs) 183 def parse_array_refs( 184 arr_refs: KerchunkArrRefs, 185 ) -> tuple[dict, ZArray, ZAttrs]: --> 186 zarray = ZArray.from_kerchunk_refs(arr_refs.pop(".zarray")) 187 zattrs = arr_refs.pop(".zattrs", {}) 188 chunk_dict = arr_refs KeyError: '.zarray'