2
2
3
3
import base64
4
4
import warnings
5
- from collections .abc import Iterable
5
+ from collections .abc import Iterable , Sequence
6
6
from enum import Enum
7
7
from functools import cached_property
8
- from typing import TYPE_CHECKING , TypedDict , cast
8
+ from typing import TYPE_CHECKING , Any , TypedDict , cast
9
9
10
10
import numcodecs .abc
11
11
12
12
from zarr .abc .metadata import Metadata
13
13
14
14
if TYPE_CHECKING :
15
- from typing import Any , Literal , Self
15
+ from typing import Literal , Self
16
16
17
17
import numpy .typing as npt
18
18
19
19
from zarr .core .buffer import Buffer , BufferPrototype
20
20
from zarr .core .common import ChunkCoords
21
21
22
22
import json
23
+ import numbers
23
24
from dataclasses import dataclass , field , fields , replace
24
25
25
26
import numcodecs
@@ -146,41 +147,39 @@ def _json_convert(
146
147
raise TypeError
147
148
148
149
zarray_dict = self .to_dict ()
150
+ zarray_dict ["fill_value" ] = _serialize_fill_value (self .fill_value , self .dtype )
149
151
zattrs_dict = zarray_dict .pop ("attributes" , {})
150
152
json_indent = config .get ("json_indent" )
151
153
return {
152
154
ZARRAY_JSON : prototype .buffer .from_bytes (
153
- json .dumps (zarray_dict , default = _json_convert , indent = json_indent ).encode ()
155
+ json .dumps (
156
+ zarray_dict , default = _json_convert , indent = json_indent , allow_nan = False
157
+ ).encode ()
154
158
),
155
159
ZATTRS_JSON : prototype .buffer .from_bytes (
156
- json .dumps (zattrs_dict , indent = json_indent ).encode ()
160
+ json .dumps (zattrs_dict , indent = json_indent , allow_nan = False ).encode ()
157
161
),
158
162
}
159
163
160
164
@classmethod
161
165
def from_dict (cls , data : dict [str , Any ]) -> ArrayV2Metadata :
162
- # make a copy to protect the original from modification
166
+ # Make a copy to protect the original from modification.
163
167
_data = data .copy ()
164
- # check that the zarr_format attribute is correct
168
+ # Check that the zarr_format attribute is correct.
165
169
_ = parse_zarr_format (_data .pop ("zarr_format" ))
166
- dtype = parse_dtype (_data ["dtype" ])
167
170
168
- if dtype .kind in "SV" :
169
- fill_value_encoded = _data .get ("fill_value" )
170
- if fill_value_encoded is not None :
171
- fill_value = base64 .standard_b64decode (fill_value_encoded )
172
- _data ["fill_value" ] = fill_value
173
-
174
- # zarr v2 allowed arbitrary keys here.
175
- # We don't want the ArrayV2Metadata constructor to fail just because someone put an
176
- # extra key in the metadata.
171
+ # zarr v2 allowed arbitrary keys in the metadata.
172
+ # Filter the keys to only those expected by the constructor.
177
173
expected = {x .name for x in fields (cls )}
178
- # https://github.com/zarr-developers/zarr-python/issues/2269
179
- # handle the renames
180
174
expected |= {"dtype" , "chunks" }
181
175
182
176
# check if `filters` is an empty sequence; if so use None instead and raise a warning
183
- if _data ["filters" ] is not None and len (_data ["filters" ]) == 0 :
177
+ filters = _data .get ("filters" )
178
+ if (
179
+ isinstance (filters , Sequence )
180
+ and not isinstance (filters , (str , bytes ))
181
+ and len (filters ) == 0
182
+ ):
184
183
msg = (
185
184
"Found an empty list of filters in the array metadata document. "
186
185
"This is contrary to the Zarr V2 specification, and will cause an error in the future. "
@@ -196,13 +195,6 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
196
195
def to_dict (self ) -> dict [str , JSON ]:
197
196
zarray_dict = super ().to_dict ()
198
197
199
- if self .dtype .kind in "SV" and self .fill_value is not None :
200
- # There's a relationship between self.dtype and self.fill_value
201
- # that mypy isn't aware of. The fact that we have S or V dtype here
202
- # means we should have a bytes-type fill_value.
203
- fill_value = base64 .standard_b64encode (cast (bytes , self .fill_value )).decode ("ascii" )
204
- zarray_dict ["fill_value" ] = fill_value
205
-
206
198
_ = zarray_dict .pop ("dtype" )
207
199
dtype_json : JSON
208
200
# In the case of zarr v2, the simplest i.e., '|VXX' dtype is represented as a string
@@ -300,7 +292,26 @@ def parse_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata:
300
292
return data
301
293
302
294
303
- def parse_fill_value (fill_value : object , dtype : np .dtype [Any ]) -> Any :
295
+ def _parse_structured_fill_value (fill_value : Any , dtype : np .dtype [Any ]) -> Any :
296
+ """Handle structured dtype/fill value pairs"""
297
+ print ("FILL VALUE" , fill_value , "DT" , dtype )
298
+ try :
299
+ if isinstance (fill_value , list ):
300
+ return np .array ([tuple (fill_value )], dtype = dtype )[0 ]
301
+ elif isinstance (fill_value , tuple ):
302
+ return np .array ([fill_value ], dtype = dtype )[0 ]
303
+ elif isinstance (fill_value , bytes ):
304
+ return np .frombuffer (fill_value , dtype = dtype )[0 ]
305
+ elif isinstance (fill_value , str ):
306
+ decoded = base64 .standard_b64decode (fill_value )
307
+ return np .frombuffer (decoded , dtype = dtype )[0 ]
308
+ else :
309
+ return np .array (fill_value , dtype = dtype )[()]
310
+ except Exception as e :
311
+ raise ValueError (f"Fill_value { fill_value } is not valid for dtype { dtype } ." ) from e
312
+
313
+
314
+ def parse_fill_value (fill_value : Any , dtype : np .dtype [Any ]) -> Any :
304
315
"""
305
316
Parse a potential fill value into a value that is compatible with the provided dtype.
306
317
@@ -317,13 +328,16 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
317
328
"""
318
329
319
330
if fill_value is None or dtype .hasobject :
320
- # no fill value
321
331
pass
332
+ elif dtype .fields is not None :
333
+ # the dtype is structured (has multiple fields), so the fill_value might be a
334
+ # compound value (e.g., a tuple or dict) that needs field-wise processing.
335
+ # We use parse_structured_fill_value to correctly convert each component.
336
+ fill_value = _parse_structured_fill_value (fill_value , dtype )
322
337
elif not isinstance (fill_value , np .void ) and fill_value == 0 :
323
338
# this should be compatible across numpy versions for any array type, including
324
339
# structured arrays
325
340
fill_value = np .zeros ((), dtype = dtype )[()]
326
-
327
341
elif dtype .kind == "U" :
328
342
# special case unicode because of encoding issues on Windows if passed through numpy
329
343
# https://github.com/alimanfoo/zarr/pull/172#issuecomment-343782713
@@ -332,6 +346,11 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
332
346
raise ValueError (
333
347
f"fill_value { fill_value !r} is not valid for dtype { dtype } ; must be a unicode string"
334
348
)
349
+ elif dtype .kind in "SV" and isinstance (fill_value , str ):
350
+ fill_value = base64 .standard_b64decode (fill_value )
351
+ elif dtype .kind == "c" and isinstance (fill_value , list ) and len (fill_value ) == 2 :
352
+ complex_val = complex (float (fill_value [0 ]), float (fill_value [1 ]))
353
+ fill_value = np .array (complex_val , dtype = dtype )[()]
335
354
else :
336
355
try :
337
356
if isinstance (fill_value , bytes ) and dtype .kind == "V" :
@@ -347,6 +366,39 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
347
366
return fill_value
348
367
349
368
369
+ def _serialize_fill_value (fill_value : Any , dtype : np .dtype [Any ]) -> JSON :
370
+ serialized : JSON
371
+
372
+ if fill_value is None :
373
+ serialized = None
374
+ elif dtype .kind in "SV" :
375
+ # There's a relationship between dtype and fill_value
376
+ # that mypy isn't aware of. The fact that we have S or V dtype here
377
+ # means we should have a bytes-type fill_value.
378
+ serialized = base64 .standard_b64encode (cast (bytes , fill_value )).decode ("ascii" )
379
+ elif isinstance (fill_value , np .datetime64 ):
380
+ serialized = np .datetime_as_string (fill_value )
381
+ elif isinstance (fill_value , numbers .Integral ):
382
+ serialized = int (fill_value )
383
+ elif isinstance (fill_value , numbers .Real ):
384
+ float_fv = float (fill_value )
385
+ if np .isnan (float_fv ):
386
+ serialized = "NaN"
387
+ elif np .isinf (float_fv ):
388
+ serialized = "Infinity" if float_fv > 0 else "-Infinity"
389
+ else :
390
+ serialized = float_fv
391
+ elif isinstance (fill_value , numbers .Complex ):
392
+ serialized = [
393
+ _serialize_fill_value (fill_value .real , dtype ),
394
+ _serialize_fill_value (fill_value .imag , dtype ),
395
+ ]
396
+ else :
397
+ serialized = fill_value
398
+
399
+ return serialized
400
+
401
+
350
402
def _default_fill_value (dtype : np .dtype [Any ]) -> Any :
351
403
"""
352
404
Get the default fill value for a type.
0 commit comments