Feature request: skipna keyword #11

aaronspring · 2021-08-08T16:58:32Z

in xarray many functions drop indexes where NaN is the value.

in bias_correction I sometimes get ValueError: array must not contain infs or NaNs from https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.lstsq.html

This happens when my input data has NaNs.

Traceback:

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/bias_correction.py in correct(self, method, lower_limit, cdf_threshold, vectorize, dask, **apply_ufunc_kwargs)
    259             )
    260         elif method == "normal_mapping":
--> 261             corrected = xr.apply_ufunc(
    262                 normal_correction,
    263                 self.obs_data,

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/xarray/core/computation.py in apply_ufunc(func, input_core_dims, output_core_dims, exclude_dims, vectorize, join, dataset_join, dataset_fill_value, keep_attrs, kwargs, dask, output_dtypes, output_sizes, meta, dask_gufunc_kwargs, *args)
   1160     # feed datasets apply_variable_ufunc through apply_dataset_vfunc
   1161     elif any(is_dict_like(a) for a in args):
-> 1162         return apply_dataset_vfunc(
   1163             variables_vfunc,
   1164             *args,

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/xarray/core/computation.py in apply_dataset_vfunc(func, signature, join, dataset_join, fill_value, exclude_dims, keep_attrs, *args)
    448     args = [getattr(arg, "data_vars", arg) for arg in args]
    449 
--> 450     result_vars = apply_dict_of_variables_vfunc(
    451         func, *args, signature=signature, join=dataset_join, fill_value=fill_value
    452     )

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/xarray/core/computation.py in apply_dict_of_variables_vfunc(func, signature, join, fill_value, *args)
    392     result_vars = {}
    393     for name, variable_args in zip(names, grouped_by_name):
--> 394         result_vars[name] = func(*variable_args)
    395 
    396     if signature.num_outputs > 1:

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/xarray/core/computation.py in apply_variable_ufunc(func, signature, exclude_dims, dask, output_dtypes, vectorize, keep_attrs, dask_gufunc_kwargs, *args)
    740             )
    741 
--> 742     result_data = func(*input_data)
    743 
    744     if signature.num_outputs == 1:

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/numpy/lib/function_base.py in __call__(self, *args, **kwargs)
   2161             vargs.extend([kwargs[_n] for _n in names])
   2162 
-> 2163         return self._vectorize_call(func=func, args=vargs)
   2164 
   2165     def _get_ufunc_and_otypes(self, func, args):

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/numpy/lib/function_base.py in _vectorize_call(self, func, args)
   2235         """Vectorized call to `func` over positional `args`."""
   2236         if self.signature is not None:
-> 2237             res = self._vectorize_call_with_signature(func, args)
   2238         elif not args:
   2239             res = func()

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/numpy/lib/function_base.py in _vectorize_call_with_signature(self, func, args)
   2275 
   2276         for index in np.ndindex(*broadcast_shape):
-> 2277             results = func(*(arg[index] for arg in args))
   2278 
   2279             n_results = len(results) if isinstance(results, tuple) else 1

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/bias_correction.py in normal_correction(obs_data, mod_data, sce_data, cdf_threshold)
    136     obs_len, mod_len, sce_len = [len(x) for x in [obs_data, mod_data, sce_data]]
    137     obs_mean, mod_mean, sce_mean = [x.mean() for x in [obs_data, mod_data, sce_data]]
--> 138     obs_detrended, mod_detrended, sce_detrended = [
    139         detrend(x) for x in [obs_data, mod_data, sce_data]
    140     ]

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/bias_correction.py in <listcomp>(.0)
    137     obs_mean, mod_mean, sce_mean = [x.mean() for x in [obs_data, mod_data, sce_data]]
    138     obs_detrended, mod_detrended, sce_detrended = [
--> 139         detrend(x) for x in [obs_data, mod_data, sce_data]
    140     ]
    141     obs_norm, mod_norm, sce_norm = [

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/scipy/signal/signaltools.py in detrend(data, axis, type, bp, overwrite_data)
   3482             A[:, 0] = np.cast[dtype](np.arange(1, Npts + 1) * 1.0 / Npts)
   3483             sl = slice(bp[m], bp[m + 1])
-> 3484             coef, resids, rank, s = linalg.lstsq(A, newdata[sl])
   3485             newdata[sl] = newdata[sl] - np.dot(A, coef)
   3486         # Put data back in original shape.

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/scipy/linalg/basic.py in lstsq(a, b, cond, overwrite_a, overwrite_b, check_finite, lapack_driver)
   1145     """
   1146     a1 = _asarray_validated(a, check_finite=check_finite)
-> 1147     b1 = _asarray_validated(b, check_finite=check_finite)
   1148     if len(a1.shape) != 2:
   1149         raise ValueError('Input array a should be 2D')

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/scipy/_lib/_util.py in _asarray_validated(a, check_finite, sparse_ok, objects_ok, mask_ok, as_inexact)
    291             raise ValueError('masked arrays are not supported')
    292     toarray = np.asarray_chkfinite if check_finite else np.asarray
--> 293     a = toarray(a)
    294     if not objects_ok:
    295         if a.dtype is np.dtype('O'):

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/numpy/lib/function_base.py in asarray_chkfinite(a, dtype, order)
    486     a = asarray(a, dtype=dtype, order=order)
    487     if a.dtype.char in typecodes['AllFloat'] and not np.isfinite(a).all():
--> 488         raise ValueError(
    489             "array must not contain infs or NaNs")
    490     return a

ValueError: array must not contain infs or NaNs

How to fix: have a check for NaNs and drop them in reference and model or data_to_be_corrected

The text was updated successfully, but these errors were encountered:

Yassminaa · 2022-06-09T13:48:16Z

I wonder if this issue is solved?

aaronspring · 2022-06-11T05:15:23Z

No. Isn’t

Anjali-Thomas95 · 2022-08-17T07:40:24Z

Hi, I am facing the same trouble. If there is NaN values in my data, the bias correction won't work properly.

aaronspring · 2022-08-17T09:56:45Z

You could try to drop NaNs in all arrays where one array has NaNs before using bias_correction. Once that works you could share that code here and we can merged it with the skipna keyword. PR welcome.

Yassminaa · 2022-08-17T14:33:22Z

the point is NANs come because the timeseries of the model and historical and refernce is not equal.
e.g. if you have historical and reference data for 1980-2015, the model 2016 -2022.
in this case you deal with non equal timeseries. to make it consistent in time you have to fill the data in nan in long timeseries 1980-2022.

aaronspring · 2022-08-18T06:26:42Z

Right. So in xarray logic usually the non overlapping part would be dropped. Would be nice to implement that here as well.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Feature request: skipna keyword #11

Feature request: skipna keyword #11

aaronspring commented Aug 8, 2021

Yassminaa commented Jun 9, 2022

aaronspring commented Jun 11, 2022

Anjali-Thomas95 commented Aug 17, 2022

aaronspring commented Aug 17, 2022

Yassminaa commented Aug 17, 2022

aaronspring commented Aug 18, 2022

Feature request: skipna keyword #11

Feature request: skipna keyword #11

Comments

aaronspring commented Aug 8, 2021

Yassminaa commented Jun 9, 2022

aaronspring commented Jun 11, 2022

Anjali-Thomas95 commented Aug 17, 2022

aaronspring commented Aug 17, 2022

Yassminaa commented Aug 17, 2022

aaronspring commented Aug 18, 2022