Skip to content

Commit fd7bfaa

Browse files
authored
BUG: Fix infer_dtype result for float with embedded pd.NA (#61624)
1 parent e1328fc commit fd7bfaa

File tree

6 files changed

+18
-9
lines changed

6 files changed

+18
-9
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,7 @@ Timezones
725725

726726
Numeric
727727
^^^^^^^
728+
- Bug in :func:`api.types.infer_dtype` returning "mixed-integer-float" for float and ``pd.NA`` mix (:issue:`61621`)
728729
- Bug in :meth:`DataFrame.corr` where numerical precision errors resulted in correlations above ``1.0`` (:issue:`61120`)
729730
- Bug in :meth:`DataFrame.cov` raises a ``TypeError`` instead of returning potentially incorrect results or other errors (:issue:`53115`)
730731
- Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`)

pandas/_libs/lib.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def is_time_array(values: np.ndarray, skipna: bool = ...): ...
6060
def is_date_array(values: np.ndarray, skipna: bool = ...): ...
6161
def is_datetime_array(values: np.ndarray, skipna: bool = ...): ...
6262
def is_string_array(values: np.ndarray, skipna: bool = ...): ...
63-
def is_float_array(values: np.ndarray): ...
63+
def is_float_array(values: np.ndarray, skipna: bool = ...): ...
6464
def is_integer_array(values: np.ndarray, skipna: bool = ...): ...
6565
def is_bool_array(values: np.ndarray, skipna: bool = ...): ...
6666
def fast_multiget(

pandas/_libs/lib.pyx

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1752,7 +1752,7 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
17521752
return "complex"
17531753

17541754
elif util.is_float_object(val):
1755-
if is_float_array(values):
1755+
if is_float_array(values, skipna=skipna):
17561756
return "floating"
17571757
elif is_integer_float_array(values, skipna=skipna):
17581758
if is_integer_na_array(values, skipna=skipna):
@@ -1954,9 +1954,11 @@ cdef class FloatValidator(Validator):
19541954

19551955

19561956
# Note: only python-exposed for tests
1957-
cpdef bint is_float_array(ndarray values):
1957+
cpdef bint is_float_array(ndarray values, bint skipna=True):
19581958
cdef:
1959-
FloatValidator validator = FloatValidator(values.size, values.dtype)
1959+
FloatValidator validator = FloatValidator(values.size,
1960+
values.dtype,
1961+
skipna=skipna)
19601962
return validator.validate(values)
19611963

19621964

pandas/core/dtypes/cast.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,10 +1086,7 @@ def convert_dtypes(
10861086
elif (
10871087
infer_objects
10881088
and input_array.dtype == object
1089-
and (
1090-
isinstance(inferred_dtype, str)
1091-
and inferred_dtype == "mixed-integer-float"
1092-
)
1089+
and (isinstance(inferred_dtype, str) and inferred_dtype == "floating")
10931090
):
10941091
inferred_dtype = pandas_dtype_func("Float64")
10951092

pandas/tests/dtypes/test_inference.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1396,6 +1396,15 @@ def test_infer_dtype_period_with_na(self, na_value):
13961396
arr = np.array([na_value, Period("2011-01", freq="D"), na_value])
13971397
assert lib.infer_dtype(arr, skipna=True) == "period"
13981398

1399+
@pytest.mark.parametrize("na_value", [pd.NA, np.nan])
1400+
def test_infer_dtype_numeric_with_na(self, na_value):
1401+
# GH61621
1402+
ser = Series([1, 2, na_value], dtype=object)
1403+
assert lib.infer_dtype(ser, skipna=True) == "integer"
1404+
1405+
ser = Series([1.0, 2.0, na_value], dtype=object)
1406+
assert lib.infer_dtype(ser, skipna=True) == "floating"
1407+
13991408
def test_infer_dtype_all_nan_nat_like(self):
14001409
arr = np.array([np.nan, np.nan])
14011410
assert lib.infer_dtype(arr, skipna=True) == "floating"

pandas/tests/extension/test_arrow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3081,7 +3081,7 @@ def test_infer_dtype_pyarrow_dtype(data, request):
30813081
res = lib.infer_dtype(data)
30823082
assert res != "unknown-array"
30833083

3084-
if data._hasna and res in ["floating", "datetime64", "timedelta64"]:
3084+
if data._hasna and res in ["datetime64", "timedelta64"]:
30853085
mark = pytest.mark.xfail(
30863086
reason="in infer_dtype pd.NA is not ignored in these cases "
30873087
"even with skipna=True in the list(data) check below"

0 commit comments

Comments
 (0)