Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,8 @@ In cases with mixed-resolution inputs, the highest resolution is used:

.. warning:: Many users will now get "M8[us]" dtype data in cases when they used to get "M8[ns]". For most use cases they should not notice a difference. One big exception is converting to integers, which will give integers 1000x smaller.

Similarly, the :class:`Timedelta` constructor and :func:`to_timedelta` with a string input now defaults to a microsecond unit, using nanosecond unit only in cases that actually have nanosecond precision.

.. _whatsnew_300.api_breaking.concat_datetime_sorting:

:func:`concat` no longer ignores ``sort`` when all objects have a :class:`DatetimeIndex`
Expand Down
45 changes: 38 additions & 7 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import collections
import re
import warnings

from pandas.util._decorators import set_module
Expand Down Expand Up @@ -448,11 +449,16 @@ def array_to_timedelta64(
ival = parse_iso_format_string(item)
else:
ival = parse_timedelta_string(item)
if not needs_nano_unit(ival, item):
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
ival = ival // 1000
else:
item_reso = NPY_FR_ns

item_reso = NPY_FR_ns
state.update_creso(item_reso)
if infer_reso:
creso = state.creso
if ival != NPY_NAT:
state.update_creso(item_reso)
if infer_reso:
creso = state.creso

elif is_tick_object(item):
item_reso = get_supported_reso(item._creso)
Expand Down Expand Up @@ -722,6 +728,24 @@ cdef timedelta_from_spec(object number, object frac, object unit):
return cast_from_unit(float(n), unit)


cdef bint needs_nano_unit(int64_t ival, str item):
"""
Check if a passed string `item` needs to be stored with nano unit or can
use microsecond instead. Needs nanoseconds if:

- if the parsed value in nanoseconds has sub-microseconds content -> certainly
needs nano
- if the seconds part in the string contains more than 6 decimals, i.e. has
trailing zeros beyond the microsecond part (e.g. "0.123456000 s") -> treat
as nano for consistency
- if the string explicitly contains an entry for nanoseconds (e.g. "1000 ns")
"""
# TODO: more performant way of doing this check?
if ival % 1000 != 0:
return True
return re.search(r"\.\d{7}", item) or "ns" in item or "nano" in item.lower()


cpdef inline str parse_timedelta_unit(str unit):
"""
Parameters
Expand Down Expand Up @@ -2121,10 +2145,17 @@ class Timedelta(_Timedelta):
if (len(value) > 0 and value[0] == "P") or (
len(value) > 1 and value[:2] == "-P"
):
value = parse_iso_format_string(value)
ival = parse_iso_format_string(value)
else:
ival = parse_timedelta_string(value)

if not needs_nano_unit(ival, value):
# If we don't specifically need nanosecond resolution, default
# to microsecond like we do for datetimes
value = np.timedelta64(ival // 1000, "us")
return cls(value)
else:
value = parse_timedelta_string(value)
value = np.timedelta64(value)
value = np.timedelta64(ival, "ns")
elif PyDelta_Check(value):
# pytimedelta object -> microsecond resolution
new_value = delta_to_nanoseconds(
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def _astype_nansafe(
# bc we know arr.dtype == object, this is equivalent to
# `np.asarray(to_timedelta(arr))`, but using a lower-level API that
# does not require a circular import.
tdvals = array_to_timedelta64(arr).view("m8[ns]")
tdvals = array_to_timedelta64(arr)

tda = ensure_wrapped_if_datetimelike(tdvals)
return tda.astype(dtype, copy=False)._ndarray
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8657,7 +8657,8 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt):
rvalues = series._values
if not isinstance(rvalues, np.ndarray):
# TODO(EA2D): no need to special-case with 2D EAs
if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"):
if lib.is_np_dtype(rvalues.dtype, "mM"):
# i.e. DatetimeArray[tznaive] or TimedeltaArray
# We can losslessly+cheaply cast to ndarray
rvalues = np.asarray(rvalues)
else:
Expand Down
21 changes: 13 additions & 8 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,18 +230,23 @@ def get_loc(self, key):

return Index.get_loc(self, key)

# error: Return type "tuple[Timedelta | NaTType, None]" of "_parse_with_reso"
# incompatible with return type "tuple[datetime, Resolution]" in supertype
# "DatetimeIndexOpsMixin"
def _parse_with_reso(self, label: str) -> tuple[Timedelta | NaTType, None]: # type: ignore[override]
# the "with_reso" is a no-op for TimedeltaIndex
# error: Return type "tuple[Timedelta | NaTType, Resolution]" of
# "_parse_with_reso" incompatible with return type
# "tuple[datetime, Resolution]" in supertype
# "pandas.core.indexes.datetimelike.DatetimeIndexOpsMixin"
def _parse_with_reso(self, label: str) -> tuple[Timedelta | NaTType, Resolution]: # type: ignore[override]
parsed = Timedelta(label)
return parsed, None
reso = Resolution.get_reso_from_freqstr(parsed.unit)
return parsed, reso

def _parsed_string_to_bounds(self, reso, parsed: Timedelta):
def _parsed_string_to_bounds(self, reso: Resolution, parsed: Timedelta):
# reso is unused, included to match signature of DTI/PI
lbound = parsed.round(parsed.resolution_string)
rbound = lbound + to_offset(parsed.resolution_string) - Timedelta(1, "ns")
rbound = (
lbound
+ to_offset(parsed.resolution_string)
- Timedelta(1, unit=self.unit).as_unit(self.unit)
)
return lbound, rbound

# -------------------------------------------------------------------
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -944,8 +944,9 @@ def nanstd(
>>> nanops.nanstd(s.values)
1.0
"""
if values.dtype == "M8[ns]":
values = values.view("m8[ns]")
if values.dtype.kind == "M":
unit = np.datetime_data(values.dtype)[0]
values = values.view(f"m8[{unit}]")

orig_dtype = values.dtype
values, mask = _get_values(values, skipna, mask=mask)
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/ops/array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,9 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
# Avoid possible ambiguities with pd.NaT
# GH 52295
if is_unitless(obj.dtype):
obj = obj.astype("datetime64[ns]")
# Use second resolution to ensure that the result of e.g.
# `left - np.datetime64("NaT")` retains the unit of left.unit
obj = obj.astype("datetime64[s]")
elif not is_supported_dtype(obj.dtype):
new_dtype = get_supported_dtype(obj.dtype)
obj = obj.astype(new_dtype)
Expand All @@ -563,7 +565,9 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
# we broadcast and wrap in a TimedeltaArray
# GH 52295
if is_unitless(obj.dtype):
obj = obj.astype("timedelta64[ns]")
# Use second resolution to ensure that the result of e.g.
# `left + np.timedelta64("NaT")` retains the unit of left.unit
obj = obj.astype("timedelta64[s]")
elif not is_supported_dtype(obj.dtype):
new_dtype = get_supported_dtype(obj.dtype)
obj = obj.astype(new_dtype)
Expand Down
16 changes: 11 additions & 5 deletions pandas/plotting/_matplotlib/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
from matplotlib.axis import Axis

from pandas._libs.tslibs.offsets import BaseOffset
from pandas._typing import TimeUnit


_mpl_units: dict = {} # Cache for units overwritten by us
Expand Down Expand Up @@ -1099,18 +1100,22 @@ class TimeSeries_TimedeltaFormatter(mpl.ticker.Formatter): # pyright: ignore[re
Formats the ticks along an axis controlled by a :class:`TimedeltaIndex`.
"""

def __init__(self, unit: TimeUnit = "ns"):
self.unit = unit
super().__init__()

axis: Axis

@staticmethod
def format_timedelta_ticks(x, pos, n_decimals: int) -> str:
def format_timedelta_ticks(x, pos, n_decimals: int, exp: int) -> str:
"""
Convert seconds to 'D days HH:MM:SS.F'
"""
s, ns = divmod(x, 10**9) # TODO(non-nano): this looks like it assumes ns
s, ns = divmod(x, 10**exp)
m, s = divmod(s, 60)
h, m = divmod(m, 60)
d, h = divmod(h, 24)
decimals = int(ns * 10 ** (n_decimals - 9))
decimals = int(ns * 10 ** (n_decimals - exp))
s = f"{int(h):02d}:{int(m):02d}:{int(s):02d}"
if n_decimals > 0:
s += f".{decimals:0{n_decimals}d}"
Expand All @@ -1119,6 +1124,7 @@ def format_timedelta_ticks(x, pos, n_decimals: int) -> str:
return s

def __call__(self, x, pos: int | None = 0) -> str:
exp = {"ns": 9, "us": 6, "ms": 3, "s": 0}[self.unit]
(vmin, vmax) = tuple(self.axis.get_view_interval())
n_decimals = min(int(np.ceil(np.log10(100 * 10**9 / abs(vmax - vmin)))), 9)
return self.format_timedelta_ticks(x, pos, n_decimals)
n_decimals = min(int(np.ceil(np.log10(100 * 10**exp / abs(vmax - vmin)))), exp)
return self.format_timedelta_ticks(x, pos, n_decimals, exp)
2 changes: 1 addition & 1 deletion pandas/plotting/_matplotlib/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ def format_dateaxis(
subplot.format_coord = functools.partial(_format_coord, freq)

elif isinstance(index, ABCTimedeltaIndex):
subplot.xaxis.set_major_formatter(TimeSeries_TimedeltaFormatter())
subplot.xaxis.set_major_formatter(TimeSeries_TimedeltaFormatter(index.unit))
else:
raise TypeError("index type not supported")

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ def test_apply_non_numpy_dtype():

result = df.apply(lambda x: x + pd.Timedelta("1day"))
expected = DataFrame(
{"dt": date_range("2015-01-02", periods=3, tz="Europe/Brussels", unit="ns")}
{"dt": date_range("2015-01-02", periods=3, tz="Europe/Brussels")}
)
tm.assert_frame_equal(result, expected)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/apply/test_series_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def test_apply_box_td64():
# timedelta
vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
ser = Series(vals)
assert ser.dtype == "timedelta64[ns]"
assert ser.dtype == "timedelta64[us]"
res = ser.apply(lambda x: f"{type(x).__name__}_{x.days}", by_row="compat")
exp = Series(["Timedelta_1", "Timedelta_2"])
tm.assert_series_equal(res, exp)
Expand Down
29 changes: 15 additions & 14 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,7 +870,7 @@ def test_dt64arr_sub_timedeltalike_scalar(
tm.assert_equal(rng, expected)

def test_dt64_array_sub_dt_with_different_timezone(self, box_with_array):
t1 = date_range("20130101", periods=3, unit="ns").tz_localize("US/Eastern")
t1 = date_range("20130101", periods=3).tz_localize("US/Eastern")
t1 = tm.box_expected(t1, box_with_array)
t2 = Timestamp("20130101").tz_localize("CET")
tnaive = Timestamp(20130101)
Expand All @@ -897,11 +897,11 @@ def test_dt64_array_sub_dt_with_different_timezone(self, box_with_array):
tnaive - t1

def test_dt64_array_sub_dt64_array_with_different_timezone(self, box_with_array):
t1 = date_range("20130101", periods=3, unit="ns").tz_localize("US/Eastern")
t1 = date_range("20130101", periods=3).tz_localize("US/Eastern")
t1 = tm.box_expected(t1, box_with_array)
t2 = date_range("20130101", periods=3, unit="ns").tz_localize("CET")
t2 = date_range("20130101", periods=3).tz_localize("CET")
t2 = tm.box_expected(t2, box_with_array)
tnaive = date_range("20130101", periods=3, unit="ns")
tnaive = date_range("20130101", periods=3)

result = t1 - t2
expected = TimedeltaIndex(
Expand Down Expand Up @@ -947,11 +947,11 @@ def test_dt64arr_add_sub_td64_nat(self, box_with_array, tz_naive_fixture):

def test_dt64arr_add_sub_td64ndarray(self, tz_naive_fixture, box_with_array):
tz = tz_naive_fixture
dti = date_range("2016-01-01", periods=3, tz=tz, unit="ns")
dti = date_range("2016-01-01", periods=3, tz=tz)
tdi = TimedeltaIndex(["-1 Day", "-1 Day", "-1 Day"])
tdarr = tdi.values

expected = date_range("2015-12-31", "2016-01-02", periods=3, tz=tz, unit="ns")
expected = date_range("2015-12-31", "2016-01-02", periods=3, tz=tz)

dtarr = tm.box_expected(dti, box_with_array)
expected = tm.box_expected(expected, box_with_array)
Expand All @@ -961,7 +961,7 @@ def test_dt64arr_add_sub_td64ndarray(self, tz_naive_fixture, box_with_array):
result = tdarr + dtarr
tm.assert_equal(result, expected)

expected = date_range("2016-01-02", "2016-01-04", periods=3, tz=tz, unit="ns")
expected = date_range("2016-01-02", "2016-01-04", periods=3, tz=tz)
expected = tm.box_expected(expected, box_with_array)

result = dtarr - tdarr
Expand All @@ -970,6 +970,7 @@ def test_dt64arr_add_sub_td64ndarray(self, tz_naive_fixture, box_with_array):
[
"cannot subtract DatetimeArray from ndarray",
"cannot subtract a datelike from a TimedeltaArray",
"cannot subtract DatetimeArray from Timedelta",
]
)
with pytest.raises(TypeError, match=msg):
Expand All @@ -991,7 +992,7 @@ def test_dt64arr_add_sub_td64ndarray(self, tz_naive_fixture, box_with_array):
)
def test_dt64arr_sub_dtscalar(self, box_with_array, ts):
# GH#8554, GH#22163 DataFrame op should _not_ return dt64 dtype
idx = date_range("2013-01-01", periods=3, unit="ns")._with_freq(None)
idx = date_range("2013-01-01", periods=3)._with_freq(None)
idx = tm.box_expected(idx, box_with_array)

expected = TimedeltaIndex(["0 Days", "1 Day", "2 Days"])
Expand Down Expand Up @@ -1892,7 +1893,7 @@ def test_sub_single_tz(self, unit):
def test_dt64tz_series_sub_dtitz(self):
# GH#19071 subtracting tzaware DatetimeIndex from tzaware Series
# (with same tz) raises, fixed by #19024
dti = date_range("1999-09-30", periods=10, tz="US/Pacific", unit="ns")
dti = date_range("1999-09-30", periods=10, tz="US/Pacific")
ser = Series(dti)
expected = Series(TimedeltaIndex(["0days"] * 10))

Expand Down Expand Up @@ -2042,7 +2043,7 @@ def test_dti_add_tdi(self, tz_naive_fixture):
tz = tz_naive_fixture
dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10)
tdi = pd.timedelta_range("0 days", periods=10)
expected = date_range("2017-01-01", periods=10, tz=tz, unit="ns")
expected = date_range("2017-01-01", periods=10, tz=tz)
expected = expected._with_freq(None)

# add with TimedeltaIndex
Expand All @@ -2064,7 +2065,7 @@ def test_dti_iadd_tdi(self, tz_naive_fixture):
tz = tz_naive_fixture
dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10)
tdi = pd.timedelta_range("0 days", periods=10)
expected = date_range("2017-01-01", periods=10, tz=tz, unit="ns")
expected = date_range("2017-01-01", periods=10, tz=tz)
expected = expected._with_freq(None)

# iadd with TimedeltaIndex
Expand All @@ -2090,7 +2091,7 @@ def test_dti_sub_tdi(self, tz_naive_fixture):
tz = tz_naive_fixture
dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10)
tdi = pd.timedelta_range("0 days", periods=10)
expected = date_range("2017-01-01", periods=10, tz=tz, freq="-1D", unit="ns")
expected = date_range("2017-01-01", periods=10, tz=tz, freq="-1D")
expected = expected._with_freq(None)

# sub with TimedeltaIndex
Expand Down Expand Up @@ -2479,11 +2480,11 @@ def test_non_nano_dt64_addsub_np_nat_scalars_unitless():
# TODO: Can we default to the ser unit?
ser = Series([1233242342344, 232432434324, 332434242344], dtype="datetime64[ms]")
result = ser - np.datetime64("nat")
expected = Series([NaT] * 3, dtype="timedelta64[ns]")
expected = Series([NaT] * 3, dtype="timedelta64[ms]")
tm.assert_series_equal(result, expected)

result = ser + np.timedelta64("nat")
expected = Series([NaT] * 3, dtype="datetime64[ns]")
expected = Series([NaT] * 3, dtype="datetime64[ms]")
tm.assert_series_equal(result, expected)


Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,9 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array
# i.e. resolution is lower -> use lowest supported resolution
dtype = np.dtype("m8[s]")
expected = expected.astype(dtype)
elif type(three_days) is timedelta:
elif type(three_days) is timedelta or (
isinstance(three_days, Timedelta) and three_days.unit == "us"
):
expected = expected.astype("m8[us]")
elif isinstance(
three_days,
Expand Down
8 changes: 2 additions & 6 deletions pandas/tests/arithmetic/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -1361,12 +1361,8 @@ def test_period_add_timestamp_raises(self, box_with_array):
arr + ts
with pytest.raises(TypeError, match=msg):
ts + arr
if box_with_array is pd.DataFrame:
# TODO: before implementing resolution-inference we got the same
# message with DataFrame and non-DataFrame. Why did that change?
msg = "cannot add PeriodArray and Timestamp"
else:
msg = "cannot add PeriodArray and DatetimeArray"

msg = "cannot add PeriodArray and DatetimeArray"
with pytest.raises(TypeError, match=msg):
arr + Series([ts])
with pytest.raises(TypeError, match=msg):
Expand Down
Loading
Loading