Skip to content

Refactor IO api for clarity and allow computing chunks directly on index #17

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 9, 2025
35 changes: 31 additions & 4 deletions swc/aeon/io/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,52 @@
from pathlib import Path

import pandas as pd
from typing_extensions import deprecated

"""The duration of each acquisition chunk, in whole hours."""
CHUNK_DURATION = 1
"""The duration of each acquisition chunk, in whole hours."""

REFERENCE_EPOCH = datetime.datetime(1904, 1, 1)
"""The reference epoch for UTC harp time."""


@deprecated("Please use the to_datetime function instead.")
def aeon(seconds):
"""Converts a Harp timestamp, in seconds, to a datetime object.

.. deprecated:: 0.2.0
This function is deprecated and will be removed in a future release.
Use :func:`to_datetime` instead.
"""
return to_datetime(seconds) # pragma: no cover


def to_datetime(seconds):
"""Converts a Harp timestamp, in seconds, to a datetime object."""
return datetime.datetime(1904, 1, 1) + pd.to_timedelta(seconds, "s")
return REFERENCE_EPOCH + pd.to_timedelta(seconds, "s")


def to_seconds(time):
"""Converts a datetime object to a Harp timestamp, in seconds."""
if isinstance(time, pd.Series):
return (time - REFERENCE_EPOCH).dt.total_seconds()
else:
return (time - REFERENCE_EPOCH).total_seconds()


def chunk(time):
"""Returns the whole hour acquisition chunk for a measurement timestamp.

:param datetime or Series time: An object or series specifying the measurement timestamps.
:param datetime, DatetimeIndex or Series time:
A datetime object, index or series specifying the measurement timestamps.
:return: A datetime object or series specifying the acquisition chunk for the measurement timestamp.
"""
if isinstance(time, pd.Series):
hour = CHUNK_DURATION * (time.dt.hour // CHUNK_DURATION)
return pd.to_datetime(time.dt.date) + pd.to_timedelta(hour, "h")
elif isinstance(time, pd.DatetimeIndex):
hour = CHUNK_DURATION * (time.hour // CHUNK_DURATION)
return pd.to_datetime(time.date) + pd.to_timedelta(hour, "h")
else:
hour = CHUNK_DURATION * (time.hour // CHUNK_DURATION)
return pd.to_datetime(datetime.datetime.combine(time.date(), datetime.time(hour=hour)))
Expand Down Expand Up @@ -54,7 +81,7 @@ def chunk_key(file):

def _set_index(data):
if not isinstance(data.index, pd.DatetimeIndex):
data.index = aeon(data.index)
data.index = to_datetime(data.index)
data.index.name = "time"


Expand Down
41 changes: 41 additions & 0 deletions tests/io/test_api.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
"""Tests for the aeon API."""

from pathlib import Path
from typing import cast

import pandas as pd
import pytest
from pandas import testing as tm

from swc import aeon
from swc.aeon.io.api import chunk, to_datetime, to_seconds
from tests.schema import exp02, social03

monotonic_path = Path(__file__).parent.parent / "data" / "monotonic"
Expand Down Expand Up @@ -58,5 +61,43 @@ def test_pose_load_nonmonotonic_data_time_start_only_sort_fallback():
assert data.index.is_monotonic_increasing


@pytest.mark.parametrize(
"seconds",
[
0, # Edge case: REFERENCE_EPOCH
123456789, # Arbitrary value
pd.Series([0.0, 123456789.0]), # Series value
],
)
def test_datetime_seconds_conversion(seconds):
# test round-trip conversion
converted_datetime = to_datetime(seconds)
converted_seconds = to_seconds(converted_datetime)
if isinstance(seconds, pd.Series):
tm.assert_series_equal(converted_seconds, seconds)
else:
assert converted_seconds == seconds


@pytest.mark.parametrize(
"time",
[
pd.Timestamp(0), # Datetime value
pd.Series([pd.to_datetime(0)]), # Series value
pd.DatetimeIndex([pd.to_datetime(0)]), # Datetime index value
],
)
def test_chunk_identity_conversion(time):
if isinstance(time, pd.Series):
time_chunk = cast(pd.Series, chunk(time))
tm.assert_series_equal(time_chunk, time)
elif isinstance(time, pd.DatetimeIndex):
time_chunk = cast(pd.DatetimeIndex, chunk(time))
tm.assert_index_equal(time_chunk, time)
else:
time_chunk = chunk(time)
assert time_chunk == time


if __name__ == "__main__":
pytest.main()