Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions langfuse/api/core/datetime_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ def serialize_datetime(v: dt.datetime) -> str:
"""
Serialize a datetime including timezone info.

Uses the timezone info provided if present, otherwise uses the current runtime's timezone info.
Assumes naive datetime (without tzinfo) is UTC, as Langfuse infrastructure
expects all timestamps in UTC. This prevents timestamp inconsistencies that
cause duplicate trace records in ClickHouse when the SDK runs in non-UTC timezones.

UTC datetimes end in "Z" while all other timezones are represented as offset from UTC, e.g. +05:00.
"""
Expand All @@ -25,6 +27,7 @@ def _serialize_zoned_datetime(v: dt.datetime) -> str:
if v.tzinfo is not None:
return _serialize_zoned_datetime(v)
else:
local_tz = dt.datetime.now().astimezone().tzinfo
localized_dt = v.replace(tzinfo=local_tz)
return _serialize_zoned_datetime(localized_dt)
# Assume naive datetime is UTC (Langfuse standard)
# This fixes duplicate trace records caused by timezone inconsistencies
utc_dt = v.replace(tzinfo=dt.timezone.utc)
return _serialize_zoned_datetime(utc_dt)
132 changes: 132 additions & 0 deletions tests/test_datetime_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""Test suite for datetime_utils module - UTC timestamp serialization."""

import datetime as dt
from datetime import timezone

import pytest

from langfuse.api.core.datetime_utils import serialize_datetime


class TestSerializeDatetime:
"""Test suite for the serialize_datetime function."""

def test_utc_datetime_ends_with_z(self):
"""Test that UTC datetime is serialized with 'Z' suffix."""
utc_dt = dt.datetime(2025, 12, 10, 13, 30, 45, 123456, tzinfo=timezone.utc)
result = serialize_datetime(utc_dt)

assert result.endswith("Z")
assert "+00:00" not in result
assert result == "2025-12-10T13:30:45.123456Z"

def test_utc_datetime_without_microseconds(self):
"""Test UTC datetime without microseconds."""
utc_dt = dt.datetime(2025, 12, 10, 13, 30, 45, tzinfo=timezone.utc)
result = serialize_datetime(utc_dt)

assert result.endswith("Z")
assert result == "2025-12-10T13:30:45Z"

def test_naive_datetime_assumed_utc(self):
"""Test that naive datetime (no tzinfo) is assumed to be UTC.

This is the key fix: naive datetime should be treated as UTC,
not local time, to prevent duplicate trace records in ClickHouse
when the SDK runs in non-UTC timezones.
"""
naive_dt = dt.datetime(2025, 12, 10, 13, 30, 45, 123456)
result = serialize_datetime(naive_dt)

# Should end with 'Z' (UTC), not a local timezone offset like +08:00
assert result.endswith("Z"), f"Expected UTC suffix 'Z', got: {result}"
assert result == "2025-12-10T13:30:45.123456Z"

def test_naive_datetime_without_microseconds(self):
"""Test naive datetime without microseconds is assumed UTC."""
naive_dt = dt.datetime(2025, 12, 10, 13, 30, 45)
result = serialize_datetime(naive_dt)

assert result.endswith("Z")
assert result == "2025-12-10T13:30:45Z"

def test_non_utc_timezone_uses_offset(self):
"""Test that non-UTC timezones use offset format."""
# Create datetime with +08:00 timezone
tz_plus_8 = timezone(dt.timedelta(hours=8))
dt_plus_8 = dt.datetime(2025, 12, 10, 21, 30, 45, tzinfo=tz_plus_8)
result = serialize_datetime(dt_plus_8)

# Should use offset format, not 'Z'
assert result.endswith("+08:00")
assert result == "2025-12-10T21:30:45+08:00"

def test_negative_timezone_offset(self):
"""Test negative timezone offset format."""
tz_minus_5 = timezone(dt.timedelta(hours=-5))
dt_minus_5 = dt.datetime(2025, 12, 10, 8, 30, 45, tzinfo=tz_minus_5)
result = serialize_datetime(dt_minus_5)

assert result.endswith("-05:00")
assert result == "2025-12-10T08:30:45-05:00"

def test_consistency_with_internal_timestamp_function(self):
"""Test that serialize_datetime is consistent with _get_timestamp.

The _get_timestamp function returns datetime.now(timezone.utc),
which should serialize correctly with 'Z' suffix.
"""
from langfuse._utils import _get_timestamp

timestamp = _get_timestamp()
result = serialize_datetime(timestamp)

# Should always end with 'Z' since _get_timestamp uses UTC
assert result.endswith("Z"), f"Expected UTC suffix 'Z', got: {result}"

def test_multiple_naive_datetimes_serialize_consistently(self):
"""Test that multiple naive datetimes serialize consistently.

This prevents the issue where different events in the same trace
could get different timezone treatments.
"""
dt1 = dt.datetime(2025, 12, 10, 13, 30, 45)
dt2 = dt.datetime(2025, 12, 10, 13, 30, 46)
dt3 = dt.datetime(2025, 12, 10, 13, 30, 47)

results = [serialize_datetime(d) for d in [dt1, dt2, dt3]]

# All should have 'Z' suffix (UTC)
for result in results:
assert result.endswith("Z"), f"Expected UTC suffix 'Z', got: {result}"

# All should have the same date (no timezone shift causing date change)
for result in results:
assert result.startswith("2025-12-10")

def test_edge_case_midnight_utc(self):
"""Test midnight UTC serialization."""
midnight = dt.datetime(2025, 12, 10, 0, 0, 0, tzinfo=timezone.utc)
result = serialize_datetime(midnight)

assert result == "2025-12-10T00:00:00Z"

def test_edge_case_end_of_day_utc(self):
"""Test end of day UTC serialization."""
end_of_day = dt.datetime(2025, 12, 10, 23, 59, 59, 999999, tzinfo=timezone.utc)
result = serialize_datetime(end_of_day)

assert result == "2025-12-10T23:59:59.999999Z"

def test_iso8601_format_compliance(self):
"""Test that output complies with ISO 8601 format."""
naive_dt = dt.datetime(2025, 12, 10, 13, 30, 45, 123456)
result = serialize_datetime(naive_dt)

# ISO 8601 format: YYYY-MM-DDTHH:MM:SS.ffffff[Z|+HH:MM|-HH:MM]
assert "T" in result
assert result.count(":") >= 2
# Should be parseable
parsed = dt.datetime.fromisoformat(result.replace("Z", "+00:00"))
assert parsed.tzinfo is not None