diff --git a/docs/environment-variables.md b/docs/environment-variables.md
index 70a28ca3e..05cb11655 100644
--- a/docs/environment-variables.md
+++ b/docs/environment-variables.md
@@ -130,6 +130,7 @@ Metrics collection and storage configuration. Controls metrics storage allocatio
 | `AIPERF_METRICS_OSL_MISMATCH_PCT_THRESHOLD` | `5.0` | ≥ 0.0, ≤ 100.0 | Percentage difference threshold for flagging discrepancies between requested and actual output sequence length (default: 5%) |
 | `AIPERF_METRICS_OSL_MISMATCH_MAX_TOKEN_THRESHOLD` | `50` | ≥ 1 | Maximum absolute token threshold for OSL mismatch. The effective threshold is min(requested_osl * pct_threshold, this value). Makes threshold tighter for large OSL values (default: 50 tokens) |
 | `AIPERF_METRICS_TDIGEST_COMPRESSION` | `500` | ≥ 20, ≤ 10000 | t-digest sketch compression for list-valued record metric aggregation. Higher = more centroids, tighter percentile accuracy, larger sketch. Default 500 measured to keep worst-case relative percentile error under 0.05% on 50M-sample workloads (40x under the 0.5% claimed accuracy band) at ~4 KB sketch size. |
+| `AIPERF_METRICS_LIST_BACKEND` | `'ragged'` | — | Storage backend for list-valued RECORD metrics (today: only inter_chunk_latency). 'ragged' (default) keeps every value, enabling exact percentiles and ICL-aware throughput / tokens-in-flight sweep curves. 'tdigest' uses a bounded-memory crick.TDigest sketch (~4 KB regardless of sample count) — percentiles are approximate (≤0.05% relative error at default compression), and ICL-aware sweep curves silently fall back to their non-ICL equivalents that use only request-level (start_ns, generation_start_ns, end_ns) timing. Choose tdigest when records-manager pod memory at 1M+ request scale is the binding constraint. |
 
 ## MLFLOW
 
diff --git a/src/aiperf/common/accumulator_protocols.py b/src/aiperf/common/accumulator_protocols.py
new file mode 100644
index 000000000..687edba76
--- /dev/null
+++ b/src/aiperf/common/accumulator_protocols.py
@@ -0,0 +1,191 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any, ClassVar, Protocol, runtime_checkable
+
+import numpy as np
+from numpy.typing import NDArray
+
+from aiperf.common.enums.metric_enums import MetricValueTypeVarT
+
+if TYPE_CHECKING:
+    from aiperf.common.models.error_models import ErrorDetailsCount
+    from aiperf.common.models.record_models import MetricResult
+    from aiperf.common.types import MetricTagT
+    from aiperf.exporters.exporter_config import FileExportInfo
+    from aiperf.plugin.enums import AccumulatorType
+
+
+@runtime_checkable
+class AccumulatorResult(Protocol):
+    """Protocol for typed results from accumulator summarize()."""
+
+    def to_json(self) -> Any:
+        """Serialize to JSON-compatible structure."""
+        ...
+
+    def to_csv(self) -> list[dict[str, Any]]:
+        """Serialize to list of CSV-compatible row dicts."""
+        ...
+
+
+@runtime_checkable
+class MetricSeriesProtocol(Protocol[MetricValueTypeVarT]):
+    """Shared interface for run-level record metric series consumers.
+
+    Implemented by any in-memory accumulator that exposes a running sum, a
+    record count, and a finalized ``MetricResult`` summary. Used by the
+    per-tag dispatch path in MetricsAccumulator and by ColumnStore-backed
+    series wrappers so that derived metrics can read values without caring
+    about the underlying storage shape (numpy column, ragged CSR, growable
+    array, etc.).
+    """
+
+    @property
+    def sum(self) -> MetricValueTypeVarT:
+        """Return the accumulated sum of all observed values."""
+
+    def __len__(self) -> int:
+        """Return the number of observed values."""
+
+    def to_result(self, tag: MetricTagT, header: str, unit: str) -> MetricResult:
+        """Summarize the accumulated values as a MetricResult."""
+
+
+@dataclass(frozen=True, slots=True)
+class ExportContext:
+    """Context passed to domain-specific export_results() methods.
+
+    Bundles the profiling time window and error summary so that export_results
+    signatures stay stable as new fields are added.
+    """
+
+    start_ns: int | None = None
+    """Inclusive start of the export time window (ns since epoch), or None for unbounded."""
+
+    end_ns: int | None = None
+    """Exclusive end of the export time window (ns since epoch), or None for unbounded."""
+
+    error_summary: list[ErrorDetailsCount] | None = None
+    """De-duplicated profile-run error counts to surface in the export, if any."""
+
+    cancelled: bool = False
+    """True when the profile run was cancelled — exporters may emit partial artifacts."""
+
+
+@dataclass(slots=True)
+class SummaryContext:
+    """Typed cross-accumulator communication context for dependency-ordered summarization.
+
+    NOT a Pydantic model — this is never serialized over the wire. It is created
+    by RecordsManager._process_results() and passed through the topological-sort
+    pipeline so each accumulator can read outputs from its declared dependencies.
+    """
+
+    accumulators: dict[AccumulatorType, Any] = field(default_factory=dict)
+    """Live accumulator instances keyed by AccumulatorType — analyzers use this to query peer state."""
+
+    accumulator_outputs: dict[str, Any] = field(default_factory=dict)
+    """Already-computed summary payloads keyed by accumulator name — populated as topo-order completes."""
+
+    start_ns: int = 0
+    """Inclusive start of the summarization window (ns since epoch); 0 means full range."""
+
+    end_ns: int = 0
+    """Exclusive end of the summarization window (ns since epoch); 0 means full range."""
+
+    cancelled: bool = False
+    """True when the profile run was cancelled — analyzers may short-circuit."""
+
+    def get_accumulator(self, accumulator_type: AccumulatorType) -> Any | None:
+        """Look up an accumulator by its type. Returns None if not present."""
+        return self.accumulators.get(accumulator_type)
+
+    def get_output(self, accumulator_type: str) -> Any | None:
+        """Look up a previously-computed accumulator output. Returns None if not yet available."""
+        return self.accumulator_outputs.get(accumulator_type)
+
+
+@runtime_checkable
+class AccumulatorProtocol(Protocol):
+    """Protocol for accumulators that ingest records, support time-range queries, and produce summaries.
+
+    Accumulators are the primary data stores in the records pipeline. Each accumulator
+    owns exactly one record type and is fully self-contained — no cross-accumulator
+    dependencies. Derived computations belong on AnalyzerProtocol instead.
+    """
+
+    async def process_record(self, record: Any) -> None:
+        """Ingest a single record into this accumulator's internal storage."""
+        ...
+
+    def query_time_range(self, start_ns: int, end_ns: int) -> NDArray[np.bool_]:
+        """Return a boolean mask where True marks records in [start_ns, end_ns).
+
+        The mask length equals the accumulator's record count. Callers can use
+        ``mask.sum()`` for the count or ``np.where(mask)[0]`` for indices.
+        """
+        ...
+
+    async def summarize(self, ctx: SummaryContext | None = None) -> AccumulatorResult:
+        """Compute and return aggregated metric results.
+
+        Args:
+            ctx: Optional SummaryContext for reading dependency outputs.
+                 None when called for realtime metrics (no cross-processor deps).
+        """
+        ...
+
+    async def export_results(self, ctx: ExportContext) -> Any:
+        """Export final results for this accumulator.
+
+        Called once after profiling completes. Each accumulator returns its own
+        typed result (AccumulatorMetricsSummary, TelemetryExportData, ServerMetricsResults)
+        which is consumed by typed fields on the unified results message.
+
+        Args:
+            ctx: ExportContext with profiling time window, error summary, and cancelled flag.
+        """
+        ...
+
+
+@runtime_checkable
+class AnalyzerProtocol(Protocol):
+    """Protocol for processors that don't ingest records directly but derive results
+    from other accumulators at summarization time.
+
+    Analyzers declare which accumulators they need via required_accumulators
+    and which outputs they depend on via summary_dependencies. They receive
+    accumulator references at construction and a SummaryContext at summarize time.
+    """
+
+    required_accumulators: ClassVar[set[AccumulatorType]]
+    summary_dependencies: ClassVar[list[AccumulatorType]]
+
+    async def summarize(self, ctx: SummaryContext) -> Any:
+        """Compute derived results using data from declared accumulator dependencies."""
+        ...
+
+
+@runtime_checkable
+class StreamExporterProtocol(Protocol):
+    """Protocol for processors that stream each record to an external sink (e.g. JSONL files).
+
+    Stream exporters have no summarization dependencies and are flushed after
+    all accumulators complete.
+    """
+
+    async def process_record(self, record: Any) -> None:
+        """Write a single record to the export sink."""
+        ...
+
+    async def finalize(self) -> None:
+        """Flush any buffered data. Called once after all records are processed."""
+        ...
+
+    def get_export_info(self) -> FileExportInfo:
+        """Return metadata about the file this exporter writes to."""
+        ...
diff --git a/src/aiperf/common/environment.py b/src/aiperf/common/environment.py
index 946470a3c..f4044e24b 100644
--- a/src/aiperf/common/environment.py
+++ b/src/aiperf/common/environment.py
@@ -493,6 +493,10 @@ class _MetricsSettings(BaseSettings):
         default=500,
         description="t-digest sketch compression for list-valued record metric aggregation. Higher = more centroids, tighter percentile accuracy, larger sketch. Default 500 measured to keep worst-case relative percentile error under 0.05% on 50M-sample workloads (40x under the 0.5% claimed accuracy band) at ~4 KB sketch size.",
     )
+    LIST_BACKEND: Literal["ragged", "tdigest"] = Field(
+        default="ragged",
+        description="Storage backend for list-valued RECORD metrics (today: only inter_chunk_latency). 'ragged' (default) keeps every value, enabling exact percentiles and ICL-aware throughput / tokens-in-flight sweep curves. 'tdigest' uses a bounded-memory crick.TDigest sketch (~4 KB regardless of sample count) — percentiles are approximate (≤0.05% relative error at default compression), and ICL-aware sweep curves silently fall back to their non-ICL equivalents that use only request-level (start_ns, generation_start_ns, end_ns) timing. Choose tdigest when records-manager pod memory at 1M+ request scale is the binding constraint.",
+    )
 
 
 class _OTelSettings(BaseSettings):
diff --git a/src/aiperf/metrics/__init__.py b/src/aiperf/metrics/__init__.py
index cfb9570a2..a65164aa5 100644
--- a/src/aiperf/metrics/__init__.py
+++ b/src/aiperf/metrics/__init__.py
@@ -14,6 +14,7 @@
     MetricDictValueTypeVarT,
     MetricRecordDict,
     MetricResultsDict,
+    MetricSeriesProtocol,
 )
 from aiperf.metrics.metric_registry import MetricRegistry
 
@@ -30,5 +31,6 @@
     "MetricRecordDict",
     "MetricRegistry",
     "MetricResultsDict",
+    "MetricSeriesProtocol",
     "RecordMetricT",
 ]
diff --git a/src/aiperf/metrics/_column_store_handlers.py b/src/aiperf/metrics/_column_store_handlers.py
new file mode 100644
index 000000000..487122fd4
--- /dev/null
+++ b/src/aiperf/metrics/_column_store_handlers.py
@@ -0,0 +1,74 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Per-tag setter closure factories for ``ColumnStore.ingest``.
+
+These closures are resolved on first sighting of each metric tag (via Python
+type dispatch) and cached in ``ColumnStore._tag_handlers``. Subsequent records
+skip the isinstance ladder and the ``_ensure_*_column`` lookups entirely.
+
+Profiling at 50k records (24 numeric tags + ICL) showed this hoist drops
+``ColumnStore.ingest`` wall by ~30% and total ingest function calls by 40%.
+The handlers are invalidated by ``_grow()`` because numeric arrays get
+reallocated; closures captured the old array references and would write to
+garbage. List backends and string lists are unaffected (in-place growth) but
+clearing all handlers on grow is simpler and grow runs ~log2(N) times.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+from typing import Any
+
+import numpy as np
+from numpy.typing import NDArray
+
+from aiperf.metrics.list_metric_aggregation import TDigestListMetricAggregator
+from aiperf.metrics.ragged_series import RaggedSeries
+
+
+def make_numeric_handler(
+    col: NDArray[np.float64],
+    tag: str,
+    sums: dict[str, float],
+    counts: dict[str, int],
+) -> Callable[[int, Any], None]:
+    """Closure that writes a numeric metric value at ``idx`` and updates the
+    O(1) running sum/count side-channel.
+
+    The ``float()`` cast is intentionally absent: numpy's ``__setitem__``
+    coerces Python ``int`` to ``float64`` automatically, and ``+=`` on the
+    sum dict promotes the int operand the same way. Saves a Python-level
+    function call per numeric metric per record (~5-8% on the scalar path).
+    """
+
+    def handler(idx: int, value: Any) -> None:
+        col[idx] = value
+        sums[tag] = sums[tag] + value
+        counts[tag] = counts[tag] + 1
+
+    return handler
+
+
+def make_string_handler(
+    col: list[str | None],
+) -> Callable[[int, Any], None]:
+    """Closure that writes a string metric value at ``idx``. The list reference
+    survives capacity growth (``list.extend`` is in-place)."""
+
+    def handler(idx: int, value: Any) -> None:
+        col[idx] = value
+
+    return handler
+
+
+def make_list_handler(
+    backend: RaggedSeries | TDigestListMetricAggregator,
+) -> Callable[[int, Any], None]:
+    """Closure that hands a list-valued metric to the configured list backend.
+    The backend reference is stable across ``ColumnStore._grow`` (list backends
+    own their own growth)."""
+
+    def handler(idx: int, value: Any) -> None:
+        backend.add_for_record(idx, value)
+
+    return handler
diff --git a/src/aiperf/metrics/column_store.py b/src/aiperf/metrics/column_store.py
new file mode 100644
index 000000000..df242708a
--- /dev/null
+++ b/src/aiperf/metrics/column_store.py
@@ -0,0 +1,503 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Session-indexed NaN-sparse columnar storage for per-record metrics."""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+from typing import Any
+
+import numpy as np
+from numpy.typing import NDArray
+
+from aiperf.common.aiperf_logger import AIPerfLogger
+from aiperf.metrics._column_store_handlers import (
+    make_list_handler as _make_list_handler,
+)
+from aiperf.metrics._column_store_handlers import (
+    make_numeric_handler as _make_numeric_handler,
+)
+from aiperf.metrics._column_store_handlers import (
+    make_string_handler as _make_string_handler,
+)
+from aiperf.metrics.list_metric_aggregation import TDigestListMetricAggregator
+from aiperf.metrics.ragged_series import RaggedSeries
+
+_logger = AIPerfLogger(__name__)
+
+# Backends both implement: ``add_for_record(idx, values)``,
+# ``to_result(tag, header, unit)`` (only on TDigest) or per-record accessors
+# (only on RaggedSeries), plus ``SUPPORTS_PER_RECORD_REPLAY`` class flag.
+ListMetricBackendT = RaggedSeries | TDigestListMetricAggregator
+
+
+def _resolve_list_backend_class() -> type[ListMetricBackendT]:
+    """Pick the list-metric backend class from ``Environment.METRICS.LIST_BACKEND``.
+
+    Resolved on each ColumnStore construction so test-time monkey-patching of
+    the env singleton takes effect without a process restart.
+    """
+    # Imported here to avoid a circular import at module load: environment ->
+    # _env_data -> (no metrics dep), but _env_data is read at module init of
+    # several siblings — safer to defer.
+    from aiperf.common.environment import Environment
+
+    if Environment.METRICS.LIST_BACKEND == "tdigest":
+        return TDigestListMetricAggregator
+    return RaggedSeries
+
+
+_BOOL_MISSING = np.uint8(255)
+"""Sentinel for an absent ``metadata_bool`` value (NaN-equivalent for uint8)."""
+
+_CATEGORICAL_MISSING = np.int32(-1)
+"""Sentinel for an absent ``metadata_categorical`` code. int32 (max ~2.1 B
+unique values) avoids the int16 overflow at >32k unique values that
+``x_correlation_id`` can hit on single-turn workloads."""
+
+
+class ColumnStore:
+    """Request-indexed NaN-sparse columnar storage for per-record metrics.
+
+    Uses session_num (credit issuance index) as the canonical array index.
+    Pre-filled with NaN/None; records write to their slot on arrival in any order.
+    """
+
+    __slots__ = (
+        "_capacity",
+        "_count",
+        "_numeric",
+        "_string",
+        "_ragged",
+        "_list_backend_cls",
+        "_sums",
+        "_counts",
+        "_tag_handlers",
+        "_metadata_numeric",
+        "_metadata_string",
+        "_metadata_bool",
+        "_metadata_categorical",
+        "_metadata_categories",
+        "start_ns",
+        "end_ns",
+        "generation_start_ns",
+    )
+
+    def __init__(
+        self,
+        initial_capacity: int = 1024,
+        *,
+        list_backend_cls: type[ListMetricBackendT] | None = None,
+    ) -> None:
+        self._capacity = initial_capacity
+        self._count = 0
+        self._numeric: dict[str, NDArray[np.float64]] = {}
+        self._string: dict[str, list[str | None]] = {}
+        self._ragged: dict[str, ListMetricBackendT] = {}
+        self._list_backend_cls = list_backend_cls or _resolve_list_backend_class()
+        self._sums: dict[str, float] = {}
+        self._counts: dict[str, int] = {}
+        # Per-tag setter closures, resolved on first sighting of each metric tag
+        # (via Python type dispatch: list -> ragged backend, str -> string column,
+        # numeric -> float64 column). Subsequent records skip the isinstance
+        # ladder and the ``_ensure_*_column`` lookups entirely. Cleared by
+        # ``_grow()`` because numeric/metadata-numeric arrays get reallocated;
+        # closures captured the old array references and would write to garbage.
+        self._tag_handlers: dict[str, Callable[[int, Any], None]] = {}
+        # Metadata columns — separate from metric columns so _compute_results()
+        # doesn't pick them up. Caller picks the storage type per field based
+        # on cardinality + semantics; see ``ingest_metadata`` for the trade-off.
+        self._metadata_numeric: dict[str, NDArray[np.float64]] = {}
+        self._metadata_string: dict[str, list[str | None]] = {}
+        self._metadata_bool: dict[str, NDArray[np.uint8]] = {}
+        self._metadata_categorical: dict[str, NDArray[np.int32]] = {}
+        # Per-tag intern table: ``categories[tag][string] = int code``.
+        self._metadata_categories: dict[str, dict[str, int]] = {}
+        self.start_ns = np.full(initial_capacity, np.nan, dtype=np.float64)
+        self.end_ns = np.full(initial_capacity, np.nan, dtype=np.float64)
+        self.generation_start_ns = np.full(initial_capacity, np.nan, dtype=np.float64)
+
+    @property
+    def count(self) -> int:
+        """Number of records written (max session_num + 1)."""
+        return self._count
+
+    def numeric(self, tag: str) -> NDArray[np.float64]:
+        """Return the float64 column for `tag`, sliced to count.
+
+        Returns a NaN-filled array if no record has ingested a value for `tag`.
+        Logs a warning when the column is missing on a non-empty store, since
+        the most common cause is a typo'd tag name silently producing a
+        useless all-NaN result downstream.
+        """
+        col = self._numeric.get(tag)
+        if col is None:
+            if self._count > 0:
+                _logger.warning(
+                    f"ColumnStore.numeric: unknown tag '{tag}' on a non-empty store "
+                    f"(known numeric tags: {sorted(self._numeric.keys())}). "
+                    "Returning NaN-fill — check for a typo or missing ingestion."
+                )
+            return np.full(self._count, np.nan, dtype=np.float64)
+        return col[: self._count]
+
+    def numeric_tags(self) -> list[str]:
+        """Return all numeric column tags."""
+        return list(self._numeric.keys())
+
+    def string(self, tag: str) -> list[str | None]:
+        """Return the string column for `tag`, sliced to count. None where missing."""
+        col = self._string.get(tag)
+        if col is None:
+            return [None] * self._count
+        return col[: self._count]
+
+    def ragged(self, tag: str) -> ListMetricBackendT:
+        """Return the list-valued backend for ``tag``.
+
+        Concrete type is :class:`RaggedSeries` (default) or
+        :class:`TDigestListMetricAggregator` depending on
+        ``Environment.METRICS.LIST_BACKEND``. Both expose
+        ``add_for_record(idx, values)``; only the ragged backend exposes
+        per-record replay accessors (``values``, ``record_indices``,
+        ``offsets``, ``grouped_cumsum``, ``get_values_for_mask``). Consumers
+        that need replay must gate on
+        ``backend.SUPPORTS_PER_RECORD_REPLAY``.
+        """
+        return self._ragged[tag]
+
+    def ragged_tags(self) -> list[str]:
+        """Return all ragged column tags."""
+        return list(self._ragged.keys())
+
+    def numeric_sum(self, tag: str) -> float:
+        """Return the running sum for a numeric column (O(1))."""
+        return self._sums.get(tag, 0.0)
+
+    def numeric_count(self, tag: str) -> int:
+        """Return the count of values ingested for a numeric column (O(1))."""
+        return self._counts.get(tag, 0)
+
+    def metadata_numeric(self, tag: str) -> NDArray[np.float64]:
+        """Return the metadata float64 column for `tag`, sliced to count. NaN where missing."""
+        col = self._metadata_numeric.get(tag)
+        if col is None:
+            return np.full(self._count, np.nan, dtype=np.float64)
+        return col[: self._count]
+
+    def metadata_string(self, tag: str) -> list[str | None]:
+        """Return the metadata string column for `tag`, sliced to count. None where missing."""
+        col = self._metadata_string.get(tag)
+        if col is None:
+            return [None] * self._count
+        return col[: self._count]
+
+    def metadata_bool(self, tag: str) -> NDArray[np.uint8]:
+        """Return the metadata bool column for `tag`, sliced to count.
+
+        Encoding: 0=False, 1=True, 255=missing. Compare against
+        ``_BOOL_MISSING`` (255) to detect absence; cast to ``bool`` otherwise.
+        """
+        col = self._metadata_bool.get(tag)
+        if col is None:
+            return np.full(self._count, _BOOL_MISSING, dtype=np.uint8)
+        return col[: self._count]
+
+    def metadata_categorical(self, tag: str) -> NDArray[np.int32]:
+        """Return the per-record category codes for `tag`. -1 = missing.
+
+        Decode via ``metadata_category_strings(tag)[code]`` (when ``code != -1``).
+        """
+        col = self._metadata_categorical.get(tag)
+        if col is None:
+            return np.full(self._count, _CATEGORICAL_MISSING, dtype=np.int32)
+        return col[: self._count]
+
+    def metadata_category_strings(self, tag: str) -> list[str]:
+        """Reverse lookup: code -> original string for a categorical column."""
+        table = self._metadata_categories.get(tag, {})
+        out = [""] * len(table)
+        for s, code in table.items():
+            out[code] = s
+        return out
+
+    def metadata_categorical_tags(self) -> list[str]:
+        """Return all categorical metadata tags (e.g. for grouping enumeration)."""
+        return list(self._metadata_categorical.keys())
+
+    def unique_categorical_values(self, tag: str) -> list[str]:
+        """Return the unique values that have appeared in categorical column ``tag``.
+
+        Same data as :meth:`metadata_category_strings`; named for the
+        per-X-grouping use case where the caller wants to iterate over
+        groups (e.g. "for each x_correlation_id, compute per-conversation
+        latency stats").
+        """
+        return self.metadata_category_strings(tag)
+
+    def mask_for_categorical(self, tag: str, value: str) -> NDArray[np.bool_]:
+        """Return a boolean mask of records whose ``tag`` column equals ``value``.
+
+        Use case: per-group analyzer queries. Combine with
+        :meth:`MetricsAccumulator.compute_results_for_mask` to compute
+        windowed metrics for a single group:
+
+        .. code-block:: python
+
+            for value in store.unique_categorical_values("x_correlation_id"):
+                mask = store.mask_for_categorical("x_correlation_id", value)
+                results = accumulator.compute_results_for_mask(mask)
+
+        Returns an empty mask if the tag has no column or the value never
+        appeared (no false-positive matches via the missing-sentinel).
+        """
+        table = self._metadata_categories.get(tag)
+        if table is None:
+            return np.zeros(self._count, dtype=np.bool_)
+        code = table.get(value)
+        if code is None:
+            return np.zeros(self._count, dtype=np.bool_)
+        col = self._metadata_categorical.get(tag)
+        if col is None:
+            return np.zeros(self._count, dtype=np.bool_)
+        return col[: self._count] == code
+
+    def query_time_range(self, start_ns: float, end_ns: float) -> NDArray[np.bool_]:
+        """Return a boolean mask of records overlapping ``[start_ns, end_ns]``.
+
+        A record overlaps the window when ``start_ns <= record.end_ns`` and
+        ``record.start_ns <= end_ns``. NaN slots (uningested or partial) are
+        excluded by the standard NaN comparison semantics: every comparison
+        with NaN returns False, so unfilled rows never match. The window
+        endpoints are inclusive.
+        """
+        if self._count == 0:
+            return np.zeros(0, dtype=np.bool_)
+        rec_start = self.start_ns[: self._count]
+        rec_end = self.end_ns[: self._count]
+        return (rec_start <= end_ns) & (rec_end >= start_ns)
+
+    # --- Write API (called from MetricsAccumulator.process_record) ---
+
+    def ingest(
+        self,
+        idx: int,
+        *,
+        record_metrics: dict[str, Any],
+        start_ns: float,
+        end_ns: float,
+        generation_start_ns: float | None,
+    ) -> None:
+        """Write a record's data to slot `idx` (= session_num).
+
+        Grows capacity if idx >= _capacity. Dispatches metric values via cached
+        per-tag setter closures — the isinstance ladder and ``_ensure_*_column``
+        lookups run only on the first record per tag. Profiling at 50k records
+        shows this hoists ~30% of ingest wall time vs the per-record dispatch.
+        """
+        if idx >= self._capacity:
+            self._grow(idx)
+
+        if idx >= self._count:
+            self._count = idx + 1
+
+        self.start_ns[idx] = start_ns
+        self.end_ns[idx] = end_ns
+        if generation_start_ns is not None:
+            self.generation_start_ns[idx] = generation_start_ns
+
+        handlers = self._tag_handlers
+        for tag, value in record_metrics.items():
+            handler = handlers.get(tag)
+            if handler is None:
+                handler = self._resolve_tag_handler(tag, value)
+                if handler is None:
+                    continue
+                handlers[tag] = handler
+            handler(idx, value)
+
+    def _resolve_tag_handler(
+        self, tag: str, value: Any
+    ) -> Callable[[int, Any], None] | None:
+        """First-sighting type dispatch: pick a setter closure for ``tag``.
+
+        Bound on first record only; subsequent records reuse the cached
+        closure. Returns ``None`` for unsupported value types so ``ingest``
+        can skip the tag without re-dispatching.
+        """
+        if isinstance(value, list):
+            backend = self._ensure_ragged_column(tag)
+            return _make_list_handler(backend)
+        if isinstance(value, str):
+            col = self._ensure_string_column(tag)
+            return _make_string_handler(col)
+        if isinstance(value, (int, float)):
+            col = self._ensure_numeric_column(tag)
+            return _make_numeric_handler(col, tag, self._sums, self._counts)
+        return None
+
+    def ingest_metadata(
+        self,
+        idx: int,
+        metadata_numeric: dict[str, float | None],
+        metadata_string: dict[str, str | None],
+        *,
+        metadata_bool: dict[str, bool | None] | None = None,
+        metadata_categorical: dict[str, str | None] | None = None,
+    ) -> None:
+        """Write per-record metadata to slot `idx`.
+
+        Metadata columns are kept separate from metric columns so that
+        _compute_results() does not treat them as metrics. Caller picks the
+        storage type per field based on cardinality + semantics:
+
+        - ``metadata_numeric``: float64 (NaN missing) — high-resolution numbers.
+        - ``metadata_string``: list[str|None] — high-cardinality strings (UUIDs).
+        - ``metadata_bool``: uint8 with sentinel 255 — saves 8x vs float64.
+        - ``metadata_categorical``: int32 + per-tag interning table — saves
+          ~25x vs raw strings even at full cardinality, much more on
+          low-cardinality fields like ``worker_id``.
+        """
+        if idx >= self._capacity:
+            self._grow(idx)
+
+        for tag, num_value in metadata_numeric.items():
+            if num_value is not None:
+                self._ensure_metadata_numeric_column(tag)[idx] = float(num_value)
+
+        for tag, str_value in metadata_string.items():
+            self._ensure_metadata_string_column(tag)[idx] = str_value
+
+        if metadata_bool:
+            self._ingest_bool_metadata(idx, metadata_bool)
+        if metadata_categorical:
+            self._ingest_categorical_metadata(idx, metadata_categorical)
+
+    def _ingest_bool_metadata(self, idx: int, values: dict[str, bool | None]) -> None:
+        for tag, bool_value in values.items():
+            if bool_value is not None:
+                self._ensure_metadata_bool_column(tag)[idx] = 1 if bool_value else 0
+
+    def _ingest_categorical_metadata(
+        self, idx: int, values: dict[str, str | None]
+    ) -> None:
+        for tag, cat_value in values.items():
+            if cat_value is None:
+                continue
+            # Order matters: ensure the column (which seeds the per-tag
+            # categories table) BEFORE interning, since Python evaluates
+            # the RHS before the LHS in chained subscript assignments.
+            col = self._ensure_metadata_categorical_column(tag)
+            col[idx] = self._intern_category(tag, cat_value)
+
+    def _grow(self, min_idx: int) -> None:
+        """Double capacity until min_idx fits. Numeric column reallocation
+        invalidates ``_tag_handlers`` (cached setter closures held old array
+        refs); list/string columns grow in place. Grow runs ~log2(N) times
+        so handler-rebuild overhead is negligible.
+        """
+        new_cap = self._capacity
+        while new_cap <= min_idx:
+            new_cap *= 2
+
+        for attr in ("start_ns", "end_ns", "generation_start_ns"):
+            old = getattr(self, attr)
+            new = np.full(new_cap, np.nan, dtype=np.float64)
+            new[: self._capacity] = old[: self._capacity]
+            setattr(self, attr, new)
+
+        for tag, old in self._numeric.items():
+            new = np.full(new_cap, np.nan, dtype=np.float64)
+            new[: self._capacity] = old[: self._capacity]
+            self._numeric[tag] = new
+
+        for tag, old in self._string.items():
+            old.extend([None] * (new_cap - self._capacity))
+            self._string[tag] = old
+
+        for tag, old in self._metadata_numeric.items():
+            new = np.full(new_cap, np.nan, dtype=np.float64)
+            new[: self._capacity] = old[: self._capacity]
+            self._metadata_numeric[tag] = new
+
+        for tag, old in self._metadata_string.items():
+            old.extend([None] * (new_cap - self._capacity))
+            self._metadata_string[tag] = old
+
+        for tag, old in self._metadata_bool.items():
+            new = np.full(new_cap, _BOOL_MISSING, dtype=np.uint8)
+            new[: self._capacity] = old[: self._capacity]
+            self._metadata_bool[tag] = new
+
+        for tag, old in self._metadata_categorical.items():
+            new = np.full(new_cap, _CATEGORICAL_MISSING, dtype=np.int32)
+            new[: self._capacity] = old[: self._capacity]
+            self._metadata_categorical[tag] = new
+
+        # Numeric metric columns were reallocated; cached setter closures
+        # captured the old array references. Drop them so the next ingest
+        # rebuilds them against the new arrays.
+        self._tag_handlers.clear()
+
+        self._capacity = new_cap
+
+    def _ensure_numeric_column(self, tag: str) -> NDArray[np.float64]:
+        col = self._numeric.get(tag)
+        if col is None:
+            col = np.full(self._capacity, np.nan, dtype=np.float64)
+            self._numeric[tag] = col
+            self._sums[tag] = 0.0
+            self._counts[tag] = 0
+        return col
+
+    def _ensure_string_column(self, tag: str) -> list[str | None]:
+        col = self._string.get(tag)
+        if col is None:
+            col = [None] * self._capacity
+            self._string[tag] = col
+        return col
+
+    def _ensure_ragged_column(self, tag: str) -> ListMetricBackendT:
+        ragged = self._ragged.get(tag)
+        if ragged is None:
+            ragged = self._list_backend_cls()
+            self._ragged[tag] = ragged
+        return ragged
+
+    def _ensure_metadata_numeric_column(self, tag: str) -> NDArray[np.float64]:
+        col = self._metadata_numeric.get(tag)
+        if col is None:
+            col = np.full(self._capacity, np.nan, dtype=np.float64)
+            self._metadata_numeric[tag] = col
+        return col
+
+    def _ensure_metadata_string_column(self, tag: str) -> list[str | None]:
+        col = self._metadata_string.get(tag)
+        if col is None:
+            col = [None] * self._capacity
+            self._metadata_string[tag] = col
+        return col
+
+    def _ensure_metadata_bool_column(self, tag: str) -> NDArray[np.uint8]:
+        col = self._metadata_bool.get(tag)
+        if col is None:
+            col = np.full(self._capacity, _BOOL_MISSING, dtype=np.uint8)
+            self._metadata_bool[tag] = col
+        return col
+
+    def _ensure_metadata_categorical_column(self, tag: str) -> NDArray[np.int32]:
+        col = self._metadata_categorical.get(tag)
+        if col is None:
+            col = np.full(self._capacity, _CATEGORICAL_MISSING, dtype=np.int32)
+            self._metadata_categorical[tag] = col
+            self._metadata_categories[tag] = {}
+        return col
+
+    def _intern_category(self, tag: str, value: str) -> int:
+        """Look up or insert ``value`` in the per-tag category table; return the int32 code."""
+        table = self._metadata_categories[tag]
+        code = table.get(value)
+        if code is None:
+            code = len(table)
+            table[value] = code
+        return code
diff --git a/src/aiperf/metrics/list_metric_aggregation.py b/src/aiperf/metrics/list_metric_aggregation.py
index 141ef31b3..cc2ef2012 100644
--- a/src/aiperf/metrics/list_metric_aggregation.py
+++ b/src/aiperf/metrics/list_metric_aggregation.py
@@ -5,6 +5,7 @@
 Used by :class:`aiperf.post_processors.metric_results_processor.MetricResultsProcessor`
 when a ``MetricType.RECORD`` metric arrives with a list value (today only
 ``inter_chunk_latency``, where each request contributes a list of inter-chunk
+
 gap durations). At 1 M-request ramp scale the exact storage —
 ``records x (chunks-1) x 8 B`` would dwarf the records-manager pod's
 memory budget. T-digest bounds it to a few KB regardless of sample count.
@@ -22,7 +23,7 @@
   suffer catastrophic cancellation.
 - ``p1``..``p99`` are approximate via t-digest.
 
-Implements the :class:`aiperf.metrics.metric_dicts.MetricAggregator` protocol.
+Implements :class:`aiperf.common.accumulator_protocols.MetricSeriesProtocol`.
 """
 
 from __future__ import annotations
@@ -37,13 +38,21 @@
 from aiperf.common.models import MetricResult
 from aiperf.common.types import MetricTagT
 
+__all__ = ["TDigestListMetricAggregator"]
+
 
 class TDigestListMetricAggregator:
     """Bounded-memory aggregator backed by a t-digest sketch.
 
-    Conforms to :class:`aiperf.metrics.metric_dicts.MetricAggregator`.
+    Conforms to :class:`aiperf.common.accumulator_protocols.MetricSeriesProtocol`
+    plus the :class:`aiperf.metrics.column_store.ColumnStore` list-backend
+    contract: ``add_for_record(idx, values)`` ingest entry point and a
+    ``SUPPORTS_PER_RECORD_REPLAY`` capability flag that downstream sweep
+    helpers gate ICL-aware curves on.
     """
 
+    SUPPORTS_PER_RECORD_REPLAY = False
+
     def __init__(self) -> None:
         self._td = TDigest(compression=Environment.METRICS.TDIGEST_COMPRESSION)
         self._count: int = 0
@@ -58,11 +67,15 @@ def __init__(self) -> None:
 
     @property
     def sum(self) -> float:
-        """Exact running sum of all samples — for the :class:`MetricAggregator`
-        protocol so derived-sum metrics can compute uniformly across this
-        and :class:`MetricArray`."""
+        """Exact running sum of all samples — for the
+        :class:`MetricSeriesProtocol` so derived-sum metrics can compute
+        uniformly across this and :class:`MetricArray`."""
         return self._sum
 
+    def __len__(self) -> int:
+        """Return the number of observed samples."""
+        return self._count
+
     def append(self, value: int | float) -> None:
         """Add a single sample."""
         v = float(value)
@@ -108,6 +121,15 @@ def extend(self, values: Iterable[int | float]) -> None:
         self._min = batch_min if self._min is None else min(self._min, batch_min)
         self._max = batch_max if self._max is None else max(self._max, batch_max)
 
+    def add_for_record(self, idx: int, values: list[float]) -> None:  # noqa: ARG002 - idx unused
+        """Record-keyed ingest entry point shared with :class:`RaggedSeries`.
+
+        ``idx`` is ignored: the t-digest is a global sketch with no per-record
+        structure. The accumulator routes every list-valued metric through this
+        method regardless of backend, which is why the signature has to match.
+        """
+        self.extend(values)
+
     def to_result(self, tag: MetricTagT, header: str, unit: str) -> MetricResult:
         """Return a :class:`MetricResult` with the same field set as
         ``MetricArray.to_result``. Percentiles come from the t-digest;
diff --git a/src/aiperf/metrics/metric_dicts.py b/src/aiperf/metrics/metric_dicts.py
index 9f08a2af1..3b77c17b0 100644
--- a/src/aiperf/metrics/metric_dicts.py
+++ b/src/aiperf/metrics/metric_dicts.py
@@ -1,9 +1,10 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-from typing import TYPE_CHECKING, Generic, Protocol, TypeVar, runtime_checkable
+from typing import TYPE_CHECKING, Generic, TypeVar
 
 import numpy as np
 
+from aiperf.common.accumulator_protocols import MetricSeriesProtocol
 from aiperf.common.aiperf_logger import AIPerfLogger
 from aiperf.common.enums import (
     MetricDictValueTypeT,
@@ -23,20 +24,20 @@
     from aiperf.metrics.metric_registry import MetricRegistry
 
 
-@runtime_checkable
-class MetricAggregator(Protocol):
-    """Run-level aggregator that produces a :class:`MetricResult`.
-
-    Implemented by :class:`MetricArray` (exact, ``np.ndarray``-backed) and
-    :class:`aiperf.metrics.list_metric_aggregation.TDigestListMetricAggregator`
-    (bounded-memory t-digest sketch). Both maintain an exact running ``sum``
-    so derived-sum metrics work uniformly across them.
-    """
-
-    @property
-    def sum(self) -> int | float: ...
-
-    def to_result(self, tag: MetricTagT, header: str, unit: str) -> MetricResult: ...
+__all__ = [
+    "BaseMetricDict",
+    "MetricAggregator",
+    "MetricArray",
+    "MetricDictValueTypeVarT",
+    "MetricRecordDict",
+    "MetricResultsDict",
+    "MetricSeriesProtocol",
+]
+
+# Back-compat alias: ``MetricAggregator`` is the historical name for
+# :class:`MetricSeriesProtocol`. Both point at the same Protocol; the alias
+# keeps existing imports working.
+MetricAggregator = MetricSeriesProtocol
 
 
 MetricDictValueTypeVarT = TypeVar(
diff --git a/src/aiperf/metrics/ragged_series.py b/src/aiperf/metrics/ragged_series.py
new file mode 100644
index 000000000..107958caf
--- /dev/null
+++ b/src/aiperf/metrics/ragged_series.py
@@ -0,0 +1,107 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Storage for list-valued per-record metrics (e.g., ICL/ITL)."""
+
+from __future__ import annotations
+
+import numpy as np
+from numpy.typing import NDArray
+
+from aiperf.common.growable_array import GrowableArray
+
+
+class RaggedSeries:
+    """Storage for list-valued per-record metrics (e.g., ICL).
+
+    Uses offsets array for O(1) per-record lookup, efficient bulk retrieval
+    via boolean mask on record indices, and vectorized grouped operations
+    (e.g., per-request cumulative sums for ICL-aware throughput sweeps).
+
+    ``SUPPORTS_PER_RECORD_REPLAY = True`` advertises that this backend retains
+    enough per-request structure to drive ICL-aware sweep curves in
+    ``accumulator_sweeps``. The alternative t-digest backend sets it ``False``
+    and the sweep helpers degrade to their request-level fallbacks.
+    """
+
+    SUPPORTS_PER_RECORD_REPLAY = True
+
+    __slots__ = ("_values", "_record_indices", "_offsets", "_offsets_capacity")
+
+    def __init__(
+        self, initial_capacity: int = 1024, offsets_capacity: int = 256
+    ) -> None:
+        self._values = GrowableArray(
+            initial_capacity=initial_capacity, dtype=np.float64
+        )
+        self._record_indices = GrowableArray(
+            initial_capacity=initial_capacity, dtype=np.int32
+        )
+        # Per-session_num start offset into _values. -1 means absent.
+        self._offsets = np.full(offsets_capacity, -1, dtype=np.int64)
+        self._offsets_capacity = offsets_capacity
+
+    @property
+    def values(self) -> NDArray[np.float64]:
+        """All concatenated values."""
+        return self._values.data
+
+    @property
+    def record_indices(self) -> NDArray[np.int32]:
+        """Session_num per value."""
+        return self._record_indices.data
+
+    @property
+    def offsets(self) -> NDArray[np.int64]:
+        """Per-session_num start offset. -1 if absent."""
+        return self._offsets[: self._offsets_capacity]
+
+    def extend(self, idx: int, values: list[float]) -> None:
+        """Append values for session_num ``idx``."""
+        n = len(values)
+        if n == 0:
+            return
+        if idx >= self._offsets_capacity:
+            self._grow_offsets(idx)
+        self._offsets[idx] = len(self._values)
+        val_arr = np.asarray(values, dtype=np.float64)
+        idx_arr = np.full(n, idx, dtype=np.int32)
+        self._values.extend(val_arr)
+        self._record_indices.extend(idx_arr)
+
+    def add_for_record(self, idx: int, values: list[float]) -> None:
+        """Unified backend contract — same signature on the t-digest backend."""
+        self.extend(idx, values)
+
+    def get_values_for_mask(
+        self, record_mask: NDArray[np.bool_]
+    ) -> NDArray[np.float64]:
+        """Return values whose record is selected by the boolean mask."""
+        if len(self._record_indices) == 0:
+            return np.zeros(0, dtype=np.float64)
+        value_mask = record_mask[self._record_indices.data]
+        return self._values.data[value_mask]
+
+    def grouped_cumsum(self) -> NDArray[np.float64]:
+        """Compute per-request cumulative sum across the flat values array.
+
+        Uses offsets to reset at request boundaries — fully vectorized, no Python loops.
+        This is the foundation of ICL-aware throughput sweeps.
+        """
+        if len(self._values) == 0:
+            return np.zeros(0, dtype=np.float64)
+
+        global_cs = np.cumsum(self._values.data)
+        rec_idx = self._record_indices.data
+        request_offsets = self._offsets[rec_idx]
+        start_cs = np.where(request_offsets > 0, global_cs[request_offsets - 1], 0.0)
+        return global_cs - start_cs
+
+    def _grow_offsets(self, min_idx: int) -> None:
+        """Grow offsets array to accommodate min_idx."""
+        new_cap = self._offsets_capacity
+        while new_cap <= min_idx:
+            new_cap *= 2
+        new_offsets = np.full(new_cap, -1, dtype=np.int64)
+        new_offsets[: self._offsets_capacity] = self._offsets[: self._offsets_capacity]
+        self._offsets = new_offsets
+        self._offsets_capacity = new_cap
diff --git a/tests/unit/common/test_accumulator_protocols.py b/tests/unit/common/test_accumulator_protocols.py
new file mode 100644
index 000000000..6b502a8d3
--- /dev/null
+++ b/tests/unit/common/test_accumulator_protocols.py
@@ -0,0 +1,252 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, ClassVar
+
+import numpy as np
+import pytest
+from numpy.typing import NDArray
+
+from aiperf.common.accumulator_protocols import (
+    AccumulatorProtocol,
+    AccumulatorResult,
+    AnalyzerProtocol,
+    ExportContext,
+    StreamExporterProtocol,
+    SummaryContext,
+)
+
+# SummaryContext dict is structurally a plain dict at runtime, so plain
+# string keys are sufficient to exercise the protocol and dataclass surface.
+_METRIC_RESULTS_KEY = "metric_results"
+_GPU_TELEMETRY_KEY = "gpu_telemetry"
+
+# ---------------------------------------------------------------------------
+# Stub AccumulatorResult implementation
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class StubResult:
+    values: list[int]
+
+    def to_json(self) -> list[int]:
+        return self.values
+
+    def to_csv(self) -> list[dict[str, Any]]:
+        return [{"value": v} for v in self.values]
+
+
+# ---------------------------------------------------------------------------
+# Stub implementations for isinstance checks
+# ---------------------------------------------------------------------------
+
+
+class StubAccumulator:
+    async def process_record(self, record: Any) -> None:
+        pass
+
+    def query_time_range(self, start_ns: int, end_ns: int) -> NDArray[np.bool_]:
+        return np.array([], dtype=bool)
+
+    async def summarize(self, ctx: SummaryContext | None = None) -> StubResult:
+        return StubResult(values=[])
+
+    async def export_results(self, ctx: ExportContext) -> StubResult:
+        return StubResult(values=[])
+
+
+class StubAnalyzer:
+    required_accumulators: ClassVar[set[str]] = set()
+    summary_dependencies: ClassVar[list[str]] = []
+
+    async def summarize(self, ctx: SummaryContext) -> Any:
+        return []
+
+
+class StubStreamExporter:
+    async def process_record(self, record: Any) -> None:
+        pass
+
+    async def finalize(self) -> None:
+        pass
+
+    def get_export_info(self) -> Any:
+        return None
+
+
+class NotAnAccumulator:
+    """Missing required methods — should NOT satisfy any protocol."""
+
+    pass
+
+
+# ---------------------------------------------------------------------------
+# Protocol isinstance checks
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "instance, protocol, expected",
+    [
+        pytest.param(
+            StubAccumulator(), AccumulatorProtocol, True, id="accumulator-matches"
+        ),
+        pytest.param(StubAnalyzer(), AnalyzerProtocol, True, id="analyzer-matches"),
+        pytest.param(
+            StubStreamExporter(),
+            StreamExporterProtocol,
+            True,
+            id="stream-exporter-matches",
+        ),
+        pytest.param(
+            NotAnAccumulator(), AccumulatorProtocol, False, id="not-accumulator"
+        ),
+        pytest.param(NotAnAccumulator(), AnalyzerProtocol, False, id="not-analyzer"),
+        pytest.param(
+            NotAnAccumulator(), StreamExporterProtocol, False, id="not-stream-exporter"
+        ),
+        pytest.param(
+            StubStreamExporter(),
+            AccumulatorProtocol,
+            False,
+            id="exporter-is-not-accumulator",
+        ),
+        # StreamExporterProtocol now uses finalize() instead of summarize(),
+        # so accumulators no longer structurally match the exporter protocol.
+        pytest.param(
+            StubAccumulator(),
+            StreamExporterProtocol,
+            False,
+            id="accumulator-does-not-match-exporter",
+        ),
+    ],
+)
+def test_protocol_isinstance_check(
+    instance: object, protocol: type, expected: bool
+) -> None:
+    assert isinstance(instance, protocol) is expected
+
+
+def test_protocols_are_unambiguous() -> None:
+    """Accumulators and stream exporters are now fully distinct.
+
+    StreamExporterProtocol uses finalize() while AccumulatorProtocol uses
+    summarize(), so there is no structural overlap between the two protocols.
+    """
+    acc = StubAccumulator()
+    exp = StubStreamExporter()
+
+    assert isinstance(acc, AccumulatorProtocol) is True
+    assert isinstance(acc, StreamExporterProtocol) is False
+    assert isinstance(exp, AccumulatorProtocol) is False
+    assert isinstance(exp, StreamExporterProtocol) is True
+
+
+# ---------------------------------------------------------------------------
+# AccumulatorResult protocol tests
+# ---------------------------------------------------------------------------
+
+
+class TestAccumulatorResult:
+    def test_stub_result_satisfies_protocol(self) -> None:
+        result = StubResult(values=[1, 2, 3])
+        assert isinstance(result, AccumulatorResult)
+
+    def test_to_json(self) -> None:
+        result = StubResult(values=[1, 2, 3])
+        assert result.to_json() == [1, 2, 3]
+
+    def test_to_csv(self) -> None:
+        result = StubResult(values=[10, 20])
+        assert result.to_csv() == [{"value": 10}, {"value": 20}]
+
+    def test_missing_to_json_does_not_satisfy(self) -> None:
+        class NoJson:
+            def to_csv(self) -> list[dict[str, Any]]:
+                return []
+
+        assert not isinstance(NoJson(), AccumulatorResult)
+
+    def test_missing_to_csv_does_not_satisfy(self) -> None:
+        class NoCsv:
+            def to_json(self) -> Any:
+                return {}
+
+        assert not isinstance(NoCsv(), AccumulatorResult)
+
+    def test_plain_object_does_not_satisfy(self) -> None:
+        assert not isinstance(object(), AccumulatorResult)
+
+
+# ---------------------------------------------------------------------------
+# SummaryContext tests
+# ---------------------------------------------------------------------------
+
+
+class TestSummaryContext:
+    def test_default_construction(self) -> None:
+        ctx = SummaryContext()
+        assert ctx.accumulators == {}
+        assert ctx.accumulator_outputs == {}
+        assert ctx.start_ns == 0
+        assert ctx.end_ns == 0
+        assert ctx.cancelled is False
+
+    def test_get_accumulator_present(self) -> None:
+        sentinel = object()
+        ctx = SummaryContext(accumulators={_METRIC_RESULTS_KEY: sentinel})
+        assert ctx.get_accumulator(_METRIC_RESULTS_KEY) is sentinel
+
+    def test_get_accumulator_missing(self) -> None:
+        ctx = SummaryContext()
+        assert ctx.get_accumulator(_METRIC_RESULTS_KEY) is None
+
+    def test_get_output_present(self) -> None:
+        sentinel = object()
+        ctx = SummaryContext(accumulator_outputs={_GPU_TELEMETRY_KEY: sentinel})
+        assert ctx.get_output(_GPU_TELEMETRY_KEY) is sentinel
+
+    def test_get_output_missing(self) -> None:
+        ctx = SummaryContext()
+        assert ctx.get_output(_GPU_TELEMETRY_KEY) is None
+
+    def test_cancelled_flag(self) -> None:
+        ctx = SummaryContext(cancelled=True)
+        assert ctx.cancelled is True
+
+    def test_time_range(self) -> None:
+        ctx = SummaryContext(start_ns=1_000_000, end_ns=2_000_000)
+        assert ctx.start_ns == 1_000_000
+        assert ctx.end_ns == 2_000_000
+
+    def test_accumulator_outputs_mutable(self) -> None:
+        ctx = SummaryContext()
+        ctx.accumulator_outputs[_METRIC_RESULTS_KEY] = [1, 2, 3]
+        assert ctx.get_output(_METRIC_RESULTS_KEY) == [1, 2, 3]
+
+
+# ---------------------------------------------------------------------------
+# ExportContext tests
+# ---------------------------------------------------------------------------
+
+
+class TestExportContext:
+    def test_default_construction(self) -> None:
+        ctx = ExportContext()
+        assert ctx.start_ns is None
+        assert ctx.end_ns is None
+        assert ctx.error_summary is None
+        assert ctx.cancelled is False
+
+    def test_cancelled_flag(self) -> None:
+        ctx = ExportContext(cancelled=True)
+        assert ctx.cancelled is True
+
+    def test_with_time_range(self) -> None:
+        ctx = ExportContext(start_ns=1_000, end_ns=2_000)
+        assert ctx.start_ns == 1_000
+        assert ctx.end_ns == 2_000
diff --git a/tests/unit/metrics/test_column_store.py b/tests/unit/metrics/test_column_store.py
new file mode 100644
index 000000000..06121a252
--- /dev/null
+++ b/tests/unit/metrics/test_column_store.py
@@ -0,0 +1,440 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Tests for ColumnStore — session-indexed columnar metric storage."""
+
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from aiperf.metrics.column_store import (
+    _BOOL_MISSING,
+    _CATEGORICAL_MISSING,
+    ColumnStore,
+)
+from aiperf.metrics.ragged_series import RaggedSeries
+
+
+def _make_store(initial_capacity: int = 8) -> ColumnStore:
+    # Pin the ragged backend to keep tests independent of the env flag.
+    return ColumnStore(initial_capacity=initial_capacity, list_backend_cls=RaggedSeries)
+
+
+def test_init_empty_count_and_columns():
+    store = _make_store()
+    assert store.count == 0
+    assert store.numeric_tags() == []
+    assert store.ragged_tags() == []
+
+
+def test_init_timestamp_columns_filled_with_nan():
+    store = _make_store(initial_capacity=4)
+    assert np.all(np.isnan(store.start_ns))
+    assert np.all(np.isnan(store.end_ns))
+    assert np.all(np.isnan(store.generation_start_ns))
+
+
+def test_ingest_writes_numeric_value_and_timestamps():
+    store = _make_store()
+    store.ingest(
+        0,
+        record_metrics={"latency_ns": 100.0},
+        start_ns=10.0,
+        end_ns=20.0,
+        generation_start_ns=15.0,
+    )
+    assert store.count == 1
+    assert store.numeric("latency_ns")[0] == 100.0
+    assert store.start_ns[0] == 10.0
+    assert store.end_ns[0] == 20.0
+    assert store.generation_start_ns[0] == 15.0
+
+
+def test_ingest_running_sum_invariant_across_records():
+    store = _make_store()
+    for i, val in enumerate([1.0, 2.0, 3.0, 4.5]):
+        store.ingest(
+            i,
+            record_metrics={"x": val},
+            start_ns=float(i),
+            end_ns=float(i) + 1.0,
+            generation_start_ns=None,
+        )
+    assert store.numeric_count("x") == 4
+    assert store.numeric_sum("x") == pytest.approx(1.0 + 2.0 + 3.0 + 4.5)
+    np.testing.assert_array_equal(store.numeric("x"), [1.0, 2.0, 3.0, 4.5])
+
+
+def test_ingest_running_sum_accepts_int_without_float_cast():
+    """Verifies the post-83cb85017 form: no Python-level float() cast.
+
+    numpy's __setitem__ + dict += both auto-coerce int -> float64.
+    """
+    store = _make_store()
+    store.ingest(
+        0, record_metrics={"i": 5}, start_ns=0.0, end_ns=1.0, generation_start_ns=None
+    )
+    store.ingest(
+        1, record_metrics={"i": 7}, start_ns=1.0, end_ns=2.0, generation_start_ns=None
+    )
+    assert store.numeric_sum("i") == 12.0
+    assert store.numeric_count("i") == 2
+    assert store.numeric("i").dtype == np.float64
+
+
+def test_numeric_returns_nan_for_unknown_tag():
+    store = _make_store()
+    store.ingest(
+        0, record_metrics={"x": 1.0}, start_ns=0.0, end_ns=1.0, generation_start_ns=None
+    )
+    out = store.numeric("does_not_exist")
+    assert out.shape == (1,)
+    assert np.all(np.isnan(out))
+
+
+def test_numeric_sum_unknown_tag_returns_zero():
+    store = _make_store()
+    assert store.numeric_sum("missing") == 0.0
+    assert store.numeric_count("missing") == 0
+
+
+def test_ingest_string_value():
+    store = _make_store()
+    store.ingest(
+        0,
+        record_metrics={"name": "alice"},
+        start_ns=0,
+        end_ns=1,
+        generation_start_ns=None,
+    )
+    store.ingest(
+        2,
+        record_metrics={"name": "bob"},
+        start_ns=2,
+        end_ns=3,
+        generation_start_ns=None,
+    )
+    col = store.string("name")
+    assert col[0] == "alice"
+    assert col[1] is None  # uningested slot
+    assert col[2] == "bob"
+
+
+def test_ingest_list_value_routes_to_ragged_backend():
+    store = _make_store()
+    store.ingest(
+        0,
+        record_metrics={"icl": [1.0, 2.0, 3.0]},
+        start_ns=0,
+        end_ns=10,
+        generation_start_ns=None,
+    )
+    store.ingest(
+        1,
+        record_metrics={"icl": [4.0]},
+        start_ns=10,
+        end_ns=20,
+        generation_start_ns=None,
+    )
+    backend = store.ragged("icl")
+    assert isinstance(backend, RaggedSeries)
+    np.testing.assert_array_equal(backend.values, [1.0, 2.0, 3.0, 4.0])
+    np.testing.assert_array_equal(backend.record_indices, [0, 0, 0, 1])
+
+
+def test_ingest_out_of_order_count_tracks_max_idx_plus_one():
+    store = _make_store()
+    store.ingest(
+        5, record_metrics={"x": 1.0}, start_ns=0, end_ns=1, generation_start_ns=None
+    )
+    store.ingest(
+        2, record_metrics={"x": 2.0}, start_ns=0, end_ns=1, generation_start_ns=None
+    )
+    assert store.count == 6
+    # Slot 0..1 unfilled
+    col = store.numeric("x")
+    assert np.isnan(col[0])
+    assert np.isnan(col[1])
+    assert col[2] == 2.0
+    assert col[5] == 1.0
+
+
+def test_ingest_grows_capacity_when_idx_exceeds_initial():
+    store = _make_store(initial_capacity=4)
+    # Force grow: idx=10 needs cap >= 16
+    store.ingest(
+        10, record_metrics={"x": 1.0}, start_ns=0, end_ns=1, generation_start_ns=None
+    )
+    assert store.count == 11
+    assert store.numeric("x")[10] == 1.0
+    assert store.numeric_sum("x") == 1.0
+
+
+def test_grow_preserves_existing_numeric_values():
+    store = _make_store(initial_capacity=4)
+    for i in range(3):
+        store.ingest(
+            i,
+            record_metrics={"x": float(i + 1)},
+            start_ns=float(i),
+            end_ns=float(i) + 1,
+            generation_start_ns=None,
+        )
+    store.ingest(
+        20, record_metrics={"x": 99.0}, start_ns=20, end_ns=21, generation_start_ns=None
+    )
+    col = store.numeric("x")
+    assert col[0] == 1.0
+    assert col[1] == 2.0
+    assert col[2] == 3.0
+    assert col[20] == 99.0
+    # Running sum survives reallocation
+    assert store.numeric_sum("x") == 1.0 + 2.0 + 3.0 + 99.0
+    assert store.numeric_count("x") == 4
+
+
+def test_grow_invalidates_tag_handlers():
+    """After _grow, cached numeric closures point at the OLD array; they must
+    be cleared so the next ingest rebinds against the new buffer."""
+    store = _make_store(initial_capacity=4)
+    store.ingest(
+        0, record_metrics={"x": 1.0}, start_ns=0, end_ns=1, generation_start_ns=None
+    )
+    # Resolve handler for 'x' so it lands in the cache.
+    assert "x" in store._tag_handlers
+    store.ingest(
+        20, record_metrics={"x": 2.0}, start_ns=20, end_ns=21, generation_start_ns=None
+    )
+    # Post-grow, ingest should still work — handler rebuilt against new array.
+    assert store.numeric("x")[20] == 2.0
+    assert store.numeric_sum("x") == 3.0
+
+
+def test_ingest_metadata_numeric_and_string():
+    store = _make_store()
+    # Metadata accessors slice on _count, which only bumps on ingest(); ingest a
+    # placeholder record first so the metadata reads return a populated row.
+    store.ingest(0, record_metrics={}, start_ns=0, end_ns=1, generation_start_ns=None)
+    store.ingest_metadata(
+        0,
+        metadata_numeric={"latency_offset_ns": 5.0},
+        metadata_string={"worker_id": "w-0"},
+    )
+    assert store.metadata_numeric("latency_offset_ns")[0] == 5.0
+    assert store.metadata_string("worker_id")[0] == "w-0"
+
+
+def test_metadata_numeric_does_not_appear_in_metric_columns():
+    """Metadata columns must NOT be visible to numeric_tags() (which feeds metric compute)."""
+    store = _make_store()
+    store.ingest_metadata(
+        0,
+        metadata_numeric={"meta_only": 1.0},
+        metadata_string={},
+    )
+    assert "meta_only" not in store.numeric_tags()
+
+
+def test_metadata_bool_encoding():
+    store = _make_store()
+    # _count bumps on ingest() only — write placeholder records so the metadata
+    # accessors expose populated rows.
+    store.ingest(0, record_metrics={}, start_ns=0, end_ns=1, generation_start_ns=None)
+    store.ingest(1, record_metrics={}, start_ns=1, end_ns=2, generation_start_ns=None)
+    store.ingest_metadata(
+        0,
+        metadata_numeric={},
+        metadata_string={},
+        metadata_bool={"is_streaming": True},
+    )
+    store.ingest_metadata(
+        1,
+        metadata_numeric={},
+        metadata_string={},
+        metadata_bool={"is_streaming": False},
+    )
+    col = store.metadata_bool("is_streaming")
+    assert col[0] == 1
+    assert col[1] == 0
+    # Slot 2 unfilled (capacity grew implicitly? no — count is 2 here)
+
+
+def test_metadata_bool_missing_sentinel_for_unfilled_slot():
+    store = _make_store()
+    store.ingest(
+        0, record_metrics={"x": 1.0}, start_ns=0, end_ns=1, generation_start_ns=None
+    )
+    store.ingest(
+        2, record_metrics={"x": 1.0}, start_ns=0, end_ns=1, generation_start_ns=None
+    )
+    store.ingest_metadata(0, {}, {}, metadata_bool={"flag": True})
+    store.ingest_metadata(2, {}, {}, metadata_bool={"flag": False})
+    col = store.metadata_bool("flag")
+    assert col[0] == 1
+    assert col[1] == _BOOL_MISSING
+    assert col[2] == 0
+
+
+def test_metadata_categorical_intern_and_lookup():
+    store = _make_store()
+    store.ingest(
+        0, record_metrics={"x": 1.0}, start_ns=0, end_ns=1, generation_start_ns=None
+    )
+    store.ingest(
+        1, record_metrics={"x": 2.0}, start_ns=0, end_ns=1, generation_start_ns=None
+    )
+    store.ingest(
+        2, record_metrics={"x": 3.0}, start_ns=0, end_ns=1, generation_start_ns=None
+    )
+    store.ingest_metadata(0, {}, {}, metadata_categorical={"corr": "conv-A"})
+    store.ingest_metadata(1, {}, {}, metadata_categorical={"corr": "conv-B"})
+    store.ingest_metadata(2, {}, {}, metadata_categorical={"corr": "conv-A"})
+
+    codes = store.metadata_categorical("corr")
+    # First-seen "conv-A" -> 0; "conv-B" -> 1; repeat "conv-A" -> 0
+    assert codes[0] == 0
+    assert codes[1] == 1
+    assert codes[2] == 0
+
+    strings = store.metadata_category_strings("corr")
+    assert strings[0] == "conv-A"
+    assert strings[1] == "conv-B"
+
+
+def test_metadata_categorical_missing_sentinel_for_uningested():
+    store = _make_store()
+    store.ingest(
+        0, record_metrics={"x": 1.0}, start_ns=0, end_ns=1, generation_start_ns=None
+    )
+    store.ingest(
+        1, record_metrics={"x": 2.0}, start_ns=0, end_ns=1, generation_start_ns=None
+    )
+    store.ingest_metadata(0, {}, {}, metadata_categorical={"corr": "v0"})
+    codes = store.metadata_categorical("corr")
+    assert codes[0] == 0
+    assert codes[1] == _CATEGORICAL_MISSING
+
+
+def test_unique_categorical_values_lists_seen_strings():
+    store = _make_store()
+    store.ingest(
+        0, record_metrics={"x": 1.0}, start_ns=0, end_ns=1, generation_start_ns=None
+    )
+    store.ingest(
+        1, record_metrics={"x": 2.0}, start_ns=0, end_ns=1, generation_start_ns=None
+    )
+    store.ingest_metadata(0, {}, {}, metadata_categorical={"g": "a"})
+    store.ingest_metadata(1, {}, {}, metadata_categorical={"g": "b"})
+    assert set(store.unique_categorical_values("g")) == {"a", "b"}
+
+
+def test_mask_for_categorical_selects_matching_records():
+    store = _make_store()
+    for i in range(3):
+        store.ingest(
+            i,
+            record_metrics={"x": float(i)},
+            start_ns=0,
+            end_ns=1,
+            generation_start_ns=None,
+        )
+    store.ingest_metadata(0, {}, {}, metadata_categorical={"g": "alpha"})
+    store.ingest_metadata(1, {}, {}, metadata_categorical={"g": "beta"})
+    store.ingest_metadata(2, {}, {}, metadata_categorical={"g": "alpha"})
+
+    mask = store.mask_for_categorical("g", "alpha")
+    np.testing.assert_array_equal(mask, [True, False, True])
+
+
+def test_mask_for_categorical_unknown_value_returns_all_false():
+    store = _make_store()
+    store.ingest(
+        0, record_metrics={"x": 1.0}, start_ns=0, end_ns=1, generation_start_ns=None
+    )
+    store.ingest_metadata(0, {}, {}, metadata_categorical={"g": "a"})
+    mask = store.mask_for_categorical("g", "never_seen")
+    assert mask.shape == (1,)
+    assert not mask.any()
+
+
+def test_mask_for_categorical_unknown_tag_returns_all_false():
+    store = _make_store()
+    store.ingest(
+        0, record_metrics={"x": 1.0}, start_ns=0, end_ns=1, generation_start_ns=None
+    )
+    mask = store.mask_for_categorical("never_indexed", "v")
+    assert mask.shape == (1,)
+    assert not mask.any()
+
+
+def test_query_time_range_selects_overlapping_records():
+    store = _make_store()
+    # Record 0: [0, 100], Record 1: [50, 200], Record 2: [300, 400]
+    store.ingest(
+        0, record_metrics={}, start_ns=0.0, end_ns=100.0, generation_start_ns=None
+    )
+    store.ingest(
+        1, record_metrics={}, start_ns=50.0, end_ns=200.0, generation_start_ns=None
+    )
+    store.ingest(
+        2, record_metrics={}, start_ns=300.0, end_ns=400.0, generation_start_ns=None
+    )
+
+    # Query window [75, 250] overlaps records 0 (end=100>=75) and 1 (50..200), not 2.
+    mask = store.query_time_range(75.0, 250.0)
+    np.testing.assert_array_equal(mask, [True, True, False])
+
+
+def test_query_time_range_excludes_unfilled_slots():
+    store = _make_store()
+    store.ingest(
+        0, record_metrics={}, start_ns=10.0, end_ns=20.0, generation_start_ns=None
+    )
+    store.ingest(
+        2, record_metrics={}, start_ns=30.0, end_ns=40.0, generation_start_ns=None
+    )
+    # Slot 1 has NaN start_ns/end_ns — must NOT match any window.
+    mask = store.query_time_range(0.0, 100.0)
+    assert mask[0]
+    assert not mask[1]
+    assert mask[2]
+
+
+def test_query_time_range_empty_store_returns_empty_mask():
+    store = _make_store()
+    mask = store.query_time_range(0.0, 100.0)
+    assert mask.shape == (0,)
+    assert mask.dtype == np.bool_
+
+
+def test_ingest_mixed_numeric_string_list_in_one_record():
+    store = _make_store()
+    store.ingest(
+        0,
+        record_metrics={
+            "lat_ns": 100.0,
+            "model": "gpt-4",
+            "icl": [1.0, 2.0],
+        },
+        start_ns=0,
+        end_ns=10,
+        generation_start_ns=5,
+    )
+    assert store.numeric("lat_ns")[0] == 100.0
+    assert store.string("model")[0] == "gpt-4"
+    np.testing.assert_array_equal(store.ragged("icl").values, [1.0, 2.0])
+
+
+def test_ingest_skips_unsupported_value_types():
+    store = _make_store()
+    # dict is neither numeric, str, nor list — should be silently skipped.
+    store.ingest(
+        0,
+        record_metrics={"weird": {"k": "v"}, "ok": 5.0},
+        start_ns=0,
+        end_ns=1,
+        generation_start_ns=None,
+    )
+    assert store.numeric("ok")[0] == 5.0
+    assert "weird" not in store.numeric_tags()
+    assert "weird" not in store.ragged_tags()
diff --git a/tests/unit/metrics/test_ragged_series.py b/tests/unit/metrics/test_ragged_series.py
new file mode 100644
index 000000000..b22e43056
--- /dev/null
+++ b/tests/unit/metrics/test_ragged_series.py
@@ -0,0 +1,125 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Tests for RaggedSeries — list-valued per-record metric storage."""
+
+from __future__ import annotations
+
+import numpy as np
+import pytest
+from pytest import param
+
+from aiperf.metrics.ragged_series import RaggedSeries
+
+
+def test_init_empty_state_zero_values():
+    series = RaggedSeries(initial_capacity=8, offsets_capacity=4)
+    assert len(series.values) == 0
+    assert len(series.record_indices) == 0
+    assert (series.offsets == -1).all()
+
+
+def test_extend_records_offsets_and_values():
+    series = RaggedSeries(initial_capacity=8, offsets_capacity=4)
+    series.extend(0, [1.0, 2.0, 3.0])
+    series.extend(2, [10.0])
+
+    np.testing.assert_array_equal(series.values, [1.0, 2.0, 3.0, 10.0])
+    np.testing.assert_array_equal(series.record_indices, [0, 0, 0, 2])
+    assert series.offsets[0] == 0
+    assert series.offsets[1] == -1  # absent
+    assert series.offsets[2] == 3
+
+
+def test_extend_empty_list_is_noop():
+    series = RaggedSeries(initial_capacity=4, offsets_capacity=4)
+    series.extend(0, [])
+    assert len(series.values) == 0
+    assert series.offsets[0] == -1
+
+
+def test_add_for_record_alias_matches_extend():
+    series_a = RaggedSeries()
+    series_b = RaggedSeries()
+    series_a.extend(1, [4.0, 5.0])
+    series_b.add_for_record(1, [4.0, 5.0])
+    np.testing.assert_array_equal(series_a.values, series_b.values)
+    np.testing.assert_array_equal(series_a.record_indices, series_b.record_indices)
+
+
+def test_extend_grows_offsets_when_idx_exceeds_capacity():
+    series = RaggedSeries(initial_capacity=8, offsets_capacity=4)
+    # idx >= 4 must trigger doubling. Push to idx=10 (requires capacity 16).
+    series.extend(10, [7.0])
+    assert series.offsets.shape[0] >= 11
+    assert series.offsets[10] == 0
+    # Earlier slots preserved as -1
+    assert (series.offsets[:10] == -1).all()
+
+
+def test_get_values_for_mask_selects_records():
+    series = RaggedSeries(initial_capacity=8, offsets_capacity=4)
+    series.extend(0, [1.0, 2.0])
+    series.extend(1, [3.0])
+    series.extend(2, [4.0, 5.0, 6.0])
+
+    mask = np.array([True, False, True])
+    selected = series.get_values_for_mask(mask)
+    np.testing.assert_array_equal(np.sort(selected), [1.0, 2.0, 4.0, 5.0, 6.0])
+
+
+def test_get_values_for_mask_empty_returns_empty():
+    series = RaggedSeries()
+    out = series.get_values_for_mask(np.zeros(0, dtype=bool))
+    assert out.shape == (0,)
+    assert out.dtype == np.float64
+
+
+def test_grouped_cumsum_resets_at_request_boundaries():
+    series = RaggedSeries(initial_capacity=8, offsets_capacity=4)
+    series.extend(0, [1.0, 2.0, 3.0])
+    series.extend(1, [10.0, 20.0])
+
+    cs = series.grouped_cumsum()
+    # Within record 0: 1, 1+2, 1+2+3
+    # Within record 1: 10, 10+20 (NOT continuing global)
+    np.testing.assert_array_equal(cs, [1.0, 3.0, 6.0, 10.0, 30.0])
+
+
+def test_grouped_cumsum_first_record_at_offset_zero():
+    series = RaggedSeries(initial_capacity=4, offsets_capacity=4)
+    series.extend(0, [5.0, 7.0])
+    cs = series.grouped_cumsum()
+    np.testing.assert_array_equal(cs, [5.0, 12.0])
+
+
+def test_grouped_cumsum_empty_returns_empty():
+    series = RaggedSeries()
+    cs = series.grouped_cumsum()
+    assert cs.shape == (0,)
+    assert cs.dtype == np.float64
+
+
+@pytest.mark.parametrize(
+    "extends",
+    [
+        param([(0, [1.0]), (1, [2.0]), (2, [3.0])], id="three_singletons"),
+        param([(0, [1.0, 2.0, 3.0])], id="single_record_three_values"),
+        param([(5, [4.0]), (6, [5.0])], id="sparse_record_indices"),
+    ],
+)
+def test_offsets_track_first_value_position(extends):
+    series = RaggedSeries(initial_capacity=8, offsets_capacity=8)
+    expected_offsets: dict[int, int] = {}
+    running_len = 0
+    for idx, vals in extends:
+        if vals:
+            expected_offsets[idx] = running_len
+            running_len += len(vals)
+        series.extend(idx, vals)
+
+    for idx, off in expected_offsets.items():
+        assert series.offsets[idx] == off
+
+
+def test_supports_per_record_replay_flag_true():
+    assert RaggedSeries.SUPPORTS_PER_RECORD_REPLAY is True
diff --git a/tools/ergonomics_baseline.json b/tools/ergonomics_baseline.json
index 077e9eb7a..ff950fecf 100644
--- a/tools/ergonomics_baseline.json
+++ b/tools/ergonomics_baseline.json
@@ -196,6 +196,11 @@
       "src/aiperf/gpu_telemetry/manager.py",
       ""
     ],
+    [
+      "file-size",
+      "src/aiperf/metrics/column_store.py",
+      ""
+    ],
     [
       "file-size",
       "src/aiperf/metrics/types/http_trace_metrics.py",