quixio · ovv · Apr 15, 2025
diff --git a/quixstreams/app.py b/quixstreams/app.py
@@ -994,15 +994,13 @@ def _on_assign(self, _, topic_partitions: List[TopicPartition]):
                     )
                 committed_offsets[tp.partition][tp.topic] = tp.offset
 
-            # Match the assigned TP with a stream ID via DataFrameRegistry
+            # Match the assigned TP with a state ID via DataFrameRegistry
             for tp in non_changelog_tps:
-                stream_ids = self._dataframe_registry.get_stream_ids(
-                    topic_name=tp.topic
-                )
-                # Assign store partitions for the given stream ids
-                for stream_id in stream_ids:
+                state_ids = self._dataframe_registry.get_state_ids(topic_name=tp.topic)
+                # Assign store partitions for the given state ids
+                for state_id in state_ids:
                     self._state_manager.on_partition_assign(
-                        stream_id=stream_id,
+                        state_id=state_id,
                         partition=tp.partition,
                         committed_offsets=committed_offsets[tp.partition],
                     )
@@ -1044,12 +1042,10 @@ def _revoke_state_partitions(self, topic_partitions: List[TopicPartition]):
         ]
         for tp in non_changelog_tps:
             if self._state_manager.stores:
-                stream_ids = self._dataframe_registry.get_stream_ids(
-                    topic_name=tp.topic
-                )
-                for stream_id in stream_ids:
+                state_ids = self._dataframe_registry.get_state_ids(topic_name=tp.topic)
+                for state_id in state_ids:
                     self._state_manager.on_partition_revoke(
-                        stream_id=stream_id, partition=tp.partition
+                        state_id=state_id, partition=tp.partition
                     )
 
     def _setup_signal_handlers(self):

diff --git a/quixstreams/checkpointing/checkpoint.py b/quixstreams/checkpointing/checkpoint.py
@@ -148,28 +148,26 @@ def __init__(
             self._producer.begin_transaction()
 
     def get_store_transaction(
-        self, stream_id: str, partition: int, store_name: str = DEFAULT_STATE_STORE_NAME
+        self, state_id: str, partition: int, store_name: str = DEFAULT_STATE_STORE_NAME
     ) -> PartitionTransaction:
         """
         Get a PartitionTransaction for the given store, topic and partition.
 
         It will return already started transaction if there's one.
 
-        :param stream_id: stream id
+        :param state_id: state id
         :param partition: partition number
         :param store_name: store name
         :return: instance of `PartitionTransaction`
         """
-        transaction = self._store_transactions.get((stream_id, partition, store_name))
+        transaction = self._store_transactions.get((state_id, partition, store_name))
         if transaction is not None:
             return transaction
 
-        store = self._state_manager.get_store(
-            stream_id=stream_id, store_name=store_name
-        )
+        store = self._state_manager.get_store(state_id=state_id, store_name=store_name)
         transaction = store.start_partition_transaction(partition=partition)
 
-        self._store_transactions[(stream_id, partition, store_name)] = transaction
+        self._store_transactions[(state_id, partition, store_name)] = transaction
         return transaction
 
     def close(self):
@@ -227,13 +225,11 @@ def commit(self):
 
         # Step 2. Produce the changelogs
         for (
-            stream_id,
+            state_id,
             partition,
             store_name,
         ), transaction in self._store_transactions.items():
-            topics = self._dataframe_registry.get_topics_for_stream_id(
-                stream_id=stream_id
-            )
+            topics = self._dataframe_registry.get_topics_for_state_id(state_id=state_id)
             processed_offsets = {
                 topic: offset
                 for (topic, partition_), offset in self._tp_offsets.items()

diff --git a/quixstreams/dataframe/dataframe.py b/quixstreams/dataframe/dataframe.py
@@ -135,7 +135,7 @@ def __init__(
         registry: DataFrameRegistry,
         processing_context: ProcessingContext,
         stream: Optional[Stream] = None,
-        stream_id: Optional[str] = None,
+        state_id: Optional[str] = None,
     ):
         if not topics:
             raise ValueError("At least one Topic must be passed")
@@ -146,15 +146,15 @@ def __init__(
         )
 
         self._stream: Stream = stream or Stream()
-        self._stream_id: str = stream_id or topic_manager.stream_id_from_topics(
+        self._state_id: str = state_id or topic_manager.state_id_from_topics(
             self.topics
         )
         self._topic_manager = topic_manager
         self._registry = registry
         self._processing_context = processing_context
         self._producer = processing_context.producer
-        self._registry.register_stream_id(
-            stream_id=self.stream_id, topic_names=[t.name for t in self._topics]
+        self._registry.register_state_id(
+            state_id=self.state_id, topic_names=[t.name for t in self._topics]
         )
 
     @property
@@ -166,20 +166,20 @@ def stream(self) -> Stream:
         return self._stream
 
     @property
-    def stream_id(self) -> str:
+    def state_id(self) -> str:
         """
         An identifier of the data stream this StreamingDataFrame
         manipulates in the application.
 
         It is used as a common prefix for state stores and group-by topics.
-        A new `stream_id` is set when StreamingDataFrames are merged via `.merge()`
+        A new `state_id` is set when StreamingDataFrames are merged via `.merge()`
         or grouped via `.group_by()`.
 
-        StreamingDataFrames with different `stream_id` cannot access the same state stores.
+        StreamingDataFrames with different `state_id` cannot access the same state stores.
 
-        By default, a topic name or a combination of topic names are used as `stream_id`.
+        By default, a topic name or a combination of topic names are used as `state_id`.
         """
-        return self._stream_id
+        return self._state_id
 
     @property
     def topics(self) -> tuple[Topic, ...]:
@@ -286,7 +286,7 @@ def func(d: dict, state: State):
             stateful_func = _as_stateful(
                 func=with_metadata_func,
                 processing_context=self._processing_context,
-                stream_id=self.stream_id,
+                state_id=self.state_id,
             )
             stream = self.stream.add_apply(stateful_func, expand=expand, metadata=True)  # type: ignore[call-overload]
         else:
@@ -395,7 +395,7 @@ def func(values: list, state: State):
             stateful_func = _as_stateful(
                 func=with_metadata_func,
                 processing_context=self._processing_context,
-                stream_id=self.stream_id,
+                state_id=self.state_id,
             )
             return self._add_update(stateful_func, metadata=True)
         else:
@@ -497,7 +497,7 @@ def func(d: dict, state: State):
             stateful_func = _as_stateful(
                 func=with_metadata_func,
                 processing_context=self._processing_context,
-                stream_id=self.stream_id,
+                state_id=self.state_id,
             )
             stream = self.stream.add_filter(stateful_func, metadata=True)
         else:
@@ -603,7 +603,7 @@ def func(d: dict, state: State):
 
         groupby_topic = self._topic_manager.repartition_topic(
             operation=operation,
-            stream_id=self.stream_id,
+            state_id=self.state_id,
             config=repartition_config,
             key_serializer=key_serializer,
             value_serializer=value_serializer,
@@ -631,7 +631,7 @@ def _callback(value, _, timestamp, headers):
         stream = self.stream.add_transform(_callback, expand=False)
 
         groupby_sdf = self.__dataframe_clone__(
-            stream=stream, stream_id=f"{self.stream_id}--groupby--{operation}"
+            stream=stream, state_id=f"{self.state_id}--groupby--{operation}"
         )
         self._registry.register_groupby(
             source_sdf=self, new_sdf=groupby_sdf, register_new_root=False
@@ -1683,15 +1683,15 @@ def _add_update(
 
     def _register_store(self):
         """
-        Register the default store for the current stream_id in StateStoreManager.
+        Register the default store for the current state_id in StateStoreManager.
         """
         self.ensure_topics_copartitioned()
 
         # Generate a changelog topic config based on the underlying topics.
         changelog_topic_config = self._topic_manager.derive_topic_config(self._topics)
 
         self._processing_context.state_manager.register_store(
-            stream_id=self.stream_id, changelog_config=changelog_topic_config
+            state_id=self.state_id, changelog_config=changelog_topic_config
         )
 
     def _groupby_key(
@@ -1711,21 +1711,22 @@ def __dataframe_clone__(
         self,
         *topics: Topic,
         stream: Optional[Stream] = None,
-        stream_id: Optional[str] = None,
+        state_id: Optional[str] = None,
     ) -> "StreamingDataFrame":
         """
         Clone the StreamingDataFrame with a new `stream`, `topics`,
-        and optional `stream_id` parameters.
+        and optional `state_id` parameters.
 
         :param topics: one or more `Topic` objects
         :param stream: instance of `Stream`, optional.
+        :param state_id: str, optional.
         :return: a new `StreamingDataFrame`.
         """
 
         clone = self.__class__(
             *(topics or self._topics),
             stream=stream,
-            stream_id=stream_id,
+            state_id=state_id,
             processing_context=self._processing_context,
             topic_manager=self._topic_manager,
             registry=self._registry,
@@ -1840,13 +1841,13 @@ def wrapper(
 def _as_stateful(
     func: Callable[[Any, Any, int, Any, State], T],
     processing_context: ProcessingContext,
-    stream_id: str,
+    state_id: str,
 ) -> Callable[[Any, Any, int, Any], T]:
     @functools.wraps(func)
     def wrapper(value: Any, key: Any, timestamp: int, headers: Any) -> Any:
         ctx = message_context()
         transaction = processing_context.checkpoint.get_store_transaction(
-            stream_id=stream_id,
+            state_id=state_id,
             partition=ctx.partition,
         )
         # Pass a State object with an interface limited to the key updates only

diff --git a/quixstreams/dataframe/registry.py b/quixstreams/dataframe/registry.py
@@ -24,8 +24,8 @@ def __init__(self) -> None:
         self._registry: dict[str, Stream] = {}
         self._topics: list[Topic] = []
         self._repartition_origins: set[str] = set()
-        self._topics_to_stream_ids: dict[str, set[str]] = {}
-        self._stream_ids_to_topics: dict[str, set[str]] = {}
+        self._topics_to_state_ids: dict[str, set[str]] = {}
+        self._state_ids_to_topics: dict[str, set[str]] = {}
 
     @property
     def consumer_topics(self) -> list[Topic]:
@@ -71,19 +71,19 @@ def register_groupby(
         :param source_sdf: the SDF used by `sdf.group_by()`
         :param new_sdf: the SDF generated by `sdf.group_by()`.
         """
-        if source_sdf.stream_id in self._repartition_origins:
+        if source_sdf.state_id in self._repartition_origins:
             raise GroupByNestingLimit(
                 "Subsequent (nested) `SDF.group_by()` operations are not allowed."
             )
 
-        if new_sdf.stream_id in self._repartition_origins:
+        if new_sdf.state_id in self._repartition_origins:
             raise GroupByDuplicate(
                 "A `SDF.group_by()` operation appears to be the same as another, "
                 "either from using the same column or name parameter; "
                 "adjust by setting a unique name with `SDF.group_by(name=<NAME>)` "
             )
 
-        self._repartition_origins.add(new_sdf.stream_id)
+        self._repartition_origins.add(new_sdf.state_id)
 
         if register_new_root:
             try:
@@ -113,34 +113,34 @@ def compose_all(
             executors[topic] = root_executors[root_stream]
         return executors
 
-    def register_stream_id(self, stream_id: str, topic_names: list[str]):
+    def register_state_id(self, state_id: str, topic_names: list[str]):
         """
-        Register a mapping between the stream_id and topic names.
+        Register a mapping between the state_id and topic names.
         This mapping is later used to match topics to state stores
         during assignment and commits.
 
-        The same stream id can be registered multiple times.
-        :param stream_id: stream id of StreamingDataFrame
+        The same state id can be registered multiple times.
+        :param state_id: state id of StreamingDataFrame
         :param topic_names: list of topics to map the stream id with
         """
         for topic_name in topic_names:
-            self._topics_to_stream_ids.setdefault(topic_name, set()).add(stream_id)
-            self._stream_ids_to_topics.setdefault(stream_id, set()).add(topic_name)
+            self._topics_to_state_ids.setdefault(topic_name, set()).add(state_id)
+            self._state_ids_to_topics.setdefault(state_id, set()).add(topic_name)
 
-    def get_stream_ids(self, topic_name: str) -> list[str]:
+    def get_state_ids(self, topic_name: str) -> list[str]:
         """
-        Get a list of stream ids for the given topic name
+        Get a list of state ids for the given topic name
 
         :param topic_name: a name of the topic
-        :return: a list of stream ids
+        :return: a list of state ids
         """
-        return list(self._topics_to_stream_ids[topic_name])
+        return list(self._topics_to_state_ids[topic_name])
 
-    def get_topics_for_stream_id(self, stream_id: str) -> list[str]:
+    def get_topics_for_state_id(self, state_id: str) -> list[str]:
         """
         Get a list of topics for the given stream id.
 
-        :param stream_id: stream id
+        :param state_id: state id
         :return: a list of topic names
         """
-        return list(self._stream_ids_to_topics[stream_id])
+        return list(self._state_ids_to_topics[state_id])
diff --git a/quixstreams/dataframe/windows/base.py b/quixstreams/dataframe/windows/base.py
@@ -74,7 +74,7 @@ def register_store(self) -> None:
         # Create a config for the changelog topic based on the underlying SDF topics
         changelog_config = TopicManager.derive_topic_config(self._dataframe.topics)
         self._dataframe.processing_context.state_manager.register_windowed_store(
-            stream_id=self._dataframe.stream_id,
+            state_id=self._dataframe.state_id,
             store_name=self._name,
             changelog_config=changelog_config,
         )
@@ -88,7 +88,7 @@ def _apply_window(
 
         windowed_func = _as_windowed(
             func=func,
-            stream_id=self._dataframe.stream_id,
+            state_id=self._dataframe.state_id,
             processing_context=self._dataframe.processing_context,
             store_name=name,
         )
@@ -400,7 +400,7 @@ def _as_windowed(
     func: TransformRecordCallbackExpandedWindowed,
     processing_context: "ProcessingContext",
     store_name: str,
-    stream_id: str,
+    state_id: str,
 ) -> TransformExpandedCallback:
     @functools.wraps(func)
     def wrapper(
@@ -410,7 +410,7 @@ def wrapper(
         transaction = cast(
             WindowedPartitionTransaction,
             processing_context.checkpoint.get_store_transaction(
-                stream_id=stream_id, partition=ctx.partition, store_name=store_name
+                state_id=state_id, partition=ctx.partition, store_name=store_name
             ),
         )
         if key is None: