requested by review

ovv · ovv · commit dd1a36b1211c · 2025-05-05T12:11:29.000+02:00
diff --git a/quixstreams/dataframe/registry.py b/quixstreams/dataframe/registry.py
@@ -79,6 +79,7 @@ def register_groupby(
         Register a "groupby" SDF, which is one generated with `SDF.group_by()`.
         :param source_sdf: the SDF used by `sdf.group_by()`
         :param new_sdf: the SDF generated by `sdf.group_by()`.
+        :param register_new_root: whether to register the new SDF as a root SDF.
         """
         if source_sdf.stream_id in self._repartition_origins:
             raise GroupByNestingLimit(
@@ -87,7 +88,7 @@ def register_groupby(
 
         if new_sdf.stream_id in self._repartition_origins:
             raise GroupByDuplicate(
-                "A `SDF.group_by()` operation appears to be the same as another, "
+                "An `SDF.group_by()` operation appears to be the same as another, "
                 "either from using the same column or name parameter; "
                 "adjust by setting a unique name with `SDF.group_by(name=<NAME>)` "
             )
@@ -99,7 +100,7 @@ def register_groupby(
                 self.register_root(new_sdf)
             except StreamingDataFrameDuplicate:
                 raise GroupByDuplicate(
-                    "A `SDF.group_by()` operation appears to be the same as another, "
+                    "An `SDF.group_by()` operation appears to be the same as another, "
                     "either from using the same column or name parameter; "
                     "adjust by setting a unique name with `SDF.group_by(name=<NAME>)` "
                 )
diff --git a/tests/test_quixstreams/test_dataframe/test_dataframe.py b/tests/test_quixstreams/test_dataframe/test_dataframe.py
@@ -1682,10 +1682,11 @@ def test_group_by_column(
         sdf = sdf.group_by(col)
         sdf[col] = col_update
 
-        groupby_topic = sdf.topics[0]
         if num_partitions == 1:
+            groupby_topic = topic
             assert sdf_registry.consumer_topics == [topic]
         else:
+            groupby_topic = sdf.topics[0]
             assert sdf_registry.consumer_topics == [topic, groupby_topic]
             assert groupby_topic.name.startswith("repartition__")
 
@@ -1702,7 +1703,7 @@ def test_group_by_column(
         if num_partitions == 1:
             post_groupby_branch_result = pre_groupby_branch_result
         else:
-            with internal_producer_factory(auto_offset_reset="earliest") as consumer:
+            with internal_consumer_factory(auto_offset_reset="earliest") as consumer:
                 consumer.subscribe([groupby_topic])
                 consumed_row = consumer.poll_row(timeout=5.0)
 
@@ -1773,10 +1774,11 @@ def test_group_by_column_with_name(
         sdf = sdf.group_by(col, name=op_name)
         sdf[col] = col_update
 
-        groupby_topic = sdf.topics[0]
         if num_partitions == 1:
+            groupby_topic = topic
             assert sdf_registry.consumer_topics == [topic]
         else:
+            groupby_topic = sdf.topics[0]
             assert sdf_registry.consumer_topics == [topic, groupby_topic]
             assert groupby_topic.name.startswith("repartition__")
 
@@ -1864,10 +1866,11 @@ def test_group_by_func(
         sdf = sdf.group_by(lambda v: v[col], name=op_name)
         sdf[col] = col_update
 
-        groupby_topic = sdf.topics[0]
         if num_partitions == 1:
+            groupby_topic = topic
             assert sdf_registry.consumer_topics == [topic]
         else:
+            groupby_topic = sdf.topics[0]
             assert sdf_registry.consumer_topics == [topic, groupby_topic]
             assert groupby_topic.name.startswith("repartition__")