added smoke tests and todos for fixing and testing output line counting logic

sid-rl · sid-rl · commit eaeffaf2772d · 2025-11-17T17:26:59.000-08:00
diff --git a/src/runloop_api_client/sdk/async_execution_result.py b/src/runloop_api_client/sdk/async_execution_result.py
@@ -60,6 +60,12 @@ def _count_non_empty_lines(self, text: str) -> int:
 
     def _get_last_n_lines(self, text: str, n: int) -> str:
         """Extract the last N lines from text."""
+        # TODO: Fix inconsistency - _count_non_empty_lines counts non-empty lines but
+        # _get_last_n_lines returns N lines (may include empty ones). This means
+        # num_lines=50 might return fewer than 50 non-empty lines. Should either:
+        # 1. Make _get_last_n_lines return N non-empty lines, OR
+        # 2. Make _count_non_empty_lines count all lines
+        # This affects both Python and TypeScript SDKs - fix together.
         if n <= 0 or not text:
             return ""
         # Remove trailing newlines before splitting and slicing
diff --git a/src/runloop_api_client/sdk/execution_result.py b/src/runloop_api_client/sdk/execution_result.py
@@ -67,6 +67,12 @@ def _count_non_empty_lines(self, text: str) -> int:
 
     def _get_last_n_lines(self, text: str, n: int) -> str:
         """Extract the last N lines from text."""
+        # TODO: Fix inconsistency - _count_non_empty_lines counts non-empty lines but
+        # _get_last_n_lines returns N lines (may include empty ones). This means
+        # num_lines=50 might return fewer than 50 non-empty lines. Should either:
+        # 1. Make _get_last_n_lines return N non-empty lines, OR
+        # 2. Make _count_non_empty_lines count all lines
+        # This affects both Python and TypeScript SDKs - fix together.
         if n <= 0 or not text:
             return ""
         # Remove trailing newlines before splitting and slicing
diff --git a/tests/smoketests/sdk/test_async_devbox.py b/tests/smoketests/sdk/test_async_devbox.py
@@ -614,3 +614,71 @@ async def test_snapshot_disk_async(self, async_sdk_client: AsyncRunloopSDK) -> N
                 await snapshot.delete()
         finally:
             await devbox.shutdown()
+
+
+class TestAsyncDevboxExecutionPagination:
+    """Test stdout/stderr pagination and streaming functionality."""
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    async def test_exec_with_large_stdout_streaming(self, shared_devbox: AsyncDevbox) -> None:
+        """Test that large stdout output is fully captured via streaming when truncated."""
+        # Generate 1000 lines of output
+        result = await shared_devbox.cmd.exec(
+            command='for i in $(seq 1 1000); do echo "Line $i with some content to make it realistic"; done',
+        )
+
+        assert result.exit_code == 0
+        stdout = await result.stdout()
+        lines = stdout.strip().split("\n")
+
+        # Verify we got all 1000 lines
+        assert len(lines) == 1000, f"Expected 1000 lines, got {len(lines)}"
+
+        # Verify first and last lines
+        assert "Line 1" in lines[0]
+        assert "Line 1000" in lines[-1]
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    async def test_exec_with_large_stderr_streaming(self, shared_devbox: AsyncDevbox) -> None:
+        """Test that large stderr output is fully captured via streaming when truncated."""
+        # Generate 1000 lines of stderr output
+        result = await shared_devbox.cmd.exec(
+            command='for i in $(seq 1 1000); do echo "Error line $i" >&2; done',
+        )
+
+        assert result.exit_code == 0
+        stderr = await result.stderr()
+        lines = stderr.strip().split("\n")
+
+        # Verify we got all 1000 lines
+        assert len(lines) == 1000, f"Expected 1000 lines, got {len(lines)}"
+
+        # Verify first and last lines
+        assert "Error line 1" in lines[0]
+        assert "Error line 1000" in lines[-1]
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    async def test_exec_with_truncated_stdout_num_lines(self, shared_devbox: AsyncDevbox) -> None:
+        """Test num_lines parameter works correctly with potentially truncated output."""
+        # Generate 2000 lines of output
+        result = await shared_devbox.cmd.exec(
+            command='for i in $(seq 1 2000); do echo "Line $i"; done',
+        )
+
+        assert result.exit_code == 0
+
+        # Request last 50 lines
+        stdout = await result.stdout(num_lines=50)
+        lines = stdout.strip().split("\n")
+
+        # Verify we got exactly 50 lines
+        assert len(lines) == 50, f"Expected 50 lines, got {len(lines)}"
+
+        # Verify these are the last 50 lines
+        assert "Line 1951" in lines[0]
+        assert "Line 2000" in lines[-1]
+
+    # TODO: Add test_exec_stdout_line_counting test once empty line logic is fixed.
+    # Currently there's an inconsistency where _count_non_empty_lines counts non-empty
+    # lines but _get_last_n_lines returns N lines (including empty ones). This affects
+    # both Python and TypeScript SDKs and needs to be fixed together.
diff --git a/tests/smoketests/sdk/test_devbox.py b/tests/smoketests/sdk/test_devbox.py
@@ -609,3 +609,71 @@ def test_snapshot_disk_async(self, sdk_client: RunloopSDK) -> None:
                 snapshot.delete()
         finally:
             devbox.shutdown()
+
+
+class TestDevboxExecutionPagination:
+    """Test stdout/stderr pagination and streaming functionality."""
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    def test_exec_with_large_stdout_streaming(self, shared_devbox: Devbox) -> None:
+        """Test that large stdout output is fully captured via streaming when truncated."""
+        # Generate 1000 lines of output
+        result = shared_devbox.cmd.exec(
+            command='for i in $(seq 1 1000); do echo "Line $i with some content to make it realistic"; done',
+        )
+
+        assert result.exit_code == 0
+        stdout = result.stdout()
+        lines = stdout.strip().split("\n")
+
+        # Verify we got all 1000 lines
+        assert len(lines) == 1000, f"Expected 1000 lines, got {len(lines)}"
+
+        # Verify first and last lines
+        assert "Line 1" in lines[0]
+        assert "Line 1000" in lines[-1]
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    def test_exec_with_large_stderr_streaming(self, shared_devbox: Devbox) -> None:
+        """Test that large stderr output is fully captured via streaming when truncated."""
+        # Generate 1000 lines of stderr output
+        result = shared_devbox.cmd.exec(
+            command='for i in $(seq 1 1000); do echo "Error line $i" >&2; done',
+        )
+
+        assert result.exit_code == 0
+        stderr = result.stderr()
+        lines = stderr.strip().split("\n")
+
+        # Verify we got all 1000 lines
+        assert len(lines) == 1000, f"Expected 1000 lines, got {len(lines)}"
+
+        # Verify first and last lines
+        assert "Error line 1" in lines[0]
+        assert "Error line 1000" in lines[-1]
+
+    @pytest.mark.timeout(TWO_MINUTE_TIMEOUT)
+    def test_exec_with_truncated_stdout_num_lines(self, shared_devbox: Devbox) -> None:
+        """Test num_lines parameter works correctly with potentially truncated output."""
+        # Generate 2000 lines of output
+        result = shared_devbox.cmd.exec(
+            command='for i in $(seq 1 2000); do echo "Line $i"; done',
+        )
+
+        assert result.exit_code == 0
+
+        # Request last 50 lines
+        stdout = result.stdout(num_lines=50)
+        lines = stdout.strip().split("\n")
+
+        # Verify we got exactly 50 lines
+        assert len(lines) == 50, f"Expected 50 lines, got {len(lines)}"
+
+        # Verify these are the last 50 lines
+        assert "Line 1951" in lines[0]
+        assert "Line 2000" in lines[-1]
+
+    # TODO: Add test_exec_stdout_line_counting test once empty line logic is fixed.
+    # Currently there's an inconsistency where _count_non_empty_lines counts non-empty
+    # lines but _get_last_n_lines returns N lines (including empty ones). This affects
+    # both Python and TypeScript SDKs and needs to be fixed together.