diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b31bcbf --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +.git +.github +tests +.venv +**/__pycache__ +*.pyc +.pytest_cache +*.md +!README.md +agent-transcripts diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..91552f9 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Метаданные и пакет (зависимости из pyproject.toml) +COPY pyproject.toml README.md LICENSE ./ +COPY src ./src +COPY config.yaml ./config.yaml + +RUN pip install --no-cache-dir --upgrade pip \ + && pip install --no-cache-dir . + +RUN mkdir -p /app/data + +ENV PYTHONUNBUFFERED=1 + +EXPOSE 8000 + +CMD ["deepseek-cursor-proxy", "--config", "/app/config.yaml"] diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..095be35 --- /dev/null +++ b/config.yaml @@ -0,0 +1,29 @@ +# Конфиг для запуска в Docker (см. Dockerfile / docker-compose.yml). +# Ключ API DeepSeek задаётся в Cursor (Custom model → API Key); прокси пробрасывает Authorization на upstream. + +base_url: https://api.deepseek.com +model: deepseek-v4-pro +thinking: enabled +reasoning_effort: high + +# Слушаем все интерфейсы внутри контейнера; с хоста: http://127.0.0.1:4012/v1 +host: 0.0.0.0 +port: 8000 +request_timeout: 600 +max_request_body_bytes: 41943040 +ngrok: false + +display_reasoning: true +collapsible_reasoning: true + +missing_reasoning_strategy: recover +reasoning_content_path: /app/data/reasoning_content.sqlite3 + +# Часто советуют в гайдах для обхода 400 при thinking + tools; в текущей версии прокси +# ремонт истории и стриминг встроены — эти поля оставлены для совместимости с инструкциями. +cache_reasoning: true +stream: true +enable_prompt_caching: true + +verbose: false +cors: false diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..1e4362e --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,15 @@ +services: + reasoner-proxy: + build: . + container_name: deepseek_reasoner_proxy + ports: + - "4012:8000" + environment: + # Не используется самим прокси: ключ задаётся в Cursor. Удобно для .env / документации. + - DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-} + volumes: + - deepseek_proxy_data:/app/data + restart: unless-stopped + +volumes: + deepseek_proxy_data: diff --git a/src/deepseek_cursor_proxy/config.py b/src/deepseek_cursor_proxy/config.py index 8c10e22..0eb3dd1 100644 --- a/src/deepseek_cursor_proxy/config.py +++ b/src/deepseek_cursor_proxy/config.py @@ -25,8 +25,8 @@ DEFAULT_COLLAPSIBLE_REASONING = True DEFAULT_NGROK = True DEFAULT_VERBOSE = False -DEFAULT_REQUEST_TIMEOUT = 300.0 -DEFAULT_MAX_REQUEST_BODY_BYTES = 20 * 1024 * 1024 +DEFAULT_REQUEST_TIMEOUT = 600.0 +DEFAULT_MAX_REQUEST_BODY_BYTES = 40 * 1024 * 1024 DEFAULT_CORS = False DEFAULT_MISSING_REASONING_STRATEGY = "recover" DEFAULT_REASONING_CACHE_MAX_AGE_SECONDS = 30 * 24 * 60 * 60 diff --git a/src/deepseek_cursor_proxy/server.py b/src/deepseek_cursor_proxy/server.py index cf96820..f55ca22 100644 --- a/src/deepseek_cursor_proxy/server.py +++ b/src/deepseek_cursor_proxy/server.py @@ -230,46 +230,99 @@ def do_POST(self) -> None: headers=upstream_headers, body_bytes=upstream_body, ) - request = Request( - upstream_url, - data=upstream_body, - method="POST", - headers=upstream_headers, - ) log_send_summary(prepared) - try: - if self.config.verbose: - LOG.info("forwarding to %s", upstream_url) - response = urlopen(request, timeout=self.config.request_timeout) - except HTTPError as exc: - LOG.warning( - "request failed upstream_status=%s stream=%s elapsed_ms=%s", - exc.code, - bool(prepared.payload.get("stream")), - elapsed_ms(started), - ) - self._send_upstream_error(exc, trace=trace) - self._finish_trace( - trace, - "upstream_error", - http_status=exc.code, - stream=bool(prepared.payload.get("stream")), - ) - return - except URLError as exc: - LOG.warning( - "upstream request failed elapsed_ms=%s reason=%s", - elapsed_ms(started), - exc.reason, - ) - self._send_json( - 502, - {"error": {"message": f"Upstream request failed: {exc.reason}"}}, - trace=trace, - ) - self._finish_trace(trace, "upstream_error", http_status=502) + MAX_RETRIES = 2 # 1 initial + 2 retries = 3 total attempts + RETRY_BACKOFF_SECONDS = [2.0, 4.0] # backoff between attempts + + response = None + last_error: HTTPError | URLError | None = None + for attempt in range(MAX_RETRIES + 1): + try: + if self.config.verbose: + LOG.info( + "forwarding to %s (attempt %s/%s)", + upstream_url, + attempt + 1, + MAX_RETRIES + 1, + ) + request = Request( + upstream_url, + data=upstream_body, + method="POST", + headers=upstream_headers, + ) + response = urlopen(request, timeout=self.config.request_timeout) + break + except HTTPError as exc: + last_error = exc + if exc.code is not None and 500 <= exc.code < 600: + if attempt < MAX_RETRIES: + delay = RETRY_BACKOFF_SECONDS[attempt] + LOG.warning( + ( + "upstream returned %s, retrying in %.1fs " + "(attempt %s/%s) elapsed_ms=%s" + ), + exc.code, + delay, + attempt + 1, + MAX_RETRIES + 1, + elapsed_ms(started), + ) + time.sleep(delay) + continue + break + except URLError as exc: + last_error = exc + if attempt < MAX_RETRIES: + delay = RETRY_BACKOFF_SECONDS[attempt] + LOG.warning( + ( + "upstream request failed, retrying in %.1fs " + "(attempt %s/%s) reason=%s elapsed_ms=%s" + ), + delay, + attempt + 1, + MAX_RETRIES + 1, + exc.reason, + elapsed_ms(started), + ) + time.sleep(delay) + continue + break + + if response is None: + if isinstance(last_error, HTTPError): + exc = last_error + LOG.warning( + "request failed upstream_status=%s stream=%s elapsed_ms=%s", + exc.code, + bool(prepared.payload.get("stream")), + elapsed_ms(started), + ) + self._send_upstream_error(exc, trace=trace) + self._finish_trace( + trace, + "upstream_error", + http_status=exc.code, + stream=bool(prepared.payload.get("stream")), + ) + else: + exc = last_error or URLError("unknown upstream error") + reason_str = getattr(exc, "reason", str(exc)) if exc else "unknown" + LOG.warning( + "upstream request failed elapsed_ms=%s reason=%s", + elapsed_ms(started), + reason_str, + ) + self._send_json( + 502, + {"error": {"message": f"Upstream request failed: {reason_str}"}}, + trace=trace, + ) + self._finish_trace(trace, "upstream_error", http_status=502) return with response: diff --git a/src/deepseek_cursor_proxy/streaming.py b/src/deepseek_cursor_proxy/streaming.py index 8d6ee95..33a6f45 100644 --- a/src/deepseek_cursor_proxy/streaming.py +++ b/src/deepseek_cursor_proxy/streaming.py @@ -7,10 +7,11 @@ from .reasoning_store import ReasoningStore -THINKING_BLOCK_START = "\n" -THINKING_BLOCK_END = "\n\n\n" -COLLAPSIBLE_THINKING_BLOCK_START = "
\nThinking\n\n" -COLLAPSIBLE_THINKING_BLOCK_END = "\n
\n\n" +THINKING_BLOCK_START = "> 💭 " +THINKING_BLOCK_END = "\n\n" +COLLAPSIBLE_THINKING_BLOCK_START = "> 💭 " +COLLAPSIBLE_THINKING_BLOCK_END = "\n\n" +THINKING_BLOCK_CONTINUE = "" @dataclass @@ -220,6 +221,7 @@ def __init__(self, collapsible: bool = True) -> None: self._block_start = ( COLLAPSIBLE_THINKING_BLOCK_START if collapsible else THINKING_BLOCK_START ) + self._block_continue = THINKING_BLOCK_CONTINUE self._block_end = ( COLLAPSIBLE_THINKING_BLOCK_END if collapsible else THINKING_BLOCK_END ) @@ -239,44 +241,47 @@ def rewrite_chunk(self, chunk: dict[str, Any]) -> None: delta = {} raw_choice["delta"] = delta - mirrored_parts: list[str] = [] - reasoning_content = delta.get("reasoning_content") - if isinstance(reasoning_content, str) and reasoning_content: + rc = delta.get("reasoning_content") + rc_str = rc if isinstance(rc, str) and rc else "" + + if rc_str: + delta.pop("reasoning_content", None) if index not in self._open_choices: - mirrored_parts.append(self._block_start) + delta["content"] = self._block_start + rc_str self._open_choices.add(index) - mirrored_parts.append(reasoning_content) + else: + delta["content"] = self._block_continue + rc_str + continue existing_content = delta.get("content") - should_close = index in self._open_choices and ( - bool(existing_content) - or bool(delta.get("tool_calls")) - or raw_choice.get("finish_reason") is not None - ) - if should_close: - mirrored_parts.append(self._block_end) - self._open_choices.discard(index) + has_real_content = isinstance(existing_content, str) and existing_content != "" + has_tool_calls = bool(delta.get("tool_calls")) + has_finish = raw_choice.get("finish_reason") is not None - if not mirrored_parts: - continue - if isinstance(existing_content, str): - mirrored_parts.append(existing_content) - delta["content"] = "".join(mirrored_parts) + if index in self._open_choices and ( + has_real_content or has_tool_calls or has_finish + ): + content_part = existing_content if isinstance(existing_content, str) else "" + delta["content"] = self._block_end + content_part + self._open_choices.discard(index) def flush_chunk(self, model: str) -> dict[str, Any] | None: - if not self._open_choices: - return None - - choices = [ - { - "index": index, - "delta": {"content": self._block_end}, - "finish_reason": None, - } - for index in sorted(self._open_choices) - ] + """Close any open thinking blockquotes when the stream ends.""" + choices: list[dict[str, Any]] = [] + + for index in sorted(self._open_choices): + choices.append( + { + "index": index, + "delta": {"content": self._block_end}, + "finish_reason": None, + } + ) self._open_choices.clear() + if not choices: + return None + chunk: dict[str, Any] = { "id": self._last_chunk_metadata.get("id", "chatcmpl-reasoning-close"), "object": self._last_chunk_metadata.get("object", "chat.completion.chunk"), @@ -298,11 +303,11 @@ def fold_reasoning_into_content( response_payload: dict[str, Any], collapsible: bool, ) -> None: - """Mirror `reasoning_content` into the visible `content` field for - non-streaming responses, matching the streaming `
` layout.""" + """Mirror reasoning_content into content as a Markdown blockquote.""" block_start = ( COLLAPSIBLE_THINKING_BLOCK_START if collapsible else THINKING_BLOCK_START ) + block_continue = THINKING_BLOCK_CONTINUE block_end = COLLAPSIBLE_THINKING_BLOCK_END if collapsible else THINKING_BLOCK_END choices = response_payload.get("choices") if not isinstance(choices, list): @@ -317,9 +322,11 @@ def fold_reasoning_into_content( if not isinstance(reasoning, str) or not reasoning: continue content = message.get("content") + formatted_reasoning = block_start + reasoning.replace( + "\n", "\n" + block_continue + ) message["content"] = ( - block_start - + reasoning + formatted_reasoning + block_end + (content if isinstance(content, str) else "") ) diff --git a/src/deepseek_cursor_proxy/transform.py b/src/deepseek_cursor_proxy/transform.py index dd576e5..03b264e 100644 --- a/src/deepseek_cursor_proxy/transform.py +++ b/src/deepseek_cursor_proxy/transform.py @@ -95,6 +95,8 @@ re.IGNORECASE | re.VERBOSE, ) +CURSOR_BLOCKQUOTE_THINKING_PREFIX_RE = re.compile(r"\A(?:>[^\n]*\n)+\n*") + RECOVERY_NOTICE_TEXT = "[deepseek-cursor-proxy] Refreshed reasoning_content history." RECOVERY_NOTICE_CONTENT = f"{RECOVERY_NOTICE_TEXT}\n\n" RECOVERY_SYSTEM_CONTENT = ( @@ -160,7 +162,9 @@ def extract_text_content(content: Any) -> str | None: def strip_cursor_thinking_blocks(content: str) -> str: - return CURSOR_THINKING_BLOCK_RE.sub("", content).lstrip("\r\n") + content = CURSOR_THINKING_BLOCK_RE.sub("", content) + content = CURSOR_BLOCKQUOTE_THINKING_PREFIX_RE.sub("", content) + return content.lstrip("\r\n") def normalize_tool_call(tool_call: Any) -> dict[str, Any]: diff --git a/tests/test_protocol.py b/tests/test_protocol.py index e763356..1e5351f 100644 --- a/tests/test_protocol.py +++ b/tests/test_protocol.py @@ -977,13 +977,11 @@ def test_streaming_response_mirrors_reasoning_into_details_block(self) -> None: if line.startswith("data: {") ] self.assertEqual( - chunks[0]["choices"][0]["delta"]["content"], - "
\nThinking\n\nNeed ", - ) - self.assertEqual( - chunks[2]["choices"][0]["delta"]["content"], - "\n
\n\nFinal.", + chunks[0]["choices"][0]["delta"], + {"role": "assistant", "content": "> 💭 Need "}, ) + self.assertEqual(chunks[1]["choices"][0]["delta"], {"content": "> context."}) + self.assertEqual(chunks[2]["choices"][0]["delta"]["content"], "\n\nFinal.") class NonStreamingDisplayTests(_StrictUpstreamCase): @@ -1007,7 +1005,7 @@ def test_non_streaming_response_mirrors_reasoning_into_details_block( content = response["choices"][0]["message"]["content"] self.assertEqual( content, - f"
\nThinking\n\n{THINKING_1_1}\n
\n\n", + f"> 💭 {THINKING_1_1}\n\n", ) @@ -1354,9 +1352,8 @@ def test_tool_reasoning_is_cached_before_done(self) -> None: ) response.read() self.assertEqual(status, 200, payload) - self.assertEqual( - payload["choices"][0]["message"]["content"].split("
")[-1], - "\n\nfollow-up accepted", + self.assertTrue( + payload["choices"][0]["message"]["content"].endswith("follow-up accepted") ) diff --git a/tests/test_streaming.py b/tests/test_streaming.py index 9f594f7..b3fe84a 100644 --- a/tests/test_streaming.py +++ b/tests/test_streaming.py @@ -329,12 +329,9 @@ def test_mirrors_reasoning_content_into_details_content(self) -> None: reasoning_delta = reasoning_chunk["choices"][0]["delta"] answer_delta = answer_chunk["choices"][0]["delta"] - self.assertEqual(reasoning_delta["reasoning_content"], "Need context.") - self.assertEqual( - reasoning_delta["content"], - "
\nThinking\n\nNeed context.", - ) - self.assertEqual(answer_delta["content"], "\n
\n\nFinal answer.") + self.assertEqual(reasoning_delta["content"], "> 💭 Need context.") + self.assertNotIn("reasoning_content", reasoning_delta) + self.assertEqual(answer_delta["content"], "\n\nFinal answer.") def test_can_mirror_reasoning_content_into_legacy_think_content(self) -> None: adapter = CursorReasoningDisplayAdapter(collapsible=False) @@ -361,11 +358,10 @@ def test_can_mirror_reasoning_content_into_legacy_think_content(self) -> None: adapter.rewrite_chunk(answer_chunk) self.assertEqual( - reasoning_chunk["choices"][0]["delta"]["content"], "\nNeed context." + reasoning_chunk["choices"][0]["delta"]["content"], "> 💭 Need context." ) self.assertEqual( - answer_chunk["choices"][0]["delta"]["content"], - "\n\n\nFinal answer.", + answer_chunk["choices"][0]["delta"]["content"], "\n\nFinal answer." ) def test_closes_thinking_block_before_tool_calls(self) -> None: @@ -400,9 +396,8 @@ def test_closes_thinking_block_before_tool_calls(self) -> None: adapter.rewrite_chunk(tool_chunk) - self.assertEqual( - tool_chunk["choices"][0]["delta"]["content"], "\n\n\n" - ) + self.assertEqual(tool_chunk["choices"][0]["delta"]["content"], "\n\n") + self.assertIn("tool_calls", tool_chunk["choices"][0]["delta"]) def test_flush_chunk_closes_unfinished_thinking_block_at_done(self) -> None: adapter = CursorReasoningDisplayAdapter() @@ -425,16 +420,41 @@ def test_flush_chunk_closes_unfinished_thinking_block_at_done(self) -> None: self.assertIsNotNone(closing_chunk) assert closing_chunk is not None self.assertEqual(closing_chunk["model"], "deepseek-v4-pro") - self.assertEqual( - closing_chunk["choices"][0]["delta"]["content"], "\n\n\n" - ) + self.assertEqual(closing_chunk["choices"][0]["delta"]["content"], "\n\n") self.assertIsNone(adapter.flush_chunk("deepseek-v4-pro")) + def test_streams_reasoning_as_blockquote_then_separates_content(self) -> None: + adapter = CursorReasoningDisplayAdapter() + expected_contents = ["> 💭 Let", "> me", "> think"] + for i, fragment in enumerate(("Let", " me", " think")): + chunk = { + "choices": [ + { + "index": 0, + "delta": {"reasoning_content": fragment}, + "finish_reason": None, + } + ] + } + adapter.rewrite_chunk(chunk) + self.assertEqual(chunk["choices"][0]["delta"]["content"], expected_contents[i]) + + answer = { + "choices": [ + { + "index": 0, + "delta": {"content": "Answer."}, + "finish_reason": None, + } + ] + } + adapter.rewrite_chunk(answer) + self.assertEqual(answer["choices"][0]["delta"]["content"], "\n\nAnswer.") + class FoldReasoningTests(unittest.TestCase): def test_fold_reasoning_into_non_streaming_content(self) -> None: - """Non-streaming responses mirror reasoning_content into a visible -
block, matching the streaming layout.""" + """Non-streaming responses mirror reasoning_content into blockquotes.""" payload = { "choices": [ { @@ -450,7 +470,26 @@ def test_fold_reasoning_into_non_streaming_content(self) -> None: fold_reasoning_into_content(payload, collapsible=True) self.assertEqual( payload["choices"][0]["message"]["content"], - "
\nThinking\n\nthinking\n
\n\nanswer", + "> 💭 thinking\n\nanswer", + ) + + def test_fold_reasoning_multiline_uses_continue_prefix(self) -> None: + payload = { + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "answer", + "reasoning_content": "line1\nline2", + }, + } + ] + } + fold_reasoning_into_content(payload, collapsible=True) + self.assertEqual( + payload["choices"][0]["message"]["content"], + "> 💭 line1\n> line2\n\nanswer", ) def test_fold_reasoning_skips_empty_reasoning(self) -> None: diff --git a/tests/test_trace.py b/tests/test_trace.py index bf6db0e..19b6d06 100644 --- a/tests/test_trace.py +++ b/tests/test_trace.py @@ -284,9 +284,9 @@ def test_captures_streaming_replay_chunks(self) -> None: "reasoning_content", trace["upstream"]["stream"]["chunks"][0]["line"], ) - self.assertIn( - "
", trace["cursor_response"]["stream"]["chunks"][0]["line"] - ) + cursor_stream = trace["cursor_response"]["stream"]["chunks"] + cursor_text = "\n".join(chunk["line"] for chunk in cursor_stream) + self.assertIn("> 💭", cursor_text) def test_captures_recovery_diagnostics(self) -> None: """A request that triggers cold-cache recovery records the recovery diff --git a/tests/test_transform.py b/tests/test_transform.py index 72e4556..ff0cd01 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -70,6 +70,10 @@ def test_strip_cursor_thinking_blocks_removes_details_and_think(self) -> None: strip_cursor_thinking_blocks("\nplan\n\n\nanswer"), "answer", ) + self.assertEqual( + strip_cursor_thinking_blocks("> 💭 plan\n> more\n\nanswer"), + "answer", + ) def test_strip_cursor_thinking_blocks_preserves_unrelated_details(self) -> None: kept = "
Diff\nrelevant\n
"