diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..b31bcbf
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,10 @@
+.git
+.github
+tests
+.venv
+**/__pycache__
+*.pyc
+.pytest_cache
+*.md
+!README.md
+agent-transcripts
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..91552f9
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,19 @@
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# Метаданные и пакет (зависимости из pyproject.toml)
+COPY pyproject.toml README.md LICENSE ./
+COPY src ./src
+COPY config.yaml ./config.yaml
+
+RUN pip install --no-cache-dir --upgrade pip \
+ && pip install --no-cache-dir .
+
+RUN mkdir -p /app/data
+
+ENV PYTHONUNBUFFERED=1
+
+EXPOSE 8000
+
+CMD ["deepseek-cursor-proxy", "--config", "/app/config.yaml"]
diff --git a/config.yaml b/config.yaml
new file mode 100644
index 0000000..095be35
--- /dev/null
+++ b/config.yaml
@@ -0,0 +1,29 @@
+# Конфиг для запуска в Docker (см. Dockerfile / docker-compose.yml).
+# Ключ API DeepSeek задаётся в Cursor (Custom model → API Key); прокси пробрасывает Authorization на upstream.
+
+base_url: https://api.deepseek.com
+model: deepseek-v4-pro
+thinking: enabled
+reasoning_effort: high
+
+# Слушаем все интерфейсы внутри контейнера; с хоста: http://127.0.0.1:4012/v1
+host: 0.0.0.0
+port: 8000
+request_timeout: 600
+max_request_body_bytes: 41943040
+ngrok: false
+
+display_reasoning: true
+collapsible_reasoning: true
+
+missing_reasoning_strategy: recover
+reasoning_content_path: /app/data/reasoning_content.sqlite3
+
+# Часто советуют в гайдах для обхода 400 при thinking + tools; в текущей версии прокси
+# ремонт истории и стриминг встроены — эти поля оставлены для совместимости с инструкциями.
+cache_reasoning: true
+stream: true
+enable_prompt_caching: true
+
+verbose: false
+cors: false
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..1e4362e
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,15 @@
+services:
+ reasoner-proxy:
+ build: .
+ container_name: deepseek_reasoner_proxy
+ ports:
+ - "4012:8000"
+ environment:
+ # Не используется самим прокси: ключ задаётся в Cursor. Удобно для .env / документации.
+ - DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
+ volumes:
+ - deepseek_proxy_data:/app/data
+ restart: unless-stopped
+
+volumes:
+ deepseek_proxy_data:
diff --git a/src/deepseek_cursor_proxy/config.py b/src/deepseek_cursor_proxy/config.py
index 8c10e22..0eb3dd1 100644
--- a/src/deepseek_cursor_proxy/config.py
+++ b/src/deepseek_cursor_proxy/config.py
@@ -25,8 +25,8 @@
DEFAULT_COLLAPSIBLE_REASONING = True
DEFAULT_NGROK = True
DEFAULT_VERBOSE = False
-DEFAULT_REQUEST_TIMEOUT = 300.0
-DEFAULT_MAX_REQUEST_BODY_BYTES = 20 * 1024 * 1024
+DEFAULT_REQUEST_TIMEOUT = 600.0
+DEFAULT_MAX_REQUEST_BODY_BYTES = 40 * 1024 * 1024
DEFAULT_CORS = False
DEFAULT_MISSING_REASONING_STRATEGY = "recover"
DEFAULT_REASONING_CACHE_MAX_AGE_SECONDS = 30 * 24 * 60 * 60
diff --git a/src/deepseek_cursor_proxy/server.py b/src/deepseek_cursor_proxy/server.py
index cf96820..f55ca22 100644
--- a/src/deepseek_cursor_proxy/server.py
+++ b/src/deepseek_cursor_proxy/server.py
@@ -230,46 +230,99 @@ def do_POST(self) -> None:
headers=upstream_headers,
body_bytes=upstream_body,
)
- request = Request(
- upstream_url,
- data=upstream_body,
- method="POST",
- headers=upstream_headers,
- )
log_send_summary(prepared)
- try:
- if self.config.verbose:
- LOG.info("forwarding to %s", upstream_url)
- response = urlopen(request, timeout=self.config.request_timeout)
- except HTTPError as exc:
- LOG.warning(
- "request failed upstream_status=%s stream=%s elapsed_ms=%s",
- exc.code,
- bool(prepared.payload.get("stream")),
- elapsed_ms(started),
- )
- self._send_upstream_error(exc, trace=trace)
- self._finish_trace(
- trace,
- "upstream_error",
- http_status=exc.code,
- stream=bool(prepared.payload.get("stream")),
- )
- return
- except URLError as exc:
- LOG.warning(
- "upstream request failed elapsed_ms=%s reason=%s",
- elapsed_ms(started),
- exc.reason,
- )
- self._send_json(
- 502,
- {"error": {"message": f"Upstream request failed: {exc.reason}"}},
- trace=trace,
- )
- self._finish_trace(trace, "upstream_error", http_status=502)
+ MAX_RETRIES = 2 # 1 initial + 2 retries = 3 total attempts
+ RETRY_BACKOFF_SECONDS = [2.0, 4.0] # backoff between attempts
+
+ response = None
+ last_error: HTTPError | URLError | None = None
+ for attempt in range(MAX_RETRIES + 1):
+ try:
+ if self.config.verbose:
+ LOG.info(
+ "forwarding to %s (attempt %s/%s)",
+ upstream_url,
+ attempt + 1,
+ MAX_RETRIES + 1,
+ )
+ request = Request(
+ upstream_url,
+ data=upstream_body,
+ method="POST",
+ headers=upstream_headers,
+ )
+ response = urlopen(request, timeout=self.config.request_timeout)
+ break
+ except HTTPError as exc:
+ last_error = exc
+ if exc.code is not None and 500 <= exc.code < 600:
+ if attempt < MAX_RETRIES:
+ delay = RETRY_BACKOFF_SECONDS[attempt]
+ LOG.warning(
+ (
+ "upstream returned %s, retrying in %.1fs "
+ "(attempt %s/%s) elapsed_ms=%s"
+ ),
+ exc.code,
+ delay,
+ attempt + 1,
+ MAX_RETRIES + 1,
+ elapsed_ms(started),
+ )
+ time.sleep(delay)
+ continue
+ break
+ except URLError as exc:
+ last_error = exc
+ if attempt < MAX_RETRIES:
+ delay = RETRY_BACKOFF_SECONDS[attempt]
+ LOG.warning(
+ (
+ "upstream request failed, retrying in %.1fs "
+ "(attempt %s/%s) reason=%s elapsed_ms=%s"
+ ),
+ delay,
+ attempt + 1,
+ MAX_RETRIES + 1,
+ exc.reason,
+ elapsed_ms(started),
+ )
+ time.sleep(delay)
+ continue
+ break
+
+ if response is None:
+ if isinstance(last_error, HTTPError):
+ exc = last_error
+ LOG.warning(
+ "request failed upstream_status=%s stream=%s elapsed_ms=%s",
+ exc.code,
+ bool(prepared.payload.get("stream")),
+ elapsed_ms(started),
+ )
+ self._send_upstream_error(exc, trace=trace)
+ self._finish_trace(
+ trace,
+ "upstream_error",
+ http_status=exc.code,
+ stream=bool(prepared.payload.get("stream")),
+ )
+ else:
+ exc = last_error or URLError("unknown upstream error")
+ reason_str = getattr(exc, "reason", str(exc)) if exc else "unknown"
+ LOG.warning(
+ "upstream request failed elapsed_ms=%s reason=%s",
+ elapsed_ms(started),
+ reason_str,
+ )
+ self._send_json(
+ 502,
+ {"error": {"message": f"Upstream request failed: {reason_str}"}},
+ trace=trace,
+ )
+ self._finish_trace(trace, "upstream_error", http_status=502)
return
with response:
diff --git a/src/deepseek_cursor_proxy/streaming.py b/src/deepseek_cursor_proxy/streaming.py
index 8d6ee95..33a6f45 100644
--- a/src/deepseek_cursor_proxy/streaming.py
+++ b/src/deepseek_cursor_proxy/streaming.py
@@ -7,10 +7,11 @@
from .reasoning_store import ReasoningStore
-THINKING_BLOCK_START = "\n"
-THINKING_BLOCK_END = "\n\n\n"
-COLLAPSIBLE_THINKING_BLOCK_START = "\nThinking
\n\n"
-COLLAPSIBLE_THINKING_BLOCK_END = "\n \n\n"
+THINKING_BLOCK_START = "> 💭 "
+THINKING_BLOCK_END = "\n\n"
+COLLAPSIBLE_THINKING_BLOCK_START = "> 💭 "
+COLLAPSIBLE_THINKING_BLOCK_END = "\n\n"
+THINKING_BLOCK_CONTINUE = ""
@dataclass
@@ -220,6 +221,7 @@ def __init__(self, collapsible: bool = True) -> None:
self._block_start = (
COLLAPSIBLE_THINKING_BLOCK_START if collapsible else THINKING_BLOCK_START
)
+ self._block_continue = THINKING_BLOCK_CONTINUE
self._block_end = (
COLLAPSIBLE_THINKING_BLOCK_END if collapsible else THINKING_BLOCK_END
)
@@ -239,44 +241,47 @@ def rewrite_chunk(self, chunk: dict[str, Any]) -> None:
delta = {}
raw_choice["delta"] = delta
- mirrored_parts: list[str] = []
- reasoning_content = delta.get("reasoning_content")
- if isinstance(reasoning_content, str) and reasoning_content:
+ rc = delta.get("reasoning_content")
+ rc_str = rc if isinstance(rc, str) and rc else ""
+
+ if rc_str:
+ delta.pop("reasoning_content", None)
if index not in self._open_choices:
- mirrored_parts.append(self._block_start)
+ delta["content"] = self._block_start + rc_str
self._open_choices.add(index)
- mirrored_parts.append(reasoning_content)
+ else:
+ delta["content"] = self._block_continue + rc_str
+ continue
existing_content = delta.get("content")
- should_close = index in self._open_choices and (
- bool(existing_content)
- or bool(delta.get("tool_calls"))
- or raw_choice.get("finish_reason") is not None
- )
- if should_close:
- mirrored_parts.append(self._block_end)
- self._open_choices.discard(index)
+ has_real_content = isinstance(existing_content, str) and existing_content != ""
+ has_tool_calls = bool(delta.get("tool_calls"))
+ has_finish = raw_choice.get("finish_reason") is not None
- if not mirrored_parts:
- continue
- if isinstance(existing_content, str):
- mirrored_parts.append(existing_content)
- delta["content"] = "".join(mirrored_parts)
+ if index in self._open_choices and (
+ has_real_content or has_tool_calls or has_finish
+ ):
+ content_part = existing_content if isinstance(existing_content, str) else ""
+ delta["content"] = self._block_end + content_part
+ self._open_choices.discard(index)
def flush_chunk(self, model: str) -> dict[str, Any] | None:
- if not self._open_choices:
- return None
-
- choices = [
- {
- "index": index,
- "delta": {"content": self._block_end},
- "finish_reason": None,
- }
- for index in sorted(self._open_choices)
- ]
+ """Close any open thinking blockquotes when the stream ends."""
+ choices: list[dict[str, Any]] = []
+
+ for index in sorted(self._open_choices):
+ choices.append(
+ {
+ "index": index,
+ "delta": {"content": self._block_end},
+ "finish_reason": None,
+ }
+ )
self._open_choices.clear()
+ if not choices:
+ return None
+
chunk: dict[str, Any] = {
"id": self._last_chunk_metadata.get("id", "chatcmpl-reasoning-close"),
"object": self._last_chunk_metadata.get("object", "chat.completion.chunk"),
@@ -298,11 +303,11 @@ def fold_reasoning_into_content(
response_payload: dict[str, Any],
collapsible: bool,
) -> None:
- """Mirror `reasoning_content` into the visible `content` field for
- non-streaming responses, matching the streaming `` layout."""
+ """Mirror reasoning_content into content as a Markdown blockquote."""
block_start = (
COLLAPSIBLE_THINKING_BLOCK_START if collapsible else THINKING_BLOCK_START
)
+ block_continue = THINKING_BLOCK_CONTINUE
block_end = COLLAPSIBLE_THINKING_BLOCK_END if collapsible else THINKING_BLOCK_END
choices = response_payload.get("choices")
if not isinstance(choices, list):
@@ -317,9 +322,11 @@ def fold_reasoning_into_content(
if not isinstance(reasoning, str) or not reasoning:
continue
content = message.get("content")
+ formatted_reasoning = block_start + reasoning.replace(
+ "\n", "\n" + block_continue
+ )
message["content"] = (
- block_start
- + reasoning
+ formatted_reasoning
+ block_end
+ (content if isinstance(content, str) else "")
)
diff --git a/src/deepseek_cursor_proxy/transform.py b/src/deepseek_cursor_proxy/transform.py
index dd576e5..03b264e 100644
--- a/src/deepseek_cursor_proxy/transform.py
+++ b/src/deepseek_cursor_proxy/transform.py
@@ -95,6 +95,8 @@
re.IGNORECASE | re.VERBOSE,
)
+CURSOR_BLOCKQUOTE_THINKING_PREFIX_RE = re.compile(r"\A(?:>[^\n]*\n)+\n*")
+
RECOVERY_NOTICE_TEXT = "[deepseek-cursor-proxy] Refreshed reasoning_content history."
RECOVERY_NOTICE_CONTENT = f"{RECOVERY_NOTICE_TEXT}\n\n"
RECOVERY_SYSTEM_CONTENT = (
@@ -160,7 +162,9 @@ def extract_text_content(content: Any) -> str | None:
def strip_cursor_thinking_blocks(content: str) -> str:
- return CURSOR_THINKING_BLOCK_RE.sub("", content).lstrip("\r\n")
+ content = CURSOR_THINKING_BLOCK_RE.sub("", content)
+ content = CURSOR_BLOCKQUOTE_THINKING_PREFIX_RE.sub("", content)
+ return content.lstrip("\r\n")
def normalize_tool_call(tool_call: Any) -> dict[str, Any]:
diff --git a/tests/test_protocol.py b/tests/test_protocol.py
index e763356..1e5351f 100644
--- a/tests/test_protocol.py
+++ b/tests/test_protocol.py
@@ -977,13 +977,11 @@ def test_streaming_response_mirrors_reasoning_into_details_block(self) -> None:
if line.startswith("data: {")
]
self.assertEqual(
- chunks[0]["choices"][0]["delta"]["content"],
- "\nThinking
\n\nNeed ",
- )
- self.assertEqual(
- chunks[2]["choices"][0]["delta"]["content"],
- "\n \n\nFinal.",
+ chunks[0]["choices"][0]["delta"],
+ {"role": "assistant", "content": "> 💭 Need "},
)
+ self.assertEqual(chunks[1]["choices"][0]["delta"], {"content": "> context."})
+ self.assertEqual(chunks[2]["choices"][0]["delta"]["content"], "\n\nFinal.")
class NonStreamingDisplayTests(_StrictUpstreamCase):
@@ -1007,7 +1005,7 @@ def test_non_streaming_response_mirrors_reasoning_into_details_block(
content = response["choices"][0]["message"]["content"]
self.assertEqual(
content,
- f"\nThinking
\n\n{THINKING_1_1}\n \n\n",
+ f"> 💭 {THINKING_1_1}\n\n",
)
@@ -1354,9 +1352,8 @@ def test_tool_reasoning_is_cached_before_done(self) -> None:
)
response.read()
self.assertEqual(status, 200, payload)
- self.assertEqual(
- payload["choices"][0]["message"]["content"].split(" ")[-1],
- "\n\nfollow-up accepted",
+ self.assertTrue(
+ payload["choices"][0]["message"]["content"].endswith("follow-up accepted")
)
diff --git a/tests/test_streaming.py b/tests/test_streaming.py
index 9f594f7..b3fe84a 100644
--- a/tests/test_streaming.py
+++ b/tests/test_streaming.py
@@ -329,12 +329,9 @@ def test_mirrors_reasoning_content_into_details_content(self) -> None:
reasoning_delta = reasoning_chunk["choices"][0]["delta"]
answer_delta = answer_chunk["choices"][0]["delta"]
- self.assertEqual(reasoning_delta["reasoning_content"], "Need context.")
- self.assertEqual(
- reasoning_delta["content"],
- "\nThinking
\n\nNeed context.",
- )
- self.assertEqual(answer_delta["content"], "\n \n\nFinal answer.")
+ self.assertEqual(reasoning_delta["content"], "> 💭 Need context.")
+ self.assertNotIn("reasoning_content", reasoning_delta)
+ self.assertEqual(answer_delta["content"], "\n\nFinal answer.")
def test_can_mirror_reasoning_content_into_legacy_think_content(self) -> None:
adapter = CursorReasoningDisplayAdapter(collapsible=False)
@@ -361,11 +358,10 @@ def test_can_mirror_reasoning_content_into_legacy_think_content(self) -> None:
adapter.rewrite_chunk(answer_chunk)
self.assertEqual(
- reasoning_chunk["choices"][0]["delta"]["content"], "\nNeed context."
+ reasoning_chunk["choices"][0]["delta"]["content"], "> 💭 Need context."
)
self.assertEqual(
- answer_chunk["choices"][0]["delta"]["content"],
- "\n\n\nFinal answer.",
+ answer_chunk["choices"][0]["delta"]["content"], "\n\nFinal answer."
)
def test_closes_thinking_block_before_tool_calls(self) -> None:
@@ -400,9 +396,8 @@ def test_closes_thinking_block_before_tool_calls(self) -> None:
adapter.rewrite_chunk(tool_chunk)
- self.assertEqual(
- tool_chunk["choices"][0]["delta"]["content"], "\n\n\n"
- )
+ self.assertEqual(tool_chunk["choices"][0]["delta"]["content"], "\n\n")
+ self.assertIn("tool_calls", tool_chunk["choices"][0]["delta"])
def test_flush_chunk_closes_unfinished_thinking_block_at_done(self) -> None:
adapter = CursorReasoningDisplayAdapter()
@@ -425,16 +420,41 @@ def test_flush_chunk_closes_unfinished_thinking_block_at_done(self) -> None:
self.assertIsNotNone(closing_chunk)
assert closing_chunk is not None
self.assertEqual(closing_chunk["model"], "deepseek-v4-pro")
- self.assertEqual(
- closing_chunk["choices"][0]["delta"]["content"], "\n\n\n"
- )
+ self.assertEqual(closing_chunk["choices"][0]["delta"]["content"], "\n\n")
self.assertIsNone(adapter.flush_chunk("deepseek-v4-pro"))
+ def test_streams_reasoning_as_blockquote_then_separates_content(self) -> None:
+ adapter = CursorReasoningDisplayAdapter()
+ expected_contents = ["> 💭 Let", "> me", "> think"]
+ for i, fragment in enumerate(("Let", " me", " think")):
+ chunk = {
+ "choices": [
+ {
+ "index": 0,
+ "delta": {"reasoning_content": fragment},
+ "finish_reason": None,
+ }
+ ]
+ }
+ adapter.rewrite_chunk(chunk)
+ self.assertEqual(chunk["choices"][0]["delta"]["content"], expected_contents[i])
+
+ answer = {
+ "choices": [
+ {
+ "index": 0,
+ "delta": {"content": "Answer."},
+ "finish_reason": None,
+ }
+ ]
+ }
+ adapter.rewrite_chunk(answer)
+ self.assertEqual(answer["choices"][0]["delta"]["content"], "\n\nAnswer.")
+
class FoldReasoningTests(unittest.TestCase):
def test_fold_reasoning_into_non_streaming_content(self) -> None:
- """Non-streaming responses mirror reasoning_content into a visible
- block, matching the streaming layout."""
+ """Non-streaming responses mirror reasoning_content into blockquotes."""
payload = {
"choices": [
{
@@ -450,7 +470,26 @@ def test_fold_reasoning_into_non_streaming_content(self) -> None:
fold_reasoning_into_content(payload, collapsible=True)
self.assertEqual(
payload["choices"][0]["message"]["content"],
- "\nThinking
\n\nthinking\n \n\nanswer",
+ "> 💭 thinking\n\nanswer",
+ )
+
+ def test_fold_reasoning_multiline_uses_continue_prefix(self) -> None:
+ payload = {
+ "choices": [
+ {
+ "index": 0,
+ "message": {
+ "role": "assistant",
+ "content": "answer",
+ "reasoning_content": "line1\nline2",
+ },
+ }
+ ]
+ }
+ fold_reasoning_into_content(payload, collapsible=True)
+ self.assertEqual(
+ payload["choices"][0]["message"]["content"],
+ "> 💭 line1\n> line2\n\nanswer",
)
def test_fold_reasoning_skips_empty_reasoning(self) -> None:
diff --git a/tests/test_trace.py b/tests/test_trace.py
index bf6db0e..19b6d06 100644
--- a/tests/test_trace.py
+++ b/tests/test_trace.py
@@ -284,9 +284,9 @@ def test_captures_streaming_replay_chunks(self) -> None:
"reasoning_content",
trace["upstream"]["stream"]["chunks"][0]["line"],
)
- self.assertIn(
- "", trace["cursor_response"]["stream"]["chunks"][0]["line"]
- )
+ cursor_stream = trace["cursor_response"]["stream"]["chunks"]
+ cursor_text = "\n".join(chunk["line"] for chunk in cursor_stream)
+ self.assertIn("> 💭", cursor_text)
def test_captures_recovery_diagnostics(self) -> None:
"""A request that triggers cold-cache recovery records the recovery
diff --git a/tests/test_transform.py b/tests/test_transform.py
index 72e4556..ff0cd01 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -70,6 +70,10 @@ def test_strip_cursor_thinking_blocks_removes_details_and_think(self) -> None:
strip_cursor_thinking_blocks("\nplan\n\n\nanswer"),
"answer",
)
+ self.assertEqual(
+ strip_cursor_thinking_blocks("> 💭 plan\n> more\n\nanswer"),
+ "answer",
+ )
def test_strip_cursor_thinking_blocks_preserves_unrelated_details(self) -> None:
kept = "Diff
\nrelevant\n "