yxlao · optommt-sudo · May 1, 2026 · May 1, 2026 · May 1, 2026
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,10 @@
+.git
+.github
+tests
+.venv
+**/__pycache__
+*.pyc
+.pytest_cache
+*.md
+!README.md
+agent-transcripts
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,19 @@
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# Метаданные и пакет (зависимости из pyproject.toml)
+COPY pyproject.toml README.md LICENSE ./
+COPY src ./src
+COPY config.yaml ./config.yaml
+
+RUN pip install --no-cache-dir --upgrade pip \
+    && pip install --no-cache-dir .
+
+RUN mkdir -p /app/data
+
+ENV PYTHONUNBUFFERED=1
+
+EXPOSE 8000
+
+CMD ["deepseek-cursor-proxy", "--config", "/app/config.yaml"]
diff --git a/config.yaml b/config.yaml
@@ -0,0 +1,29 @@
+# Конфиг для запуска в Docker (см. Dockerfile / docker-compose.yml).
+# Ключ API DeepSeek задаётся в Cursor (Custom model → API Key); прокси пробрасывает Authorization на upstream.
+
+base_url: https://api.deepseek.com
+model: deepseek-v4-pro
+thinking: enabled
+reasoning_effort: high
+
+# Слушаем все интерфейсы внутри контейнера; с хоста: http://127.0.0.1:4012/v1
+host: 0.0.0.0
+port: 8000
+request_timeout: 600
+max_request_body_bytes: 41943040
+ngrok: false
+
+display_reasoning: true
+collapsible_reasoning: true
+
+missing_reasoning_strategy: recover
+reasoning_content_path: /app/data/reasoning_content.sqlite3
+
+# Часто советуют в гайдах для обхода 400 при thinking + tools; в текущей версии прокси
+# ремонт истории и стриминг встроены — эти поля оставлены для совместимости с инструкциями.
+cache_reasoning: true
+stream: true
+enable_prompt_caching: true
+
+verbose: false
+cors: false
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,15 @@
+services:
+  reasoner-proxy:
+    build: .
+    container_name: deepseek_reasoner_proxy
+    ports:
+      - "4012:8000"
+    environment:
+      # Не используется самим прокси: ключ задаётся в Cursor. Удобно для .env / документации.
+      - DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
+    volumes:
+      - deepseek_proxy_data:/app/data
+    restart: unless-stopped
+
+volumes:
+  deepseek_proxy_data:
diff --git a/src/deepseek_cursor_proxy/config.py b/src/deepseek_cursor_proxy/config.py
@@ -25,8 +25,8 @@
 DEFAULT_COLLAPSIBLE_REASONING = True
 DEFAULT_NGROK = True
 DEFAULT_VERBOSE = False
-DEFAULT_REQUEST_TIMEOUT = 300.0
-DEFAULT_MAX_REQUEST_BODY_BYTES = 20 * 1024 * 1024
+DEFAULT_REQUEST_TIMEOUT = 600.0
+DEFAULT_MAX_REQUEST_BODY_BYTES = 40 * 1024 * 1024
 DEFAULT_CORS = False
 DEFAULT_MISSING_REASONING_STRATEGY = "recover"
 DEFAULT_REASONING_CACHE_MAX_AGE_SECONDS = 30 * 24 * 60 * 60

diff --git a/src/deepseek_cursor_proxy/server.py b/src/deepseek_cursor_proxy/server.py
@@ -230,46 +230,99 @@ def do_POST(self) -> None:
                 headers=upstream_headers,
                 body_bytes=upstream_body,
             )
-        request = Request(
-            upstream_url,
-            data=upstream_body,
-            method="POST",
-            headers=upstream_headers,
-        )
 
         log_send_summary(prepared)
 
-        try:
-            if self.config.verbose:
-                LOG.info("forwarding to %s", upstream_url)
-            response = urlopen(request, timeout=self.config.request_timeout)
-        except HTTPError as exc:
-            LOG.warning(
-                "request failed upstream_status=%s stream=%s elapsed_ms=%s",
-                exc.code,
-                bool(prepared.payload.get("stream")),
-                elapsed_ms(started),
-            )
-            self._send_upstream_error(exc, trace=trace)
-            self._finish_trace(
-                trace,
-                "upstream_error",
-                http_status=exc.code,
-                stream=bool(prepared.payload.get("stream")),
-            )
-            return
-        except URLError as exc:
-            LOG.warning(
-                "upstream request failed elapsed_ms=%s reason=%s",
-                elapsed_ms(started),
-                exc.reason,
-            )
-            self._send_json(
-                502,
-                {"error": {"message": f"Upstream request failed: {exc.reason}"}},
-                trace=trace,
-            )
-            self._finish_trace(trace, "upstream_error", http_status=502)
+        MAX_RETRIES = 2  # 1 initial + 2 retries = 3 total attempts
+        RETRY_BACKOFF_SECONDS = [2.0, 4.0]  # backoff between attempts
+
+        response = None
+        last_error: HTTPError | URLError | None = None
+        for attempt in range(MAX_RETRIES + 1):
+            try:
+                if self.config.verbose:
+                    LOG.info(
+                        "forwarding to %s (attempt %s/%s)",
+                        upstream_url,
+                        attempt + 1,
+                        MAX_RETRIES + 1,
+                    )
+                request = Request(
+                    upstream_url,
+                    data=upstream_body,
+                    method="POST",
+                    headers=upstream_headers,
+                )
+                response = urlopen(request, timeout=self.config.request_timeout)
+                break
+            except HTTPError as exc:
+                last_error = exc
+                if exc.code is not None and 500 <= exc.code < 600:
+                    if attempt < MAX_RETRIES:
+                        delay = RETRY_BACKOFF_SECONDS[attempt]
+                        LOG.warning(
+                            (
+                                "upstream returned %s, retrying in %.1fs "
+                                "(attempt %s/%s) elapsed_ms=%s"
+                            ),
+                            exc.code,
+                            delay,
+                            attempt + 1,
+                            MAX_RETRIES + 1,
+                            elapsed_ms(started),
+                        )
+                        time.sleep(delay)
+                        continue
+                break
+            except URLError as exc:
+                last_error = exc
+                if attempt < MAX_RETRIES:
+                    delay = RETRY_BACKOFF_SECONDS[attempt]
+                    LOG.warning(
+                        (
+                            "upstream request failed, retrying in %.1fs "
+                            "(attempt %s/%s) reason=%s elapsed_ms=%s"
+                        ),
+                        delay,
+                        attempt + 1,
+                        MAX_RETRIES + 1,
+                        exc.reason,
+                        elapsed_ms(started),
+                    )
+                    time.sleep(delay)
+                    continue
+                break
+
+        if response is None:
+            if isinstance(last_error, HTTPError):
+                exc = last_error
+                LOG.warning(
+                    "request failed upstream_status=%s stream=%s elapsed_ms=%s",
+                    exc.code,
+                    bool(prepared.payload.get("stream")),
+                    elapsed_ms(started),
+                )
+                self._send_upstream_error(exc, trace=trace)
+                self._finish_trace(
+                    trace,
+                    "upstream_error",
+                    http_status=exc.code,
+                    stream=bool(prepared.payload.get("stream")),
+                )
+            else:
+                exc = last_error or URLError("unknown upstream error")
+                reason_str = getattr(exc, "reason", str(exc)) if exc else "unknown"
+                LOG.warning(
+                    "upstream request failed elapsed_ms=%s reason=%s",
+                    elapsed_ms(started),
+                    reason_str,
+                )
+                self._send_json(
+                    502,
+                    {"error": {"message": f"Upstream request failed: {reason_str}"}},
+                    trace=trace,
+                )
+                self._finish_trace(trace, "upstream_error", http_status=502)
             return
 
         with response:

diff --git a/src/deepseek_cursor_proxy/streaming.py b/src/deepseek_cursor_proxy/streaming.py
@@ -7,10 +7,11 @@
 from .reasoning_store import ReasoningStore
 
 
-THINKING_BLOCK_START = "<think>\n"
-THINKING_BLOCK_END = "\n</think>\n\n"
-COLLAPSIBLE_THINKING_BLOCK_START = "<details>\n<summary>Thinking</summary>\n\n"
-COLLAPSIBLE_THINKING_BLOCK_END = "\n</details>\n\n"
+THINKING_BLOCK_START = "> 💭 "
+THINKING_BLOCK_END = "\n\n"
+COLLAPSIBLE_THINKING_BLOCK_START = "> 💭 "
+COLLAPSIBLE_THINKING_BLOCK_END = "\n\n"
+THINKING_BLOCK_CONTINUE = ""
 
 
 @dataclass
@@ -220,6 +221,7 @@ def __init__(self, collapsible: bool = True) -> None:
         self._block_start = (
             COLLAPSIBLE_THINKING_BLOCK_START if collapsible else THINKING_BLOCK_START
         )
+        self._block_continue = THINKING_BLOCK_CONTINUE
         self._block_end = (
             COLLAPSIBLE_THINKING_BLOCK_END if collapsible else THINKING_BLOCK_END
         )
@@ -239,44 +241,47 @@ def rewrite_chunk(self, chunk: dict[str, Any]) -> None:
                 delta = {}
                 raw_choice["delta"] = delta
 
-            mirrored_parts: list[str] = []
-            reasoning_content = delta.get("reasoning_content")
-            if isinstance(reasoning_content, str) and reasoning_content:
+            rc = delta.get("reasoning_content")
+            rc_str = rc if isinstance(rc, str) and rc else ""
+
+            if rc_str:
+                delta.pop("reasoning_content", None)
                 if index not in self._open_choices:
-                    mirrored_parts.append(self._block_start)
+                    delta["content"] = self._block_start + rc_str
                     self._open_choices.add(index)
-                mirrored_parts.append(reasoning_content)
+                else:
+                    delta["content"] = self._block_continue + rc_str
+                continue
 
             existing_content = delta.get("content")
-            should_close = index in self._open_choices and (
-                bool(existing_content)
-                or bool(delta.get("tool_calls"))
-                or raw_choice.get("finish_reason") is not None
-            )
-            if should_close:
-                mirrored_parts.append(self._block_end)
-                self._open_choices.discard(index)
+            has_real_content = isinstance(existing_content, str) and existing_content != ""
+            has_tool_calls = bool(delta.get("tool_calls"))
+            has_finish = raw_choice.get("finish_reason") is not None
 
-            if not mirrored_parts:
-                continue
-            if isinstance(existing_content, str):
-                mirrored_parts.append(existing_content)
-            delta["content"] = "".join(mirrored_parts)
+            if index in self._open_choices and (
+                has_real_content or has_tool_calls or has_finish
+            ):
+                content_part = existing_content if isinstance(existing_content, str) else ""
+                delta["content"] = self._block_end + content_part
+                self._open_choices.discard(index)
 
     def flush_chunk(self, model: str) -> dict[str, Any] | None:
-        if not self._open_choices:
-            return None
-
-        choices = [
-            {
-                "index": index,
-                "delta": {"content": self._block_end},
-                "finish_reason": None,
-            }
-            for index in sorted(self._open_choices)
-        ]
+        """Close any open thinking blockquotes when the stream ends."""
+        choices: list[dict[str, Any]] = []
+
+        for index in sorted(self._open_choices):
+            choices.append(
+                {
+                    "index": index,
+                    "delta": {"content": self._block_end},
+                    "finish_reason": None,
+                }
+            )
         self._open_choices.clear()
 
+        if not choices:
+            return None
+
         chunk: dict[str, Any] = {
             "id": self._last_chunk_metadata.get("id", "chatcmpl-reasoning-close"),
             "object": self._last_chunk_metadata.get("object", "chat.completion.chunk"),
@@ -298,11 +303,11 @@ def fold_reasoning_into_content(
     response_payload: dict[str, Any],
     collapsible: bool,
 ) -> None:
-    """Mirror `reasoning_content` into the visible `content` field for
-    non-streaming responses, matching the streaming `<details>` layout."""
+    """Mirror reasoning_content into content as a Markdown blockquote."""
     block_start = (
         COLLAPSIBLE_THINKING_BLOCK_START if collapsible else THINKING_BLOCK_START
     )
+    block_continue = THINKING_BLOCK_CONTINUE
     block_end = COLLAPSIBLE_THINKING_BLOCK_END if collapsible else THINKING_BLOCK_END
     choices = response_payload.get("choices")
     if not isinstance(choices, list):
@@ -317,9 +322,11 @@ def fold_reasoning_into_content(
         if not isinstance(reasoning, str) or not reasoning:
             continue
         content = message.get("content")
+        formatted_reasoning = block_start + reasoning.replace(
+            "\n", "\n" + block_continue
+        )
         message["content"] = (
-            block_start
-            + reasoning
+            formatted_reasoning
             + block_end
             + (content if isinstance(content, str) else "")
         )
diff --git a/src/deepseek_cursor_proxy/transform.py b/src/deepseek_cursor_proxy/transform.py
@@ -95,6 +95,8 @@
     re.IGNORECASE | re.VERBOSE,
 )
 
+CURSOR_BLOCKQUOTE_THINKING_PREFIX_RE = re.compile(r"\A(?:>[^\n]*\n)+\n*")
+
 RECOVERY_NOTICE_TEXT = "[deepseek-cursor-proxy] Refreshed reasoning_content history."
 RECOVERY_NOTICE_CONTENT = f"{RECOVERY_NOTICE_TEXT}\n\n"
 RECOVERY_SYSTEM_CONTENT = (
@@ -160,7 +162,9 @@ def extract_text_content(content: Any) -> str | None:
 
 
 def strip_cursor_thinking_blocks(content: str) -> str:
-    return CURSOR_THINKING_BLOCK_RE.sub("", content).lstrip("\r\n")
+    content = CURSOR_THINKING_BLOCK_RE.sub("", content)
+    content = CURSOR_BLOCKQUOTE_THINKING_PREFIX_RE.sub("", content)
+    return content.lstrip("\r\n")
 
 
 def normalize_tool_call(tool_call: Any) -> dict[str, Any]: