Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
.git
.github
tests
.venv
**/__pycache__
*.pyc
.pytest_cache
*.md
!README.md
agent-transcripts
19 changes: 19 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
FROM python:3.11-slim

WORKDIR /app

# Метаданные и пакет (зависимости из pyproject.toml)
COPY pyproject.toml README.md LICENSE ./
COPY src ./src
COPY config.yaml ./config.yaml

RUN pip install --no-cache-dir --upgrade pip \
&& pip install --no-cache-dir .

RUN mkdir -p /app/data

ENV PYTHONUNBUFFERED=1

EXPOSE 8000

CMD ["deepseek-cursor-proxy", "--config", "/app/config.yaml"]
29 changes: 29 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Конфиг для запуска в Docker (см. Dockerfile / docker-compose.yml).
# Ключ API DeepSeek задаётся в Cursor (Custom model → API Key); прокси пробрасывает Authorization на upstream.

base_url: https://api.deepseek.com
model: deepseek-v4-pro
thinking: enabled
reasoning_effort: high

# Слушаем все интерфейсы внутри контейнера; с хоста: http://127.0.0.1:4012/v1
host: 0.0.0.0
port: 8000
request_timeout: 600
max_request_body_bytes: 41943040
ngrok: false

display_reasoning: true
collapsible_reasoning: true

missing_reasoning_strategy: recover
reasoning_content_path: /app/data/reasoning_content.sqlite3

# Часто советуют в гайдах для обхода 400 при thinking + tools; в текущей версии прокси
# ремонт истории и стриминг встроены — эти поля оставлены для совместимости с инструкциями.
cache_reasoning: true
stream: true
enable_prompt_caching: true

verbose: false
cors: false
15 changes: 15 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
services:
reasoner-proxy:
build: .
container_name: deepseek_reasoner_proxy
ports:
- "4012:8000"
environment:
# Не используется самим прокси: ключ задаётся в Cursor. Удобно для .env / документации.
- DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
volumes:
- deepseek_proxy_data:/app/data
restart: unless-stopped

volumes:
deepseek_proxy_data:
4 changes: 2 additions & 2 deletions src/deepseek_cursor_proxy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
DEFAULT_COLLAPSIBLE_REASONING = True
DEFAULT_NGROK = True
DEFAULT_VERBOSE = False
DEFAULT_REQUEST_TIMEOUT = 300.0
DEFAULT_MAX_REQUEST_BODY_BYTES = 20 * 1024 * 1024
DEFAULT_REQUEST_TIMEOUT = 600.0
DEFAULT_MAX_REQUEST_BODY_BYTES = 40 * 1024 * 1024
DEFAULT_CORS = False
DEFAULT_MISSING_REASONING_STRATEGY = "recover"
DEFAULT_REASONING_CACHE_MAX_AGE_SECONDS = 30 * 24 * 60 * 60
Expand Down
127 changes: 90 additions & 37 deletions src/deepseek_cursor_proxy/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,46 +230,99 @@ def do_POST(self) -> None:
headers=upstream_headers,
body_bytes=upstream_body,
)
request = Request(
upstream_url,
data=upstream_body,
method="POST",
headers=upstream_headers,
)

log_send_summary(prepared)

try:
if self.config.verbose:
LOG.info("forwarding to %s", upstream_url)
response = urlopen(request, timeout=self.config.request_timeout)
except HTTPError as exc:
LOG.warning(
"request failed upstream_status=%s stream=%s elapsed_ms=%s",
exc.code,
bool(prepared.payload.get("stream")),
elapsed_ms(started),
)
self._send_upstream_error(exc, trace=trace)
self._finish_trace(
trace,
"upstream_error",
http_status=exc.code,
stream=bool(prepared.payload.get("stream")),
)
return
except URLError as exc:
LOG.warning(
"upstream request failed elapsed_ms=%s reason=%s",
elapsed_ms(started),
exc.reason,
)
self._send_json(
502,
{"error": {"message": f"Upstream request failed: {exc.reason}"}},
trace=trace,
)
self._finish_trace(trace, "upstream_error", http_status=502)
MAX_RETRIES = 2 # 1 initial + 2 retries = 3 total attempts
RETRY_BACKOFF_SECONDS = [2.0, 4.0] # backoff between attempts

response = None
last_error: HTTPError | URLError | None = None
for attempt in range(MAX_RETRIES + 1):
try:
if self.config.verbose:
LOG.info(
"forwarding to %s (attempt %s/%s)",
upstream_url,
attempt + 1,
MAX_RETRIES + 1,
)
request = Request(
upstream_url,
data=upstream_body,
method="POST",
headers=upstream_headers,
)
response = urlopen(request, timeout=self.config.request_timeout)
break
except HTTPError as exc:
last_error = exc
if exc.code is not None and 500 <= exc.code < 600:
if attempt < MAX_RETRIES:
delay = RETRY_BACKOFF_SECONDS[attempt]
LOG.warning(
(
"upstream returned %s, retrying in %.1fs "
"(attempt %s/%s) elapsed_ms=%s"
),
exc.code,
delay,
attempt + 1,
MAX_RETRIES + 1,
elapsed_ms(started),
)
time.sleep(delay)
continue
break
except URLError as exc:
last_error = exc
if attempt < MAX_RETRIES:
delay = RETRY_BACKOFF_SECONDS[attempt]
LOG.warning(
(
"upstream request failed, retrying in %.1fs "
"(attempt %s/%s) reason=%s elapsed_ms=%s"
),
delay,
attempt + 1,
MAX_RETRIES + 1,
exc.reason,
elapsed_ms(started),
)
time.sleep(delay)
continue
break

if response is None:
if isinstance(last_error, HTTPError):
exc = last_error
LOG.warning(
"request failed upstream_status=%s stream=%s elapsed_ms=%s",
exc.code,
bool(prepared.payload.get("stream")),
elapsed_ms(started),
)
self._send_upstream_error(exc, trace=trace)
self._finish_trace(
trace,
"upstream_error",
http_status=exc.code,
stream=bool(prepared.payload.get("stream")),
)
else:
exc = last_error or URLError("unknown upstream error")
reason_str = getattr(exc, "reason", str(exc)) if exc else "unknown"
LOG.warning(
"upstream request failed elapsed_ms=%s reason=%s",
elapsed_ms(started),
reason_str,
)
self._send_json(
502,
{"error": {"message": f"Upstream request failed: {reason_str}"}},
trace=trace,
)
self._finish_trace(trace, "upstream_error", http_status=502)
return

with response:
Expand Down
81 changes: 44 additions & 37 deletions src/deepseek_cursor_proxy/streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
from .reasoning_store import ReasoningStore


THINKING_BLOCK_START = "<think>\n"
THINKING_BLOCK_END = "\n</think>\n\n"
COLLAPSIBLE_THINKING_BLOCK_START = "<details>\n<summary>Thinking</summary>\n\n"
COLLAPSIBLE_THINKING_BLOCK_END = "\n</details>\n\n"
THINKING_BLOCK_START = "> 💭 "
THINKING_BLOCK_END = "\n\n"
COLLAPSIBLE_THINKING_BLOCK_START = "> 💭 "
COLLAPSIBLE_THINKING_BLOCK_END = "\n\n"
THINKING_BLOCK_CONTINUE = ""


@dataclass
Expand Down Expand Up @@ -220,6 +221,7 @@ def __init__(self, collapsible: bool = True) -> None:
self._block_start = (
COLLAPSIBLE_THINKING_BLOCK_START if collapsible else THINKING_BLOCK_START
)
self._block_continue = THINKING_BLOCK_CONTINUE
self._block_end = (
COLLAPSIBLE_THINKING_BLOCK_END if collapsible else THINKING_BLOCK_END
)
Expand All @@ -239,44 +241,47 @@ def rewrite_chunk(self, chunk: dict[str, Any]) -> None:
delta = {}
raw_choice["delta"] = delta

mirrored_parts: list[str] = []
reasoning_content = delta.get("reasoning_content")
if isinstance(reasoning_content, str) and reasoning_content:
rc = delta.get("reasoning_content")
rc_str = rc if isinstance(rc, str) and rc else ""

if rc_str:
delta.pop("reasoning_content", None)
if index not in self._open_choices:
mirrored_parts.append(self._block_start)
delta["content"] = self._block_start + rc_str
self._open_choices.add(index)
mirrored_parts.append(reasoning_content)
else:
delta["content"] = self._block_continue + rc_str
continue

existing_content = delta.get("content")
should_close = index in self._open_choices and (
bool(existing_content)
or bool(delta.get("tool_calls"))
or raw_choice.get("finish_reason") is not None
)
if should_close:
mirrored_parts.append(self._block_end)
self._open_choices.discard(index)
has_real_content = isinstance(existing_content, str) and existing_content != ""
has_tool_calls = bool(delta.get("tool_calls"))
has_finish = raw_choice.get("finish_reason") is not None

if not mirrored_parts:
continue
if isinstance(existing_content, str):
mirrored_parts.append(existing_content)
delta["content"] = "".join(mirrored_parts)
if index in self._open_choices and (
has_real_content or has_tool_calls or has_finish
):
content_part = existing_content if isinstance(existing_content, str) else ""
delta["content"] = self._block_end + content_part
self._open_choices.discard(index)

def flush_chunk(self, model: str) -> dict[str, Any] | None:
if not self._open_choices:
return None

choices = [
{
"index": index,
"delta": {"content": self._block_end},
"finish_reason": None,
}
for index in sorted(self._open_choices)
]
"""Close any open thinking blockquotes when the stream ends."""
choices: list[dict[str, Any]] = []

for index in sorted(self._open_choices):
choices.append(
{
"index": index,
"delta": {"content": self._block_end},
"finish_reason": None,
}
)
self._open_choices.clear()

if not choices:
return None

chunk: dict[str, Any] = {
"id": self._last_chunk_metadata.get("id", "chatcmpl-reasoning-close"),
"object": self._last_chunk_metadata.get("object", "chat.completion.chunk"),
Expand All @@ -298,11 +303,11 @@ def fold_reasoning_into_content(
response_payload: dict[str, Any],
collapsible: bool,
) -> None:
"""Mirror `reasoning_content` into the visible `content` field for
non-streaming responses, matching the streaming `<details>` layout."""
"""Mirror reasoning_content into content as a Markdown blockquote."""
block_start = (
COLLAPSIBLE_THINKING_BLOCK_START if collapsible else THINKING_BLOCK_START
)
block_continue = THINKING_BLOCK_CONTINUE
block_end = COLLAPSIBLE_THINKING_BLOCK_END if collapsible else THINKING_BLOCK_END
choices = response_payload.get("choices")
if not isinstance(choices, list):
Expand All @@ -317,9 +322,11 @@ def fold_reasoning_into_content(
if not isinstance(reasoning, str) or not reasoning:
continue
content = message.get("content")
formatted_reasoning = block_start + reasoning.replace(
"\n", "\n" + block_continue
)
message["content"] = (
block_start
+ reasoning
formatted_reasoning
+ block_end
+ (content if isinstance(content, str) else "")
)
6 changes: 5 additions & 1 deletion src/deepseek_cursor_proxy/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@
re.IGNORECASE | re.VERBOSE,
)

CURSOR_BLOCKQUOTE_THINKING_PREFIX_RE = re.compile(r"\A(?:>[^\n]*\n)+\n*")

RECOVERY_NOTICE_TEXT = "[deepseek-cursor-proxy] Refreshed reasoning_content history."
RECOVERY_NOTICE_CONTENT = f"{RECOVERY_NOTICE_TEXT}\n\n"
RECOVERY_SYSTEM_CONTENT = (
Expand Down Expand Up @@ -160,7 +162,9 @@ def extract_text_content(content: Any) -> str | None:


def strip_cursor_thinking_blocks(content: str) -> str:
return CURSOR_THINKING_BLOCK_RE.sub("", content).lstrip("\r\n")
content = CURSOR_THINKING_BLOCK_RE.sub("", content)
content = CURSOR_BLOCKQUOTE_THINKING_PREFIX_RE.sub("", content)
return content.lstrip("\r\n")


def normalize_tool_call(tool_call: Any) -> dict[str, Any]:
Expand Down
Loading
Loading