diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..b31bcbf
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,10 @@
+.git
+.github
+tests
+.venv
+**/__pycache__
+*.pyc
+.pytest_cache
+*.md
+!README.md
+agent-transcripts
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..91552f9
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,19 @@
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# Метаданные и пакет (зависимости из pyproject.toml)
+COPY pyproject.toml README.md LICENSE ./
+COPY src ./src
+COPY config.yaml ./config.yaml
+
+RUN pip install --no-cache-dir --upgrade pip \
+    && pip install --no-cache-dir .
+
+RUN mkdir -p /app/data
+
+ENV PYTHONUNBUFFERED=1
+
+EXPOSE 8000
+
+CMD ["deepseek-cursor-proxy", "--config", "/app/config.yaml"]
diff --git a/config.yaml b/config.yaml
new file mode 100644
index 0000000..095be35
--- /dev/null
+++ b/config.yaml
@@ -0,0 +1,29 @@
+# Конфиг для запуска в Docker (см. Dockerfile / docker-compose.yml).
+# Ключ API DeepSeek задаётся в Cursor (Custom model → API Key); прокси пробрасывает Authorization на upstream.
+
+base_url: https://api.deepseek.com
+model: deepseek-v4-pro
+thinking: enabled
+reasoning_effort: high
+
+# Слушаем все интерфейсы внутри контейнера; с хоста: http://127.0.0.1:4012/v1
+host: 0.0.0.0
+port: 8000
+request_timeout: 600
+max_request_body_bytes: 41943040
+ngrok: false
+
+display_reasoning: true
+collapsible_reasoning: true
+
+missing_reasoning_strategy: recover
+reasoning_content_path: /app/data/reasoning_content.sqlite3
+
+# Часто советуют в гайдах для обхода 400 при thinking + tools; в текущей версии прокси
+# ремонт истории и стриминг встроены — эти поля оставлены для совместимости с инструкциями.
+cache_reasoning: true
+stream: true
+enable_prompt_caching: true
+
+verbose: false
+cors: false
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..1e4362e
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,15 @@
+services:
+  reasoner-proxy:
+    build: .
+    container_name: deepseek_reasoner_proxy
+    ports:
+      - "4012:8000"
+    environment:
+      # Не используется самим прокси: ключ задаётся в Cursor. Удобно для .env / документации.
+      - DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
+    volumes:
+      - deepseek_proxy_data:/app/data
+    restart: unless-stopped
+
+volumes:
+  deepseek_proxy_data:
diff --git a/src/deepseek_cursor_proxy/config.py b/src/deepseek_cursor_proxy/config.py
index 8c10e22..0eb3dd1 100644
--- a/src/deepseek_cursor_proxy/config.py
+++ b/src/deepseek_cursor_proxy/config.py
@@ -25,8 +25,8 @@
 DEFAULT_COLLAPSIBLE_REASONING = True
 DEFAULT_NGROK = True
 DEFAULT_VERBOSE = False
-DEFAULT_REQUEST_TIMEOUT = 300.0
-DEFAULT_MAX_REQUEST_BODY_BYTES = 20 * 1024 * 1024
+DEFAULT_REQUEST_TIMEOUT = 600.0
+DEFAULT_MAX_REQUEST_BODY_BYTES = 40 * 1024 * 1024
 DEFAULT_CORS = False
 DEFAULT_MISSING_REASONING_STRATEGY = "recover"
 DEFAULT_REASONING_CACHE_MAX_AGE_SECONDS = 30 * 24 * 60 * 60
diff --git a/src/deepseek_cursor_proxy/server.py b/src/deepseek_cursor_proxy/server.py
index cf96820..f55ca22 100644
--- a/src/deepseek_cursor_proxy/server.py
+++ b/src/deepseek_cursor_proxy/server.py
@@ -230,46 +230,99 @@ def do_POST(self) -> None:
                 headers=upstream_headers,
                 body_bytes=upstream_body,
             )
-        request = Request(
-            upstream_url,
-            data=upstream_body,
-            method="POST",
-            headers=upstream_headers,
-        )
 
         log_send_summary(prepared)
 
-        try:
-            if self.config.verbose:
-                LOG.info("forwarding to %s", upstream_url)
-            response = urlopen(request, timeout=self.config.request_timeout)
-        except HTTPError as exc:
-            LOG.warning(
-                "request failed upstream_status=%s stream=%s elapsed_ms=%s",
-                exc.code,
-                bool(prepared.payload.get("stream")),
-                elapsed_ms(started),
-            )
-            self._send_upstream_error(exc, trace=trace)
-            self._finish_trace(
-                trace,
-                "upstream_error",
-                http_status=exc.code,
-                stream=bool(prepared.payload.get("stream")),
-            )
-            return
-        except URLError as exc:
-            LOG.warning(
-                "upstream request failed elapsed_ms=%s reason=%s",
-                elapsed_ms(started),
-                exc.reason,
-            )
-            self._send_json(
-                502,
-                {"error": {"message": f"Upstream request failed: {exc.reason}"}},
-                trace=trace,
-            )
-            self._finish_trace(trace, "upstream_error", http_status=502)
+        MAX_RETRIES = 2  # 1 initial + 2 retries = 3 total attempts
+        RETRY_BACKOFF_SECONDS = [2.0, 4.0]  # backoff between attempts
+
+        response = None
+        last_error: HTTPError | URLError | None = None
+        for attempt in range(MAX_RETRIES + 1):
+            try:
+                if self.config.verbose:
+                    LOG.info(
+                        "forwarding to %s (attempt %s/%s)",
+                        upstream_url,
+                        attempt + 1,
+                        MAX_RETRIES + 1,
+                    )
+                request = Request(
+                    upstream_url,
+                    data=upstream_body,
+                    method="POST",
+                    headers=upstream_headers,
+                )
+                response = urlopen(request, timeout=self.config.request_timeout)
+                break
+            except HTTPError as exc:
+                last_error = exc
+                if exc.code is not None and 500 <= exc.code < 600:
+                    if attempt < MAX_RETRIES:
+                        delay = RETRY_BACKOFF_SECONDS[attempt]
+                        LOG.warning(
+                            (
+                                "upstream returned %s, retrying in %.1fs "
+                                "(attempt %s/%s) elapsed_ms=%s"
+                            ),
+                            exc.code,
+                            delay,
+                            attempt + 1,
+                            MAX_RETRIES + 1,
+                            elapsed_ms(started),
+                        )
+                        time.sleep(delay)
+                        continue
+                break
+            except URLError as exc:
+                last_error = exc
+                if attempt < MAX_RETRIES:
+                    delay = RETRY_BACKOFF_SECONDS[attempt]
+                    LOG.warning(
+                        (
+                            "upstream request failed, retrying in %.1fs "
+                            "(attempt %s/%s) reason=%s elapsed_ms=%s"
+                        ),
+                        delay,
+                        attempt + 1,
+                        MAX_RETRIES + 1,
+                        exc.reason,
+                        elapsed_ms(started),
+                    )
+                    time.sleep(delay)
+                    continue
+                break
+
+        if response is None:
+            if isinstance(last_error, HTTPError):
+                exc = last_error
+                LOG.warning(
+                    "request failed upstream_status=%s stream=%s elapsed_ms=%s",
+                    exc.code,
+                    bool(prepared.payload.get("stream")),
+                    elapsed_ms(started),
+                )
+                self._send_upstream_error(exc, trace=trace)
+                self._finish_trace(
+                    trace,
+                    "upstream_error",
+                    http_status=exc.code,
+                    stream=bool(prepared.payload.get("stream")),
+                )
+            else:
+                exc = last_error or URLError("unknown upstream error")
+                reason_str = getattr(exc, "reason", str(exc)) if exc else "unknown"
+                LOG.warning(
+                    "upstream request failed elapsed_ms=%s reason=%s",
+                    elapsed_ms(started),
+                    reason_str,
+                )
+                self._send_json(
+                    502,
+                    {"error": {"message": f"Upstream request failed: {reason_str}"}},
+                    trace=trace,
+                )
+                self._finish_trace(trace, "upstream_error", http_status=502)
             return
 
         with response:
diff --git a/src/deepseek_cursor_proxy/streaming.py b/src/deepseek_cursor_proxy/streaming.py
index 8d6ee95..33a6f45 100644
--- a/src/deepseek_cursor_proxy/streaming.py
+++ b/src/deepseek_cursor_proxy/streaming.py
@@ -7,10 +7,11 @@
 from .reasoning_store import ReasoningStore
 
 
-THINKING_BLOCK_START = "<think>\n"
-THINKING_BLOCK_END = "\n</think>\n\n"
-COLLAPSIBLE_THINKING_BLOCK_START = "<details>\n<summary>Thinking</summary>\n\n"
-COLLAPSIBLE_THINKING_BLOCK_END = "\n</details>\n\n"
+THINKING_BLOCK_START = "> 💭 "
+THINKING_BLOCK_END = "\n\n"
+COLLAPSIBLE_THINKING_BLOCK_START = "> 💭 "
+COLLAPSIBLE_THINKING_BLOCK_END = "\n\n"
+THINKING_BLOCK_CONTINUE = ""
 
 
 @dataclass
@@ -220,6 +221,7 @@ def __init__(self, collapsible: bool = True) -> None:
         self._block_start = (
             COLLAPSIBLE_THINKING_BLOCK_START if collapsible else THINKING_BLOCK_START
         )
+        self._block_continue = THINKING_BLOCK_CONTINUE
         self._block_end = (
             COLLAPSIBLE_THINKING_BLOCK_END if collapsible else THINKING_BLOCK_END
         )
@@ -239,44 +241,47 @@ def rewrite_chunk(self, chunk: dict[str, Any]) -> None:
                 delta = {}
                 raw_choice["delta"] = delta
 
-            mirrored_parts: list[str] = []
-            reasoning_content = delta.get("reasoning_content")
-            if isinstance(reasoning_content, str) and reasoning_content:
+            rc = delta.get("reasoning_content")
+            rc_str = rc if isinstance(rc, str) and rc else ""
+
+            if rc_str:
+                delta.pop("reasoning_content", None)
                 if index not in self._open_choices:
-                    mirrored_parts.append(self._block_start)
+                    delta["content"] = self._block_start + rc_str
                     self._open_choices.add(index)
-                mirrored_parts.append(reasoning_content)
+                else:
+                    delta["content"] = self._block_continue + rc_str
+                continue
 
             existing_content = delta.get("content")
-            should_close = index in self._open_choices and (
-                bool(existing_content)
-                or bool(delta.get("tool_calls"))
-                or raw_choice.get("finish_reason") is not None
-            )
-            if should_close:
-                mirrored_parts.append(self._block_end)
-                self._open_choices.discard(index)
+            has_real_content = isinstance(existing_content, str) and existing_content != ""
+            has_tool_calls = bool(delta.get("tool_calls"))
+            has_finish = raw_choice.get("finish_reason") is not None
 
-            if not mirrored_parts:
-                continue
-            if isinstance(existing_content, str):
-                mirrored_parts.append(existing_content)
-            delta["content"] = "".join(mirrored_parts)
+            if index in self._open_choices and (
+                has_real_content or has_tool_calls or has_finish
+            ):
+                content_part = existing_content if isinstance(existing_content, str) else ""
+                delta["content"] = self._block_end + content_part
+                self._open_choices.discard(index)
 
     def flush_chunk(self, model: str) -> dict[str, Any] | None:
-        if not self._open_choices:
-            return None
-
-        choices = [
-            {
-                "index": index,
-                "delta": {"content": self._block_end},
-                "finish_reason": None,
-            }
-            for index in sorted(self._open_choices)
-        ]
+        """Close any open thinking blockquotes when the stream ends."""
+        choices: list[dict[str, Any]] = []
+
+        for index in sorted(self._open_choices):
+            choices.append(
+                {
+                    "index": index,
+                    "delta": {"content": self._block_end},
+                    "finish_reason": None,
+                }
+            )
         self._open_choices.clear()
 
+        if not choices:
+            return None
+
         chunk: dict[str, Any] = {
             "id": self._last_chunk_metadata.get("id", "chatcmpl-reasoning-close"),
             "object": self._last_chunk_metadata.get("object", "chat.completion.chunk"),
@@ -298,11 +303,11 @@ def fold_reasoning_into_content(
     response_payload: dict[str, Any],
     collapsible: bool,
 ) -> None:
-    """Mirror `reasoning_content` into the visible `content` field for
-    non-streaming responses, matching the streaming `<details>` layout."""
+    """Mirror reasoning_content into content as a Markdown blockquote."""
     block_start = (
         COLLAPSIBLE_THINKING_BLOCK_START if collapsible else THINKING_BLOCK_START
     )
+    block_continue = THINKING_BLOCK_CONTINUE
     block_end = COLLAPSIBLE_THINKING_BLOCK_END if collapsible else THINKING_BLOCK_END
     choices = response_payload.get("choices")
     if not isinstance(choices, list):
@@ -317,9 +322,11 @@ def fold_reasoning_into_content(
         if not isinstance(reasoning, str) or not reasoning:
             continue
         content = message.get("content")
+        formatted_reasoning = block_start + reasoning.replace(
+            "\n", "\n" + block_continue
+        )
         message["content"] = (
-            block_start
-            + reasoning
+            formatted_reasoning
             + block_end
             + (content if isinstance(content, str) else "")
         )
diff --git a/src/deepseek_cursor_proxy/transform.py b/src/deepseek_cursor_proxy/transform.py
index dd576e5..03b264e 100644
--- a/src/deepseek_cursor_proxy/transform.py
+++ b/src/deepseek_cursor_proxy/transform.py
@@ -95,6 +95,8 @@
     re.IGNORECASE | re.VERBOSE,
 )
 
+CURSOR_BLOCKQUOTE_THINKING_PREFIX_RE = re.compile(r"\A(?:>[^\n]*\n)+\n*")
+
 RECOVERY_NOTICE_TEXT = "[deepseek-cursor-proxy] Refreshed reasoning_content history."
 RECOVERY_NOTICE_CONTENT = f"{RECOVERY_NOTICE_TEXT}\n\n"
 RECOVERY_SYSTEM_CONTENT = (
@@ -160,7 +162,9 @@ def extract_text_content(content: Any) -> str | None:
 
 
 def strip_cursor_thinking_blocks(content: str) -> str:
-    return CURSOR_THINKING_BLOCK_RE.sub("", content).lstrip("\r\n")
+    content = CURSOR_THINKING_BLOCK_RE.sub("", content)
+    content = CURSOR_BLOCKQUOTE_THINKING_PREFIX_RE.sub("", content)
+    return content.lstrip("\r\n")
 
 
 def normalize_tool_call(tool_call: Any) -> dict[str, Any]:
diff --git a/tests/test_protocol.py b/tests/test_protocol.py
index e763356..1e5351f 100644
--- a/tests/test_protocol.py
+++ b/tests/test_protocol.py
@@ -977,13 +977,11 @@ def test_streaming_response_mirrors_reasoning_into_details_block(self) -> None:
             if line.startswith("data: {")
         ]
         self.assertEqual(
-            chunks[0]["choices"][0]["delta"]["content"],
-            "<details>\n<summary>Thinking</summary>\n\nNeed ",
-        )
-        self.assertEqual(
-            chunks[2]["choices"][0]["delta"]["content"],
-            "\n</details>\n\nFinal.",
+            chunks[0]["choices"][0]["delta"],
+            {"role": "assistant", "content": "> 💭 Need "},
         )
+        self.assertEqual(chunks[1]["choices"][0]["delta"], {"content": "> context."})
+        self.assertEqual(chunks[2]["choices"][0]["delta"]["content"], "\n\nFinal.")
 
 
 class NonStreamingDisplayTests(_StrictUpstreamCase):
@@ -1007,7 +1005,7 @@ def test_non_streaming_response_mirrors_reasoning_into_details_block(
         content = response["choices"][0]["message"]["content"]
         self.assertEqual(
             content,
-            f"<details>\n<summary>Thinking</summary>\n\n{THINKING_1_1}\n</details>\n\n",
+            f"> 💭 {THINKING_1_1}\n\n",
         )
 
 
@@ -1354,9 +1352,8 @@ def test_tool_reasoning_is_cached_before_done(self) -> None:
             )
             response.read()
         self.assertEqual(status, 200, payload)
-        self.assertEqual(
-            payload["choices"][0]["message"]["content"].split("</details>")[-1],
-            "\n\nfollow-up accepted",
+        self.assertTrue(
+            payload["choices"][0]["message"]["content"].endswith("follow-up accepted")
         )
 
 
diff --git a/tests/test_streaming.py b/tests/test_streaming.py
index 9f594f7..b3fe84a 100644
--- a/tests/test_streaming.py
+++ b/tests/test_streaming.py
@@ -329,12 +329,9 @@ def test_mirrors_reasoning_content_into_details_content(self) -> None:
 
         reasoning_delta = reasoning_chunk["choices"][0]["delta"]
         answer_delta = answer_chunk["choices"][0]["delta"]
-        self.assertEqual(reasoning_delta["reasoning_content"], "Need context.")
-        self.assertEqual(
-            reasoning_delta["content"],
-            "<details>\n<summary>Thinking</summary>\n\nNeed context.",
-        )
-        self.assertEqual(answer_delta["content"], "\n</details>\n\nFinal answer.")
+        self.assertEqual(reasoning_delta["content"], "> 💭 Need context.")
+        self.assertNotIn("reasoning_content", reasoning_delta)
+        self.assertEqual(answer_delta["content"], "\n\nFinal answer.")
 
     def test_can_mirror_reasoning_content_into_legacy_think_content(self) -> None:
         adapter = CursorReasoningDisplayAdapter(collapsible=False)
@@ -361,11 +358,10 @@ def test_can_mirror_reasoning_content_into_legacy_think_content(self) -> None:
         adapter.rewrite_chunk(answer_chunk)
 
         self.assertEqual(
-            reasoning_chunk["choices"][0]["delta"]["content"], "<think>\nNeed context."
+            reasoning_chunk["choices"][0]["delta"]["content"], "> 💭 Need context."
         )
         self.assertEqual(
-            answer_chunk["choices"][0]["delta"]["content"],
-            "\n</think>\n\nFinal answer.",
+            answer_chunk["choices"][0]["delta"]["content"], "\n\nFinal answer."
         )
 
     def test_closes_thinking_block_before_tool_calls(self) -> None:
@@ -400,9 +396,8 @@ def test_closes_thinking_block_before_tool_calls(self) -> None:
 
         adapter.rewrite_chunk(tool_chunk)
 
-        self.assertEqual(
-            tool_chunk["choices"][0]["delta"]["content"], "\n</details>\n\n"
-        )
+        self.assertEqual(tool_chunk["choices"][0]["delta"]["content"], "\n\n")
+        self.assertIn("tool_calls", tool_chunk["choices"][0]["delta"])
 
     def test_flush_chunk_closes_unfinished_thinking_block_at_done(self) -> None:
         adapter = CursorReasoningDisplayAdapter()
@@ -425,16 +420,41 @@ def test_flush_chunk_closes_unfinished_thinking_block_at_done(self) -> None:
         self.assertIsNotNone(closing_chunk)
         assert closing_chunk is not None
         self.assertEqual(closing_chunk["model"], "deepseek-v4-pro")
-        self.assertEqual(
-            closing_chunk["choices"][0]["delta"]["content"], "\n</details>\n\n"
-        )
+        self.assertEqual(closing_chunk["choices"][0]["delta"]["content"], "\n\n")
         self.assertIsNone(adapter.flush_chunk("deepseek-v4-pro"))
 
+    def test_streams_reasoning_as_blockquote_then_separates_content(self) -> None:
+        adapter = CursorReasoningDisplayAdapter()
+        expected_contents = ["> 💭 Let", ">  me", ">  think"]
+        for i, fragment in enumerate(("Let", " me", " think")):
+            chunk = {
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": {"reasoning_content": fragment},
+                        "finish_reason": None,
+                    }
+                ]
+            }
+            adapter.rewrite_chunk(chunk)
+            self.assertEqual(chunk["choices"][0]["delta"]["content"], expected_contents[i])
+
+        answer = {
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {"content": "Answer."},
+                    "finish_reason": None,
+                }
+            ]
+        }
+        adapter.rewrite_chunk(answer)
+        self.assertEqual(answer["choices"][0]["delta"]["content"], "\n\nAnswer.")
+
 
 class FoldReasoningTests(unittest.TestCase):
     def test_fold_reasoning_into_non_streaming_content(self) -> None:
-        """Non-streaming responses mirror reasoning_content into a visible
-        <details> block, matching the streaming layout."""
+        """Non-streaming responses mirror reasoning_content into blockquotes."""
         payload = {
             "choices": [
                 {
@@ -450,7 +470,26 @@ def test_fold_reasoning_into_non_streaming_content(self) -> None:
         fold_reasoning_into_content(payload, collapsible=True)
         self.assertEqual(
             payload["choices"][0]["message"]["content"],
-            "<details>\n<summary>Thinking</summary>\n\nthinking\n</details>\n\nanswer",
+            "> 💭 thinking\n\nanswer",
+        )
+
+    def test_fold_reasoning_multiline_uses_continue_prefix(self) -> None:
+        payload = {
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "answer",
+                        "reasoning_content": "line1\nline2",
+                    },
+                }
+            ]
+        }
+        fold_reasoning_into_content(payload, collapsible=True)
+        self.assertEqual(
+            payload["choices"][0]["message"]["content"],
+            "> 💭 line1\n> line2\n\nanswer",
         )
 
     def test_fold_reasoning_skips_empty_reasoning(self) -> None:
diff --git a/tests/test_trace.py b/tests/test_trace.py
index bf6db0e..19b6d06 100644
--- a/tests/test_trace.py
+++ b/tests/test_trace.py
@@ -284,9 +284,9 @@ def test_captures_streaming_replay_chunks(self) -> None:
             "reasoning_content",
             trace["upstream"]["stream"]["chunks"][0]["line"],
         )
-        self.assertIn(
-            "<details>", trace["cursor_response"]["stream"]["chunks"][0]["line"]
-        )
+        cursor_stream = trace["cursor_response"]["stream"]["chunks"]
+        cursor_text = "\n".join(chunk["line"] for chunk in cursor_stream)
+        self.assertIn("> 💭", cursor_text)
 
     def test_captures_recovery_diagnostics(self) -> None:
         """A request that triggers cold-cache recovery records the recovery
diff --git a/tests/test_transform.py b/tests/test_transform.py
index 72e4556..ff0cd01 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -70,6 +70,10 @@ def test_strip_cursor_thinking_blocks_removes_details_and_think(self) -> None:
             strip_cursor_thinking_blocks("<think>\nplan\n</think>\n\nanswer"),
             "answer",
         )
+        self.assertEqual(
+            strip_cursor_thinking_blocks("> 💭 plan\n> more\n\nanswer"),
+            "answer",
+        )
 
     def test_strip_cursor_thinking_blocks_preserves_unrelated_details(self) -> None:
         kept = "<details><summary>Diff</summary>\nrelevant\n</details>"