Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/security.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ jobs:
run: |
grep -v '@ git+' src/server/requirements.txt > /tmp/requirements-server-audit.txt
uvx pip-audit==2.9.0 --strict \
--ignore-vuln PYSEC-2026-161 \
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this line? I have locally experimented and found that bumping fastapi to >=0.135.0 can solve this ignore cleanly. (The doc addition mentions that the ignore is due to vllm-omni, which is not installed in the server environment).

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed by removing the line and bumping FastAPI bound.

-r /tmp/requirements-server-audit.txt
- name: Run pip-audit (worker CPU)
# Drop ``@ git+`` deps before auditing — not on PyPI, no CVE feed.
Expand All @@ -105,6 +106,7 @@ jobs:
--ignore-vuln GHSA-vfmq-68hx-4jfw \
--ignore-vuln GHSA-j7w6-vpvq-j3gm \
--ignore-vuln GHSA-98h9-4798-4q5v \
--ignore-vuln GHSA-7wx4-6vff-v64p \
--ignore-vuln PYSEC-2025-189 \
--ignore-vuln PYSEC-2025-190 \
--ignore-vuln PYSEC-2025-191 \
Expand Down Expand Up @@ -152,6 +154,7 @@ jobs:
--ignore-vuln GHSA-83vm-p52w-f9pw \
--ignore-vuln GHSA-j7w6-vpvq-j3gm \
--ignore-vuln GHSA-98h9-4798-4q5v \
--ignore-vuln GHSA-7wx4-6vff-v64p \
--ignore-vuln PYSEC-2025-189 \
--ignore-vuln PYSEC-2025-190 \
--ignore-vuln PYSEC-2025-191 \
Expand All @@ -176,4 +179,5 @@ jobs:
--ignore-vuln PYSEC-2024-277 \
--ignore-vuln PYSEC-2025-222 \
--ignore-vuln PYSEC-2024-274 \
--ignore-vuln PYSEC-2026-161 \
-r src/worker/requirements/requirements.gpu.txt
2 changes: 2 additions & 0 deletions docs/CODE_STYLE.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ as `--ignore-vuln` flags in `.github/workflows/security.yml`.
| GHSA-w8v5-vhqr-4h9v | diskcache | (none) | upstream unmaintained, no fixed version published |
| GHSA-j7w6-vpvq-j3gm | diffusers | 0.38.0 | fix requires safetensors>=0.8.0rc0 pre-release; uv lock won't pick up pre-releases without explicit opt-in |
| GHSA-98h9-4798-4q5v | diffusers | 0.38.0 | same blocker as GHSA-j7w6-vpvq-j3gm — both fixed in 0.38.0 |
| GHSA-7wx4-6vff-v64p | diffusers | 0.38.0 | same blocker as GHSA-j7w6-vpvq-j3gm — fixed in 0.38.0 |
| PYSEC-2025-189 | torch | (none) | no fix version published |
| PYSEC-2025-190 | torch | (none) | same |
| PYSEC-2025-191 | torch | (none) | same |
Expand All @@ -141,6 +142,7 @@ as `--ignore-vuln` flags in `.github/workflows/security.yml`.
| PYSEC-2024-277 | joblib | (none) | no fix version published |
| PYSEC-2025-222 | vllm | (none) | no fix version published; held by vllm-omni 0.18 pin |
| PYSEC-2024-274 | gradio | (none) | no fix version published; vllm-omni 0.18 pins gradio==5.50 |
| PYSEC-2026-161 | starlette | 1.0.1 | gradio 5.50 caps starlette<1.0 (transitive via vllm-omni 0.18) |

When a blocker lifts (e.g. transformers 5 ↔ vllm 0.19 line stabilizes),
drop the corresponding `--ignore-vuln` flag from the workflow and the
Expand Down
15 changes: 10 additions & 5 deletions src/server/supervisor/supervisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

import asyncio
import logging
import multiprocessing as mp
import os
import signal
from multiprocessing.process import BaseProcess
from multiprocessing.queues import Queue as MPQueue
from queue import Empty as QueueEmpty

Expand All @@ -18,7 +18,12 @@
WorkerManagementConfig,
)
from ..hooks import PrincipalContext
from ..utils.concurrent import TaskReceiver, TaskSender, create_task_channel
from ..utils.concurrent import (
MP_CTX,
TaskReceiver,
TaskSender,
create_task_channel,
)

_CMD_TIMEOUT = 120.0
_NODE_ID_HANDSHAKE_TIMEOUT = 30.0
Expand All @@ -42,10 +47,10 @@ def __init__(
self._worker_management = worker_management
self._logging_config = logging_config
self._logger = logger
self._process: mp.Process | None = None
self._process: BaseProcess | None = None
self._cmd_sender: TaskSender[CommandMessage, CommandResponse] | None = None
self._cmd_receiver: TaskReceiver[CommandMessage, CommandResponse] | None = None
self._node_id_queue: MPQueue[str] = mp.Queue(maxsize=1)
self._node_id_queue: MPQueue[str] = MP_CTX.Queue(maxsize=1)
self._node_id: str | None = None

@property
Expand All @@ -66,7 +71,7 @@ async def start(self, system_principal: PrincipalContext) -> None:
return

self._cmd_sender, self._cmd_receiver = create_task_channel()
self._process = mp.Process(
self._process = MP_CTX.Process(
target=_run_supervisor,
kwargs={
"identity": self._identity,
Expand Down
11 changes: 9 additions & 2 deletions src/server/utils/concurrent.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
from collections.abc import Iterable
from multiprocessing.queues import Queue as MPQueue

# Shared multiprocessing context for FlowMesh child processes. Spawn (not fork) because
# the parent opens Redis-over-TLS connections during lifespan startup, which initialises
# OpenSSL state; OpenSSL is not fork-safe and the child can deadlock in
# `ssl.SSLContext.__new__` when it builds its own connections. All IPC primitives shared
# between parent and child must be created from this same context.
MP_CTX = mp.get_context("spawn")

type TaskIDType = str


Expand Down Expand Up @@ -90,8 +97,8 @@ def send_result(self, task_id: TaskIDType, result: R) -> None:


def create_task_channel[T, R]() -> tuple[TaskSender[T, R], TaskReceiver[T, R]]:
send_q: MPQueue[tuple[TaskIDType, T | Sentinel]] = mp.Queue()
recv_q: MPQueue[tuple[TaskIDType, R | Sentinel]] = mp.Queue()
send_q: MPQueue[tuple[TaskIDType, T | Sentinel]] = MP_CTX.Queue()
recv_q: MPQueue[tuple[TaskIDType, R | Sentinel]] = MP_CTX.Queue()
sender = TaskSender(send_q, recv_q)
receiver = TaskReceiver(send_q, recv_q)
return sender, receiver
Loading