Skip to content

Commit 496e4e3

Browse files
authored
Merge pull request #9 from Infinirc/dev
[Feature] add ARM64 support for Docker images and configurable worker image
2 parents 5541d42 + 5164dab commit 496e4e3

17 files changed

Lines changed: 1566 additions & 554 deletions

File tree

.github/workflows/docker.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ jobs:
4444
- name: Checkout code
4545
uses: actions/checkout@v4
4646

47+
- name: Set up QEMU
48+
uses: docker/setup-qemu-action@v3
49+
4750
- name: Set up Docker Buildx
4851
uses: docker/setup-buildx-action@v3
4952

@@ -79,7 +82,7 @@ jobs:
7982
labels: ${{ steps.meta.outputs.labels }}
8083
cache-from: type=gha
8184
cache-to: type=gha,mode=max
82-
platforms: linux/amd64
85+
platforms: linux/amd64,linux/arm64
8386

8487
# Notify on completion
8588
notify:

backend/Dockerfile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@ WORKDIR /app
2121

2222
# Install docker CLI for local worker spawn feature
2323
# Using Docker 27.x for API version 1.47 compatibility
24-
RUN apt-get update && apt-get install -y --no-install-recommends \
25-
curl \
26-
&& curl -fsSL https://download.docker.com/linux/static/stable/x86_64/docker-27.4.1.tgz | tar xz --strip-components=1 -C /usr/local/bin docker/docker \
24+
# Auto-detect architecture (x86_64 or aarch64)
25+
RUN apt-get update && apt-get install -y --no-install-recommends curl \
26+
&& ARCH=$(uname -m) \
27+
&& if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then DOCKER_ARCH="aarch64"; else DOCKER_ARCH="x86_64"; fi \
28+
&& curl -fsSL "https://download.docker.com/linux/static/stable/${DOCKER_ARCH}/docker-27.4.1.tgz" | tar xz --strip-components=1 -C /usr/local/bin docker/docker \
2729
&& rm -rf /var/lib/apt/lists/*
2830

2931
# Copy installed packages from builder

backend/app/api/workers.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,9 @@ def _generate_docker_command(token: str, name: str, backend_url: str) -> str:
531531
532532
Command is single-line for cross-platform compatibility (Linux/Mac/Windows).
533533
"""
534+
from app.config import get_settings
535+
536+
settings = get_settings()
534537
return (
535538
f"docker run -d --name lmstack-worker --restart unless-stopped "
536539
f"--network host --gpus all --privileged "
@@ -540,7 +543,7 @@ def _generate_docker_command(token: str, name: str, backend_url: str) -> str:
540543
f"-e BACKEND_URL={backend_url} "
541544
f"-e WORKER_NAME={name} "
542545
f"-e REGISTRATION_TOKEN={token} "
543-
f"infinirc/lmstack-worker:latest"
546+
f"{settings.worker_image}"
544547
)
545548

546549

backend/app/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class Settings(BaseSettings):
3232
# Worker settings
3333
worker_heartbeat_interval: int = 10 # seconds between status checks
3434
worker_timeout: int = 30 # seconds to consider worker offline
35+
worker_image: str = "infinirc/lmstack-worker:latest" # Docker image for local worker
3536

3637
# vLLM defaults
3738
vllm_default_image: str = "vllm/vllm-openai:latest"

backend/app/database.py

Lines changed: 84 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -37,85 +37,98 @@ async def get_db() -> AsyncSession:
3737
await session.close()
3838

3939

40+
def _get_column_type_sql(column) -> str:
41+
"""Convert SQLAlchemy column type to SQLite type string."""
42+
from sqlalchemy import JSON, Boolean, DateTime, Float, Integer, String, Text
43+
44+
col_type = type(column.type)
45+
46+
if col_type == Integer or "Integer" in str(col_type):
47+
return "INTEGER"
48+
elif col_type == String or "String" in str(col_type):
49+
length = getattr(column.type, "length", None)
50+
return f"VARCHAR({length})" if length else "VARCHAR(255)"
51+
elif col_type == Text or "Text" in str(col_type):
52+
return "TEXT"
53+
elif col_type == Boolean or "Boolean" in str(col_type):
54+
return "BOOLEAN"
55+
elif col_type == Float or "Float" in str(col_type):
56+
return "FLOAT"
57+
elif col_type == DateTime or "DateTime" in str(col_type):
58+
return "DATETIME"
59+
elif col_type == JSON or "JSON" in str(col_type):
60+
return "JSON"
61+
else:
62+
# Default fallback
63+
return "TEXT"
64+
65+
4066
async def _run_migrations(conn):
41-
"""Run schema migrations for new columns (SQLite compatible)."""
67+
"""Auto-detect and add missing columns by comparing models with database schema."""
4268
from sqlalchemy import text
4369

44-
async def column_exists(table_name: str, column_name: str) -> bool:
45-
"""Check if a column exists in a table."""
46-
result = await conn.execute(text(f"PRAGMA table_info({table_name})"))
47-
columns = [row[1] for row in result.fetchall()]
48-
return column_name in columns
49-
50-
# Migration: Add container_name to deployments (for Windows Docker compatibility)
51-
if not await column_exists("deployments", "container_name"):
52-
logger.info("Adding 'container_name' column to deployments table...")
53-
await conn.execute(text("ALTER TABLE deployments ADD COLUMN container_name VARCHAR(255)"))
54-
logger.info("'container_name' column added!")
55-
56-
# Migration: Add is_local to registration_tokens (for local worker detection)
57-
if not await column_exists("registration_tokens", "is_local"):
58-
logger.info("Adding 'is_local' column to registration_tokens table...")
59-
await conn.execute(
60-
text("ALTER TABLE registration_tokens ADD COLUMN is_local BOOLEAN DEFAULT 0")
61-
)
62-
logger.info("'is_local' column added!")
63-
64-
# Migration: Add conversation_type to conversations (for Agent chat support)
65-
if not await column_exists("conversations", "conversation_type"):
66-
logger.info("Adding 'conversation_type' column to conversations table...")
67-
await conn.execute(
68-
text(
69-
"ALTER TABLE conversations ADD COLUMN conversation_type VARCHAR(20) DEFAULT 'chat' NOT NULL"
70-
)
70+
async def get_table_columns(table_name: str) -> set[str]:
71+
"""Get all column names from a database table."""
72+
try:
73+
result = await conn.execute(text(f"PRAGMA table_info({table_name})"))
74+
return {row[1] for row in result.fetchall()}
75+
except Exception:
76+
return set()
77+
78+
async def table_exists(table_name: str) -> bool:
79+
"""Check if a table exists in the database."""
80+
result = await conn.execute(
81+
text("SELECT name FROM sqlite_master WHERE type='table' AND name=:name"),
82+
{"name": table_name},
7183
)
72-
logger.info("'conversation_type' column added!")
73-
74-
# Migration: Add agent_config to conversations (for Agent configuration)
75-
if not await column_exists("conversations", "agent_config"):
76-
logger.info("Adding 'agent_config' column to conversations table...")
77-
await conn.execute(text("ALTER TABLE conversations ADD COLUMN agent_config JSON"))
78-
logger.info("'agent_config' column added!")
79-
80-
# Migration: Add tool_calls to messages (for Agent tool calls)
81-
if not await column_exists("messages", "tool_calls"):
82-
logger.info("Adding 'tool_calls' column to messages table...")
83-
await conn.execute(text("ALTER TABLE messages ADD COLUMN tool_calls JSON"))
84-
logger.info("'tool_calls' column added!")
85-
86-
# Migration: Add tool_call_id to messages (for Agent tool results)
87-
if not await column_exists("messages", "tool_call_id"):
88-
logger.info("Adding 'tool_call_id' column to messages table...")
89-
await conn.execute(text("ALTER TABLE messages ADD COLUMN tool_call_id VARCHAR(100)"))
90-
logger.info("'tool_call_id' column added!")
91-
92-
# Migration: Add step_type to messages (for Agent execution steps)
93-
if not await column_exists("messages", "step_type"):
94-
logger.info("Adding 'step_type' column to messages table...")
95-
await conn.execute(text("ALTER TABLE messages ADD COLUMN step_type VARCHAR(50)"))
96-
logger.info("'step_type' column added!")
97-
98-
# Migration: Add execution_time_ms to messages (for tool execution timing)
99-
if not await column_exists("messages", "execution_time_ms"):
100-
logger.info("Adding 'execution_time_ms' column to messages table...")
101-
await conn.execute(text("ALTER TABLE messages ADD COLUMN execution_time_ms FLOAT"))
102-
logger.info("'execution_time_ms' column added!")
103-
104-
# Migration: Add tuning_config to tuning_jobs (for multi-framework testing)
105-
if not await column_exists("tuning_jobs", "tuning_config"):
106-
logger.info("Adding 'tuning_config' column to tuning_jobs table...")
107-
await conn.execute(text("ALTER TABLE tuning_jobs ADD COLUMN tuning_config JSON"))
108-
logger.info("'tuning_config' column added!")
109-
110-
# Migration: Add conversation_id to tuning_jobs (for Agent Chat integration)
111-
if not await column_exists("tuning_jobs", "conversation_id"):
112-
logger.info("Adding 'conversation_id' column to tuning_jobs table...")
113-
await conn.execute(text("ALTER TABLE tuning_jobs ADD COLUMN conversation_id INTEGER"))
114-
logger.info("'conversation_id' column added!")
84+
return result.fetchone() is not None
85+
86+
# Iterate through all tables defined in models
87+
for table_name, table in Base.metadata.tables.items():
88+
# Skip if table doesn't exist yet (will be created by create_all)
89+
if not await table_exists(table_name):
90+
continue
91+
92+
# Get existing columns in database
93+
existing_columns = await get_table_columns(table_name)
94+
95+
# Check each column in the model
96+
for column in table.columns:
97+
if column.name not in existing_columns:
98+
# Build ALTER TABLE statement
99+
col_type = _get_column_type_sql(column)
100+
101+
# Handle default values
102+
default_clause = ""
103+
if column.default is not None:
104+
default_val = column.default.arg
105+
if callable(default_val):
106+
default_val = default_val(None)
107+
if isinstance(default_val, str):
108+
default_clause = f" DEFAULT '{default_val}'"
109+
elif isinstance(default_val, bool):
110+
default_clause = f" DEFAULT {1 if default_val else 0}"
111+
elif default_val is not None:
112+
default_clause = f" DEFAULT {default_val}"
113+
114+
sql = (
115+
f"ALTER TABLE {table_name} ADD COLUMN {column.name} {col_type}{default_clause}"
116+
)
117+
118+
logger.info(f"Auto-migration: Adding '{column.name}' column to {table_name}...")
119+
try:
120+
await conn.execute(text(sql))
121+
logger.info(f"Column '{column.name}' added to {table_name}!")
122+
except Exception as e:
123+
logger.warning(f"Failed to add column {column.name} to {table_name}: {e}")
115124

116125

117126
async def init_db():
118127
"""Initialize database tables and run migrations"""
128+
# Import all models to register them with Base.metadata
129+
# This ensures all tables are created by create_all()
130+
import app.models # noqa: F401
131+
119132
try:
120133
async with engine.begin() as conn:
121134
await conn.run_sync(Base.metadata.create_all)

backend/app/services/deployment_sync.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,69 @@ async def check_with_semaphore(deployment: Deployment):
122122

123123
return stats
124124

125+
def _is_native_deployment(self, deployment: Deployment) -> bool:
126+
"""Check if this is a native Mac deployment (not Docker)."""
127+
# Native deployments have container_id like "native-123"
128+
if deployment.container_id and deployment.container_id.startswith("native-"):
129+
return True
130+
131+
# Mac-only backends are always native
132+
native_only_backends = {"mlx", "llama_cpp"}
133+
if deployment.backend in native_only_backends:
134+
return True
135+
136+
# For Ollama, check if worker is Mac
137+
if deployment.backend == BackendType.OLLAMA.value:
138+
if deployment.worker and deployment.worker.is_mac:
139+
return True
140+
141+
return False
142+
143+
async def _check_native_deployment(self, deployment: Deployment) -> str:
144+
"""Check a native Mac deployment's API health.
145+
146+
Native deployments run as processes, not Docker containers.
147+
We can only check if the API endpoint is responding.
148+
"""
149+
try:
150+
# For native deployments, if worker is offline, keep current status
151+
# and let the health check loop retry later (worker may be reconnecting)
152+
if deployment.worker.status != "online":
153+
logger.info(
154+
f"Native deployment {deployment.name}: worker offline, "
155+
"keeping current status (may be reconnecting)"
156+
)
157+
# Don't change status - worker might be in the process of reconnecting
158+
return "skipped"
159+
160+
# Check API health via worker
161+
api_healthy = await self._check_api_health(
162+
deployment.worker.address,
163+
deployment.port,
164+
deployment.backend,
165+
None, # No container_name for native
166+
)
167+
168+
if api_healthy:
169+
if deployment.status != DeploymentStatus.RUNNING.value:
170+
deployment.status = DeploymentStatus.RUNNING.value
171+
deployment.status_message = "Model ready (native process verified)"
172+
logger.info(f"Native deployment {deployment.name}: healthy")
173+
return "running_verified"
174+
else:
175+
# Process might have died or not started yet
176+
# Mark as STARTING instead of ERROR to allow retry
177+
deployment.status = DeploymentStatus.STARTING.value
178+
deployment.status_message = "Native process not responding. Waiting for recovery..."
179+
logger.info(f"Native deployment {deployment.name}: API not responding, waiting...")
180+
return "api_not_ready"
181+
182+
except Exception as e:
183+
logger.error(f"Error checking native deployment {deployment.name}: {e}")
184+
deployment.status = DeploymentStatus.STARTING.value
185+
deployment.status_message = f"Checking status: {e}"
186+
return "api_not_ready"
187+
125188
async def _check_and_update_deployment(self, deployment: Deployment, db) -> str:
126189
"""Check a single deployment and update its status.
127190
@@ -134,6 +197,10 @@ async def _check_and_update_deployment(self, deployment: Deployment, db) -> str:
134197
logger.warning(f"Deployment {deployment.id} has no worker, skipping")
135198
return "skipped"
136199

200+
# Check if this is a native deployment (Mac without Docker)
201+
if self._is_native_deployment(deployment):
202+
return await self._check_native_deployment(deployment)
203+
137204
if not deployment.container_id:
138205
# If deployment is still starting, skip it
139206
if deployment.status == DeploymentStatus.STARTING.value:

backend/app/services/local_worker.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,12 +205,24 @@ def spawn_docker_worker(
205205
backend_url: str,
206206
registration_token: str,
207207
container_name: str = "lmstack-worker",
208+
worker_image: str | None = None,
208209
) -> dict:
209210
"""Spawn a Docker worker container on the local machine.
210211
212+
Args:
213+
worker_name: Name for the worker
214+
backend_url: URL of the backend server
215+
registration_token: Token for worker registration
216+
container_name: Name for the Docker container
217+
worker_image: Docker image to use (defaults to settings.worker_image)
218+
211219
Returns:
212220
dict with keys: success, message, container_id (if success)
213221
"""
222+
from app.config import get_settings
223+
224+
settings = get_settings()
225+
image = worker_image or settings.worker_image
214226
# On Mac, ensure Ollama is running with external access before starting Docker
215227
if platform.system() == "Darwin":
216228
logger.info("Mac detected, ensuring Ollama is running with external access...")
@@ -270,11 +282,11 @@ def spawn_docker_worker(
270282
f"WORKER_NAME={worker_name}",
271283
"-e",
272284
f"REGISTRATION_TOKEN={registration_token}",
273-
"infinirc/lmstack-worker:latest",
285+
image,
274286
]
275287

276288
try:
277-
logger.info(f"Spawning Docker worker: {worker_name}")
289+
logger.info(f"Spawning Docker worker: {worker_name} with image {image}")
278290
result = subprocess.run(
279291
cmd,
280292
capture_output=True,

backend/pyproject.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ dependencies = [
1919
"httpx>=0.26.0",
2020
"docker>=7.0.0",
2121
"python-multipart>=0.0.6",
22+
"python-jose[cryptography]>=3.3.0",
23+
"email-validator>=2.0.0",
24+
"psutil>=5.9.0",
25+
"optuna>=3.5.0",
26+
"openai>=1.0.0",
2227
]
2328

2429
[project.optional-dependencies]

backend/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ python-jose[cryptography]>=3.3.0
1111
email-validator>=2.0.0
1212
psutil>=5.9.0
1313
optuna>=3.5.0
14+
openai>=1.0.0

0 commit comments

Comments
 (0)