Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ jobs:
args: "--target x86_64-apple-darwin"
python-version: "3.12"
backend: "pytorch"
# - platform: 'ubuntu-22.04'
# args: ''
# python-version: '3.12'
# backend: 'pytorch'
- platform: "ubuntu-22.04"
args: ""
python-version: "3.12"
backend: "pytorch"
- platform: "windows-latest"
args: ""
python-version: "3.12"
Expand Down
12 changes: 6 additions & 6 deletions backend/backends/pytorch_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ def _generate_sync():
"small": "openai/whisper-small",
"medium": "openai/whisper-medium",
"large": "openai/whisper-large-v3",
"turbo": "openai/whisper-large-v3-turbo",
}


Expand Down Expand Up @@ -591,21 +592,20 @@ def _transcribe_sync():
)
inputs = inputs.to(self.device)

# Set language if provided
forced_decoder_ids = None
# Generate transcription
# If language is provided, force it; otherwise let Whisper auto-detect
generate_kwargs = {}
if language:
# Support all languages from frontend: en, zh, ja, ko, de, fr, ru, pt, es, it
# Whisper supports these and many more
forced_decoder_ids = self.processor.get_decoder_prompt_ids(
language=language,
task="transcribe",
)
generate_kwargs["forced_decoder_ids"] = forced_decoder_ids

# Generate transcription
with torch.no_grad():
predicted_ids = self.model.generate(
inputs["input_features"],
forced_decoder_ids=forced_decoder_ids,
**generate_kwargs,
)

# Decode
Expand Down
41 changes: 37 additions & 4 deletions backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,25 @@
import asyncio
import uvicorn
import argparse
import torch
import tempfile
import io
from pathlib import Path
import uuid
import asyncio
import signal
import os

# Set HSA_OVERRIDE_GFX_VERSION for AMD GPUs that aren't officially listed in ROCm
# (e.g., RX 6600 is gfx1032 which maps to gfx1030 target)
# This must be set BEFORE any torch.cuda calls
if not os.environ.get("HSA_OVERRIDE_GFX_VERSION"):
os.environ["HSA_OVERRIDE_GFX_VERSION"] = "10.3.0"

# Suppress noisy MIOpen workspace warnings on AMD GPUs
if not os.environ.get("MIOPEN_LOG_LEVEL"):
os.environ["MIOPEN_LOG_LEVEL"] = "4"

import torch
from urllib.parse import quote


Expand Down Expand Up @@ -1022,11 +1033,12 @@ async def transcribe_audio(
# Transcribe
whisper_model = transcribe.get_whisper_model()

# Check if Whisper model is downloaded (uses default size "base")
# Check if Whisper model is downloaded
model_size = whisper_model.model_size
# Map model sizes to HF repo IDs (whisper-large needs -v3 suffix)
# Map model sizes to HF repo IDs (some need special suffixes)
whisper_hf_repos = {
"large": "openai/whisper-large-v3",
"turbo": "openai/whisper-large-v3-turbo",
}
model_name = whisper_hf_repos.get(model_size, f"openai/whisper-{model_size}")

Expand Down Expand Up @@ -1490,6 +1502,13 @@ def check_chatterbox_loaded():
"model_size": "large",
"check_loaded": lambda: check_whisper_loaded("large"),
},
{
"model_name": "whisper-turbo",
"display_name": "Whisper Turbo",
"hf_repo_id": "openai/whisper-large-v3-turbo",
"model_size": "turbo",
"check_loaded": lambda: check_whisper_loaded("turbo"),
},
]

# Build a mapping of model_name -> hf_repo_id so we can check if shared repos are downloading
Expand Down Expand Up @@ -1684,6 +1703,10 @@ async def trigger_model_download(request: models.ModelDownloadRequest):
"model_size": "large",
"load_func": lambda: transcribe.get_whisper_model().load_model("large"),
},
"whisper-turbo": {
"model_size": "turbo",
"load_func": lambda: transcribe.get_whisper_model().load_model("turbo"),
},
}

if request.model_name not in model_configs:
Expand Down Expand Up @@ -1810,6 +1833,11 @@ async def delete_model(model_name: str):
"model_size": "large",
"model_type": "whisper",
},
"whisper-turbo": {
"hf_repo_id": "openai/whisper-large-v3-turbo",
"model_size": "turbo",
"model_type": "whisper",
},
}

if model_name not in model_configs:
Expand Down Expand Up @@ -2044,7 +2072,12 @@ def _get_gpu_status() -> str:
"""Get GPU availability status."""
backend_type = get_backend_type()
if torch.cuda.is_available():
return f"CUDA ({torch.cuda.get_device_name(0)})"
device_name = torch.cuda.get_device_name(0)
# Check if this is ROCm (AMD) or CUDA (NVIDIA)
is_rocm = hasattr(torch.version, 'hip') and torch.version.hip is not None
if is_rocm:
return f"ROCm ({device_name})"
return f"CUDA ({device_name})"
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
return "MPS (Apple Silicon)"
elif backend_type == "mlx":
Expand Down
Loading