Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions api/src/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class Settings(BaseSettings):
)
use_gpu: bool = True # Whether to use GPU acceleration if available
device_type: str | None = (
None # Will be auto-detected if None, can be "cuda", "mps", or "cpu"
None # Will be auto-detected if None, can be "cuda", "mps", "xpu", or "cpu"
)
allow_local_voice_saving: bool = (
False # Whether to allow saving combined voices locally
Expand Down Expand Up @@ -59,7 +59,7 @@ class Settings(BaseSettings):
cors_enabled: bool = True # Whether to enable CORS

# Temp File Settings for WEB Ui
temp_file_dir: str = "api/temp_files" # Directory for temporary audio files (relative to project root)
temp_file_dir: str = "/tmp/kokoro_temp" # Directory for temporary audio files
max_temp_dir_size_mb: int = 2048 # Maximum size of temp directory (2GB)
max_temp_dir_age_hours: int = 1 # Remove temp files older than 1 hour
max_temp_dir_count: int = 3 # Maximum number of temp files to keep
Expand All @@ -80,6 +80,16 @@ def get_device(self) -> str:
return "mps"
elif torch.cuda.is_available():
return "cuda"

# Check for Intel GPU (XPU)
try:
import intel_extension_for_pytorch as ipex

if hasattr(torch, "xpu") and torch.xpu.is_available():
return "xpu"
except ImportError:
pass

return "cpu"


Expand Down
23 changes: 23 additions & 0 deletions api/src/inference/kokoro_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ async def load_model(self, path: str) -> None:
self._model = self._model.to(torch.device("mps"))
elif self._device == "cuda":
self._model = self._model.cuda()
elif self._device == "xpu":
import intel_extension_for_pytorch as ipex
self._model = self._model.to("xpu")
else:
self._model = self._model.cpu()

Expand Down Expand Up @@ -334,6 +337,13 @@ def _check_memory(self) -> bool:
if self._device == "cuda":
memory_gb = torch.cuda.memory_allocated() / 1e9
return memory_gb > model_config.pytorch_gpu.memory_threshold
elif self._device == "xpu":
try:
import intel_extension_for_pytorch as ipex
memory_gb = torch.xpu.memory_allocated() / 1e9
return memory_gb > model_config.pytorch_gpu.memory_threshold
except (ImportError, AttributeError):
pass
# MPS doesn't provide memory management APIs
return False

Expand All @@ -346,6 +356,13 @@ def _clear_memory(self) -> None:
# Empty cache if available (future-proofing)
if hasattr(torch.mps, "empty_cache"):
torch.mps.empty_cache()
elif self._device == "xpu":
try:
import intel_extension_for_pytorch as ipex
torch.xpu.empty_cache()
torch.xpu.synchronize()
except (ImportError, AttributeError):
pass

def unload(self) -> None:
"""Unload model and free resources."""
Expand All @@ -358,6 +375,12 @@ def unload(self) -> None:
if torch.cuda.is_available():
torch.cuda.empty_cache()
torch.cuda.synchronize()
try:
if hasattr(torch, "xpu") and torch.xpu.is_available():
torch.xpu.empty_cache()
torch.xpu.synchronize()
except Exception:
pass

@property
def is_loaded(self) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion api/src/inference/model_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, config: Optional[ModelConfig] = None):

def _determine_device(self) -> str:
"""Determine device based on settings."""
return "cuda" if settings.use_gpu else "cpu"
return settings.get_device()

async def initialize(self) -> None:
"""Initialize Kokoro V1 backend."""
Expand Down
2 changes: 2 additions & 0 deletions api/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ async def lifespan(app: FastAPI):
startup_msg += "\nUsing Apple Metal Performance Shaders (MPS)"
elif device == "cuda":
startup_msg += f"\nCUDA: {torch.cuda.is_available()}"
elif device == "xpu":
startup_msg += f"\nXPU: {torch.xpu.is_available()}"
else:
startup_msg += "\nRunning on CPU"
startup_msg += f"\n{voicepack_count} voice packs loaded"
Expand Down
43 changes: 43 additions & 0 deletions docker/intel/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Kokoro-FastAPI: Intel GPU (XPU) Setup

This directory contains the configuration to run Kokoro-FastAPI leveraging Intel GPUs (Arc, Data Center, and Integrated Graphics) through the Intel Extension for PyTorch (IPEX).

## Features
- **Intel XPU Acceleration**: Uses IPEX 2.5.10 + PyTorch 2.5.1 for optimized inference.
- **Automated Driver Stack**: Integrated with Intel's reference driver installation script.
- **Secure Non-Root User**: Runs as a standard `appuser` with correct GPU group permissions.

## Requirements
- **Host Drivers**: Ensure your host has the Intel GPU drivers installed (Compute Runtime, Level Zero).
- **Docker**: Version 20.10+ with `device_cgroup_rules` support.

## Usage

### 1. Build and Start
Run the following command from the project root:
```bash
docker compose -f docker/intel/docker-compose.yml up --build
```

### 2. Verify GPU Access
Check the container logs for XPU registration messages or run:
```bash
docker compose -f docker/intel/docker-compose.yml exec kokoro-tts python -c "import torch; import intel_extension_for_pytorch; print(f'XPU Available: {torch.xpu.is_available()}')"
```

## Technical Details
- **Base Image**: Ubuntu 24.04 (required for glibc compatibility).
- **Versions**:
- Python: 3.12
- Torch: 2.5.1
- IPEX: 2.5.10+xpu
- **Groups**: The container uses a default `RENDER_GID=992`. If your host's `render` group has a different ID, pass it as a build argument:
```bash
docker compose -f docker/intel/docker-compose.yml build --build-arg RENDER_GID=$(stat -c '%g' /dev/dri/renderD128)
```

## Troubleshooting
If you see `RuntimeError: Native API failed`, check:
1. That `/dev/dri` is correctly mapped.
2. That your host user has permissions to access `/dev/dri` (usually by being in the `video` or `render` groups).
3. That the `RENDER_GID` build argument matches your host's GID.
14 changes: 14 additions & 0 deletions docker/intel/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
set -e

# In this simplified version, we assume permissions are handled via Dockerfile/Compose
# and the user is 'appuser'.

if [ "$DOWNLOAD_MODEL" = "true" ]; then
echo "Downloading model..."
python download_model.py --output api/src/models/v1_0
fi

echo "Starting Application..."
# We use 'exec' so the process handles signals
exec uv run --extra $DEVICE --no-sync python -m uvicorn api.src.main:app --host 0.0.0.0 --port 8880 --log-level debug
99 changes: 99 additions & 0 deletions docker/intel/install_ubuntu_gpu_drivers.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/bin/bash
#
# Copyright (c) 2021 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -e

if [ -z "$INSTALL_DRIVER_VERSION" ]; then
echo "Error: INSTALL_DRIVER_VERSION cannot be empty."
exit 1
fi

apt-get update && apt-get install -y libnuma1 ocl-icd-libopencl1 --no-install-recommends && rm -rf /var/lib/apt/lists/* && \
case $INSTALL_DRIVER_VERSION in \
"24.26.30049") \
mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/24.26.30049.6/intel-level-zero-gpu_1.3.30049.6_amd64.deb ; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/24.26.30049.6/intel-opencl-icd_24.26.30049.6_amd64.deb ; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/24.26.30049.6/libigdgmm12_22.3.20_amd64.deb ; \
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17193.4/intel-igc-core_1.0.17193.4_amd64.deb ; \
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17193.4/intel-igc-opencl_1.0.17193.4_amd64.deb ; \
dpkg -i *.deb && rm -Rf /tmp/gpu_deps ; \
;; \
"24.39.31294") \
mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-level-zero-gpu_1.6.31294.12_amd64.deb ; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-opencl-icd_24.39.31294.12_amd64.deb ; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/libigdgmm12_22.5.2_amd64.deb ; \
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-core_1.0.17791.9_amd64.deb ; \
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-opencl_1.0.17791.9_amd64.deb ; \
dpkg -i *.deb && rm -Rf /tmp/gpu_deps ; \
;; \
"24.52.32224") \
mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-level-zero-gpu_1.6.32224.5_amd64.deb ; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-opencl-icd_24.52.32224.5_amd64.deb ; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/libigdgmm12_22.5.5_amd64.deb ; \
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-core-2_2.5.6+18417_amd64.deb ; \
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-opencl-2_2.5.6+18417_amd64.deb ; \
dpkg -i *.deb && rm -Rf /tmp/gpu_deps ; \
;; \
"25.31.34666") \
mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/25.31.34666.3/libze-intel-gpu1_25.31.34666.3-0_amd64.deb; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/25.31.34666.3/intel-opencl-icd_25.31.34666.3-0_amd64.deb; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/25.31.34666.3/libigdgmm12_22.8.1_amd64.deb; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/25.31.34666.3/intel-ocloc_25.31.34666.3-0_amd64.deb; \
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.16.0/intel-igc-core-2_2.16.0+19683_amd64.deb; \
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.16.0/intel-igc-opencl-2_2.16.0+19683_amd64.deb; \
dpkg -i *.deb && rm -Rf /tmp/gpu_deps ; \
;; \
"25.35.35096") \
mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/25.35.35096.9/libze-intel-gpu1_25.35.35096.9-0_amd64.deb; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/25.35.35096.9/intel-opencl-icd_25.35.35096.9-0_amd64.deb; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/25.35.35096.9/libigdgmm12_22.8.1_amd64.deb; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/25.35.35096.9/intel-ocloc_25.35.35096.9-0_amd64.deb; \
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.18.5/intel-igc-core-2_2.18.5+19820_amd64.deb; \
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.18.5/intel-igc-opencl-2_2.18.5+19820_amd64.deb; \
dpkg -i *.deb && rm -Rf /tmp/gpu_deps ; \
;; \
"25.48.36300") \
mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/25.48.36300.8/libze-intel-gpu1_25.48.36300.8-0_amd64.deb; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/25.48.36300.8/intel-opencl-icd_25.48.36300.8-0_amd64.deb; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/25.48.36300.8/libigdgmm12_22.8.2_amd64.deb; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/25.48.36300.8/intel-ocloc_25.48.36300.8-0_amd64.deb; \
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.24.8/intel-igc-core-2_2.24.8+20344_amd64.deb; \
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.24.8/intel-igc-opencl-2_2.24.8+20344_amd64.deb; \
dpkg -i *.deb && rm -Rf /tmp/gpu_deps ; \
;; \
*) \
dpkg -P intel-gmmlib intel-igc-core intel-igc-opencl intel-level-zero-gpu intel-ocloc intel-opencl intel-opencl-icd && \
apt-get update && apt-get -y --no-install-recommends install dpkg-dev && rm -rf /var/lib/apt/lists/* && \
cd /drivers/${INSTALL_DRIVER_VERSION} && \
dpkg-scanpackages . > Packages && \
cd - ; \
echo "deb [trusted=yes arch=amd64] file:/drivers/${INSTALL_DRIVER_VERSION} ./" > /etc/apt/sources.list.d/intel-graphics-${INSTALL_DRIVER_VERSION}.list ; \
apt-get update && \
apt-get install -y --no-install-recommends \
intel-opencl-icd \
intel-level-zero-gpu level-zero \
intel-media-va-driver-non-free libmfx1 && \
rm -rf /var/lib/apt/lists/* ; \
esac

apt-get clean && rm -rf /var/lib/apt/lists/* && rm -rf /tmp/*
18 changes: 18 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ dependencies = [
"phonemizer-fork>=3.3.2",
"av>=14.2.0",
"text2num>=2.5.1",
"sentencepiece>=0.2.0",
"protobuf>=5.29.1",
]

[project.optional-dependencies]
Expand All @@ -49,6 +51,10 @@ rocm = [
"torch==2.8.0+rocm6.4",
"pytorch-triton-rocm>=3.2.0",
]
intel = [
"torch==2.5.1",
"intel-extension-for-pytorch==2.5.10+xpu",
]
test = [
"pytest==8.3.5",
"pytest-cov==6.0.0",
Expand All @@ -64,6 +70,7 @@ conflicts = [
{ extra = "cpu" },
{ extra = "gpu" },
{ extra = "rocm" },
{ extra = "intel" },
],
]
override-dependencies = [
Expand All @@ -75,10 +82,14 @@ torch = [
{ index = "pytorch-cpu", extra = "cpu" },
{ index = "pytorch-cuda", extra = "gpu" },
{ index = "pytorch-rocm", extra = "rocm" },
{ index = "pytorch-intel", extra = "intel" },
]
pytorch-triton-rocm = [
{ index = "pytorch-rocm", extra = "rocm" },
]
intel-extension-for-pytorch = [
{ index = "pytorch-intel", extra = "intel" },
]

[[tool.uv.index]]
name = "pytorch-cpu"
Expand All @@ -95,6 +106,13 @@ name = "pytorch-rocm"
url = "https://download.pytorch.org/whl/rocm6.4"
explicit = true

[[tool.uv.index]]
name = "pytorch-intel"
url = "https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
explicit = true



[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
Expand Down