remsky · josejuanmontiel · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026
diff --git a/api/src/core/config.py b/api/src/core/config.py
@@ -19,7 +19,7 @@ class Settings(BaseSettings):
     )
     use_gpu: bool = True  # Whether to use GPU acceleration if available
     device_type: str | None = (
-        None  # Will be auto-detected if None, can be "cuda", "mps", or "cpu"
+        None  # Will be auto-detected if None, can be "cuda", "mps", "xpu", or "cpu"
     )
     allow_local_voice_saving: bool = (
         False  # Whether to allow saving combined voices locally
@@ -59,7 +59,7 @@ class Settings(BaseSettings):
     cors_enabled: bool = True  # Whether to enable CORS
 
     # Temp File Settings for WEB Ui
-    temp_file_dir: str = "api/temp_files"  # Directory for temporary audio files (relative to project root)
+    temp_file_dir: str = "/tmp/kokoro_temp"  # Directory for temporary audio files
     max_temp_dir_size_mb: int = 2048  # Maximum size of temp directory (2GB)
     max_temp_dir_age_hours: int = 1  # Remove temp files older than 1 hour
     max_temp_dir_count: int = 3  # Maximum number of temp files to keep
@@ -80,6 +80,16 @@ def get_device(self) -> str:
             return "mps"
         elif torch.cuda.is_available():
             return "cuda"
+
+        # Check for Intel GPU (XPU)
+        try:
+            import intel_extension_for_pytorch as ipex
+
+            if hasattr(torch, "xpu") and torch.xpu.is_available():
+                return "xpu"
+        except ImportError:
+            pass
+
         return "cpu"
 
 

diff --git a/api/src/inference/kokoro_v1.py b/api/src/inference/kokoro_v1.py
@@ -57,6 +57,9 @@ async def load_model(self, path: str) -> None:
                 self._model = self._model.to(torch.device("mps"))
             elif self._device == "cuda":
                 self._model = self._model.cuda()
+            elif self._device == "xpu":
+                import intel_extension_for_pytorch as ipex
+                self._model = self._model.to("xpu")
             else:
                 self._model = self._model.cpu()
 
@@ -334,6 +337,13 @@ def _check_memory(self) -> bool:
         if self._device == "cuda":
             memory_gb = torch.cuda.memory_allocated() / 1e9
             return memory_gb > model_config.pytorch_gpu.memory_threshold
+        elif self._device == "xpu":
+            try:
+                import intel_extension_for_pytorch as ipex
+                memory_gb = torch.xpu.memory_allocated() / 1e9
+                return memory_gb > model_config.pytorch_gpu.memory_threshold
+            except (ImportError, AttributeError):
+                pass
         # MPS doesn't provide memory management APIs
         return False
 
@@ -346,6 +356,13 @@ def _clear_memory(self) -> None:
             # Empty cache if available (future-proofing)
             if hasattr(torch.mps, "empty_cache"):
                 torch.mps.empty_cache()
+        elif self._device == "xpu":
+            try:
+                import intel_extension_for_pytorch as ipex
+                torch.xpu.empty_cache()
+                torch.xpu.synchronize()
+            except (ImportError, AttributeError):
+                pass
 
     def unload(self) -> None:
         """Unload model and free resources."""
@@ -358,6 +375,12 @@ def unload(self) -> None:
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
             torch.cuda.synchronize()
+        try:
+            if hasattr(torch, "xpu") and torch.xpu.is_available():
+                torch.xpu.empty_cache()
+                torch.xpu.synchronize()
+        except Exception:
+            pass
 
     @property
     def is_loaded(self) -> bool:

diff --git a/api/src/inference/model_manager.py b/api/src/inference/model_manager.py
@@ -29,7 +29,7 @@ def __init__(self, config: Optional[ModelConfig] = None):
 
     def _determine_device(self) -> str:
         """Determine device based on settings."""
-        return "cuda" if settings.use_gpu else "cpu"
+        return settings.get_device()
 
     async def initialize(self) -> None:
         """Initialize Kokoro V1 backend."""

diff --git a/api/src/main.py b/api/src/main.py
@@ -94,6 +94,8 @@ async def lifespan(app: FastAPI):
         startup_msg += "\nUsing Apple Metal Performance Shaders (MPS)"
     elif device == "cuda":
         startup_msg += f"\nCUDA: {torch.cuda.is_available()}"
+    elif device == "xpu":
+        startup_msg += f"\nXPU: {torch.xpu.is_available()}"
     else:
         startup_msg += "\nRunning on CPU"
     startup_msg += f"\n{voicepack_count} voice packs loaded"

diff --git a/docker/intel/README.md b/docker/intel/README.md
@@ -0,0 +1,43 @@
+# Kokoro-FastAPI: Intel GPU (XPU) Setup
+
+This directory contains the configuration to run Kokoro-FastAPI leveraging Intel GPUs (Arc, Data Center, and Integrated Graphics) through the Intel Extension for PyTorch (IPEX).
+
+## Features
+- **Intel XPU Acceleration**: Uses IPEX 2.5.10 + PyTorch 2.5.1 for optimized inference.
+- **Automated Driver Stack**: Integrated with Intel's reference driver installation script.
+- **Secure Non-Root User**: Runs as a standard `appuser` with correct GPU group permissions.
+
+## Requirements
+- **Host Drivers**: Ensure your host has the Intel GPU drivers installed (Compute Runtime, Level Zero).
+- **Docker**: Version 20.10+ with `device_cgroup_rules` support.
+
+## Usage
+
+### 1. Build and Start
+Run the following command from the project root:
+```bash
+docker compose -f docker/intel/docker-compose.yml up --build
+```
+
+### 2. Verify GPU Access
+Check the container logs for XPU registration messages or run:
+```bash
+docker compose -f docker/intel/docker-compose.yml exec kokoro-tts python -c "import torch; import intel_extension_for_pytorch; print(f'XPU Available: {torch.xpu.is_available()}')"
+```
+
+## Technical Details
+- **Base Image**: Ubuntu 24.04 (required for glibc compatibility).
+- **Versions**:
+  - Python: 3.12
+  - Torch: 2.5.1
+  - IPEX: 2.5.10+xpu
+- **Groups**: The container uses a default `RENDER_GID=992`. If your host's `render` group has a different ID, pass it as a build argument:
+  ```bash
+  docker compose -f docker/intel/docker-compose.yml build --build-arg RENDER_GID=$(stat -c '%g' /dev/dri/renderD128)
+  ```
+
+## Troubleshooting
+If you see `RuntimeError: Native API failed`, check:
+1. That `/dev/dri` is correctly mapped.
+2. That your host user has permissions to access `/dev/dri` (usually by being in the `video` or `render` groups).
+3. That the `RENDER_GID` build argument matches your host's GID.
diff --git a/docker/intel/entrypoint.sh b/docker/intel/entrypoint.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+set -e
+
+# In this simplified version, we assume permissions are handled via Dockerfile/Compose
+# and the user is 'appuser'.
+
+if [ "$DOWNLOAD_MODEL" = "true" ]; then
+    echo "Downloading model..."
+    python download_model.py --output api/src/models/v1_0
+fi
+
+echo "Starting Application..."
+# We use 'exec' so the process handles signals
+exec uv run --extra $DEVICE --no-sync python -m uvicorn api.src.main:app --host 0.0.0.0 --port 8880 --log-level debug
diff --git a/docker/intel/install_ubuntu_gpu_drivers.sh b/docker/intel/install_ubuntu_gpu_drivers.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -e
+
+if [ -z "$INSTALL_DRIVER_VERSION" ]; then
+    echo "Error: INSTALL_DRIVER_VERSION cannot be empty."
+    exit 1
+fi
+
+apt-get update && apt-get install -y libnuma1 ocl-icd-libopencl1 --no-install-recommends && rm -rf /var/lib/apt/lists/* && \
+case $INSTALL_DRIVER_VERSION in \
+"24.26.30049") \
+        mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \
+        curl -L -O https://github.com/intel/compute-runtime/releases/download/24.26.30049.6/intel-level-zero-gpu_1.3.30049.6_amd64.deb ; \
+        curl -L -O https://github.com/intel/compute-runtime/releases/download/24.26.30049.6/intel-opencl-icd_24.26.30049.6_amd64.deb ; \
+        curl -L -O https://github.com/intel/compute-runtime/releases/download/24.26.30049.6/libigdgmm12_22.3.20_amd64.deb ; \
+        curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17193.4/intel-igc-core_1.0.17193.4_amd64.deb ; \
+        curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17193.4/intel-igc-opencl_1.0.17193.4_amd64.deb ; \
+        dpkg -i *.deb && rm -Rf /tmp/gpu_deps ; \
+;; \
+"24.39.31294") \
+        mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \
+        curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-level-zero-gpu_1.6.31294.12_amd64.deb ; \
+        curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-opencl-icd_24.39.31294.12_amd64.deb ; \
+        curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/libigdgmm12_22.5.2_amd64.deb ; \
+        curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-core_1.0.17791.9_amd64.deb ; \
+        curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-opencl_1.0.17791.9_amd64.deb ; \
+        dpkg -i *.deb && rm -Rf /tmp/gpu_deps ; \
+;; \
+"24.52.32224") \
+        mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \
+        curl -L -O https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-level-zero-gpu_1.6.32224.5_amd64.deb ; \
+        curl -L -O https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-opencl-icd_24.52.32224.5_amd64.deb ; \
+        curl -L -O https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/libigdgmm12_22.5.5_amd64.deb ; \
+        curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-core-2_2.5.6+18417_amd64.deb ; \
+        curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-opencl-2_2.5.6+18417_amd64.deb ; \
+        dpkg -i *.deb && rm -Rf /tmp/gpu_deps ; \
+;; \
+"25.31.34666") \
+	mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \
+	curl -L -O https://github.com/intel/compute-runtime/releases/download/25.31.34666.3/libze-intel-gpu1_25.31.34666.3-0_amd64.deb; \
+	curl -L -O https://github.com/intel/compute-runtime/releases/download/25.31.34666.3/intel-opencl-icd_25.31.34666.3-0_amd64.deb; \
+	curl -L -O https://github.com/intel/compute-runtime/releases/download/25.31.34666.3/libigdgmm12_22.8.1_amd64.deb; \
+	curl -L -O https://github.com/intel/compute-runtime/releases/download/25.31.34666.3/intel-ocloc_25.31.34666.3-0_amd64.deb; \
+	curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.16.0/intel-igc-core-2_2.16.0+19683_amd64.deb; \
+	curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.16.0/intel-igc-opencl-2_2.16.0+19683_amd64.deb; \
+	dpkg -i *.deb && rm -Rf /tmp/gpu_deps ; \
+;; \
+"25.35.35096") \
+	mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \
+	curl -L -O https://github.com/intel/compute-runtime/releases/download/25.35.35096.9/libze-intel-gpu1_25.35.35096.9-0_amd64.deb; \
+	curl -L -O https://github.com/intel/compute-runtime/releases/download/25.35.35096.9/intel-opencl-icd_25.35.35096.9-0_amd64.deb; \
+	curl -L -O https://github.com/intel/compute-runtime/releases/download/25.35.35096.9/libigdgmm12_22.8.1_amd64.deb; \
+	curl -L -O https://github.com/intel/compute-runtime/releases/download/25.35.35096.9/intel-ocloc_25.35.35096.9-0_amd64.deb; \
+	curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.18.5/intel-igc-core-2_2.18.5+19820_amd64.deb; \
+	curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.18.5/intel-igc-opencl-2_2.18.5+19820_amd64.deb; \
+	dpkg -i *.deb && rm -Rf /tmp/gpu_deps ; \
+;; \
+"25.48.36300") \
+	mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \
+	curl -L -O https://github.com/intel/compute-runtime/releases/download/25.48.36300.8/libze-intel-gpu1_25.48.36300.8-0_amd64.deb; \
+	curl -L -O https://github.com/intel/compute-runtime/releases/download/25.48.36300.8/intel-opencl-icd_25.48.36300.8-0_amd64.deb; \
+	curl -L -O https://github.com/intel/compute-runtime/releases/download/25.48.36300.8/libigdgmm12_22.8.2_amd64.deb; \
+	curl -L -O https://github.com/intel/compute-runtime/releases/download/25.48.36300.8/intel-ocloc_25.48.36300.8-0_amd64.deb; \
+	curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.24.8/intel-igc-core-2_2.24.8+20344_amd64.deb; \
+	curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.24.8/intel-igc-opencl-2_2.24.8+20344_amd64.deb; \
+	dpkg -i *.deb && rm -Rf /tmp/gpu_deps ; \
+;; \
+*) \
+        dpkg -P intel-gmmlib intel-igc-core intel-igc-opencl intel-level-zero-gpu intel-ocloc intel-opencl intel-opencl-icd && \
+        apt-get update && apt-get -y --no-install-recommends install dpkg-dev && rm -rf /var/lib/apt/lists/* && \
+        cd /drivers/${INSTALL_DRIVER_VERSION} && \
+            dpkg-scanpackages .  > Packages && \
+            cd - ; \
+        echo "deb [trusted=yes arch=amd64] file:/drivers/${INSTALL_DRIVER_VERSION} ./" > /etc/apt/sources.list.d/intel-graphics-${INSTALL_DRIVER_VERSION}.list ; \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            intel-opencl-icd \
+            intel-level-zero-gpu level-zero \
+            intel-media-va-driver-non-free libmfx1 && \
+            rm -rf /var/lib/apt/lists/* ; \
+esac
+
+apt-get clean && rm -rf /var/lib/apt/lists/* && rm -rf /tmp/*
diff --git a/pyproject.toml b/pyproject.toml
@@ -40,6 +40,8 @@ dependencies = [
     "phonemizer-fork>=3.3.2",
     "av>=14.2.0",
     "text2num>=2.5.1",
+    "sentencepiece>=0.2.0",
+    "protobuf>=5.29.1",
 ]
 
 [project.optional-dependencies]
@@ -49,6 +51,10 @@ rocm = [
     "torch==2.8.0+rocm6.4",
     "pytorch-triton-rocm>=3.2.0",
 ]
+intel = [
+    "torch==2.5.1",
+    "intel-extension-for-pytorch==2.5.10+xpu",
+]
 test = [
     "pytest==8.3.5",
     "pytest-cov==6.0.0",
@@ -64,6 +70,7 @@ conflicts = [
         { extra = "cpu" },
         { extra = "gpu" },
         { extra = "rocm" },
+        { extra = "intel" },
     ],
 ]
 override-dependencies = [
@@ -75,10 +82,14 @@ torch = [
     { index = "pytorch-cpu", extra = "cpu" },
     { index = "pytorch-cuda", extra = "gpu" },
     { index = "pytorch-rocm", extra = "rocm" },
+    { index = "pytorch-intel", extra = "intel" },
 ]
 pytorch-triton-rocm = [
     { index = "pytorch-rocm", extra = "rocm" },
 ]
+intel-extension-for-pytorch = [
+    { index = "pytorch-intel", extra = "intel" },
+]
 
 [[tool.uv.index]]
 name = "pytorch-cpu"
@@ -95,6 +106,13 @@ name = "pytorch-rocm"
 url = "https://download.pytorch.org/whl/rocm6.4"
 explicit = true
 
+[[tool.uv.index]]
+name = "pytorch-intel"
+url = "https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+explicit = true
+
+
+
 [build-system]
 requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"