Skip to content

Release

Release #240

Workflow file for this run

name: Release
on:
schedule:
- cron: '0 */6 * * *' # every 6 hours (4 times daily)
workflow_dispatch: # allows manual triggering
inputs:
create_release:
description: 'Create new release'
required: true
type: boolean
pull_request: # validate the release build on PRs; the release job is skipped so nothing is published
paths:
# The build jobs clone llama.cpp source from upstream and the get-tag-name
# action is consumed via a pinned @lemonade ref, so only changes to this
# workflow itself can affect the outcome of a PR run. Scope PR runs to it.
- '.github/workflows/release.yml'
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON"
jobs:
ubuntu-22-rocm:
runs-on: ubuntu-22.04
strategy:
matrix:
include:
- ROCM_VERSION: "7.13.0"
gpu_targets: "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1010;gfx1011;gfx1012;gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1152;gfx1200;gfx1201"
build: x64
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
repository: 'ggml-org/llama.cpp'
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}
evict-old-files: 1d
- name: Dependencies
id: depends
run: |
sudo apt install -y build-essential git cmake wget
- name: Setup Legacy ROCm
if: matrix.ROCM_VERSION == '7.2.1'
id: legacy_env
run: |
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
sudo tee /etc/apt/sources.list.d/rocm.list << EOF
deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ matrix.ROCM_VERSION }} jammy main
EOF
sudo tee /etc/apt/preferences.d/rocm-pin-600 << EOF
Package: *
Pin: release o=repo.radeon.com
Pin-Priority: 600
EOF
sudo apt update
sudo apt-get install -y libssl-dev rocm-hip-sdk
- name: Setup TheRock
if: matrix.ROCM_VERSION != '7.2.1'
id: therock_env
run: |
wget https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1151-${{ matrix.ROCM_VERSION }}.tar.gz
mkdir install
tar -xf *.tar.gz -C install
export ROCM_PATH=$(pwd)/install
echo ROCM_PATH=$ROCM_PATH >> $GITHUB_ENV
echo PATH=$PATH:$ROCM_PATH/bin >> $GITHUB_ENV
echo LD_LIBRARY_PATH=$ROCM_PATH/lib:$ROCM_PATH/llvm/lib:$ROCM_PATH/lib/rocprofiler-systems >> $GITHUB_ENV
- name: Build with native CMake HIP support
id: cmake_build
run: |
cmake -B build -S . \
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_BACKEND_DL=ON \
-DGGML_NATIVE=OFF \
-DCMAKE_INSTALL_RPATH='$ORIGIN' \
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
-DGGML_CPU_ALL_VARIANTS=ON \
-DGPU_TARGETS="${{ matrix.gpu_targets }}" \
-DGGML_HIP=ON \
-DHIP_PLATFORM=amd \
-DGGML_HIP_ROCWMMA_FATTN=ON \
-DGGML_OPENMP=ON \
${{ env.CMAKE_ARGS }}
cmake --build build --config Release -j $(nproc)
- name: Determine tag name
id: tag
uses: lemonade-sdk/llama.cpp/.github/actions/get-tag-name@lemonade
- name: Get ROCm short version
run: echo "ROCM_VERSION_SHORT=$(echo '${{ matrix.ROCM_VERSION }}' | cut -d '.' -f 1,2)" >> $GITHUB_ENV
- name: Pack artifacts
id: pack_artifacts
run: |
cp LICENSE ./build/bin/
tar -czvf llama-bin-ubuntu-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
- name: Upload artifacts
uses: actions/upload-artifact@v6
with:
path: llama-bin-ubuntu-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.tar.gz
name: llama-bin-ubuntu-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.tar.gz
ubuntu-22-cuda:
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
# On PRs only build one representative arch (packaging logic is identical
# across all sm_*); build the full matrix on schedule/dispatch.
sm: ${{ github.event_name == 'pull_request' && fromJSON('["sm_89"]') || fromJSON('["sm_75", "sm_80", "sm_86", "sm_89", "sm_90", "sm_100", "sm_120", "sm_121"]') }}
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
repository: 'ggml-org/llama.cpp'
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: ubuntu-cuda-${{ matrix.sm }}
evict-old-files: 1d
- name: Free disk space
run: |
sudo apt-get remove -y '^aspnetcore-.*' '^dotnet-.*' '^llvm-.*' 'php.*' 'ruby.*' \
google-cloud-cli azure-cli google-chrome-stable firefox powershell 2>/dev/null || true
sudo apt-get autoremove -y
df -h
- name: Install CUDA Toolkit
run: |
wget -q https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get install -y cuda-toolkit-12-9 cmake ninja-build patchelf
- name: Set CUDA environment
run: |
echo "CUDA_PATH=/usr/local/cuda" >> "$GITHUB_ENV"
echo "/usr/local/cuda/bin" >> "$GITHUB_PATH"
echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH:-}" >> "$GITHUB_ENV"
- name: Build
run: |
cmake_arch="${{ matrix.sm }}"
cmake_arch="${cmake_arch#sm_}"
cmake -B build -S . \
-DGGML_CUDA=ON \
-DCMAKE_CUDA_ARCHITECTURES="${cmake_arch}" \
-DBUILD_SHARED_LIBS=ON \
-DGGML_NATIVE=OFF \
-DGGML_BACKEND_DL=ON \
-DGGML_OPENMP=OFF \
-DGGML_STATIC=OFF \
-DLLAMA_BUILD_BORINGSSL=ON \
-DCMAKE_BUILD_TYPE=Release \
${{ env.CMAKE_ARGS }}
cmake --build build --config Release -j $(nproc)
- name: Bundle CUDA runtime libraries
run: |
cuda_lib=/usr/local/cuda/lib64
cp -av ${cuda_lib}/libcudart.so* build/bin/
cp -av ${cuda_lib}/libcublas.so* build/bin/
cp -av ${cuda_lib}/libcublasLt.so* build/bin/
cp -av ${cuda_lib}/libcurand.so* build/bin/
cp -av ${cuda_lib}/libnvJitLink.so* build/bin/
- name: Set RPATH for portable distribution
run: |
for f in build/bin/*; do
[ -f "$f" ] && ! [ -L "$f" ] || continue
if file "$f" | grep -q 'ELF'; then
patchelf --set-rpath '$ORIGIN' "$f"
fi
done
- name: Validate CUDA package contents
run: |
shopt -s nullglob
required_libs=(
libcudart.so
libcublas.so
libcublasLt.so
libcurand.so
libnvJitLink.so
)
for lib in "${required_libs[@]}"; do
matches=(build/bin/${lib}*)
if [ ${#matches[@]} -eq 0 ]; then
echo "::error::Missing required CUDA runtime library matching ${lib}*"
exit 1
fi
done
smoke_bin=""
if [ -x build/bin/llama-cli ]; then
smoke_bin=build/bin/llama-cli
elif [ -x build/bin/llama-server ]; then
smoke_bin=build/bin/llama-server
else
smoke_bin=$(find build/bin -maxdepth 1 -type f -name 'llama-*' -perm -111 | head -n 1)
fi
if [ -z "$smoke_bin" ]; then
echo "::error::No llama executable found for smoke testing"
exit 1
fi
"$smoke_bin" --version >/dev/null
for f in build/bin/*; do
[ -f "$f" ] && ! [ -L "$f" ] || continue
if ! file "$f" | grep -q 'ELF'; then
continue
fi
rpath=$(patchelf --print-rpath "$f")
if [ "$rpath" != '$ORIGIN' ]; then
echo "::error::Unexpected RPATH '$rpath' for $f"
exit 1
fi
missing=$(ldd "$f" | awk '/=> not found/ && $1 != "libcuda.so.1" { print }')
if [ -n "$missing" ]; then
echo "::error::Unresolved runtime dependencies for $f"
echo "$missing"
exit 1
fi
done
- name: Determine tag name
id: tag
uses: lemonade-sdk/llama.cpp/.github/actions/get-tag-name@lemonade
- name: Pack artifacts
id: pack_artifacts
run: |
cp LICENSE ./build/bin/
# Stage into a versioned top-level directory so extraction lands in a
# single llama-<tag>/ folder, matching the ROCm Linux tarball layout.
# Build the directory explicitly rather than via tar --transform, which
# only rewrites paths when tar preserves the leading ./ on members and
# so behaves differently across tar versions/runners.
pkgdir="llama-${{ steps.tag.outputs.name }}"
mkdir -p "$pkgdir"
cp -a build/bin/. "$pkgdir/"
tar -cJf llama-ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz "$pkgdir"
- name: Upload artifacts
uses: actions/upload-artifact@v6
with:
path: llama-ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz
name: llama-ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz
ubuntu-22-cuda-arm64:
runs-on: ubuntu-22.04-arm
strategy:
fail-fast: false
matrix:
# On PRs only build one representative arch (packaging logic is identical
# across all sm_*); build the full matrix on schedule/dispatch.
sm: ${{ github.event_name == 'pull_request' && fromJSON('["sm_89"]') || fromJSON('["sm_75", "sm_80", "sm_86", "sm_89", "sm_90", "sm_100", "sm_120", "sm_121"]') }}
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
repository: 'ggml-org/llama.cpp'
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: ubuntu-cuda-arm64-${{ matrix.sm }}
evict-old-files: 1d
- name: Free disk space
run: |
sudo apt-get remove -y '^aspnetcore-.*' '^dotnet-.*' '^llvm-.*' 'php.*' 'ruby.*' \
google-cloud-cli azure-cli google-chrome-stable firefox powershell 2>/dev/null || true
sudo apt-get autoremove -y
df -h
- name: Install CUDA Toolkit
run: |
wget -q https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/sbsa/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get install -y cuda-toolkit-12-9 cmake ninja-build patchelf
- name: Set CUDA environment
run: |
echo "CUDA_PATH=/usr/local/cuda" >> "$GITHUB_ENV"
echo "/usr/local/cuda/bin" >> "$GITHUB_PATH"
echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH:-}" >> "$GITHUB_ENV"
- name: Build
run: |
cmake_arch="${{ matrix.sm }}"
cmake_arch="${cmake_arch#sm_}"
cmake -B build -S . \
-DGGML_CUDA=ON \
-DCMAKE_CUDA_ARCHITECTURES="${cmake_arch}" \
-DBUILD_SHARED_LIBS=ON \
-DGGML_NATIVE=OFF \
-DGGML_BACKEND_DL=ON \
-DGGML_OPENMP=OFF \
-DGGML_STATIC=OFF \
-DLLAMA_BUILD_BORINGSSL=ON \
-DCMAKE_BUILD_TYPE=Release \
${{ env.CMAKE_ARGS }}
cmake --build build --config Release -j $(nproc)
- name: Bundle CUDA runtime libraries
run: |
cuda_lib=/usr/local/cuda/lib64
cp -av ${cuda_lib}/libcudart.so* build/bin/
cp -av ${cuda_lib}/libcublas.so* build/bin/
cp -av ${cuda_lib}/libcublasLt.so* build/bin/
cp -av ${cuda_lib}/libcurand.so* build/bin/
cp -av ${cuda_lib}/libnvJitLink.so* build/bin/
- name: Set RPATH for portable distribution
run: |
for f in build/bin/*; do
[ -f "$f" ] && ! [ -L "$f" ] || continue
if file "$f" | grep -q 'ELF'; then
patchelf --set-rpath '$ORIGIN' "$f"
fi
done
- name: Validate CUDA package contents
run: |
shopt -s nullglob
required_libs=(
libcudart.so
libcublas.so
libcublasLt.so
libcurand.so
libnvJitLink.so
)
for lib in "${required_libs[@]}"; do
matches=(build/bin/${lib}*)
if [ ${#matches[@]} -eq 0 ]; then
echo "::error::Missing required CUDA runtime library matching ${lib}*"
exit 1
fi
done
smoke_bin=""
if [ -x build/bin/llama-cli ]; then
smoke_bin=build/bin/llama-cli
elif [ -x build/bin/llama-server ]; then
smoke_bin=build/bin/llama-server
else
smoke_bin=$(find build/bin -maxdepth 1 -type f -name 'llama-*' -perm -111 | head -n 1)
fi
if [ -z "$smoke_bin" ]; then
echo "::error::No llama executable found for smoke testing"
exit 1
fi
"$smoke_bin" --version >/dev/null
for f in build/bin/*; do
[ -f "$f" ] && ! [ -L "$f" ] || continue
if ! file "$f" | grep -q 'ELF'; then
continue
fi
rpath=$(patchelf --print-rpath "$f")
if [ "$rpath" != '$ORIGIN' ]; then
echo "::error::Unexpected RPATH '$rpath' for $f"
exit 1
fi
missing=$(ldd "$f" | awk '/=> not found/ && $1 != "libcuda.so.1" { print }')
if [ -n "$missing" ]; then
echo "::error::Unresolved runtime dependencies for $f"
echo "$missing"
exit 1
fi
done
- name: Determine tag name
id: tag
uses: lemonade-sdk/llama.cpp/.github/actions/get-tag-name@lemonade
- name: Pack artifacts
id: pack_artifacts
run: |
cp LICENSE ./build/bin/
# Stage into a versioned top-level directory so extraction lands in a
# single llama-<tag>/ folder, matching the ROCm Linux tarball layout.
# Build the directory explicitly rather than via tar --transform, which
# only rewrites paths when tar preserves the leading ./ on members and
# so behaves differently across tar versions/runners.
pkgdir="llama-${{ steps.tag.outputs.name }}"
mkdir -p "$pkgdir"
cp -a build/bin/. "$pkgdir/"
tar -cJf llama-ubuntu-cuda-${{ matrix.sm }}-arm64.tar.xz "$pkgdir"
- name: Upload artifacts
uses: actions/upload-artifact@v6
with:
path: llama-ubuntu-cuda-${{ matrix.sm }}-arm64.tar.xz
name: llama-ubuntu-cuda-${{ matrix.sm }}-arm64.tar.xz
ubuntu-22-openvino:
runs-on: ubuntu-22.04
env:
OPENVINO_VERSION_MAJOR: "2026.0"
OPENVINO_VERSION_FULL: "2026.0.0.20965.c6d6a13a886"
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
repository: 'ggml-org/llama.cpp'
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: ubuntu-openvino
evict-old-files: 1d
- name: Dependencies
run: |
sudo apt-get install -y build-essential cmake ninja-build patchelf \
python3-pip libtbb12 \
ocl-icd-opencl-dev opencl-headers opencl-clhpp-headers intel-opencl-icd
- name: Setup OpenVINO
uses: lemonade-sdk/llama.cpp/.github/actions/linux-setup-openvino@lemonade
with:
path: ${{ github.workspace }}/openvino
version_major: ${{ env.OPENVINO_VERSION_MAJOR }}
version_full: ${{ env.OPENVINO_VERSION_FULL }}
- name: Install OpenVINO dependencies
run: |
chmod +x ${{ github.workspace }}/openvino/install_dependencies/install_openvino_dependencies.sh
echo "Y" | sudo -E ${{ github.workspace }}/openvino/install_dependencies/install_openvino_dependencies.sh
- name: Build
run: |
source ${{ github.workspace }}/openvino/setupvars.sh
cmake -B build -S . \
-DGGML_OPENVINO=ON \
-DBUILD_SHARED_LIBS=ON \
-DGGML_NATIVE=OFF \
-DGGML_BACKEND_DL=ON \
-DGGML_OPENMP=OFF \
-DGGML_STATIC=OFF \
-DLLAMA_BUILD_BORINGSSL=ON \
-DCMAKE_BUILD_TYPE=Release \
${{ env.CMAKE_ARGS }}
cmake --build build --config Release -j $(nproc)
- name: Bundle OpenVINO runtime libraries
run: |
OV_LIBDIR="${{ github.workspace }}/openvino/runtime/lib/intel64"
# Core runtime and all plugins
cp -av "$OV_LIBDIR"/libopenvino*.so* build/bin/
# TBB threading library bundled with OpenVINO (skip libtbbbind — needs libhwloc)
TBB_DIR="${{ github.workspace }}/openvino/runtime/3rdparty/tbb/lib"
if [ -d "$TBB_DIR" ]; then
cp -av "$TBB_DIR"/libtbb.so* build/bin/
cp -av "$TBB_DIR"/libtbbmalloc.so* build/bin/
fi
- name: Set RPATH for portable distribution
run: |
for f in build/bin/*; do
[ -f "$f" ] && ! [ -L "$f" ] || continue
if file "$f" | grep -q 'ELF'; then
patchelf --set-rpath '$ORIGIN' "$f"
fi
done
- name: Validate OpenVINO package contents
run: |
shopt -s nullglob
required_libs=(libopenvino.so)
for lib in "${required_libs[@]}"; do
matches=(build/bin/${lib}*)
if [ ${#matches[@]} -eq 0 ]; then
echo "::error::Missing required OpenVINO runtime library matching ${lib}*"
exit 1
fi
done
smoke_bin=""
if [ -x build/bin/llama-server ]; then
smoke_bin=build/bin/llama-server
else
smoke_bin=$(find build/bin -maxdepth 1 -type f -name 'llama-*' -perm -111 | head -n 1)
fi
if [ -z "$smoke_bin" ]; then
echo "::error::No llama executable found for smoke testing"
exit 1
fi
"$smoke_bin" --version >/dev/null
for f in build/bin/*; do
[ -f "$f" ] && ! [ -L "$f" ] || continue
if ! file "$f" | grep -q 'ELF'; then
continue
fi
rpath=$(patchelf --print-rpath "$f")
if [ "$rpath" != '$ORIGIN' ]; then
echo "::error::Unexpected RPATH '$rpath' for $f"
exit 1
fi
# libOpenCL is optional (Intel GPU acceleration); skip it
missing=$(ldd "$f" | awk '/=> not found/ && $1 != "libOpenCL.so.1" { print }')
if [ -n "$missing" ]; then
echo "::error::Unresolved runtime dependencies for $f"
echo "$missing"
exit 1
fi
done
- name: Determine tag name
id: tag
uses: lemonade-sdk/llama.cpp/.github/actions/get-tag-name@lemonade
- name: Pack artifacts
run: |
cp LICENSE ./build/bin/
tar -czvf llama-bin-ubuntu-openvino-${{ env.OPENVINO_VERSION_MAJOR }}-x64.tar.gz \
--transform "s,./,llama-${{ steps.tag.outputs.name }}/," \
-C ./build/bin .
- name: Upload artifacts
uses: actions/upload-artifact@v6
with:
path: llama-bin-ubuntu-openvino-${{ env.OPENVINO_VERSION_MAJOR }}-x64.tar.gz
name: llama-bin-ubuntu-openvino-${{ env.OPENVINO_VERSION_MAJOR }}-x64.tar.gz
windows-cpu:
runs-on: windows-2025
strategy:
matrix:
include:
- arch: 'x64'
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
repository: 'ggml-org/llama.cpp'
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: windows-latest-cpu-${{ matrix.arch }}
variant: ccache
evict-old-files: 1d
- name: Install Ninja
run: |
choco install ninja
- name: Build
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'x64' && 'x64' || 'amd64_arm64' }}
cmake -S . -B build -G "Ninja Multi-Config" ^
-D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake ^
-DLLAMA_BUILD_BORINGSSL=ON ^
-DGGML_NATIVE=OFF ^
-DGGML_BACKEND_DL=ON ^
-DGGML_CPU_ALL_VARIANTS=${{ matrix.arch == 'x64' && 'ON' || 'OFF' }} ^
-DGGML_OPENMP=ON ^
${{ env.CMAKE_ARGS }}
cmake --build build --config Release
- name: Pack artifacts
id: pack_artifacts
shell: pwsh
run: |
$archSuffix = if ('${{ matrix.arch }}' -eq 'x64') { 'x86_64' } else { 'aarch64' }
$vswhere = Join-Path ${env:ProgramFiles(x86)} "Microsoft Visual Studio\Installer\vswhere.exe"
$searchRoots = @()
if (Test-Path $vswhere) {
$searchRoots += & $vswhere -all -products * -property installationPath 2>$null |
ForEach-Object { Join-Path $_ "VC\Redist\MSVC" } |
Where-Object { Test-Path $_ }
}
if (-not $searchRoots) {
$searchRoots = @("C:\Program Files\Microsoft Visual Studio", "${env:ProgramFiles(x86)}\Microsoft Visual Studio")
}
$ompDll = $searchRoots |
ForEach-Object { Get-ChildItem $_ -Recurse -File -Filter "libomp140.$archSuffix.dll" -ErrorAction SilentlyContinue } |
Where-Object { $_.FullName -like "*\debug_nonredist\${{ matrix.arch }}\*" } |
Sort-Object FullName -Descending |
Select-Object -First 1
if (-not $ompDll) { throw "Could not locate libomp140.$archSuffix.dll under: $($searchRoots -join ', ')" }
Copy-Item $ompDll.FullName .\build\bin\Release\
7z a -snl llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*
- name: Upload artifacts
uses: actions/upload-artifact@v6
with:
path: llama-bin-win-cpu-${{ matrix.arch }}.zip
name: llama-bin-win-cpu-${{ matrix.arch }}.zip
windows-rocm:
runs-on: windows-2022
strategy:
matrix:
include:
- ROCM_VERSION: "7.13.0"
gpu_targets: "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1010;gfx1011;gfx1012;gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1152;gfx1200;gfx1201"
build: x64
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
repository: 'ggml-org/llama.cpp'
- name: Cache ROCm Installation
id: cache-rocm
uses: actions/cache@v4
with:
path: C:\TheRock\build
key: rocm-${{ matrix.ROCM_VERSION }}-gfx1151-${{ runner.os }}
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
with:
key: windows-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}
evict-old-files: 1d
- name: Install ROCm
if: steps.cache-rocm.outputs.cache-hit != 'true'
run: |
$ErrorActionPreference = "Stop"
write-host "Downloading AMD ROCm ${{ matrix.ROCM_VERSION }} tarball"
Invoke-WebRequest -Uri "https://repo.amd.com/rocm/tarball/therock-dist-windows-gfx1151-${{ matrix.ROCM_VERSION }}.tar.gz" -OutFile "${env:RUNNER_TEMP}\rocm.tar.gz"
write-host "Extracting ROCm tarball"
mkdir C:\TheRock\build -Force
tar -xzf "${env:RUNNER_TEMP}\rocm.tar.gz" -C C:\TheRock\build --strip-components=1
write-host "Completed ROCm extraction"
- name: Setup ROCm Environment
run: |
$rocmPath = "C:\TheRock\build"
echo "HIP_PATH=$rocmPath" >> $env:GITHUB_ENV
echo "HIP_DEVICE_LIB_PATH=$rocmPath\lib\llvm\amdgcn\bitcode" >> $env:GITHUB_ENV
echo "HIP_PLATFORM=amd" >> $env:GITHUB_ENV
echo "LLVM_PATH=$rocmPath\lib\llvm" >> $env:GITHUB_ENV
echo "$rocmPath\bin" >> $env:GITHUB_PATH
echo "$rocmPath\lib\llvm\bin" >> $env:GITHUB_PATH
- name: Build
run: |
mkdir build
cd build
cmake .. `
-G "Unix Makefiles" `
-DCMAKE_PREFIX_PATH="${env:HIP_PATH}" `
-DCMAKE_BUILD_TYPE=Release `
-DGGML_BACKEND_DL=ON `
-DGGML_NATIVE=OFF `
-DGGML_CPU=ON `
-DGGML_CPU_ALL_VARIANTS=ON `
-DGGML_HIP=ON `
-DCMAKE_C_COMPILER="${env:HIP_PATH}\lib\llvm\bin\clang.exe" `
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\lib\llvm\bin\clang++.exe" `
-DCMAKE_C_FLAGS="-Wno-error=incompatible-pointer-types" `
-DCMAKE_HIP_COMPILER="${env:HIP_PATH}\lib\llvm\bin\clang.exe" `
-DHIP_PATH="${env:HIP_PATH}" `
-DGGML_HIP_ROCWMMA_FATTN=ON `
-DAMDGPU_TARGETS="${{ matrix.gpu_targets }}"
cmake --build . --config Release --parallel ${env:NUMBER_OF_PROCESSORS}
- name: Verify HIP backend was built
run: |
$hipDll = Get-ChildItem -Path build\bin -Filter "ggml-hip*.dll" -ErrorAction SilentlyContinue
if (-not $hipDll) {
Write-Host "##[error]ggml-hip*.dll was NOT produced. The HIP backend silently failed to build."
Write-Host "Contents of build\bin:"
Get-ChildItem build\bin | Format-Table -AutoSize
exit 1
}
Write-Host "HIP backend artifact found:"
$hipDll | Format-Table FullName, Length -AutoSize
- name: Determine tag name
id: tag
uses: lemonade-sdk/llama.cpp/.github/actions/get-tag-name@lemonade
- name: Get ROCm short version
run: |
$rocmVersionShort = ('${{ matrix.ROCM_VERSION }}'.Split('.')[0..1] -join '.')
echo "ROCM_VERSION_SHORT=$rocmVersionShort" >> $env:GITHUB_ENV
- name: Pack artifacts
run: |
cp "LICENSE" "build\bin\"
7z a -snl llama-bin-win-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.zip .\build\bin\*
- name: Upload artifacts
uses: actions/upload-artifact@v6
with:
path: llama-bin-win-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.zip
name: llama-bin-win-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.zip
windows-cuda:
runs-on: windows-2022
needs:
- windows-cpu
strategy:
fail-fast: false
matrix:
# On PRs only build one representative arch (packaging logic is identical
# across all sm_*); build the full matrix on schedule/dispatch.
sm: ${{ github.event_name == 'pull_request' && fromJSON('["sm_89"]') || fromJSON('["sm_75", "sm_80", "sm_86", "sm_89", "sm_90", "sm_100", "sm_120", "sm_121"]') }}
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
repository: 'ggml-org/llama.cpp'
- name: Install CUDA Toolkit
uses: Jimver/cuda-toolkit@v0.2.35
with:
cuda: '12.9.0'
method: 'network'
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "curand", "nvjitlink", "thrust", "visual_studio_integration"]'
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: windows-cuda-${{ matrix.sm }}
variant: ccache
evict-old-files: 1d
- name: Install Ninja
run: choco install ninja
- name: Build
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
set sm=${{ matrix.sm }}
set cmake_arch=%sm:sm_=%
cmake -S . -B build -G "Ninja Multi-Config" ^
-DGGML_CUDA=ON ^
-DCMAKE_CUDA_ARCHITECTURES=%cmake_arch% ^
-DGGML_NATIVE=OFF ^
-DGGML_BACKEND_DL=ON ^
-DLLAMA_BUILD_BORINGSSL=ON ^
${{ env.CMAKE_ARGS }}
cmake --build build --config Release
- name: Download CPU backend artifact
uses: actions/download-artifact@v7
with:
name: llama-bin-win-cpu-x64.zip
path: .\cpu-artifact
- name: Pack artifacts
run: |
$releaseDir = '.\build\bin\Release'
$cpuArchive = '.\cpu-artifact\llama-bin-win-cpu-x64.zip'
$cudaBin = Join-Path $env:CUDA_PATH 'bin'
$cpuExtractDir = Join-Path $env:RUNNER_TEMP "cpu-backend-${{ matrix.sm }}"
if (-not (Test-Path $cpuArchive)) {
throw "Missing required CPU backend artifact: $cpuArchive"
}
if (Test-Path $cpuExtractDir) {
Remove-Item $cpuExtractDir -Recurse -Force
}
New-Item -ItemType Directory -Path $cpuExtractDir | Out-Null
Expand-Archive -Path $cpuArchive -DestinationPath $cpuExtractDir -Force
Copy-Item (Join-Path $cpuExtractDir '*') $releaseDir -Recurse -Force
$runtimeDllPatterns = @(
'cudart64_*.dll',
'cublas64_*.dll',
'cublasLt64_*.dll',
'curand64_*.dll',
'nvJitLink_*.dll'
)
foreach ($pattern in $runtimeDllPatterns) {
$dll = Get-ChildItem -Path $cudaBin -Filter $pattern | Sort-Object Name -Descending | Select-Object -First 1
if (-not $dll) {
throw "Missing CUDA runtime DLL matching $pattern in $cudaBin"
}
Copy-Item $dll.FullName $releaseDir
}
Copy-Item LICENSE $releaseDir
$cudaDll = Get-ChildItem -Path $releaseDir -Filter "ggml-cuda*.dll" -ErrorAction SilentlyContinue
if (-not $cudaDll) {
Write-Host "##[error]ggml-cuda*.dll was NOT produced in the final package layout."
Write-Host "Contents of ${releaseDir}:"
Get-ChildItem $releaseDir | Format-Table -AutoSize
exit 1
}
foreach ($pattern in $runtimeDllPatterns) {
$dll = Get-ChildItem -Path $releaseDir -Filter $pattern | Select-Object -First 1
if (-not $dll) {
throw "Missing staged CUDA runtime DLL matching $pattern in $releaseDir"
}
}
$smokeExe = $null
foreach ($name in @('llama-cli.exe', 'llama-server.exe')) {
$candidate = Get-ChildItem -Path $releaseDir -Filter $name -ErrorAction SilentlyContinue | Select-Object -First 1
if ($candidate) {
$smokeExe = $candidate
break
}
}
if (-not $smokeExe) {
$smokeExe = Get-ChildItem -Path $releaseDir -Filter 'llama-*.exe' -ErrorAction SilentlyContinue | Sort-Object Name | Select-Object -First 1
}
if (-not $smokeExe) {
throw "No llama executable found for staged Windows CUDA smoke test"
}
Push-Location $releaseDir
try {
& ".\$($smokeExe.Name)" --version | Out-Null
if ($LASTEXITCODE -ne 0) {
throw "Smoke test failed for $($smokeExe.Name) with exit code $LASTEXITCODE"
}
}
finally {
Pop-Location
}
7z a -snl llama-windows-cuda-${{ matrix.sm }}-x64.7z "$releaseDir\\*"
- name: Upload artifacts
uses: actions/upload-artifact@v6
with:
path: llama-windows-cuda-${{ matrix.sm }}-x64.7z
name: llama-windows-cuda-${{ matrix.sm }}-x64.7z
release:
if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }}
# Fine-grant permission
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
permissions:
contents: write # for creating release
runs-on: ubuntu-slim
needs:
- windows-cpu
- windows-rocm
- windows-cuda
- ubuntu-22-rocm
- ubuntu-22-cuda
- ubuntu-22-cuda-arm64
- ubuntu-22-openvino
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
repository: 'ggml-org/llama.cpp'
- name: Determine tag name
id: tag
uses: lemonade-sdk/llama.cpp/.github/actions/get-tag-name@lemonade
- name: Download artifacts
id: download-artifact
uses: actions/download-artifact@v7
with:
path: ./artifact
merge-multiple: true
- name: Move artifacts
id: move_artifacts
run: |
mkdir -p release
echo "Adding CPU backend files to Windows ZIP archives..."
for arch in x64; do
cpu_zip="artifact/llama-bin-win-cpu-${arch}.zip"
if [ ! -f "$cpu_zip" ]; then
echo "::error::Missing required CPU artifact $cpu_zip"
exit 1
fi
temp_dir=$(mktemp -d)
echo "Extracting CPU backend for $arch..."
unzip "$cpu_zip" -d "$temp_dir"
echo "Adding CPU files to $arch zips..."
for target_zip in artifact/llama-bin-win-*-${arch}.zip; do
if [[ "$target_zip" == "$cpu_zip" ]]; then
continue
fi
echo "Adding CPU backend to $(basename "$target_zip")"
realpath_target_zip=$(realpath "$target_zip")
(cd "$temp_dir" && zip -r "$realpath_target_zip" .)
done
rm -rf "$temp_dir"
done
echo "Renaming and moving zips to release..."
for zip_file in artifact/llama-bin-win-*.zip; do
base_name=$(basename "$zip_file" .zip)
zip_name="llama-${{ steps.tag.outputs.name }}-${base_name#llama-}.zip"
echo "Moving $zip_file to release/$zip_name"
mv "$zip_file" "release/$zip_name"
done
echo "Renaming and moving tar.gz files to release..."
for tar_file in artifact/*.tar.gz; do
base_name=$(basename "$tar_file" .tar.gz)
tar_name="llama-${{ steps.tag.outputs.name }}-${base_name#llama-}.tar.gz"
echo "Moving $tar_file to release/$tar_name"
mv "$tar_file" "release/$tar_name"
done
echo "Renaming and moving CUDA tar.xz artifacts to release..."
for tar_file in artifact/llama-ubuntu-cuda-*.tar.xz; do
[ -f "$tar_file" ] || continue
base_name=$(basename "$tar_file" .tar.xz)
tar_name="llama-${{ steps.tag.outputs.name }}-${base_name#llama-}.tar.xz"
echo "Moving $tar_file to release/$tar_name"
mv "$tar_file" "release/$tar_name"
done
echo "Renaming and moving CUDA .7z artifacts to release..."
for z_file in artifact/llama-windows-cuda-*.7z; do
[ -f "$z_file" ] || continue
base_name=$(basename "$z_file" .7z)
z_name="llama-${{ steps.tag.outputs.name }}-${base_name#llama-}.7z"
echo "Moving $z_file to release/$z_name"
mv "$z_file" "release/$z_name"
done
- name: Determine release summary
id: release_summary
run: |
if [[ "${{ github.event_name }}" == "schedule" ]]; then
echo "value=Nightly release for ${{ github.sha }}" >> "$GITHUB_OUTPUT"
else
echo "value=$(git log -1 --pretty=%s)" >> "$GITHUB_OUTPUT"
fi
- name: Check release artifacts
run: |
files=$(find ./release -maxdepth 1 \( -name '*.zip' -o -name '*.tar.gz' -o -name '*.tar.xz' -o -name '*.7z' \) 2>/dev/null | wc -l)
if [ "$files" -eq 0 ]; then
echo "No release artifacts found in ./release — aborting before creating a release."
exit 1
fi
echo "Found $files artifact(s) ready to upload."
# Get the release for this tag, creating it if it does not exist yet.
# This is idempotent: if a previous run created the release but failed
# partway through uploading (e.g. a transient GitHub outage), re-running
# reuses the existing release so the upload step below can fill the gaps.
- name: Create or get release
id: release
uses: actions/github-script@v8
env:
TAG_NAME: ${{ steps.tag.outputs.name }}
RELEASE_SUMMARY: ${{ steps.release_summary.outputs.value }}
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const tag = process.env.TAG_NAME;
const { owner, repo } = context.repo;
let release;
try {
release = (await github.rest.repos.getReleaseByTag({ owner, repo, tag })).data;
core.info(`Reusing existing release ${tag} (id ${release.id})`);
} catch (e) {
if (e.status !== 404) throw e;
const body = [
'<details open>',
'',
process.env.RELEASE_SUMMARY,
'',
'</details>',
'',
'**Linux:**',
`- [Ubuntu x64 (ROCm 7.13)](https://github.com/${owner}/${repo}/releases/download/${tag}/llama-${tag}-bin-ubuntu-rocm-7.13-x64.tar.gz)`,
'- Ubuntu x64 (CUDA): `llama-' + tag + '-ubuntu-cuda-sm_XX-x64.tar.xz` (replace XX with your GPU compute capability)',
'- Ubuntu arm64 (CUDA): `llama-' + tag + '-ubuntu-cuda-sm_XX-arm64.tar.xz` (replace XX with your GPU compute capability)',
`- [Ubuntu x64 (OpenVINO 2026.0)](https://github.com/${owner}/${repo}/releases/download/${tag}/llama-${tag}-bin-ubuntu-openvino-2026.0-x64.tar.gz)`,
'',
'**Windows:**',
`- [Windows x64 (ROCm 7.13)](https://github.com/${owner}/${repo}/releases/download/${tag}/llama-${tag}-bin-win-rocm-7.13-x64.zip)`,
'- Windows x64 (CUDA): `llama-' + tag + '-windows-cuda-sm_XX-x64.7z` (replace XX with your GPU compute capability)',
].join('\n');
try {
release = (await github.rest.repos.createRelease({ owner, repo, tag_name: tag, body })).data;
core.info(`Created release ${tag} (id ${release.id})`);
} catch (createError) {
if (createError.status !== 422) throw createError;
release = (await github.rest.repos.getReleaseByTag({ owner, repo, tag })).data;
core.info(`Reusing concurrently created release ${tag} (id ${release.id})`);
}
}
core.setOutput('id', release.id);
- name: Upload release
id: upload_release
uses: actions/github-script@v8
env:
RELEASE_ID: ${{ steps.release.outputs.id }}
with:
github-token: ${{secrets.GITHUB_TOKEN}}
script: |
const fs = require('fs');
const { owner, repo } = context.repo;
const release_id = Number(process.env.RELEASE_ID);
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
// Retry transient failures (network errors, 5xx, rate limits) with
// exponential backoff so a momentary GitHub blip does not abort the
// whole upload and leave the release partially populated.
async function withRetry(label, fn, attempts = 5) {
for (let i = 1; ; i++) {
try {
return await fn();
} catch (e) {
const transient = !e.status || e.status >= 500 || e.status === 429;
if (i >= attempts || !transient) throw e;
const delay = Math.min(30000, 1000 * 2 ** (i - 1));
core.warning(`${label} failed (attempt ${i}/${attempts}): ${e.message}. Retrying in ${delay}ms`);
await sleep(delay);
}
}
}
// Assets already attached (from an earlier partial run). GitHub only
// creates an asset once its upload completes, so anything listed here
// is intact and can be skipped — re-runs only fill the gaps.
const existing = new Set();
for (const a of await github.paginate(github.rest.repos.listReleaseAssets, { owner, repo, release_id })) {
if (a.state === 'uploaded') {
existing.add(a.name);
} else {
core.warning(`deleting incomplete asset ${a.name} (${a.state})`);
await github.rest.repos.deleteReleaseAsset({ owner, repo, asset_id: a.id });
}
}
const files = fs.readdirSync('./release').filter((f) =>
f.endsWith('.zip') || f.endsWith('.tar.gz') || f.endsWith('.tar.xz') || f.endsWith('.7z'));
let uploaded = 0;
let skipped = 0;
for (const file of files) {
if (existing.has(file)) {
core.info(`skipping ${file} (already uploaded)`);
skipped++;
continue;
}
const data = fs.readFileSync(`./release/${file}`);
await withRetry(`upload ${file}`, () =>
github.rest.repos.uploadReleaseAsset({ owner, repo, release_id, name: file, data })
.catch(async (e) => {
if (e.status !== 422) throw e;
const assets = await github.paginate(github.rest.repos.listReleaseAssets, { owner, repo, release_id });
const asset = assets.find((a) => a.name === file);
if (asset?.state === 'uploaded') {
core.info(`accepting ${file} (already uploaded)`);
return;
}
if (asset) {
core.warning(`deleting conflicting asset ${file} (${asset.state})`);
await github.rest.repos.deleteReleaseAsset({ owner, repo, asset_id: asset.id });
throw new Error(`retrying upload for ${file} after deleting conflicting asset`);
}
throw e;
}));
core.info(`uploaded ${file}`);
uploaded++;
}
core.info(`Done: ${uploaded} uploaded, ${skipped} already present, ${files.length} total.`);