Release #240

Workflow file for this run

.github/workflows/release.yml at ea05f50

	name: Release

	on:
	schedule:
	- cron: '0 /6 * *' # every 6 hours (4 times daily)
	workflow_dispatch: # allows manual triggering
	inputs:
	create_release:
	description: 'Create new release'
	required: true
	type: boolean
	pull_request: # validate the release build on PRs; the release job is skipped so nothing is published
	paths:
	# The build jobs clone llama.cpp source from upstream and the get-tag-name
	# action is consumed via a pinned @lemonade ref, so only changes to this
	# workflow itself can affect the outcome of a PR run. Scope PR runs to it.
	- '.github/workflows/release.yml'

	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref && github.ref \|\| github.run_id }}
	cancel-in-progress: true

	env:
	BRANCH_NAME: ${{ github.head_ref \|\| github.ref_name }}
	CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON"

	jobs:
	ubuntu-22-rocm:
	runs-on: ubuntu-22.04

	strategy:
	matrix:
	include:
	- ROCM_VERSION: "7.13.0"
	gpu_targets: "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1010;gfx1011;gfx1012;gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1152;gfx1200;gfx1201"
	build: x64

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v6
	with:
	fetch-depth: 0
	repository: 'ggml-org/llama.cpp'

	- name: ccache
	uses: ggml-org/ccache-action@v1.2.21
	with:
	key: ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}
	evict-old-files: 1d

	- name: Dependencies
	id: depends
	run: \|
	sudo apt install -y build-essential git cmake wget

	- name: Setup Legacy ROCm
	if: matrix.ROCM_VERSION == '7.2.1'
	id: legacy_env
	run: \|
	sudo mkdir --parents --mode=0755 /etc/apt/keyrings
	wget https://repo.radeon.com/rocm/rocm.gpg.key -O - \| \
	gpg --dearmor \| sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null

	sudo tee /etc/apt/sources.list.d/rocm.list << EOF
	deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ matrix.ROCM_VERSION }} jammy main
	EOF

	sudo tee /etc/apt/preferences.d/rocm-pin-600 << EOF
	Package: *
	Pin: release o=repo.radeon.com
	Pin-Priority: 600
	EOF

	sudo apt update
	sudo apt-get install -y libssl-dev rocm-hip-sdk

	- name: Setup TheRock
	if: matrix.ROCM_VERSION != '7.2.1'
	id: therock_env
	run: \|
	wget https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1151-${{ matrix.ROCM_VERSION }}.tar.gz
	mkdir install
	tar -xf *.tar.gz -C install
	export ROCM_PATH=$(pwd)/install
	echo ROCM_PATH=$ROCM_PATH >> $GITHUB_ENV
	echo PATH=$PATH:$ROCM_PATH/bin >> $GITHUB_ENV
	echo LD_LIBRARY_PATH=$ROCM_PATH/lib:$ROCM_PATH/llvm/lib:$ROCM_PATH/lib/rocprofiler-systems >> $GITHUB_ENV

	- name: Build with native CMake HIP support
	id: cmake_build
	run: \|
	cmake -B build -S . \
	-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
	-DCMAKE_BUILD_TYPE=Release \
	-DGGML_BACKEND_DL=ON \
	-DGGML_NATIVE=OFF \
	-DCMAKE_INSTALL_RPATH='$ORIGIN' \
	-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
	-DGGML_CPU_ALL_VARIANTS=ON \
	-DGPU_TARGETS="${{ matrix.gpu_targets }}" \
	-DGGML_HIP=ON \
	-DHIP_PLATFORM=amd \
	-DGGML_HIP_ROCWMMA_FATTN=ON \
	-DGGML_OPENMP=ON \
	${{ env.CMAKE_ARGS }}
	cmake --build build --config Release -j $(nproc)

	- name: Determine tag name
	id: tag
	uses: lemonade-sdk/llama.cpp/.github/actions/get-tag-name@lemonade

	- name: Get ROCm short version
	run: echo "ROCM_VERSION_SHORT=$(echo '${{ matrix.ROCM_VERSION }}' \| cut -d '.' -f 1,2)" >> $GITHUB_ENV

	- name: Pack artifacts
	id: pack_artifacts
	run: \|
	cp LICENSE ./build/bin/
	tar -czvf llama-bin-ubuntu-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .

	- name: Upload artifacts
	uses: actions/upload-artifact@v6
	with:
	path: llama-bin-ubuntu-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.tar.gz
	name: llama-bin-ubuntu-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.tar.gz

	ubuntu-22-cuda:
	runs-on: ubuntu-22.04

	strategy:
	fail-fast: false
	matrix:
	# On PRs only build one representative arch (packaging logic is identical
	# across all sm_*); build the full matrix on schedule/dispatch.
	sm: ${{ github.event_name == 'pull_request' && fromJSON('["sm_89"]') \|\| fromJSON('["sm_75", "sm_80", "sm_86", "sm_89", "sm_90", "sm_100", "sm_120", "sm_121"]') }}

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v6
	with:
	fetch-depth: 0
	repository: 'ggml-org/llama.cpp'

	- name: ccache
	uses: ggml-org/ccache-action@v1.2.21
	with:
	key: ubuntu-cuda-${{ matrix.sm }}
	evict-old-files: 1d

	- name: Free disk space
	run: \|
	sudo apt-get remove -y '^aspnetcore-.' '^dotnet-.' '^llvm-.' 'php.' 'ruby.*' \
	google-cloud-cli azure-cli google-chrome-stable firefox powershell 2>/dev/null \|\| true
	sudo apt-get autoremove -y
	df -h

	- name: Install CUDA Toolkit
	run: \|
	wget -q https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
	sudo dpkg -i cuda-keyring_1.1-1_all.deb
	sudo apt-get update
	sudo apt-get install -y cuda-toolkit-12-9 cmake ninja-build patchelf

	- name: Set CUDA environment
	run: \|
	echo "CUDA_PATH=/usr/local/cuda" >> "$GITHUB_ENV"
	echo "/usr/local/cuda/bin" >> "$GITHUB_PATH"
	echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH:-}" >> "$GITHUB_ENV"

	- name: Build
	run: \|
	cmake_arch="${{ matrix.sm }}"
	cmake_arch="${cmake_arch#sm_}"
	cmake -B build -S . \
	-DGGML_CUDA=ON \
	-DCMAKE_CUDA_ARCHITECTURES="${cmake_arch}" \
	-DBUILD_SHARED_LIBS=ON \
	-DGGML_NATIVE=OFF \
	-DGGML_BACKEND_DL=ON \
	-DGGML_OPENMP=OFF \
	-DGGML_STATIC=OFF \
	-DLLAMA_BUILD_BORINGSSL=ON \
	-DCMAKE_BUILD_TYPE=Release \
	${{ env.CMAKE_ARGS }}
	cmake --build build --config Release -j $(nproc)

	- name: Bundle CUDA runtime libraries
	run: \|
	cuda_lib=/usr/local/cuda/lib64
	cp -av ${cuda_lib}/libcudart.so* build/bin/
	cp -av ${cuda_lib}/libcublas.so* build/bin/
	cp -av ${cuda_lib}/libcublasLt.so* build/bin/
	cp -av ${cuda_lib}/libcurand.so* build/bin/
	cp -av ${cuda_lib}/libnvJitLink.so* build/bin/

	- name: Set RPATH for portable distribution
	run: \|
	for f in build/bin/*; do
	[ -f "$f" ] && ! [ -L "$f" ] \|\| continue
	if file "$f" \| grep -q 'ELF'; then
	patchelf --set-rpath '$ORIGIN' "$f"
	fi
	done

	- name: Validate CUDA package contents
	run: \|
	shopt -s nullglob

	required_libs=(
	libcudart.so
	libcublas.so
	libcublasLt.so
	libcurand.so
	libnvJitLink.so
	)
	for lib in "${required_libs[@]}"; do
	matches=(build/bin/${lib}*)
	if [ ${#matches[@]} -eq 0 ]; then
	echo "::error::Missing required CUDA runtime library matching ${lib}*"
	exit 1
	fi
	done

	smoke_bin=""
	if [ -x build/bin/llama-cli ]; then
	smoke_bin=build/bin/llama-cli
	elif [ -x build/bin/llama-server ]; then
	smoke_bin=build/bin/llama-server
	else
	smoke_bin=$(find build/bin -maxdepth 1 -type f -name 'llama-*' -perm -111 \| head -n 1)
	fi

	if [ -z "$smoke_bin" ]; then
	echo "::error::No llama executable found for smoke testing"
	exit 1
	fi

	"$smoke_bin" --version >/dev/null

	for f in build/bin/*; do
	[ -f "$f" ] && ! [ -L "$f" ] \|\| continue
	if ! file "$f" \| grep -q 'ELF'; then
	continue
	fi

	rpath=$(patchelf --print-rpath "$f")
	if [ "$rpath" != '$ORIGIN' ]; then
	echo "::error::Unexpected RPATH '$rpath' for $f"
	exit 1
	fi

	missing=$(ldd "$f" \| awk '/=> not found/ && $1 != "libcuda.so.1" { print }')
	if [ -n "$missing" ]; then
	echo "::error::Unresolved runtime dependencies for $f"
	echo "$missing"
	exit 1
	fi
	done

	- name: Determine tag name
	id: tag
	uses: lemonade-sdk/llama.cpp/.github/actions/get-tag-name@lemonade

	- name: Pack artifacts
	id: pack_artifacts
	run: \|
	cp LICENSE ./build/bin/
	# Stage into a versioned top-level directory so extraction lands in a
	# single llama-<tag>/ folder, matching the ROCm Linux tarball layout.
	# Build the directory explicitly rather than via tar --transform, which
	# only rewrites paths when tar preserves the leading ./ on members and
	# so behaves differently across tar versions/runners.
	pkgdir="llama-${{ steps.tag.outputs.name }}"
	mkdir -p "$pkgdir"
	cp -a build/bin/. "$pkgdir/"
	tar -cJf llama-ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz "$pkgdir"

	- name: Upload artifacts
	uses: actions/upload-artifact@v6
	with:
	path: llama-ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz
	name: llama-ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz

	ubuntu-22-cuda-arm64:
	runs-on: ubuntu-22.04-arm

	strategy:
	fail-fast: false
	matrix:
	# On PRs only build one representative arch (packaging logic is identical
	# across all sm_*); build the full matrix on schedule/dispatch.
	sm: ${{ github.event_name == 'pull_request' && fromJSON('["sm_89"]') \|\| fromJSON('["sm_75", "sm_80", "sm_86", "sm_89", "sm_90", "sm_100", "sm_120", "sm_121"]') }}

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v6
	with:
	fetch-depth: 0
	repository: 'ggml-org/llama.cpp'

	- name: ccache
	uses: ggml-org/ccache-action@v1.2.21
	with:
	key: ubuntu-cuda-arm64-${{ matrix.sm }}
	evict-old-files: 1d

	- name: Free disk space
	run: \|
	sudo apt-get remove -y '^aspnetcore-.' '^dotnet-.' '^llvm-.' 'php.' 'ruby.*' \
	google-cloud-cli azure-cli google-chrome-stable firefox powershell 2>/dev/null \|\| true
	sudo apt-get autoremove -y
	df -h

	- name: Install CUDA Toolkit
	run: \|
	wget -q https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/sbsa/cuda-keyring_1.1-1_all.deb
	sudo dpkg -i cuda-keyring_1.1-1_all.deb
	sudo apt-get update
	sudo apt-get install -y cuda-toolkit-12-9 cmake ninja-build patchelf

	- name: Set CUDA environment
	run: \|
	echo "CUDA_PATH=/usr/local/cuda" >> "$GITHUB_ENV"
	echo "/usr/local/cuda/bin" >> "$GITHUB_PATH"
	echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH:-}" >> "$GITHUB_ENV"

	- name: Build
	run: \|
	cmake_arch="${{ matrix.sm }}"
	cmake_arch="${cmake_arch#sm_}"
	cmake -B build -S . \
	-DGGML_CUDA=ON \
	-DCMAKE_CUDA_ARCHITECTURES="${cmake_arch}" \
	-DBUILD_SHARED_LIBS=ON \
	-DGGML_NATIVE=OFF \
	-DGGML_BACKEND_DL=ON \
	-DGGML_OPENMP=OFF \
	-DGGML_STATIC=OFF \
	-DLLAMA_BUILD_BORINGSSL=ON \
	-DCMAKE_BUILD_TYPE=Release \
	${{ env.CMAKE_ARGS }}
	cmake --build build --config Release -j $(nproc)

	- name: Bundle CUDA runtime libraries
	run: \|
	cuda_lib=/usr/local/cuda/lib64
	cp -av ${cuda_lib}/libcudart.so* build/bin/
	cp -av ${cuda_lib}/libcublas.so* build/bin/
	cp -av ${cuda_lib}/libcublasLt.so* build/bin/
	cp -av ${cuda_lib}/libcurand.so* build/bin/
	cp -av ${cuda_lib}/libnvJitLink.so* build/bin/

	- name: Set RPATH for portable distribution
	run: \|
	for f in build/bin/*; do
	[ -f "$f" ] && ! [ -L "$f" ] \|\| continue
	if file "$f" \| grep -q 'ELF'; then
	patchelf --set-rpath '$ORIGIN' "$f"
	fi
	done

	- name: Validate CUDA package contents
	run: \|
	shopt -s nullglob

	required_libs=(
	libcudart.so
	libcublas.so
	libcublasLt.so
	libcurand.so
	libnvJitLink.so
	)
	for lib in "${required_libs[@]}"; do
	matches=(build/bin/${lib}*)
	if [ ${#matches[@]} -eq 0 ]; then
	echo "::error::Missing required CUDA runtime library matching ${lib}*"
	exit 1
	fi
	done

	smoke_bin=""
	if [ -x build/bin/llama-cli ]; then
	smoke_bin=build/bin/llama-cli
	elif [ -x build/bin/llama-server ]; then
	smoke_bin=build/bin/llama-server
	else
	smoke_bin=$(find build/bin -maxdepth 1 -type f -name 'llama-*' -perm -111 \| head -n 1)
	fi

	if [ -z "$smoke_bin" ]; then
	echo "::error::No llama executable found for smoke testing"
	exit 1
	fi

	"$smoke_bin" --version >/dev/null

	for f in build/bin/*; do
	[ -f "$f" ] && ! [ -L "$f" ] \|\| continue
	if ! file "$f" \| grep -q 'ELF'; then
	continue
	fi

	rpath=$(patchelf --print-rpath "$f")
	if [ "$rpath" != '$ORIGIN' ]; then
	echo "::error::Unexpected RPATH '$rpath' for $f"
	exit 1
	fi

	missing=$(ldd "$f" \| awk '/=> not found/ && $1 != "libcuda.so.1" { print }')
	if [ -n "$missing" ]; then
	echo "::error::Unresolved runtime dependencies for $f"
	echo "$missing"
	exit 1
	fi
	done

	- name: Determine tag name
	id: tag
	uses: lemonade-sdk/llama.cpp/.github/actions/get-tag-name@lemonade

	- name: Pack artifacts
	id: pack_artifacts
	run: \|
	cp LICENSE ./build/bin/
	# Stage into a versioned top-level directory so extraction lands in a
	# single llama-<tag>/ folder, matching the ROCm Linux tarball layout.
	# Build the directory explicitly rather than via tar --transform, which
	# only rewrites paths when tar preserves the leading ./ on members and
	# so behaves differently across tar versions/runners.
	pkgdir="llama-${{ steps.tag.outputs.name }}"
	mkdir -p "$pkgdir"
	cp -a build/bin/. "$pkgdir/"
	tar -cJf llama-ubuntu-cuda-${{ matrix.sm }}-arm64.tar.xz "$pkgdir"

	- name: Upload artifacts
	uses: actions/upload-artifact@v6
	with:
	path: llama-ubuntu-cuda-${{ matrix.sm }}-arm64.tar.xz
	name: llama-ubuntu-cuda-${{ matrix.sm }}-arm64.tar.xz

	ubuntu-22-openvino:
	runs-on: ubuntu-22.04

	env:
	OPENVINO_VERSION_MAJOR: "2026.0"
	OPENVINO_VERSION_FULL: "2026.0.0.20965.c6d6a13a886"

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v6
	with:
	fetch-depth: 0
	repository: 'ggml-org/llama.cpp'

	- name: ccache
	uses: ggml-org/ccache-action@v1.2.21
	with:
	key: ubuntu-openvino
	evict-old-files: 1d

	- name: Dependencies
	run: \|
	sudo apt-get install -y build-essential cmake ninja-build patchelf \
	python3-pip libtbb12 \
	ocl-icd-opencl-dev opencl-headers opencl-clhpp-headers intel-opencl-icd

	- name: Setup OpenVINO
	uses: lemonade-sdk/llama.cpp/.github/actions/linux-setup-openvino@lemonade
	with:
	path: ${{ github.workspace }}/openvino
	version_major: ${{ env.OPENVINO_VERSION_MAJOR }}
	version_full: ${{ env.OPENVINO_VERSION_FULL }}

	- name: Install OpenVINO dependencies
	run: \|
	chmod +x ${{ github.workspace }}/openvino/install_dependencies/install_openvino_dependencies.sh
	echo "Y" \| sudo -E ${{ github.workspace }}/openvino/install_dependencies/install_openvino_dependencies.sh

	- name: Build
	run: \|
	source ${{ github.workspace }}/openvino/setupvars.sh
	cmake -B build -S . \
	-DGGML_OPENVINO=ON \
	-DBUILD_SHARED_LIBS=ON \
	-DGGML_NATIVE=OFF \
	-DGGML_BACKEND_DL=ON \
	-DGGML_OPENMP=OFF \
	-DGGML_STATIC=OFF \
	-DLLAMA_BUILD_BORINGSSL=ON \
	-DCMAKE_BUILD_TYPE=Release \
	${{ env.CMAKE_ARGS }}
	cmake --build build --config Release -j $(nproc)

	- name: Bundle OpenVINO runtime libraries
	run: \|
	OV_LIBDIR="${{ github.workspace }}/openvino/runtime/lib/intel64"

	# Core runtime and all plugins
	cp -av "$OV_LIBDIR"/libopenvino.so build/bin/

	# TBB threading library bundled with OpenVINO (skip libtbbbind — needs libhwloc)
	TBB_DIR="${{ github.workspace }}/openvino/runtime/3rdparty/tbb/lib"
	if [ -d "$TBB_DIR" ]; then
	cp -av "$TBB_DIR"/libtbb.so* build/bin/
	cp -av "$TBB_DIR"/libtbbmalloc.so* build/bin/
	fi

	- name: Set RPATH for portable distribution
	run: \|
	for f in build/bin/*; do
	[ -f "$f" ] && ! [ -L "$f" ] \|\| continue
	if file "$f" \| grep -q 'ELF'; then
	patchelf --set-rpath '$ORIGIN' "$f"
	fi
	done

	- name: Validate OpenVINO package contents
	run: \|
	shopt -s nullglob

	required_libs=(libopenvino.so)
	for lib in "${required_libs[@]}"; do
	matches=(build/bin/${lib}*)
	if [ ${#matches[@]} -eq 0 ]; then
	echo "::error::Missing required OpenVINO runtime library matching ${lib}*"
	exit 1
	fi
	done

	smoke_bin=""
	if [ -x build/bin/llama-server ]; then
	smoke_bin=build/bin/llama-server
	else
	smoke_bin=$(find build/bin -maxdepth 1 -type f -name 'llama-*' -perm -111 \| head -n 1)
	fi
	if [ -z "$smoke_bin" ]; then
	echo "::error::No llama executable found for smoke testing"
	exit 1
	fi
	"$smoke_bin" --version >/dev/null

	for f in build/bin/*; do
	[ -f "$f" ] && ! [ -L "$f" ] \|\| continue
	if ! file "$f" \| grep -q 'ELF'; then
	continue
	fi

	rpath=$(patchelf --print-rpath "$f")
	if [ "$rpath" != '$ORIGIN' ]; then
	echo "::error::Unexpected RPATH '$rpath' for $f"
	exit 1
	fi

	# libOpenCL is optional (Intel GPU acceleration); skip it
	missing=$(ldd "$f" \| awk '/=> not found/ && $1 != "libOpenCL.so.1" { print }')
	if [ -n "$missing" ]; then
	echo "::error::Unresolved runtime dependencies for $f"
	echo "$missing"
	exit 1
	fi
	done

	- name: Determine tag name
	id: tag
	uses: lemonade-sdk/llama.cpp/.github/actions/get-tag-name@lemonade

	- name: Pack artifacts
	run: \|
	cp LICENSE ./build/bin/
	tar -czvf llama-bin-ubuntu-openvino-${{ env.OPENVINO_VERSION_MAJOR }}-x64.tar.gz \
	--transform "s,./,llama-${{ steps.tag.outputs.name }}/," \
	-C ./build/bin .

	- name: Upload artifacts
	uses: actions/upload-artifact@v6
	with:
	path: llama-bin-ubuntu-openvino-${{ env.OPENVINO_VERSION_MAJOR }}-x64.tar.gz
	name: llama-bin-ubuntu-openvino-${{ env.OPENVINO_VERSION_MAJOR }}-x64.tar.gz

	windows-cpu:
	runs-on: windows-2025

	strategy:
	matrix:
	include:
	- arch: 'x64'

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v6
	with:
	fetch-depth: 0
	repository: 'ggml-org/llama.cpp'

	- name: ccache
	uses: ggml-org/ccache-action@v1.2.21
	with:
	key: windows-latest-cpu-${{ matrix.arch }}
	variant: ccache
	evict-old-files: 1d

	- name: Install Ninja
	run: \|
	choco install ninja

	- name: Build
	shell: cmd
	run: \|
	call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'x64' && 'x64' \|\| 'amd64_arm64' }}
	cmake -S . -B build -G "Ninja Multi-Config" ^
	-D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake ^
	-DLLAMA_BUILD_BORINGSSL=ON ^
	-DGGML_NATIVE=OFF ^
	-DGGML_BACKEND_DL=ON ^
	-DGGML_CPU_ALL_VARIANTS=${{ matrix.arch == 'x64' && 'ON' \|\| 'OFF' }} ^
	-DGGML_OPENMP=ON ^
	${{ env.CMAKE_ARGS }}
	cmake --build build --config Release

	- name: Pack artifacts
	id: pack_artifacts
	shell: pwsh
	run: \|
	$archSuffix = if ('${{ matrix.arch }}' -eq 'x64') { 'x86_64' } else { 'aarch64' }
	$vswhere = Join-Path ${env:ProgramFiles(x86)} "Microsoft Visual Studio\Installer\vswhere.exe"
	$searchRoots = @()
	if (Test-Path $vswhere) {
	$searchRoots += & $vswhere -all -products * -property installationPath 2>$null \|
	ForEach-Object { Join-Path $_ "VC\Redist\MSVC" } \|
	Where-Object { Test-Path $_ }
	}
	if (-not $searchRoots) {
	$searchRoots = @("C:\Program Files\Microsoft Visual Studio", "${env:ProgramFiles(x86)}\Microsoft Visual Studio")
	}
	$ompDll = $searchRoots \|
	ForEach-Object { Get-ChildItem $_ -Recurse -File -Filter "libomp140.$archSuffix.dll" -ErrorAction SilentlyContinue } \|
	Where-Object { $_.FullName -like "\debug_nonredist\${{ matrix.arch }}\" } \|
	Sort-Object FullName -Descending \|
	Select-Object -First 1
	if (-not $ompDll) { throw "Could not locate libomp140.$archSuffix.dll under: $($searchRoots -join ', ')" }
	Copy-Item $ompDll.FullName .\build\bin\Release\
	7z a -snl llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*

	- name: Upload artifacts
	uses: actions/upload-artifact@v6
	with:
	path: llama-bin-win-cpu-${{ matrix.arch }}.zip
	name: llama-bin-win-cpu-${{ matrix.arch }}.zip

	windows-rocm:
	runs-on: windows-2022

	strategy:
	matrix:
	include:
	- ROCM_VERSION: "7.13.0"
	gpu_targets: "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1010;gfx1011;gfx1012;gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1152;gfx1200;gfx1201"
	build: x64

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v6
	with:
	fetch-depth: 0
	repository: 'ggml-org/llama.cpp'

	- name: Cache ROCm Installation
	id: cache-rocm
	uses: actions/cache@v4
	with:
	path: C:\TheRock\build
	key: rocm-${{ matrix.ROCM_VERSION }}-gfx1151-${{ runner.os }}

	- name: ccache
	uses: ggml-org/ccache-action@v1.2.16
	with:
	key: windows-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}
	evict-old-files: 1d

	- name: Install ROCm
	if: steps.cache-rocm.outputs.cache-hit != 'true'
	run: \|
	$ErrorActionPreference = "Stop"
	write-host "Downloading AMD ROCm ${{ matrix.ROCM_VERSION }} tarball"
	Invoke-WebRequest -Uri "https://repo.amd.com/rocm/tarball/therock-dist-windows-gfx1151-${{ matrix.ROCM_VERSION }}.tar.gz" -OutFile "${env:RUNNER_TEMP}\rocm.tar.gz"
	write-host "Extracting ROCm tarball"
	mkdir C:\TheRock\build -Force
	tar -xzf "${env:RUNNER_TEMP}\rocm.tar.gz" -C C:\TheRock\build --strip-components=1
	write-host "Completed ROCm extraction"

	- name: Setup ROCm Environment
	run: \|
	$rocmPath = "C:\TheRock\build"
	echo "HIP_PATH=$rocmPath" >> $env:GITHUB_ENV
	echo "HIP_DEVICE_LIB_PATH=$rocmPath\lib\llvm\amdgcn\bitcode" >> $env:GITHUB_ENV
	echo "HIP_PLATFORM=amd" >> $env:GITHUB_ENV
	echo "LLVM_PATH=$rocmPath\lib\llvm" >> $env:GITHUB_ENV
	echo "$rocmPath\bin" >> $env:GITHUB_PATH
	echo "$rocmPath\lib\llvm\bin" >> $env:GITHUB_PATH

	- name: Build
	run: \|
	mkdir build
	cd build
	cmake .. `
	-G "Unix Makefiles" `
	-DCMAKE_PREFIX_PATH="${env:HIP_PATH}" `
	-DCMAKE_BUILD_TYPE=Release `
	-DGGML_BACKEND_DL=ON `
	-DGGML_NATIVE=OFF `
	-DGGML_CPU=ON `
	-DGGML_CPU_ALL_VARIANTS=ON `
	-DGGML_HIP=ON `
	-DCMAKE_C_COMPILER="${env:HIP_PATH}\lib\llvm\bin\clang.exe" `
	-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\lib\llvm\bin\clang++.exe" `
	-DCMAKE_C_FLAGS="-Wno-error=incompatible-pointer-types" `
	-DCMAKE_HIP_COMPILER="${env:HIP_PATH}\lib\llvm\bin\clang.exe" `
	-DHIP_PATH="${env:HIP_PATH}" `
	-DGGML_HIP_ROCWMMA_FATTN=ON `
	-DAMDGPU_TARGETS="${{ matrix.gpu_targets }}"
	cmake --build . --config Release --parallel ${env:NUMBER_OF_PROCESSORS}

	- name: Verify HIP backend was built
	run: \|
	$hipDll = Get-ChildItem -Path build\bin -Filter "ggml-hip*.dll" -ErrorAction SilentlyContinue
	if (-not $hipDll) {
	Write-Host "##[error]ggml-hip*.dll was NOT produced. The HIP backend silently failed to build."
	Write-Host "Contents of build\bin:"
	Get-ChildItem build\bin \| Format-Table -AutoSize
	exit 1
	}
	Write-Host "HIP backend artifact found:"
	$hipDll \| Format-Table FullName, Length -AutoSize

	- name: Determine tag name
	id: tag
	uses: lemonade-sdk/llama.cpp/.github/actions/get-tag-name@lemonade

	- name: Get ROCm short version
	run: \|
	$rocmVersionShort = ('${{ matrix.ROCM_VERSION }}'.Split('.')[0..1] -join '.')
	echo "ROCM_VERSION_SHORT=$rocmVersionShort" >> $env:GITHUB_ENV

	- name: Pack artifacts
	run: \|
	cp "LICENSE" "build\bin\"
	7z a -snl llama-bin-win-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.zip .\build\bin\*

	- name: Upload artifacts
	uses: actions/upload-artifact@v6
	with:
	path: llama-bin-win-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.zip
	name: llama-bin-win-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.zip

	windows-cuda:
	runs-on: windows-2022
	needs:
	- windows-cpu

	strategy:
	fail-fast: false
	matrix:
	# On PRs only build one representative arch (packaging logic is identical
	# across all sm_*); build the full matrix on schedule/dispatch.
	sm: ${{ github.event_name == 'pull_request' && fromJSON('["sm_89"]') \|\| fromJSON('["sm_75", "sm_80", "sm_86", "sm_89", "sm_90", "sm_100", "sm_120", "sm_121"]') }}

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v6
	with:
	fetch-depth: 0
	repository: 'ggml-org/llama.cpp'

	- name: Install CUDA Toolkit
	uses: Jimver/cuda-toolkit@v0.2.35
	with:
	cuda: '12.9.0'
	method: 'network'
	sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "curand", "nvjitlink", "thrust", "visual_studio_integration"]'

	- name: ccache
	uses: ggml-org/ccache-action@v1.2.21
	with:
	key: windows-cuda-${{ matrix.sm }}
	variant: ccache
	evict-old-files: 1d

	- name: Install Ninja
	run: choco install ninja

	- name: Build
	shell: cmd
	run: \|
	call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
	set sm=${{ matrix.sm }}
	set cmake_arch=%sm:sm_=%
	cmake -S . -B build -G "Ninja Multi-Config" ^
	-DGGML_CUDA=ON ^
	-DCMAKE_CUDA_ARCHITECTURES=%cmake_arch% ^
	-DGGML_NATIVE=OFF ^
	-DGGML_BACKEND_DL=ON ^
	-DLLAMA_BUILD_BORINGSSL=ON ^
	${{ env.CMAKE_ARGS }}
	cmake --build build --config Release

	- name: Download CPU backend artifact
	uses: actions/download-artifact@v7
	with:
	name: llama-bin-win-cpu-x64.zip
	path: .\cpu-artifact

	- name: Pack artifacts
	run: \|
	$releaseDir = '.\build\bin\Release'
	$cpuArchive = '.\cpu-artifact\llama-bin-win-cpu-x64.zip'
	$cudaBin = Join-Path $env:CUDA_PATH 'bin'
	$cpuExtractDir = Join-Path $env:RUNNER_TEMP "cpu-backend-${{ matrix.sm }}"

	if (-not (Test-Path $cpuArchive)) {
	throw "Missing required CPU backend artifact: $cpuArchive"
	}
	if (Test-Path $cpuExtractDir) {
	Remove-Item $cpuExtractDir -Recurse -Force
	}
	New-Item -ItemType Directory -Path $cpuExtractDir \| Out-Null
	Expand-Archive -Path $cpuArchive -DestinationPath $cpuExtractDir -Force
	Copy-Item (Join-Path $cpuExtractDir '*') $releaseDir -Recurse -Force

	$runtimeDllPatterns = @(
	'cudart64_*.dll',
	'cublas64_*.dll',
	'cublasLt64_*.dll',
	'curand64_*.dll',
	'nvJitLink_*.dll'
	)
	foreach ($pattern in $runtimeDllPatterns) {
	$dll = Get-ChildItem -Path $cudaBin -Filter $pattern \| Sort-Object Name -Descending \| Select-Object -First 1
	if (-not $dll) {
	throw "Missing CUDA runtime DLL matching $pattern in $cudaBin"
	}
	Copy-Item $dll.FullName $releaseDir
	}
	Copy-Item LICENSE $releaseDir

	$cudaDll = Get-ChildItem -Path $releaseDir -Filter "ggml-cuda*.dll" -ErrorAction SilentlyContinue
	if (-not $cudaDll) {
	Write-Host "##[error]ggml-cuda*.dll was NOT produced in the final package layout."
	Write-Host "Contents of ${releaseDir}:"
	Get-ChildItem $releaseDir \| Format-Table -AutoSize
	exit 1
	}

	foreach ($pattern in $runtimeDllPatterns) {
	$dll = Get-ChildItem -Path $releaseDir -Filter $pattern \| Select-Object -First 1
	if (-not $dll) {
	throw "Missing staged CUDA runtime DLL matching $pattern in $releaseDir"
	}
	}

	$smokeExe = $null
	foreach ($name in @('llama-cli.exe', 'llama-server.exe')) {
	$candidate = Get-ChildItem -Path $releaseDir -Filter $name -ErrorAction SilentlyContinue \| Select-Object -First 1
	if ($candidate) {
	$smokeExe = $candidate
	break
	}
	}
	if (-not $smokeExe) {
	$smokeExe = Get-ChildItem -Path $releaseDir -Filter 'llama-*.exe' -ErrorAction SilentlyContinue \| Sort-Object Name \| Select-Object -First 1
	}
	if (-not $smokeExe) {
	throw "No llama executable found for staged Windows CUDA smoke test"
	}

	Push-Location $releaseDir
	try {
	& ".\$($smokeExe.Name)" --version \| Out-Null
	if ($LASTEXITCODE -ne 0) {
	throw "Smoke test failed for $($smokeExe.Name) with exit code $LASTEXITCODE"
	}
	}
	finally {
	Pop-Location
	}

	7z a -snl llama-windows-cuda-${{ matrix.sm }}-x64.7z "$releaseDir\\*"

	- name: Upload artifacts
	uses: actions/upload-artifact@v6
	with:
	path: llama-windows-cuda-${{ matrix.sm }}-x64.7z
	name: llama-windows-cuda-${{ matrix.sm }}-x64.7z

	release:
	if: ${{ github.event_name == 'schedule' \|\| github.event.inputs.create_release == 'true' }}

	# Fine-grant permission
	# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
	permissions:
	contents: write # for creating release

	runs-on: ubuntu-slim

	needs:
	- windows-cpu
	- windows-rocm
	- windows-cuda
	- ubuntu-22-rocm
	- ubuntu-22-cuda
	- ubuntu-22-cuda-arm64
	- ubuntu-22-openvino

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v6
	with:
	fetch-depth: 0
	repository: 'ggml-org/llama.cpp'

	- name: Determine tag name
	id: tag
	uses: lemonade-sdk/llama.cpp/.github/actions/get-tag-name@lemonade

	- name: Download artifacts
	id: download-artifact
	uses: actions/download-artifact@v7
	with:
	path: ./artifact
	merge-multiple: true

	- name: Move artifacts
	id: move_artifacts
	run: \|
	mkdir -p release

	echo "Adding CPU backend files to Windows ZIP archives..."
	for arch in x64; do
	cpu_zip="artifact/llama-bin-win-cpu-${arch}.zip"
	if [ ! -f "$cpu_zip" ]; then
	echo "::error::Missing required CPU artifact $cpu_zip"
	exit 1
	fi
	temp_dir=$(mktemp -d)
	echo "Extracting CPU backend for $arch..."
	unzip "$cpu_zip" -d "$temp_dir"

	echo "Adding CPU files to $arch zips..."
	for target_zip in artifact/llama-bin-win-*-${arch}.zip; do
	if [[ "$target_zip" == "$cpu_zip" ]]; then
	continue
	fi
	echo "Adding CPU backend to $(basename "$target_zip")"
	realpath_target_zip=$(realpath "$target_zip")
	(cd "$temp_dir" && zip -r "$realpath_target_zip" .)
	done

	rm -rf "$temp_dir"
	done

	echo "Renaming and moving zips to release..."
	for zip_file in artifact/llama-bin-win-*.zip; do
	base_name=$(basename "$zip_file" .zip)
	zip_name="llama-${{ steps.tag.outputs.name }}-${base_name#llama-}.zip"
	echo "Moving $zip_file to release/$zip_name"
	mv "$zip_file" "release/$zip_name"
	done

	echo "Renaming and moving tar.gz files to release..."
	for tar_file in artifact/*.tar.gz; do
	base_name=$(basename "$tar_file" .tar.gz)
	tar_name="llama-${{ steps.tag.outputs.name }}-${base_name#llama-}.tar.gz"
	echo "Moving $tar_file to release/$tar_name"
	mv "$tar_file" "release/$tar_name"
	done

	echo "Renaming and moving CUDA tar.xz artifacts to release..."
	for tar_file in artifact/llama-ubuntu-cuda-*.tar.xz; do
	[ -f "$tar_file" ] \|\| continue
	base_name=$(basename "$tar_file" .tar.xz)
	tar_name="llama-${{ steps.tag.outputs.name }}-${base_name#llama-}.tar.xz"
	echo "Moving $tar_file to release/$tar_name"
	mv "$tar_file" "release/$tar_name"
	done

	echo "Renaming and moving CUDA .7z artifacts to release..."
	for z_file in artifact/llama-windows-cuda-*.7z; do
	[ -f "$z_file" ] \|\| continue
	base_name=$(basename "$z_file" .7z)
	z_name="llama-${{ steps.tag.outputs.name }}-${base_name#llama-}.7z"
	echo "Moving $z_file to release/$z_name"
	mv "$z_file" "release/$z_name"
	done

	- name: Determine release summary
	id: release_summary
	run: \|
	if [[ "${{ github.event_name }}" == "schedule" ]]; then
	echo "value=Nightly release for ${{ github.sha }}" >> "$GITHUB_OUTPUT"
	else
	echo "value=$(git log -1 --pretty=%s)" >> "$GITHUB_OUTPUT"
	fi

	- name: Check release artifacts
	run: \|
	files=$(find ./release -maxdepth 1 $ -name '.zip' -o -name '.tar.gz' -o -name '.tar.xz' -o -name '.7z' $ 2>/dev/null \| wc -l)
	if [ "$files" -eq 0 ]; then
	echo "No release artifacts found in ./release — aborting before creating a release."
	exit 1
	fi
	echo "Found $files artifact(s) ready to upload."

	# Get the release for this tag, creating it if it does not exist yet.
	# This is idempotent: if a previous run created the release but failed
	# partway through uploading (e.g. a transient GitHub outage), re-running
	# reuses the existing release so the upload step below can fill the gaps.
	- name: Create or get release
	id: release
	uses: actions/github-script@v8
	env:
	TAG_NAME: ${{ steps.tag.outputs.name }}
	RELEASE_SUMMARY: ${{ steps.release_summary.outputs.value }}
	with:
	github-token: ${{ secrets.GITHUB_TOKEN }}
	script: \|
	const tag = process.env.TAG_NAME;
	const { owner, repo } = context.repo;
	let release;
	try {
	release = (await github.rest.repos.getReleaseByTag({ owner, repo, tag })).data;
	core.info(`Reusing existing release ${tag} (id ${release.id})`);
	} catch (e) {
	if (e.status !== 404) throw e;
	const body = [
	'<details open>',
	'',
	process.env.RELEASE_SUMMARY,
	'',
	'</details>',
	'',
	'Linux:',
	`- [Ubuntu x64 (ROCm 7.13)](https://github.com/${owner}/${repo}/releases/download/${tag}/llama-${tag}-bin-ubuntu-rocm-7.13-x64.tar.gz)`,
	'- Ubuntu x64 (CUDA): `llama-' + tag + '-ubuntu-cuda-sm_XX-x64.tar.xz` (replace XX with your GPU compute capability)',
	'- Ubuntu arm64 (CUDA): `llama-' + tag + '-ubuntu-cuda-sm_XX-arm64.tar.xz` (replace XX with your GPU compute capability)',
	`- [Ubuntu x64 (OpenVINO 2026.0)](https://github.com/${owner}/${repo}/releases/download/${tag}/llama-${tag}-bin-ubuntu-openvino-2026.0-x64.tar.gz)`,
	'',
	'Windows:',
	`- [Windows x64 (ROCm 7.13)](https://github.com/${owner}/${repo}/releases/download/${tag}/llama-${tag}-bin-win-rocm-7.13-x64.zip)`,
	'- Windows x64 (CUDA): `llama-' + tag + '-windows-cuda-sm_XX-x64.7z` (replace XX with your GPU compute capability)',
	].join('\n');
	try {
	release = (await github.rest.repos.createRelease({ owner, repo, tag_name: tag, body })).data;
	core.info(`Created release ${tag} (id ${release.id})`);
	} catch (createError) {
	if (createError.status !== 422) throw createError;
	release = (await github.rest.repos.getReleaseByTag({ owner, repo, tag })).data;
	core.info(`Reusing concurrently created release ${tag} (id ${release.id})`);
	}
	}
	core.setOutput('id', release.id);

	- name: Upload release
	id: upload_release
	uses: actions/github-script@v8
	env:
	RELEASE_ID: ${{ steps.release.outputs.id }}
	with:
	github-token: ${{secrets.GITHUB_TOKEN}}
	script: \|
	const fs = require('fs');
	const { owner, repo } = context.repo;
	const release_id = Number(process.env.RELEASE_ID);

	const sleep = (ms) => new Promise((r) => setTimeout(r, ms));

	// Retry transient failures (network errors, 5xx, rate limits) with
	// exponential backoff so a momentary GitHub blip does not abort the
	// whole upload and leave the release partially populated.
	async function withRetry(label, fn, attempts = 5) {
	for (let i = 1; ; i++) {
	try {
	return await fn();
	} catch (e) {
	const transient = !e.status \|\| e.status >= 500 \|\| e.status === 429;
	if (i >= attempts \|\| !transient) throw e;
	const delay = Math.min(30000, 1000 * 2 ** (i - 1));
	core.warning(`${label} failed (attempt ${i}/${attempts}): ${e.message}. Retrying in ${delay}ms`);
	await sleep(delay);
	}
	}
	}

	// Assets already attached (from an earlier partial run). GitHub only
	// creates an asset once its upload completes, so anything listed here
	// is intact and can be skipped — re-runs only fill the gaps.
	const existing = new Set();
	for (const a of await github.paginate(github.rest.repos.listReleaseAssets, { owner, repo, release_id })) {
	if (a.state === 'uploaded') {
	existing.add(a.name);
	} else {
	core.warning(`deleting incomplete asset ${a.name} (${a.state})`);
	await github.rest.repos.deleteReleaseAsset({ owner, repo, asset_id: a.id });
	}
	}

	const files = fs.readdirSync('./release').filter((f) =>
	f.endsWith('.zip') \|\| f.endsWith('.tar.gz') \|\| f.endsWith('.tar.xz') \|\| f.endsWith('.7z'));

	let uploaded = 0;
	let skipped = 0;
	for (const file of files) {
	if (existing.has(file)) {
	core.info(`skipping ${file} (already uploaded)`);
	skipped++;
	continue;
	}
	const data = fs.readFileSync(`./release/${file}`);
	await withRetry(`upload ${file}`, () =>
	github.rest.repos.uploadReleaseAsset({ owner, repo, release_id, name: file, data })
	.catch(async (e) => {
	if (e.status !== 422) throw e;
	const assets = await github.paginate(github.rest.repos.listReleaseAssets, { owner, repo, release_id });
	const asset = assets.find((a) => a.name === file);
	if (asset?.state === 'uploaded') {
	core.info(`accepting ${file} (already uploaded)`);
	return;
	}
	if (asset) {
	core.warning(`deleting conflicting asset ${file} (${asset.state})`);
	await github.rest.repos.deleteReleaseAsset({ owner, repo, asset_id: asset.id });
	throw new Error(`retrying upload for ${file} after deleting conflicting asset`);
	}
	throw e;
	}));
	core.info(`uploaded ${file}`);
	uploaded++;
	}

	core.info(`Done: ${uploaded} uploaded, ${skipped} already present, ${files.length} total.`);

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Release #240

Workflow file

Release #240

Uh oh!

Workflow file for this run