Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
141 commits
Select commit Hold shift + click to select a range
cbd41af
HSA backend initialization (#3)
ypapadop-amd Dec 4, 2024
e39d0d2
Adding agent information to internal datastructures (#4)
ypapadop-amd Dec 4, 2024
d6e92d7
Memory pool information and buffer allocation support (#5)
ypapadop-amd Dec 5, 2024
d3fb964
HSA tensor data functions (#6)
ypapadop-amd Dec 5, 2024
f988b22
Support in HSA backend for small examples support (#7)
ypapadop-amd Dec 6, 2024
0020fa1
Fix for correctly choosing memories and reporting size (#8)
ypapadop-amd Dec 9, 2024
78ed6e9
HSA backend function implementations (#9)
ypapadop-amd Dec 12, 2024
388f71f
More HSA backend function implementations (#10)
ypapadop-amd Dec 16, 2024
69afc42
HSA operations fallback to CPU (#11)
ypapadop-amd Dec 18, 2024
7fb5f13
HSA queue support (#12)
ypapadop-amd Jan 2, 2025
9eec22f
Use GGML_ABORT instead of abort() (#13)
ypapadop-amd Jan 3, 2025
660a832
Support for CPY kernel (#14)
ypapadop-amd Jan 21, 2025
7045076
CPU backend fallback (#15)
ypapadop-amd Jan 24, 2025
a17ed35
Replacing mutex and bool with call_once (#17)
ypapadop-amd Feb 21, 2025
091ed13
Support for loading pre-compiled NPU kernels (#18)
ypapadop-amd Mar 4, 2025
d6759cf
Exception and error handling (#25)
ypapadop-amd Mar 4, 2025
e7c41d0
Clang format support (#26)
ypapadop-amd Mar 4, 2025
0e3ba5b
Free packet memory after synchronization (#28)
ypapadop-amd Mar 5, 2025
54fcc2d
ggml_backend_buffer_init_tensor support (#29)
ypapadop-amd Mar 5, 2025
4137c2e
Using alias for std::filesystem (#30)
ypapadop-amd Mar 6, 2025
776a787
Matrix multiplication kernel (#31)
ypapadop-amd Mar 18, 2025
5103ca8
Binary instructions format loading (#32)
ypapadop-amd Mar 24, 2025
d1dbf00
Release memory allocated for packets (#33)
ypapadop-amd Mar 24, 2025
80261cf
Directly create PDI instead through xclbin (#34)
ypapadop-amd Mar 26, 2025
5d44e05
HSA tensor extra metadata (#37)
ypapadop-amd Apr 1, 2025
6be1343
Various fixes (#38)
ypapadop-amd Apr 3, 2025
0ec6070
CMake Refactor (#39)
ypapadop-amd Apr 7, 2025
fd4670b
Stand-alone kernel discovery header (#40)
ypapadop-amd Apr 8, 2025
e7c9269
Renaming kernel to the IRON equivalent (#41)
ypapadop-amd Apr 9, 2025
5cd2849
Fix CPU emulation ops (#42)
ypapadop-amd Apr 10, 2025
3fe7192
Miscelaneous cleanups (#43)
ypapadop-amd Apr 17, 2025
69f6b62
Support user kernel directory (#44)
ypapadop-amd Apr 18, 2025
025f550
Using GGML_STATUS_ALLOC_FAILED when allocation has failed (#45)
ypapadop-amd Apr 21, 2025
7aad576
Using correct size for inputs and outputs (#46)
ypapadop-amd Apr 23, 2025
9f1449e
Support non uniform sizes (#47)
ypapadop-amd Apr 23, 2025
2f177e9
JIT compilation (alpha) (#49)
ypapadop-amd Apr 25, 2025
1b96699
Invoking python script via pybind11 (#50)
ypapadop-amd Apr 28, 2025
11256ab
Adding copyright (#51)
ypapadop-amd Apr 28, 2025
546115d
Fixing MLIR-AIE discovery (#52)
ypapadop-amd Apr 29, 2025
a9a42c9
Using find_package(AIE) (#53)
ypapadop-amd Apr 29, 2025
6dcfe59
Cleanups (#54)
ypapadop-amd May 1, 2025
68a0dd9
Using existing ggml_n_dims (#55)
ypapadop-amd May 2, 2025
abb1ddb
JIT for core functions via Peano (#56)
ypapadop-amd May 8, 2025
2bd91ce
JIT cleanups (#57)
ypapadop-amd May 8, 2025
1ddc43f
Fix for CMake arg (#58)
ypapadop-amd May 8, 2025
d34ad2d
Use compiler wrapper in CMake (#59)
ypapadop-amd May 9, 2025
b6ae52c
Flatten IRON kernel support directories (#60)
ypapadop-amd May 9, 2025
98854a7
Vector-vector ops (#61)
ypapadop-amd May 13, 2025
102ca60
Rename files to follow naming scheme (#62)
ypapadop-amd May 13, 2025
6e2386a
Support for clearing kernel cache (#63)
ypapadop-amd May 13, 2025
f82c0e6
Unary op support (#64)
ypapadop-amd May 14, 2025
97f8465
Jit cleanup (#65)
ypapadop-amd May 15, 2025
458127e
Associating core functions with kernels (#66)
ypapadop-amd May 16, 2025
84c9864
Remove redundant tensor (#67)
ypapadop-amd May 20, 2025
dad62f3
Core function refactor (#68)
ypapadop-amd May 22, 2025
ef7a078
Unary ops implementations (#69)
ypapadop-amd May 22, 2025
0d45c60
Flatten tensors (#71)
ypapadop-amd May 28, 2025
ebc07ce
JIT verbose output (#72)
ypapadop-amd May 30, 2025
47f2350
Mulmat fixes (#73)
ypapadop-amd May 30, 2025
00a01a4
Host implementations for CPY, DUP, CONT (#74)
ypapadop-amd Jun 6, 2025
6bd298b
Remove prebuilt kernels (#75)
ypapadop-amd Jun 6, 2025
15fd0ee
Use correct environment variable name (#77)
ypapadop-amd Jun 6, 2025
6e5b71a
Fixes for MUL_MAT (#78)
ypapadop-amd Jun 13, 2025
a2f756d
Moving kernels to per-architecture directory (#79)
ypapadop-amd Jun 13, 2025
14c63e9
JIT refactor (#80)
ypapadop-amd Jun 18, 2025
79ea13b
Various cleanups (#81)
ypapadop-amd Jun 24, 2025
af36b32
Replacing NPU1Col4 with NPU1 (#82)
ypapadop-amd Jun 25, 2025
83b9e67
Remove AIEDevice.npu1_4col (#83)
ypapadop-amd Jun 26, 2025
af39db2
Remove NPU1Col4 (#84)
ypapadop-amd Jun 26, 2025
6f1cd8c
Flatten view support (#85)
ypapadop-amd Jul 1, 2025
1fee6cd
Using HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_REC_GRANULE to detect AI…
ypapadop-amd Jul 1, 2025
9f7e9ac
update iron mm.cc file and includes, modify tile sizes for core funct…
efurst Jul 17, 2025
f7b03da
Mul mat fixes (#89)
ypapadop-amd Jul 24, 2025
c1605a9
Per-device alignment requirements (#90)
ypapadop-amd Jul 24, 2025
a23634a
Fix check if a tensor can be flattened (#91)
ypapadop-amd Jul 24, 2025
3a00562
Tensor view copy (#92)
ypapadop-amd Jul 25, 2025
2b13513
Better errors (#93)
ypapadop-amd Jul 25, 2025
a04fbdb
Updating for latest IRON (#94)
ypapadop-amd Aug 6, 2025
acb4235
IRON environment (#95)
ypapadop-amd Aug 7, 2025
79a8841
Fixing import to IRON dtype_to_str
ypapadop-amd Aug 8, 2025
4633964
Replace CoreFunction with ExternalFunction (#97)
ypapadop-amd Aug 14, 2025
99d281b
Kernel compilation at tensor initialization (#98)
ypapadop-amd Aug 14, 2025
2b8b654
Update mul_mat from IRON (#99)
ypapadop-amd Aug 26, 2025
f438de3
Abstract kernels (#100)
ypapadop-amd Aug 26, 2025
2e43742
Using matmul kernel column major support to avoid temporary tensors (…
ypapadop-amd Aug 27, 2025
738feba
Kernel refactor (#102)
ypapadop-amd Aug 28, 2025
dbfefdc
Relocate core files (#103)
ypapadop-amd Aug 28, 2025
ad653f6
Use datatype and number of elements in tile calculation (#104)
ypapadop-amd Aug 28, 2025
7175613
Remove emulated tensors (#105)
ypapadop-amd Sep 2, 2025
eb6dc29
Updated environment instructions (#106)
ypapadop-amd Sep 3, 2025
3197e47
Convert FP16 to BF16 (#107)
ypapadop-amd Sep 5, 2025
086fcac
Updating IRON environment set-up
ypapadop-amd Sep 6, 2025
cf653c0
Update IRON environment set-up (#108)
ypapadop-amd Sep 6, 2025
03b042a
Adding pytest to Python requirements (#109)
ypapadop-amd Sep 8, 2025
f786ce0
Operations refactor (#110)
ypapadop-amd Sep 17, 2025
1ad2ebd
Aligning tensor sizes for bf16 / int8 / int16 (#111)
ypapadop-amd Sep 18, 2025
6984dee
Return false is_host for HSA memory and refactor error messages (#112)
ypapadop-amd Sep 19, 2025
ae4d230
Adding new ggml_backend_i member (#115)
ypapadop-amd Sep 26, 2025
80f08d5
Run time logging (#116)
ypapadop-amd Sep 29, 2025
6b8761c
Move tests to test/ggml-hsa (#122)
ypapadop-amd Oct 6, 2025
08034a2
Generic iron kernels (#123)
ypapadop-amd Oct 7, 2025
bff14d8
Add GitHub Copilot instructions for ggml-hsa (#133)
Copilot Oct 8, 2025
f7b13d7
Update latest iron (#136)
ypapadop-amd Oct 9, 2025
5ee2e11
Remove core function (#137)
ypapadop-amd Oct 10, 2025
e5fcdd8
Doc and python check updates (#139)
ypapadop-amd Oct 14, 2025
aa87fa6
Adding new unary ops (#141)
ypapadop-amd Oct 23, 2025
61aba4d
Removing define
ypapadop-amd Oct 31, 2025
5234713
Update README on supported NPUs and prerequisites (#144)
ypapadop-amd Oct 31, 2025
9fa9f69
Remove python file registration from C++ (#145)
ypapadop-amd Nov 3, 2025
55c765e
Generic backend kernel support (#146)
ypapadop-amd Nov 5, 2025
0f0e61c
TensorDesc as dataclass (#147)
ypapadop-amd Nov 5, 2025
e41cdd7
Update README on how to compile (#148)
ypapadop-amd Nov 5, 2025
875696a
Pass op_params argument to kernels in compilation
artulab Nov 7, 2025
fc63188
aie2p MAT_MUL (#150)
ypapadop-amd Nov 17, 2025
f9a9e79
Implement SCALE kernel
artulab Nov 17, 2025
785b9d7
Formatting
ypapadop-amd Dec 15, 2025
2faddbd
Bump MLIR-AIE and ROCm versions (#156)
ypapadop-amd Jan 7, 2026
98bb217
Bump mlir-aie to 1.2.0 (#158)
ypapadop-amd Jan 28, 2026
4d9b464
Use correct import for NPU1 / NPU2 (#159)
ypapadop-amd Jan 29, 2026
4f664a4
Implement softmax kernel
artulab Jan 2, 2026
1289236
Vector ops refactoring (#160)
ypapadop-amd Feb 10, 2026
0430723
feat(ggml-hsa): bump mlir-aie dependency from v1.2.0 to v1.2.1 (#163)
ypapadop-amd Feb 12, 2026
f8e2ade
GGML_OP_CLAMP support (#164)
ypapadop-amd Feb 12, 2026
e0a4fa0
Simplify core function compilation (#161)
ypapadop-amd Feb 16, 2026
68e0094
Support kernel names without ggml_op entry (#165)
ypapadop-amd Feb 24, 2026
81d5e73
Fix circular buffer overrun (#166)
ypapadop-amd Feb 27, 2026
823c8fc
Deferred compilation (#167)
ypapadop-amd Mar 6, 2026
8805b6c
ci: add auto-format workflow for ggml-hsa directory (#168)
ypapadop-amd Mar 6, 2026
97678cc
Fallback to data memory if kernarg is not available (#169)
ypapadop-amd Mar 7, 2026
10b9e33
Add scalar ARGMAX kernel for HSA backend (#171)
ypapadop-amd Mar 7, 2026
3f6a774
feat(ggml-hsa): add COUNT_EQUAL operation kernel (#173)
ypapadop-amd Mar 9, 2026
71a20bd
Implement CROSS_ENTROPY_LOSS, and improve vec_exp in SOFT_MAX
artulab Mar 6, 2026
56a41cb
Auto-format code in src/ggml-hsa
github-actions[bot] Mar 9, 2026
aa3df92
Vector op broadcast (#174)
ypapadop-amd Mar 9, 2026
946dc10
Code documentation (#176)
ypapadop-amd Mar 9, 2026
9737fac
feat(hsa): add null pointer assertions in AIE kernel dispatch (#178)
ypapadop-amd Mar 10, 2026
ece5579
refactor(hsa): move scalar math functions to shared header
ypapadop-amd Mar 9, 2026
b448427
feat(hsa): move pow2, floor_log2 and alibi_slope math functions
ypapadop-amd Mar 9, 2026
69f9da3
Auto-format code in src/ggml-hsa
github-actions[bot] Mar 9, 2026
ed6e5d3
feat(ggml-hsa): add scalar f32 matmul support for AIE2/AIE2p
ypapadop-amd Mar 9, 2026
8dc17cb
f32 support in MUL_MAT
ypapadop-amd Mar 10, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 206 additions & 0 deletions .github/copilot-instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
# GitHub Copilot Instructions for GGML

## Project Overview

GGML is a tensor library for machine learning with a focus on:
- Low-level cross-platform implementation
- Integer quantization support for efficient model inference
- Broad hardware support (CPU, CUDA, Metal, HIP/HSA, SYCL, Vulkan, WebGPU, OpenCL)
- Automatic differentiation
- Zero memory allocations during runtime
- No third-party dependencies for core functionality

**Note:** This project is under active development. Core library development primarily happens in the [llama.cpp](https://github.com/ggerganov/llama.cpp) and [whisper.cpp](https://github.com/ggerganov/whisper.cpp) repositories.

## Build System

### CMake Configuration

- **Minimum CMake version:** 3.14
- **Languages:** C (C11), C++ (C++17), Assembly
- **Default build type:** Release (if not specified)
- **Shared libraries:** Default ON (except MINGW/Emscripten/WASM)

### Building the Project

```bash
mkdir build && cd build
cmake ..
cmake --build . --config Release -j 8
```

### Key CMake Options

- `BUILD_SHARED_LIBS` - Build shared libraries (default: ON except Windows/MINGW)
- `GGML_BUILD_TESTS` - Build test suite (default: ON when standalone)
- `GGML_BUILD_EXAMPLES` - Build example programs (default: ON when standalone)
- `GGML_CUDA` - Enable CUDA backend
- `GGML_METAL` - Enable Metal backend (default: ON for Apple platforms)
- `GGML_HIP` - Enable HIP backend
- `GGML_HSA` - Enable HSA backend
- `GGML_SYCL` - Enable SYCL backend
- `GGML_VULKAN` - Enable Vulkan backend
- `GGML_BLAS` - Enable BLAS support

## Coding Standards

### Code Style

- **Indentation:** 4 spaces (see `.editorconfig`)
- **Line endings:** LF (Unix-style)
- **Charset:** UTF-8
- **Final newline:** Required
- **Trailing whitespace:** Remove

### Formatting Tools

- A `.clang-format` file exists in `src/ggml-hsa/` based on LLVM style
- **Column limit:** 100 characters
- **Pointer alignment:** Middle (e.g., `int * ptr`)
- **Brace style:** Attach

### Naming Conventions

- Public API functions: `ggml_*` prefix
- Backend-specific functions: `ggml_<backend>_*` (e.g., `ggml_cuda_*`, `ggml_metal_*`)
- Types: `struct ggml_*`
- Enums: `GGML_*` (uppercase with underscores)

## Architecture

### Directory Structure

```
├── include/ # Public headers (ggml.h, ggml-*.h, gguf.h)
├── src/ # Core implementation and backend implementations
│ ├── ggml.c # Core tensor library
│ ├── ggml-cpu/ # CPU-specific optimizations
│ ├── ggml-cuda/ # CUDA backend
│ ├── ggml-metal/ # Metal backend
│ ├── ggml-hip/ # HIP backend
│ ├── ggml-hsa/ # HSA backend
│ └── ... # Other backends
├── examples/ # Example applications (GPT-2, GPT-J, MNIST, SAM, etc.)
├── tests/ # Test suite
├── cmake/ # CMake modules
├── scripts/ # Utility scripts
└── docs/ # Documentation (GGUF format spec)
```

### Key Components

- **ggml.h/ggml.c** - Core tensor operations and compute graph
- **ggml-backend.h** - Backend abstraction layer
- **ggml-alloc.h** - Memory allocation utilities
- **gguf.h** - GGUF file format for model serialization
- **Backend implementations** - Hardware-specific optimizations

## Testing

### Running Tests

```bash
cd build
ctest --output-on-failure
```

### Test Organization

- Unit tests in `tests/` directory
- Backend-specific tests in `tests/ggml-<backend>/`
- Test naming: `test-*.c` or `test-*.cpp`
- Use CTest for test execution

### Writing Tests

- Follow existing test patterns in `tests/` directory
- Test both correctness and performance where applicable
- Include edge cases and boundary conditions
- Backend tests should verify backend-specific functionality

## Contributing Guidelines

⚠️ **Important:** For changes to the core `ggml` library (including CMake build system):
- Open a PR in https://github.com/ggml-org/llama.cpp first
- This ensures better visibility, testing, and review
- See [CONTRIBUTING.md](../CONTRIBUTING.md) for details

### Pull Request Process

1. Ensure code follows the established style
2. Add or update tests as needed
3. Verify all tests pass locally
4. Update documentation if changing public APIs
5. Keep changes focused and minimal

## Common Tasks

### Adding a New Backend

1. Create `src/ggml-<backend>/` directory
2. Implement backend interface defined in `ggml-backend.h`
3. Add CMakeLists.txt with appropriate options
4. Create public header `include/ggml-<backend>.h`
5. Add tests in `tests/ggml-<backend>/`
6. Update main CMakeLists.txt with new options

### Adding New Tensor Operations

1. Add operation to `enum ggml_op` in `include/ggml.h`
2. Implement forward pass in `src/ggml.c`
3. Implement backward pass (gradient) if needed
4. Add operation to backend implementations
5. Add comprehensive tests
6. Update documentation

### Optimizing Existing Operations

1. Profile to identify bottlenecks
2. Consider SIMD/vectorization opportunities (see `src/ggml-cpu/`)
3. Implement backend-specific optimizations
4. Add performance tests
5. Verify correctness with existing tests

## Backend-Specific Notes

### CUDA Backend
- Use `ggml_cuda.h` for CUDA-specific APIs
- CUDA kernels in `src/ggml-cuda/`

### Metal Backend
- macOS/iOS GPU acceleration
- Shaders in Metal Shading Language
- Default ON for Apple platforms

### HIP/HSA Backends
- AMD GPU support
- Use appropriate compiler flags for ROCm

### CPU Backend
- SIMD optimizations in `src/ggml-cpu/`
- Multiple implementations for different architectures
- llamafile integration for optimized matrix multiplication

## Python Bindings

Python bindings are available in `examples/python/`:
- Auto-generated using CFFI
- Support for quantized tensors with automatic conversion
- See `examples/python/README.md` for usage

## Resources

- [Introduction to ggml](https://huggingface.co/blog/introduction-to-ggml)
- [GGUF file format specification](../docs/gguf.md)
- [llama.cpp project](https://github.com/ggerganov/llama.cpp) - Primary development hub
- [whisper.cpp project](https://github.com/ggerganov/whisper.cpp) - Speech recognition with ggml

## Important Reminders

1. **Minimal changes**: Make surgical, focused changes
2. **Test early and often**: Run tests after each significant change
3. **Follow existing patterns**: Match the style and structure of existing code
4. **Consider performance**: GGML is performance-critical; profile changes
5. **Cross-platform**: Ensure changes work on Linux, macOS, and Windows
6. **Documentation**: Update comments and docs for public API changes
7. **Upstream first**: Core changes should go to llama.cpp repository first
74 changes: 74 additions & 0 deletions .github/workflows/format.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
name: Format Code

on:
push:
branches: [ hsa-backend ]
paths:
- 'src/ggml-hsa/**'
pull_request:
branches: [ hsa-backend ]
paths:
- 'src/ggml-hsa/**'
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true

jobs:
format:
runs-on: ubuntu-latest

steps:
- name: Clone
uses: actions/checkout@v6
with:
ref: ${{ github.head_ref || github.ref_name }}
fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'

- name: Install formatters
run: |
sudo mkdir -p /etc/apt/keyrings
curl -fsSL https://apt.llvm.org/llvm-snapshot.gpg.key | sudo gpg --dearmor -o /etc/apt/keyrings/llvm-snapshot.gpg
echo "deb [signed-by=/etc/apt/keyrings/llvm-snapshot.gpg] http://apt.llvm.org/$(lsb_release -cs)/ llvm-toolchain-$(lsb_release -cs)-22 main" | sudo tee /etc/apt/sources.list.d/llvm-toolchain-22.list
sudo apt-get update
sudo apt-get install -y clang-format-22
pip install black

- name: Format C++ code with clang-format
run: |
find src/ggml-hsa -type f \( -name '*.cpp' -o -name '*.cc' -o -name '*.hpp' -o -name '*.h' \) \
-exec clang-format-22 -i --style=file:src/ggml-hsa/.clang-format {} +

- name: Format Python code with black
run: |
black src/ggml-hsa

- name: Check for changes
id: verify
run: |
if ! git diff --exit-code; then
echo "changes=true" >> $GITHUB_OUTPUT
else
echo "changes=false" >> $GITHUB_OUTPUT
fi

- name: Commit and push formatting changes
if: steps.verify.outputs.changes == 'true'
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add src/ggml-hsa
git commit -m "Auto-format code in src/ggml-hsa

- Format C++ code with clang-format
- Format Python code with black

Co-Authored-By: github-actions[bot] <github-actions[bot]@users.noreply.github.com>"
git push
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM"
option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
option(GGML_HIP_MMQ_MFMA "ggml: enable MFMA MMA for CDNA in MMQ" ON)
option(GGML_HIP_EXPORT_METRICS "ggml: enable kernel perf metrics output" OFF)
option(GGML_HSA "ggml: use HSA" OFF)
option(GGML_MUSA_GRAPHS "ggml: use MUSA graph, experimental, unstable" OFF)
option(GGML_MUSA_MUDNN_COPY "ggml: enable muDNN for accelerated copy" OFF)
option(GGML_VULKAN "ggml: use Vulkan" OFF)
Expand Down Expand Up @@ -319,6 +320,7 @@ set(GGML_PUBLIC_HEADERS
include/ggml-cann.h
include/ggml-cpp.h
include/ggml-cuda.h
include/ggml-hsa.h
include/ggml-opt.h
include/ggml-metal.h
include/ggml-rpc.h
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ cmake -DGGML_CUDA=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda-12.1/bin/nvcc ..
cmake -DCMAKE_C_COMPILER="$(hipconfig -l)/clang" -DCMAKE_CXX_COMPILER="$(hipconfig -l)/clang++" -DGGML_HIP=ON
```

## Using HSA

```bash
cmake -DCMAKE_C_COMPILER="$(hipconfig -l)/clang" -DCMAKE_CXX_COMPILER="$(hipconfig -l)/clang++" -DGGML_HSA=ON
```

## Using SYCL

```bash
Expand Down
5 changes: 5 additions & 0 deletions cmake/ggml-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ if (NOT GGML_SHARED_LIB)
set(GGML_HIP_INTERFACE_LINK_LIBRARIES hip::host roc::rocblas roc::hipblas)
endif()

if (GGML_HSA)
find_package(hsa-runtime64 1.0 REQUIRED)
set(GGML_HSA_INTERFACE_LINK_LIBRARIES hsa-runtime64::hsa-runtime64)
endif()

if (GGML_SYCL)
set(GGML_SYCL_INTERFACE_LINK_LIBRARIES "")
find_package(DNNL)
Expand Down
20 changes: 20 additions & 0 deletions examples/gpt-2/main-backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
#include "ggml-metal.h"
#endif

#ifdef GGML_USE_HSA
#include "ggml-hsa.h"
#endif

#include "common.h"
#include "common-ggml.h"

Expand Down Expand Up @@ -220,6 +224,16 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
}
#endif

#ifdef GGML_USE_HSA
if (n_gpu_layers > 0) {
fprintf(stderr, "%s: using HSA backend\n", __func__);
model.backend = ggml_backend_hsa_init(0);
if (!model.backend) {
fprintf(stderr, "%s: ggml_backend_hsa_init() failed\n", __func__);
}
}
#endif

if (!model.backend) {
// fallback to CPU backend
fprintf(stderr, "%s: using CPU backend\n", __func__);
Expand All @@ -231,6 +245,12 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
return false;
}

ggml_backend_dev_t device = ggml_backend_get_device(model.backend);
size_t total_memory = 0;
size_t free_memory = 0;
ggml_backend_dev_memory(device, &free_memory, &total_memory);
fprintf(stderr, "%s: free memory %zu, total memory %zu\n", __func__, free_memory, total_memory);

// create the tensors for the model
{
const auto & hparams = model.hparams;
Expand Down
17 changes: 17 additions & 0 deletions examples/gpt-2/main-sched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
#include "ggml-blas.h"
#endif

#ifdef GGML_USE_HSA
#include "ggml-hsa.h"
#endif


#include "common.h"
#include "common-ggml.h"

Expand Down Expand Up @@ -145,6 +150,18 @@ void init_backends(gpt2_model & model, const gpt_params & params) {
}
#endif

#ifdef GGML_USE_HSA
if (params.n_gpu_layers > 0) {
fprintf(stderr, "%s: using HSA backend\n", __func__);
ggml_backend_t hsa_backend = ggml_backend_hsa_init(0);
if (!hsa_backend) {
fprintf(stderr, "%s: ggml_backend_hsa_init() failed\n", __func__);
} else {
model.backends.push_back(hsa_backend);
}
}
#endif

// always add the CPU backend as a fallback
ggml_backend_t cpu_backend = ggml_backend_cpu_init();
ggml_backend_cpu_set_n_threads(cpu_backend, params.n_threads);
Expand Down
Loading