Skip to content

Commit 8869662

Browse files
committed
support sm_100 and llvm v19
1 parent caaef11 commit 8869662

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+4190
-2708
lines changed

.devcontainer.json

Lines changed: 0 additions & 20 deletions
This file was deleted.

.devcontainer/Dockerfile

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
FROM nvidia/cuda:12.9.0-cudnn-devel-ubuntu24.04
2+
3+
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y install \
4+
build-essential \
5+
clang \
6+
curl \
7+
libssl-dev \
8+
libtinfo-dev \
9+
pkg-config \
10+
xz-utils \
11+
zlib1g-dev && \
12+
rm -rf /var/lib/apt/lists/*
13+
14+
# Needed to build `path_tracer`, `optix/ex03_window` example
15+
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y install \
16+
cmake \
17+
libfontconfig-dev \
18+
libx11-xcb-dev \
19+
libxcursor-dev \
20+
libxi-dev \
21+
libxinerama-dev \
22+
libxrandr-dev && \
23+
rm -rf /var/lib/apt/lists/*
24+
25+
# Get Rust
26+
RUN curl -sSf -L https://sh.rustup.rs | bash -s -- -y
27+
ENV PATH="/root/.cargo/bin:${PATH}"
28+
29+
# Setup the workspace
30+
WORKDIR /data/Rust-CUDA
31+
RUN --mount=type=bind,source=rust-toolchain.toml,target=/data/Rust-CUDA/rust-toolchain.toml \
32+
rustup show
33+
34+
# Add nvvm to LD_LIBRARY_PATH.
35+
ENV LD_LIBRARY_PATH="/usr/local/cuda/nvvm/lib64:${LD_LIBRARY_PATH}"
36+
ENV LLVM_LINK_STATIC=1
37+
ENV RUST_LOG=info
38+
39+
# install git + miscellaneous
40+
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y install \
41+
git \
42+
gdb \
43+
lldb \
44+
psmisc \
45+
libzstd-dev && \
46+
rm -rf /var/lib/apt/lists/*
47+
48+
# ln -s /opt/llvm-19-debug/bin/llvm-config /usr/bin/llvm-config
49+
# export PATH="/opt/llvm-19-debug/bin:$PATH"

.devcontainer/devcontainer.json

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"name": "Rust CUDA Development",
3+
"build": {
4+
"dockerfile": "Dockerfile",
5+
"context": "../"
6+
},
7+
"mounts": [
8+
"source=${localEnv:HOME}/.ssh,target=/root/.ssh,type=bind,consistency=cached",
9+
"source=${localEnv:HOME}/llvm-build/llvm-19-debug,target=/opt/llvm-19-debug,type=bind,consistency=cached"
10+
],
11+
"hostRequirements": {
12+
"cpus": 8,
13+
"memory": "32gb",
14+
"storage": "64gb"
15+
},
16+
"features": {
17+
"ghcr.io/devcontainers/features/sshd:1": {
18+
"version": "latest"
19+
}
20+
},
21+
"capAdd": [
22+
"SYS_PTRACE"
23+
],
24+
"runArgs": [
25+
"--security-opt",
26+
"seccomp=unconfined"
27+
],
28+
"customizations": {
29+
"vscode": {
30+
"extensions": [
31+
"rust-lang.rust-analyzer",
32+
"ms-vscode.cpptools",
33+
"ms-vscode.cmake-tools",
34+
"nvidia.nsight-vscode-edition",
35+
"vadimcn.vscode-lldb"
36+
],
37+
"settings": {
38+
"rust-analyzer.cargo.features": "all",
39+
"rust-analyzer.checkOnSave.command": "clippy"
40+
}
41+
}
42+
},
43+
"forwardPorts": [],
44+
"postCreateCommand": "rustc --version && nvcc --version",
45+
"remoteUser": "root"
46+
}

.dockerignore

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# Rust build artifacts
2+
target/
3+
Cargo.lock
4+
5+
# IDE and editor files
6+
.vscode/
7+
.idea/
8+
*.swp
9+
*.swo
10+
*~
11+
12+
# OS generated files
13+
.DS_Store
14+
.DS_Store?
15+
._*
16+
.Spotlight-V100
17+
.Trashes
18+
ehthumbs.db
19+
Thumbs.db
20+
21+
# Git
22+
.git/
23+
.gitignore
24+
25+
# Documentation
26+
*.md
27+
docs/
28+
29+
# Test files (optional - uncomment if you don't want tests in container)
30+
# tests/
31+
# benches/
32+
33+
# Examples (optional - uncomment if you don't want examples in container)
34+
# examples/
35+
36+
# CI/CD files
37+
.github/
38+
.gitlab-ci.yml
39+
.travis.yml
40+
Jenkinsfile
41+
42+
# Docker files (avoid recursive inclusion)
43+
.devcontainer/
44+
Dockerfile*
45+
docker-compose*
46+
.dockerignore
47+
48+
# Logs
49+
*.log
50+
logs/
51+
52+
# Temporary files
53+
tmp/
54+
temp/
55+
.tmp/
56+
57+
# Node modules (if you have any JS tooling)
58+
node_modules/
59+
npm-debug.log*
60+
61+
# Python cache (if you have any Python tooling)
62+
__pycache__/
63+
*.py[cod]
64+
*$py.class
65+
.pytest_cache/
66+
67+
# Environment files
68+
.env
69+
.env.local
70+
.env.*.local
71+
72+
# CUDA cache and temporary files
73+
*.fatbin
74+
*.cubin
75+
*.ptx

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ book
22
/target
33
Cargo.lock
44
**/.vscode
5-
.devcontainer
5+
core
6+
rustc-ice*

container/ubuntu22-cuda12/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04
1+
FROM nvidia/cuda:12.9.0-cudnn-devel-ubuntu22.04
22

33
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y install \
44
build-essential \

container/ubuntu24-cuda12/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM nvidia/cuda:12.8.1-cudnn-devel-ubuntu24.04
1+
FROM nvidia/cuda:12.9.0-cudnn-devel-ubuntu24.04
22

33
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y install \
44
build-essential \

crates/cuda_builder/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ impl CudaBuilder {
162162
ptx_file_copy_path: None,
163163
generate_line_info: true,
164164
nvvm_opts: true,
165-
arch: NvvmArch::Compute61,
165+
arch: NvvmArch::Compute100,
166166
ftz: false,
167167
fast_sqrt: false,
168168
fast_div: false,

crates/cuda_std/src/cfg.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ pub enum ComputeCapability {
1616
Compute72,
1717
Compute75,
1818
Compute80,
19+
Compute86,
20+
Compute87,
21+
Compute89,
22+
Compute90,
23+
Compute100
1924
}
2025

2126
impl ComputeCapability {
@@ -42,6 +47,11 @@ impl ComputeCapability {
4247
"720" => ComputeCapability::Compute72,
4348
"750" => ComputeCapability::Compute75,
4449
"800" => ComputeCapability::Compute80,
50+
"860" => ComputeCapability::Compute86, // Ampere (RTX 30 series, A100)
51+
"870" => ComputeCapability::Compute87, // Ampere (Jetson AGX Orin)
52+
"890" => ComputeCapability::Compute89, // Ada Lovelace (RTX 40 series)
53+
"900" => ComputeCapability::Compute90, // Hopper (H100)
54+
"1000" => ComputeCapability::Compute100, // Blackwell (RTX 50 series, H200, B100)
4555
_ => panic!("CUDA_ARCH had an invalid value"),
4656
}
4757
}

crates/cust/src/module.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ pub enum JitTarget {
5656
Compute75 = 75,
5757
Compute80 = 80,
5858
Compute86 = 86,
59+
Compute87 = 87,
60+
Compute89 = 89,
61+
Compute90 = 90,
62+
Compute100 = 100,
5963
}
6064

6165
/// How to handle cases where a loaded module's data does not contain an exact match for the

crates/nvvm/src/lib.rs

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ use std::{
44
ffi::{CStr, CString},
55
fmt::Display,
66
mem::MaybeUninit,
7-
ptr::null_mut,
87
str::FromStr,
98
};
109

@@ -255,6 +254,11 @@ impl FromStr for NvvmOption {
255254
"72" => NvvmArch::Compute72,
256255
"75" => NvvmArch::Compute75,
257256
"80" => NvvmArch::Compute80,
257+
"86" => NvvmArch::Compute86,
258+
"87" => NvvmArch::Compute87,
259+
"89" => NvvmArch::Compute89,
260+
"90" => NvvmArch::Compute90,
261+
"100" => NvvmArch::Compute100,
258262
_ => return Err("unknown arch"),
259263
};
260264
Self::Arch(arch)
@@ -279,6 +283,11 @@ pub enum NvvmArch {
279283
Compute72,
280284
Compute75,
281285
Compute80,
286+
Compute86,
287+
Compute87,
288+
Compute89,
289+
Compute90,
290+
Compute100,
282291
}
283292

284293
impl Display for NvvmArch {
@@ -291,7 +300,7 @@ impl Display for NvvmArch {
291300

292301
impl Default for NvvmArch {
293302
fn default() -> Self {
294-
Self::Compute52
303+
Self::Compute100
295304
}
296305
}
297306

@@ -403,8 +412,21 @@ impl NvvmProgram {
403412

404413
/// Verify the program without actually compiling it. In the case of invalid IR, you can find
405414
/// more detailed error info by calling [`compiler_log`](Self::compiler_log).
406-
pub fn verify(&self) -> Result<(), NvvmError> {
407-
unsafe { nvvm_sys::nvvmVerifyProgram(self.raw, 0, null_mut()).to_result() }
415+
pub fn verify(&self, options: &[NvvmOption]) -> Result<(), NvvmError> {
416+
let option_strings: Vec<_> = options.iter().map(|opt| opt.to_string()).collect();
417+
let option_cstrings: Vec<_> = option_strings.iter()
418+
.map(|s| std::ffi::CString::new(s.as_str()).unwrap())
419+
.collect();
420+
let mut option_ptrs: Vec<_> = option_cstrings.iter()
421+
.map(|cs| cs.as_ptr())
422+
.collect();
423+
unsafe {
424+
nvvm_sys::nvvmVerifyProgram(
425+
self.raw,
426+
option_ptrs.len() as i32,
427+
option_ptrs.as_mut_ptr()
428+
).to_result()
429+
}
408430
}
409431
}
410432

@@ -433,6 +455,11 @@ mod tests {
433455
"-arch=compute_72",
434456
"-arch=compute_75",
435457
"-arch=compute_80",
458+
"-arch=compute_86",
459+
"-arch=compute_87",
460+
"-arch=compute_89",
461+
"-arch=compute_90",
462+
"-arch=compute_100",
436463
"-ftz=1",
437464
"-prec-sqrt=0",
438465
"-prec-div=0",
@@ -454,6 +481,11 @@ mod tests {
454481
Arch(Compute72),
455482
Arch(Compute75),
456483
Arch(Compute80),
484+
Arch(Compute86),
485+
Arch(Compute87),
486+
Arch(Compute89),
487+
Arch(Compute90),
488+
Arch(Compute100),
457489
Ftz,
458490
FastSqrt,
459491
FastDiv,

0 commit comments

Comments
 (0)