Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@ Note down the best performance batch value for use in the generator.
cargo run --release generate --batch BATCH_SIZE PREFIX
```

## Device Compatibility

### AMD Radeon devices
The OpenCL kernel has been optimized for compatibility with AMD Radeon devices (including RX 5500 series and newer). If you encounter compilation errors:

1. The software will automatically try OpenCL 2.0 if OpenCL 3.0 compilation fails
2. You can specify a custom kernel file using `--kernel path/to/kernel.cl` if needed
3. Make sure your AMD drivers are up to date

### NVIDIA devices
All NVIDIA devices with OpenCL support should work out of the box.

## Provider-specific instructions

### vast.ai
Expand Down
48 changes: 41 additions & 7 deletions agvg/src/bacon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use opencl3::{
event::{Event, wait_for_events},
kernel::Kernel,
memory::{Buffer, CL_MEM_HOST_WRITE_ONLY, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, ClMem},
program::CL_STD_3_0,
program::{CL_STD_3_0, CL_STD_2_0},
types::{CL_FALSE, CL_TRUE},
};
use rand::thread_rng;
Expand Down Expand Up @@ -332,6 +332,10 @@ const BASE32_ALPHABET: &[u8; 32] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";

impl Context {
pub fn new(cpu: bool, msig: Option<[u8; 32]>, device: usize, kernel: String) -> Self {
let device = default_device(device);
let context = opencl3::context::Context::from_device(&device).unwrap();

// Add device-specific flags
let args = {
let mut args = Vec::from([CL_STD_3_0]);
if cpu {
Expand All @@ -342,18 +346,48 @@ impl Context {
args.push("-D MSIG");
}

// Add AMD-specific flags if needed
if let Ok(vendor) = device.vendor() {
if vendor.to_lowercase().contains("amd") || vendor.to_lowercase().contains("advanced micro devices") {
args.push("-D AMD_DEVICE");
}
}

args.join(" ")
};

let device = default_device(device);
let context = opencl3::context::Context::from_device(&device).unwrap();

let program = opencl3::program::Program::create_and_build_from_source(
// Try to compile with OpenCL 3.0 first, then fallback to 2.0 if it fails
let program = match opencl3::program::Program::create_and_build_from_source(
&context,
&kernel,
args.as_str(),
)
.unwrap();
) {
Ok(program) => program,
Err(e) => {
eprintln!("OpenCL 3.0 compilation failed, trying OpenCL 2.0: {}", e);

// Try with OpenCL 2.0
let args_2_0 = args.replace(CL_STD_3_0, CL_STD_2_0);
match opencl3::program::Program::create_and_build_from_source(
&context,
&kernel,
&args_2_0,
) {
Ok(program) => {
eprintln!("Successfully compiled with OpenCL 2.0");
program
}
Err(e2) => {
eprintln!("OpenCL kernel compilation failed with both 3.0 and 2.0: {}", e2);
eprintln!("Device: {}", device.name().unwrap_or("Unknown".to_string()));
eprintln!("Vendor: {}", device.vendor().unwrap_or("Unknown".to_string()));
eprintln!("Original error: {}", e);
eprintln!("Fallback error: {}", e2);
panic!("Failed to compile OpenCL kernel. This may be due to device compatibility issues.");
}
}
}
};

Self {
device,
Expand Down
11 changes: 9 additions & 2 deletions kernel.cl
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
// OpenCL kernel for ed25519 key generation
// Compatible with both NVIDIA and AMD devices
//
// Key compatibility fixes for AMD devices:
// - Uses OpenCL's guaranteed 64-bit 'ulong' type instead of 'unsigned long'
// - Proper 64-bit literal suffixes (ULL instead of UL)
//
typedef struct
{
int X[10];
Expand Down Expand Up @@ -28,7 +35,7 @@ typedef struct
int xy2d[10];
} ge_precomp;

typedef unsigned long uint64_t;
typedef ulong uint64_t;
typedef unsigned short uint16_t;
typedef unsigned char uint8_t;

Expand Down Expand Up @@ -137,7 +144,7 @@ static const uint64_t K[80] = {
#define Ch(x, y, z) (z ^ (x & (y ^ z)))
#define Maj(x, y, z) (((x | y) & z) | (x & y))
#define S(x, n) ROR64(x, n)
#define R(x, n) (((x) & 0xFFFFFFFFFFFFFFFFUL) >> ((uint64_t)n))
#define R(x, n) (((x) & 0xFFFFFFFFFFFFFFFFULL) >> ((uint64_t)n))
#define Sigma0(x) (S(x, 28) ^ S(x, 34) ^ S(x, 39))
#define Sigma1(x) (S(x, 14) ^ S(x, 18) ^ S(x, 41))
#define Gamma0(x) (S(x, 1) ^ S(x, 8) ^ R(x, 7))
Expand Down