diff --git a/README.md b/README.md index 0e7fd0e..cc5c68b 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,18 @@ Note down the best performance batch value for use in the generator. cargo run --release generate --batch BATCH_SIZE PREFIX ``` +## Device Compatibility + +### AMD Radeon devices +The OpenCL kernel has been optimized for compatibility with AMD Radeon devices (including RX 5500 series and newer). If you encounter compilation errors: + +1. The software will automatically try OpenCL 2.0 if OpenCL 3.0 compilation fails +2. You can specify a custom kernel file using `--kernel path/to/kernel.cl` if needed +3. Make sure your AMD drivers are up to date + +### NVIDIA devices +All NVIDIA devices with OpenCL support should work out of the box. + ## Provider-specific instructions ### vast.ai diff --git a/agvg/src/bacon.rs b/agvg/src/bacon.rs index aed4d9e..3a7f0e0 100644 --- a/agvg/src/bacon.rs +++ b/agvg/src/bacon.rs @@ -7,7 +7,7 @@ use opencl3::{ event::{Event, wait_for_events}, kernel::Kernel, memory::{Buffer, CL_MEM_HOST_WRITE_ONLY, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, ClMem}, - program::CL_STD_3_0, + program::{CL_STD_3_0, CL_STD_2_0}, types::{CL_FALSE, CL_TRUE}, }; use rand::thread_rng; @@ -332,6 +332,10 @@ const BASE32_ALPHABET: &[u8; 32] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"; impl Context { pub fn new(cpu: bool, msig: Option<[u8; 32]>, device: usize, kernel: String) -> Self { + let device = default_device(device); + let context = opencl3::context::Context::from_device(&device).unwrap(); + + // Add device-specific flags let args = { let mut args = Vec::from([CL_STD_3_0]); if cpu { @@ -342,18 +346,48 @@ impl Context { args.push("-D MSIG"); } + // Add AMD-specific flags if needed + if let Ok(vendor) = device.vendor() { + if vendor.to_lowercase().contains("amd") || vendor.to_lowercase().contains("advanced micro devices") { + args.push("-D AMD_DEVICE"); + } + } + args.join(" ") }; - let device = default_device(device); - let context = opencl3::context::Context::from_device(&device).unwrap(); - - let program = opencl3::program::Program::create_and_build_from_source( + // Try to compile with OpenCL 3.0 first, then fallback to 2.0 if it fails + let program = match opencl3::program::Program::create_and_build_from_source( &context, &kernel, args.as_str(), - ) - .unwrap(); + ) { + Ok(program) => program, + Err(e) => { + eprintln!("OpenCL 3.0 compilation failed, trying OpenCL 2.0: {}", e); + + // Try with OpenCL 2.0 + let args_2_0 = args.replace(CL_STD_3_0, CL_STD_2_0); + match opencl3::program::Program::create_and_build_from_source( + &context, + &kernel, + &args_2_0, + ) { + Ok(program) => { + eprintln!("Successfully compiled with OpenCL 2.0"); + program + } + Err(e2) => { + eprintln!("OpenCL kernel compilation failed with both 3.0 and 2.0: {}", e2); + eprintln!("Device: {}", device.name().unwrap_or("Unknown".to_string())); + eprintln!("Vendor: {}", device.vendor().unwrap_or("Unknown".to_string())); + eprintln!("Original error: {}", e); + eprintln!("Fallback error: {}", e2); + panic!("Failed to compile OpenCL kernel. This may be due to device compatibility issues."); + } + } + } + }; Self { device, diff --git a/kernel.cl b/kernel.cl index 07909cc..3bb3af8 100644 --- a/kernel.cl +++ b/kernel.cl @@ -1,3 +1,10 @@ +// OpenCL kernel for ed25519 key generation +// Compatible with both NVIDIA and AMD devices +// +// Key compatibility fixes for AMD devices: +// - Uses OpenCL's guaranteed 64-bit 'ulong' type instead of 'unsigned long' +// - Proper 64-bit literal suffixes (ULL instead of UL) +// typedef struct { int X[10]; @@ -28,7 +35,7 @@ typedef struct int xy2d[10]; } ge_precomp; -typedef unsigned long uint64_t; +typedef ulong uint64_t; typedef unsigned short uint16_t; typedef unsigned char uint8_t; @@ -137,7 +144,7 @@ static const uint64_t K[80] = { #define Ch(x, y, z) (z ^ (x & (y ^ z))) #define Maj(x, y, z) (((x | y) & z) | (x & y)) #define S(x, n) ROR64(x, n) -#define R(x, n) (((x) & 0xFFFFFFFFFFFFFFFFUL) >> ((uint64_t)n)) +#define R(x, n) (((x) & 0xFFFFFFFFFFFFFFFFULL) >> ((uint64_t)n)) #define Sigma0(x) (S(x, 28) ^ S(x, 34) ^ S(x, 39)) #define Sigma1(x) (S(x, 14) ^ S(x, 18) ^ S(x, 41)) #define Gamma0(x) (S(x, 1) ^ S(x, 8) ^ R(x, 7))