diff --git a/.github/workflows/aes.yml b/.github/workflows/aes.yml new file mode 100644 index 000000000..e4635625c --- /dev/null +++ b/.github/workflows/aes.yml @@ -0,0 +1,169 @@ +name: AES-GCM + +on: + merge_group: + pull_request: + branches: ["main", "dev", "*"] + paths: + - "aesgcm/**" + workflow_dispatch: + +env: + CARGO_TERM_COLOR: always + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + strategy: + fail-fast: false + matrix: + bits: [32, 64] + os: + - macos-latest # macos-15 on apple silicon + - ubuntu-latest + - windows-latest + exclude: + - bits: 32 + os: "macos-latest" + + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash + working-directory: aesgcm + + steps: + - uses: actions/checkout@v5 + - uses: taiki-e/install-action@cargo-hack + + - name: Update dependencies + run: cargo update + + - run: echo "RUST_TARGET_FLAG=" > $GITHUB_ENV + if: ${{ matrix.bits == 64 }} + + - run: echo 'EXCLUDE_FEATURES=--exclude-features simd256' > $GITHUB_ENV + if: ${{ matrix.os == 'macos-latest' }} + + - run: echo 'EXCLUDE_FEATURES=--exclude-features simd128' > $GITHUB_ENV + if: ${{ matrix.os != 'macos-latest' }} + + - name: 🛠️ Setup Rust Nightly + run: rustup toolchain install nightly + + - name: 🛠️ Setup Ubuntu x86 + if: ${{ matrix.bits == 32 && matrix.os == 'ubuntu-latest' }} + run: | + rustup target add i686-unknown-linux-gnu + sudo apt-get update + sudo apt-get install -y gcc-multilib g++-multilib + + # Set up 32 bit systems + + - name: 🛠️ Config Windows x86 + run: echo "RUST_TARGET_FLAG=--target=i686-pc-windows-msvc" > $GITHUB_ENV + if: ${{ matrix.bits == 32 && matrix.os == 'windows-latest' }} + + - name: 🛠️ Config Linux x86 + run: | + echo "RUST_TARGET_FLAG=--target=i686-unknown-linux-gnu" > $GITHUB_ENV + if: ${{ matrix.bits == 32 && matrix.os == 'ubuntu-latest' }} + + # Build ... + + - name: 🔨 Build + run: | + rustc --print=cfg + cargo build --verbose $RUST_TARGET_FLAG + + - name: 🔨 Build Release + run: cargo build --verbose --release $RUST_TARGET_FLAG + + - name: 🏃🏻 Asan MacOS + if: ${{ matrix.os == 'macos-latest' }} + run: RUSTDOCFLAGS=-Zsanitizer=address RUSTFLAGS=-Zsanitizer=address cargo +nightly test --release --target aarch64-apple-darwin + + # Test ... + + - name: 🏃🏻‍♀️ Test + run: | + cargo clean + cargo test --verbose $RUST_TARGET_FLAG + + - name: 🏃🏻‍♀️ Test Release + run: | + cargo clean + cargo test --verbose --release $RUST_TARGET_FLAG + + - name: 🏃🏻‍♀️ Test Portable + run: | + cargo clean + LIBCRUX_DISABLE_SIMD128=1 LIBCRUX_DISABLE_SIMD256=1 cargo test --verbose $RUST_TARGET_FLAG + + - name: 🏃🏻‍♀️ Test Portable Release + run: | + cargo clean + LIBCRUX_DISABLE_SIMD128=1 LIBCRUX_DISABLE_SIMD256=1 cargo test --verbose --release $RUST_TARGET_FLAG + + - name: 🏃🏻‍♀️ Test Kyber + run: | + cargo clean + cargo test ,kyber --verbose $RUST_TARGET_FLAG + + - name: 🏃🏻‍♀️ Cargo Test Features + if: ${{ matrix.bits == 64 }} + run: | + cargo clean + cargo hack test --each-feature $EXCLUDE_FEATURES --verbose $RUST_TARGET_FLAG + + build-intel-macos: + runs-on: macos-13 + defaults: + run: + shell: bash + working-directory: aesgcm + + steps: + - uses: actions/checkout@v5 + + - name: Update dependencies + run: cargo update + + - name: 🔨 Build + run: | + rustc --print=cfg + cargo build --verbose + + fuzz: + strategy: + fail-fast: false + matrix: + os: + - macos-latest # macos-15 + - ubuntu-latest + + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash + working-directory: aesgcm + + steps: + - uses: actions/checkout@v5 + + - name: 🛠️ Setup Rust Nightly + run: | + rustup toolchain install nightly + cargo install cargo-fuzz + + - name: 🛠️ Update dependencies + run: cargo update + + - name: 🏃🏻‍♀️ Encrypt256 + run: CARGO_PROFILE_RELEASE_LTO=false cargo +nightly fuzz run encrypt128 -- -runs=100000 + + - name: 🏃🏻‍♀️ Encrypt256 + run: CARGO_PROFILE_RELEASE_LTO=false cargo +nightly fuzz run encrypt256 -- -runs=100000 diff --git a/Cargo.toml b/Cargo.toml index 528ebd552..c507579fa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,7 @@ [workspace] members = [ + "aesgcm", + "aesgcm/fuzz", "sys/hacl", "sys/libjade", "sys/platform", @@ -49,6 +51,20 @@ allow-branch = ["main"] [workspace.dependencies] hax-lib = { version = "0.3.4" } +libcrux-intrinsics = { version = "=0.0.3", path = "libcrux-intrinsics" } +libcrux-aesgcm = { version = "=0.0.2", path = "aesgcm" } +libcrux-chacha20poly1305 = { version = "=0.0.3", path = "chacha20poly1305" } +libcrux-traits = { version = "=0.0.3", path = "traits" } +libcrux-hacl-rs = { version = "=0.0.3", path = "hacl-rs" } +libcrux-hacl = { version = "=0.0.2", path = "sys/hacl" } +libcrux-platform = { version = "=0.0.2", path = "sys/platform" } +libcrux-hkdf = { version = "=0.0.3", path = "libcrux-hkdf" } +libcrux-hmac = { version = "=0.0.3", path = "libcrux-hmac" } +libcrux-sha2 = { version = "=0.0.3", path = "sha2" } +libcrux-ed25519 = { version = "=0.0.3", path = "ed25519" } +libcrux-ecdh = { version = "=0.0.3", path = "libcrux-ecdh" } +libcrux-ml-kem = { version = "=0.0.3", path = "libcrux-ml-kem" } +libcrux-kem = { version = "=0.0.3", path = "libcrux-kem" } [package] name = "libcrux" @@ -81,23 +97,27 @@ bench = false # so libtest doesn't eat the argumen libcrux-platform = { version = "=0.0.2", path = "sys/platform" } [dependencies] -libcrux-traits = { version = "=0.0.3", path = "traits" } -libcrux-chacha20poly1305 = { version = "=0.0.3", path = "chacha20poly1305" } -libcrux-hacl-rs = { version = "=0.0.3", path = "hacl-rs" } -libcrux-hacl = { version = "=0.0.2", path = "sys/hacl" } -libcrux-platform = { version = "=0.0.2", path = "sys/platform" } -libcrux-hkdf = { version = "=0.0.3", path = "libcrux-hkdf" } -libcrux-hmac = { version = "=0.0.3", path = "libcrux-hmac" } -libcrux-sha2 = { version = "=0.0.3", path = "sha2" } -libcrux-ed25519 = { version = "=0.0.3", path = "ed25519" } -libcrux-ecdh = { version = "=0.0.3", path = "libcrux-ecdh" } -libcrux-ml-kem = { version = "=0.0.3", path = "libcrux-ml-kem" } -libcrux-kem = { version = "=0.0.3", path = "libcrux-kem" } +libcrux-hacl-rs.workspace = true +libcrux-chacha20poly1305.workspace = true +libcrux-ml-kem.workspace = true +libcrux-traits.workspace = true +libcrux-hacl.workspace = true +libcrux-platform.workspace = true +libcrux-hkdf.workspace = true +libcrux-hmac.workspace = true +libcrux-sha2.workspace = true +libcrux-ed25519.workspace = true +libcrux-ecdh.workspace = true +libcrux-kem.workspace = true + rand = { version = "0.9" } log = { version = "0.4", optional = true } + # WASM API wasm-bindgen = { version = "0.2.87", optional = true } getrandom = { version = "0.3", optional = true } + +# Proofs hax-lib.workspace = true [dev-dependencies] diff --git a/aesgcm/.gitignore b/aesgcm/.gitignore new file mode 100644 index 000000000..407088d6f --- /dev/null +++ b/aesgcm/.gitignore @@ -0,0 +1 @@ +profile.json.gz diff --git a/aesgcm/Cargo.toml b/aesgcm/Cargo.toml new file mode 100644 index 000000000..876708a07 --- /dev/null +++ b/aesgcm/Cargo.toml @@ -0,0 +1,45 @@ +[package] +name = "libcrux_aesgcm" +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +edition.workspace = true +repository.workspace = true +readme = "README.md" +description = "Libcrux AES-GCM implementation" +exclude = [] + +[lib] +bench = false # so libtest doesn't eat the arguments to criterion + +[dependencies] +libcrux-platform.workspace = true +libcrux-intrinsics.workspace = true +libcrux-traits.workspace = true + +rand = { version = "0.9", optional = true } + +[features] +default = ["rand"] # XXX: remove rand here when cleaning up +simd128 = [] +simd256 = [] +rand = ["dep:rand"] +std = [] + +[[bench]] +name = "aesgcm" +harness = false + +[dev-dependencies] +libcrux_aesgcm = { version = "*", features = ["std"], path = "." } +cavp = { version = "0.0.2", path = "../cavp" } +criterion = "0.5.1" +hex = "0.4.3" +pretty_env_logger = "0.5.0" +rand_core = { version = "0.6" } +aes-gcm = "0.10.3" +wycheproof = "0.6.0" + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(hax)', 'cfg(eurydice)'] } diff --git a/aesgcm/README.md b/aesgcm/README.md new file mode 100644 index 000000000..0d708942f --- /dev/null +++ b/aesgcm/README.md @@ -0,0 +1,12 @@ +# AES-GCM + +![pre-verification] + +This crate implements AES-GCM 128 and 256 + +It provides +- a portable, bit-sliced implementation +- an x64 optimised implementation using AES-NI +- an Aarch64 optimised implementation using the AES instructions + +[pre-verification]: https://img.shields.io/badge/pre_verification-orange.svg?style=for-the-badge&logo= diff --git a/aesgcm/benches/aesgcm.rs b/aesgcm/benches/aesgcm.rs new file mode 100644 index 000000000..1fe2a78d7 --- /dev/null +++ b/aesgcm/benches/aesgcm.rs @@ -0,0 +1,197 @@ +#![allow(non_snake_case)] +use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput}; + +pub fn randombytes(n: usize) -> Vec { + let mut bytes = vec![0u8; n]; + rand::rng().fill_bytes(&mut bytes); + bytes +} + +pub fn fmt(x: usize) -> String { + let base = (x as f64).log(1024f64).floor() as usize; + let suffix = ["", "KB", "MB", "GB"]; + format!("{} {}", x >> (10 * base), suffix[base]) +} + +macro_rules! impl_comp { + ($fun:ident, $keylen:literal, $portable_fun:expr, $neon_fun:expr, $intel_fun:expr, $rustcrypto_fun:expr) => { + // Comparing libcrux performance for different payload sizes and other implementations. + fn $fun(c: &mut Criterion) { + const PAYLOAD_SIZES: [usize; 3] = [128, 1024, 1024 * 1024 * 10]; + + let mut group = c.benchmark_group(stringify!($fun).replace("_", " ")); + + for payload_size in PAYLOAD_SIZES.iter() { + group.throughput(Throughput::Bytes(*payload_size as u64)); + + group.bench_with_input( + BenchmarkId::new("libcrux", fmt(*payload_size)), + payload_size, + |b, payload_size| { + b.iter_batched( + || { + ( + randombytes($keylen), + randombytes(12), + randombytes(32), + randombytes(*payload_size), + ) + }, + |(key, nonce, aad, payload)| { + let mut ciphertext = vec![0; *payload_size]; + let mut tag = [0u8; 16]; + $portable_fun( + &key, + &nonce, + &aad, + &payload, + &mut ciphertext, + &mut tag, + ); + }, + BatchSize::SmallInput, + ) + }, + ); + + #[cfg(all(target_arch = "aarch64", target_feature = "aes"))] + group.bench_with_input( + BenchmarkId::new("neon-aes-clmul", fmt(*payload_size)), + payload_size, + |b, payload_size| { + b.iter_batched( + || { + ( + randombytes($keylen), + randombytes(12), + randombytes(32), + randombytes(*payload_size), + ) + }, + |(key, nonce, aad, payload)| { + let mut ciphertext = vec![0; *payload_size]; + let mut tag = [0u8; 16]; + $neon_fun(&key, &nonce, &aad, &payload, &mut ciphertext, &mut tag); + }, + BatchSize::SmallInput, + ) + }, + ); + + #[cfg(all(target_arch = "x86_64"))] // ENABLE: target_feature="aes" + group.bench_with_input( + BenchmarkId::new("intel-aes-clmul", fmt(*payload_size)), + payload_size, + |b, payload_size| { + b.iter_batched( + || { + ( + randombytes($keylen), + randombytes(12), + randombytes(32), + randombytes(*payload_size), + ) + }, + |(key, nonce, aad, payload)| { + let mut ciphertext = vec![0; *payload_size]; + let mut tag = [0u8; 16]; + $intel_fun(&key, &nonce, &aad, &payload, &mut ciphertext, &mut tag); + }, + BatchSize::SmallInput, + ) + }, + ); + + group.bench_with_input( + BenchmarkId::new("rust-crypto", fmt(*payload_size)), + payload_size, + |b, payload_size| { + b.iter_batched( + || { + ( + randombytes($keylen), + randombytes(12), + randombytes(32), + randombytes(*payload_size), + ) + }, + |(key, nonce, aad, payload)| { + let mut ciphertext = vec![0; *payload_size]; + let mut tag = [0u8; 16]; + $rustcrypto_fun( + &key, + &nonce, + &aad, + &payload, + &mut ciphertext, + &mut tag, + ); + }, + BatchSize::SmallInput, + ) + }, + ); + } + } + }; +} + +use aes_gcm::{ + aead::{Aead, KeyInit, Payload}, + Aes128Gcm, Aes256Gcm, +}; +use rand::RngCore; + +fn rustcrypto_aes128_gcm_encrypt( + key: &[u8], + nonce: &[u8], + aad: &[u8], + msg: &[u8], + ciphertext: &mut [u8], + tag: &mut [u8], +) { + let cipher = Aes128Gcm::new(key.into()); + let ctxt = cipher.encrypt(nonce.into(), Payload { msg, aad }).unwrap(); + ciphertext.copy_from_slice(&ctxt[0..msg.len()]); + tag.copy_from_slice(&ctxt[msg.len()..]); +} + +// XXX: We could work with the traits here, but this is quicker for now. +fn rustcrypto_aes256_gcm_encrypt( + key: &[u8], + nonce: &[u8], + aad: &[u8], + msg: &[u8], + ciphertext: &mut [u8], + tag: &mut [u8], +) { + let cipher = Aes256Gcm::new(key.into()); + let ctxt = cipher.encrypt(nonce.into(), Payload { msg, aad }).unwrap(); + ciphertext.copy_from_slice(&ctxt[0..msg.len()]); + tag.copy_from_slice(&ctxt[msg.len()..]); +} + +impl_comp!( + AES128_GCM, + 16, + libcrux_aesgcm::portable::aes_gcm_128::encrypt, + libcrux_aesgcm::neon::aes_gcm_128::encrypt, + libcrux_aesgcm::x64::aes_gcm_128::encrypt, + rustcrypto_aes128_gcm_encrypt +); +impl_comp!( + AES256_GCM, + 32, + libcrux_aesgcm::portable::aes_gcm_256::encrypt, + libcrux_aesgcm::neon::aes_gcm_256::encrypt, + libcrux_aesgcm::x64::aes_gcm_256::encrypt, + rustcrypto_aes256_gcm_encrypt +); + +fn benchmarks(c: &mut Criterion) { + AES128_GCM(c); + AES256_GCM(c); +} + +criterion_group!(benches, benchmarks); +criterion_main!(benches); diff --git a/aesgcm/build.rs b/aesgcm/build.rs new file mode 100644 index 000000000..abc7a8b78 --- /dev/null +++ b/aesgcm/build.rs @@ -0,0 +1,34 @@ +use std::env; + +fn main() { + let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); + let disable_simd128 = read_env("LIBCRUX_DISABLE_SIMD128"); + let disable_simd256 = read_env("LIBCRUX_DISABLE_SIMD256"); + + // Force a simd build. Make sure you know what you're doing. + let enable_simd128 = read_env("LIBCRUX_ENABLE_SIMD128"); + let enable_simd256 = read_env("LIBCRUX_ENABLE_SIMD256"); + + let simd128_possible = target_arch == "aarch64"; + if (simd128_possible || enable_simd128) && !disable_simd128 { + // We enable simd128 on all aarch64 builds. + println!("cargo:rustc-cfg=feature=\"simd128\""); + } + let simd256_possible = target_arch == "x86_64"; + if (simd256_possible || enable_simd256) && !disable_simd256 { + // We enable simd256 on all x86_64 builds. + // Note that this doesn't mean the required CPU features are available. + // But the compiler will support them and the runtime checks ensure that + // it's only used when available. + // + // We don't enable this on x86 because it seems to generate invalid code. + println!("cargo:rustc-cfg=feature=\"simd256\""); + } +} + +fn read_env(key: &str) -> bool { + match env::var(key) { + Ok(s) => s == "1" || s == "y" || s == "Y", + Err(_) => false, + } +} diff --git a/aesgcm/examples/bench.rs b/aesgcm/examples/bench.rs new file mode 100644 index 000000000..0f823cbfd --- /dev/null +++ b/aesgcm/examples/bench.rs @@ -0,0 +1,26 @@ +use libcrux_aesgcm::Aead; + +fn main() { + const PAYLOAD_SIZES: usize = 3045; + + let key = [0x16; 16]; + let nonce = [0x12; 12]; + + let aad = [0xff; 32]; + let plaintext = [0xab; PAYLOAD_SIZES]; + + let mut ciphertext = vec![0; PAYLOAD_SIZES]; + let mut tag = [0u8; 16]; + + for _ in 0..10000 { + libcrux_aesgcm::AesGcm128::encrypt( + &mut ciphertext, + &mut tag, + &key, + &nonce, + &aad, + &plaintext, + ) + .unwrap(); + } +} diff --git a/aesgcm/fuzz/.gitignore b/aesgcm/fuzz/.gitignore new file mode 100644 index 000000000..1a45eee77 --- /dev/null +++ b/aesgcm/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/aesgcm/fuzz/Cargo.toml b/aesgcm/fuzz/Cargo.toml new file mode 100644 index 000000000..ab0ba8f8f --- /dev/null +++ b/aesgcm/fuzz/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "libcrux_aesgcm-fuzz" +version = "0.0.0" +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +[dependencies.libcrux_aesgcm] +path = ".." + +[[bin]] +name = "encrypt128" +path = "fuzz_targets/encrypt128.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "encrypt256" +path = "fuzz_targets/encrypt256.rs" +test = false +doc = false +bench = false diff --git a/aesgcm/fuzz/fuzz_targets/encrypt128.rs b/aesgcm/fuzz/fuzz_targets/encrypt128.rs new file mode 100644 index 000000000..0daa622a8 --- /dev/null +++ b/aesgcm/fuzz/fuzz_targets/encrypt128.rs @@ -0,0 +1,28 @@ +#![no_main] + +use libcrux_aesgcm::Aead; + +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + if data.len() < 16 + 12 + 7 { + // We want at least a key, nonce, and a few input bytes. + return; + } + + let key = &data[0..16]; + let nonce = &data[16..16 + 12]; + let aad = &data[16 + 12..16 + 12 + 5]; + + let mut ctxt = vec![0u8; data.len()]; + let mut tag = [0u8; 16]; + libcrux_aesgcm::PortableAesGcm128::encrypt( + &mut ctxt, + &mut tag, + key.try_into().unwrap(), + nonce.try_into().unwrap(), + aad, + &data, + ) + .unwrap(); +}); diff --git a/aesgcm/fuzz/fuzz_targets/encrypt256.rs b/aesgcm/fuzz/fuzz_targets/encrypt256.rs new file mode 100644 index 000000000..528634f9c --- /dev/null +++ b/aesgcm/fuzz/fuzz_targets/encrypt256.rs @@ -0,0 +1,28 @@ +#![no_main] + +use libcrux_aesgcm::Aead; + +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + if data.len() < 32 + 12 + 7 { + // We want at least a key, nonce, and a few input bytes. + return; + } + + let key = &data[0..32]; + let nonce = &data[32..32 + 12]; + let aad = &data[32 + 12..32 + 12 + 5]; + + let mut ctxt = vec![0u8; data.len()]; + let mut tag = [0u8; 16]; + libcrux_aesgcm::PortableAesGcm256::encrypt( + &mut ctxt, + &mut tag, + key.try_into().unwrap(), + nonce.try_into().unwrap(), + aad, + &data, + ) + .unwrap(); +}); diff --git a/aesgcm/src/aes.rs b/aesgcm/src/aes.rs new file mode 100644 index 000000000..121a63200 --- /dev/null +++ b/aesgcm/src/aes.rs @@ -0,0 +1,24 @@ +//! The AES block cipher function. + +use crate::platform::*; + +pub(crate) type ExtendedKey = [T; NUM_KEYS]; + +/// AES block size +pub(crate) const AES_BLOCK_LEN: usize = 16; + +/// The AES block cipher function. +#[inline] +pub(crate) fn block_cipher( + st: &mut T, + keyex: &ExtendedKey, +) { + st.xor_key(&keyex[0]); + + #[allow(clippy::needless_range_loop)] + for i in 1..NUM_KEYS - 1 { + st.aes_enc(&keyex[i]); + } + + st.aes_enc_last(&keyex[NUM_KEYS - 1]); +} diff --git a/aesgcm/src/aes_gcm.rs b/aesgcm/src/aes_gcm.rs new file mode 100644 index 000000000..25d5b2591 --- /dev/null +++ b/aesgcm/src/aes_gcm.rs @@ -0,0 +1,105 @@ +//! Implementation of AES-GCM + +/// Macro to instantiate the AES state. +/// This should really be replaced by using traits everywhere. +macro_rules! aesgcm { + ($state:ty, $context:ident) => { + impl super::State for $state { + /// Initialize the state + fn init(key: &[u8]) -> Self { + debug_assert!(key.len() == KEY_LEN); + + let nonce = [0u8; NONCE_LEN]; + let mut gcm_key = [0u8; GCM_KEY_LEN]; + let tag_mix = [0u8; TAG_LEN]; + + let aes_state = $context::::init(key, &nonce); + aes_state.key_block(0, &mut gcm_key); + let gcm_state = GF128State::init(&gcm_key); + + Self { + aes_state, + gcm_state, + tag_mix, + } + } + + fn set_nonce(&mut self, nonce: &[u8]) { + debug_assert!(nonce.len() == NONCE_LEN); + + self.aes_state.set_nonce(nonce); + self.aes_state.key_block(1, &mut self.tag_mix); + } + + fn encrypt( + &mut self, + aad: &[u8], + plaintext: &[u8], + ciphertext: &mut [u8], + tag: &mut [u8], + ) { + debug_assert!(ciphertext.len() == plaintext.len()); + debug_assert!(plaintext.len() / AES_BLOCK_LEN <= u32::MAX as usize); + debug_assert!(tag.len() == TAG_LEN); + + self.aes_state.update(2, plaintext, ciphertext); + + self.gcm_state.update_padded(aad); + self.gcm_state.update_padded(ciphertext); + + let mut last_block = [0u8; AES_BLOCK_LEN]; + last_block[0..8].copy_from_slice(&((aad.len() as u64) * 8).to_be_bytes()); + last_block[8..16].copy_from_slice(&((plaintext.len() as u64) * 8).to_be_bytes()); + + self.gcm_state.update(&last_block); + self.gcm_state.emit(tag); + + for i in 0..16 { + tag[i] ^= self.tag_mix[i]; + } + } + + fn decrypt( + &mut self, + aad: &[u8], + ciphertext: &[u8], + tag: &[u8], + plaintext: &mut [u8], + ) -> Result<(), DecryptError> { + debug_assert!(plaintext.len() == ciphertext.len()); + debug_assert!(ciphertext.len() / AES_BLOCK_LEN <= u32::MAX as usize); + debug_assert!(tag.len() == TAG_LEN); + + self.gcm_state.update_padded(aad); + self.gcm_state.update_padded(ciphertext); + + let mut last_block = [0u8; AES_BLOCK_LEN]; + last_block[0..8].copy_from_slice(&((aad.len() as u64) * 8).to_be_bytes()); + last_block[8..16].copy_from_slice(&((plaintext.len() as u64) * 8).to_be_bytes()); + + self.gcm_state.update(&last_block); + + let mut computed_tag = [0u8; TAG_LEN]; + self.gcm_state.emit(&mut computed_tag); + + for i in 0..16 { + computed_tag[i] ^= self.tag_mix[i]; + } + + let mut eq_mask = 0u8; + for i in 0..16 { + eq_mask |= computed_tag[i] ^ tag[i]; + } + + if eq_mask == 0 { + self.aes_state.update(2, ciphertext, plaintext); + Ok(()) + } else { + Err(DecryptError()) + } + } + } + }; +} + +pub(crate) use aesgcm; diff --git a/aesgcm/src/aes_gcm_128.rs b/aesgcm/src/aes_gcm_128.rs new file mode 100644 index 000000000..64cd6d3c3 --- /dev/null +++ b/aesgcm/src/aes_gcm_128.rs @@ -0,0 +1,23 @@ +//! AES-GCM 128 + +use crate::{ + aes::AES_BLOCK_LEN, + aes_gcm::aesgcm, + ctr::Aes128CtrContext, + gf128::GF128State, + platform::{AESState, GF128FieldElement}, + DecryptError, NONCE_LEN, TAG_LEN, +}; + +/// Key length. +pub(crate) const KEY_LEN: usize = 16; +pub(crate) const GCM_KEY_LEN: usize = 16; + +/// The AES-GCM 128 state +pub(crate) struct State { + pub(crate) aes_state: Aes128CtrContext, + pub(crate) gcm_state: GF128State, + pub(crate) tag_mix: [u8; TAG_LEN], +} + +aesgcm!(State, Aes128CtrContext); diff --git a/aesgcm/src/aes_gcm_256.rs b/aesgcm/src/aes_gcm_256.rs new file mode 100644 index 000000000..070a31031 --- /dev/null +++ b/aesgcm/src/aes_gcm_256.rs @@ -0,0 +1,23 @@ +//! AES-GCM 256 + +use crate::{ + aes::AES_BLOCK_LEN, + aes_gcm::aesgcm, + ctr::Aes256CtrContext, + gf128::GF128State, + platform::{AESState, GF128FieldElement}, + DecryptError, NONCE_LEN, TAG_LEN, +}; + +/// Key length. +pub(crate) const KEY_LEN: usize = 32; +pub(crate) const GCM_KEY_LEN: usize = 16; + +/// The AES-GCM 256 state +pub(crate) struct State { + pub(crate) aes_state: Aes256CtrContext, + pub(crate) gcm_state: GF128State, + pub(crate) tag_mix: [u8; TAG_LEN], +} + +aesgcm!(State, Aes256CtrContext); diff --git a/aesgcm/src/ctr.rs b/aesgcm/src/ctr.rs new file mode 100644 index 000000000..ea8fb4bb4 --- /dev/null +++ b/aesgcm/src/ctr.rs @@ -0,0 +1,106 @@ +//! AES ctr mode implementation. +//! +//! This implementation is generic over the [`AESState`], which has different, +//! platform dependent implementations. +//! +//! This get's instantiated in [`aes128_ctr`] and [`aes256_ctr`]. + +use crate::{aes::*, platform::AESState}; + +#[cfg(test)] +mod test128; + +mod aes128_ctr; +mod aes256_ctr; + +pub(crate) use aes128_ctr::*; +pub(crate) use aes256_ctr::*; + +/// The ctr nonce length. This is different from the AES nonce length +/// [`crate::NONCE_LEN`]. +const NONCE_LEN: usize = 16; + +/// Generic AES CTR context. +pub(crate) struct AesCtrContext { + pub(crate) extended_key: ExtendedKey, + pub(crate) ctr_nonce: [u8; NONCE_LEN], +} + +impl AesCtrContext { + #[inline] + fn aes_ctr_set_nonce(&mut self, nonce: &[u8]) { + debug_assert!(nonce.len() == crate::NONCE_LEN); + + self.ctr_nonce[0..crate::NONCE_LEN].copy_from_slice(nonce); + } + + #[inline] + fn aes_ctr_key_block(&self, ctr: u32, out: &mut [u8]) { + debug_assert!(out.len() == AES_BLOCK_LEN); + + let mut st_init = self.ctr_nonce; + st_init[12..16].copy_from_slice(&ctr.to_be_bytes()); + let mut st = T::new(); + + st.load_block(&st_init); + + block_cipher(&mut st, &self.extended_key); + + st.store_block(out); + } + + #[inline] + fn aes_ctr_xor_block(&self, ctr: u32, input: &[u8], out: &mut [u8]) { + debug_assert!(input.len() == out.len() && input.len() <= AES_BLOCK_LEN); + + let mut st_init = self.ctr_nonce; + st_init[12..16].copy_from_slice(&ctr.to_be_bytes()); + let mut st = T::new(); + st.load_block(&st_init); + + block_cipher(&mut st, &self.extended_key); + + st.xor_block(input, out); + } + + #[inline] + fn aes_ctr_xor_blocks(&self, ctr: u32, input: &[u8], out: &mut [u8]) { + debug_assert!(input.len() == out.len() && input.len().is_multiple_of(AES_BLOCK_LEN)); + // If input.len() / AES_BLOCK_LEN == u32::MAX - 1 and we start with + // ctr == 2 then we'll wrap to 0 below and we'll repeat the initial key + // block + debug_assert!(input.len() / AES_BLOCK_LEN < (u32::MAX - 1) as usize); + + let blocks = input.len() / AES_BLOCK_LEN; + for i in 0..blocks { + let offset = i * AES_BLOCK_LEN; + self.aes_ctr_xor_block( + ctr.wrapping_add(i as u32), + &input[offset..offset + AES_BLOCK_LEN], + &mut out[offset..offset + AES_BLOCK_LEN], + ); + } + } + + #[inline] + fn aes_ctr_update(&self, ctr: u32, input: &[u8], out: &mut [u8]) { + debug_assert!(input.len() == out.len()); + debug_assert!(input.len() / AES_BLOCK_LEN < u32::MAX as usize); + + let blocks = input.len() / AES_BLOCK_LEN; + self.aes_ctr_xor_blocks( + ctr, + &input[0..blocks * AES_BLOCK_LEN], + &mut out[0..blocks * AES_BLOCK_LEN], + ); + + let last = input.len() - input.len() % AES_BLOCK_LEN; + if last < input.len() { + self.aes_ctr_xor_block( + ctr.wrapping_add(blocks as u32), + &input[last..], + &mut out[last..], + ); + } + } +} diff --git a/aesgcm/src/ctr/aes128_ctr.rs b/aesgcm/src/ctr/aes128_ctr.rs new file mode 100644 index 000000000..87a7e9134 --- /dev/null +++ b/aesgcm/src/ctr/aes128_ctr.rs @@ -0,0 +1,80 @@ +//! AES128 ctr mode, generic over the platform [`AESState`]. + +use core::array::from_fn; + +use super::AesCtrContext; +use crate::{aes::*, aes_gcm_128::GCM_KEY_LEN, platform::AESState, NONCE_LEN}; + +pub(super) const NUM_KEYS: usize = 11; + +/// Type alias for the AES 128 ctr context. +pub(crate) type Aes128CtrContext = AesCtrContext; + +impl Aes128CtrContext { + #[inline] + pub(crate) fn init(key: &[u8], nonce: &[u8]) -> Self { + debug_assert!(nonce.len() == NONCE_LEN); + debug_assert!(key.len() == GCM_KEY_LEN); + + let mut ctr_nonce = [0u8; 16]; + ctr_nonce[0..12].copy_from_slice(nonce); + + Self { + extended_key: key_expansion(key), + ctr_nonce, + } + } + + #[inline] + pub(crate) fn set_nonce(&mut self, nonce: &[u8]) { + debug_assert!(nonce.len() == NONCE_LEN); + + self.aes_ctr_set_nonce(nonce); + } + + #[inline] + pub(crate) fn key_block(&self, ctr: u32, out: &mut [u8]) { + debug_assert!(out.len() == GCM_KEY_LEN); + + self.aes_ctr_key_block(ctr, out); + } + + #[inline] + pub(crate) fn update(&self, ctr: u32, inp: &[u8], out: &mut [u8]) { + debug_assert!(inp.len() == out.len()); + + self.aes_ctr_update(ctr, inp, out); + } +} + +/// 128 - Key expansion +#[inline] +fn key_expansion(key: &[u8]) -> ExtendedKey { + debug_assert!(key.len() == GCM_KEY_LEN); + + let mut keyex = from_fn(|_| T::new()); + keyex[0].load_block(key); + + macro_rules! expansion_step128 { + ($i:expr,$rcon:expr) => { + // For hax we could clone here instead. + // let prev = keyex[$i - 1].clone(); + let (prev, current) = keyex.split_at_mut($i); + current[0].aes_keygen_assist0::<$rcon>(&prev[$i - 1]); + current[0].key_expansion_step(&prev[$i - 1]); + }; + } + + expansion_step128!(1, 0x01); + expansion_step128!(2, 0x02); + expansion_step128!(3, 0x04); + expansion_step128!(4, 0x08); + expansion_step128!(5, 0x10); + expansion_step128!(6, 0x20); + expansion_step128!(7, 0x40); + expansion_step128!(8, 0x80); + expansion_step128!(9, 0x1b); + expansion_step128!(10, 0x36); + + keyex +} diff --git a/aesgcm/src/ctr/aes256_ctr.rs b/aesgcm/src/ctr/aes256_ctr.rs new file mode 100644 index 000000000..7009b138f --- /dev/null +++ b/aesgcm/src/ctr/aes256_ctr.rs @@ -0,0 +1,107 @@ +//! AES256 ctr mode, generic over the platform [`AESState`]. + +use core::array::from_fn; + +use super::AesCtrContext; +use crate::{aes::*, aes_gcm_256::KEY_LEN, platform::AESState, NONCE_LEN}; + +pub(crate) const NUM_KEYS: usize = 15; + +/// Type alias for the AES 256 ctr context. +pub(crate) type Aes256CtrContext = AesCtrContext; + +impl Aes256CtrContext { + #[inline] + pub(crate) fn init(key: &[u8], nonce: &[u8]) -> Self { + debug_assert!(nonce.len() == NONCE_LEN); + debug_assert!(key.len() == KEY_LEN); + + let mut ctr_nonce = [0u8; 16]; + ctr_nonce[0..NONCE_LEN].copy_from_slice(nonce); + + Self { + extended_key: key_expansion(key), + ctr_nonce, + } + } + + #[inline] + pub(crate) fn set_nonce(&mut self, nonce: &[u8]) { + debug_assert!(nonce.len() == NONCE_LEN); + self.aes_ctr_set_nonce(nonce); + } + + #[inline] + pub(crate) fn key_block(&self, ctr: u32, out: &mut [u8]) { + debug_assert!(out.len() == AES_BLOCK_LEN, "out.len() = {}", out.len()); + self.aes_ctr_key_block(ctr, out); + } + + #[inline] + pub(crate) fn update(&self, ctr: u32, input: &[u8], out: &mut [u8]) { + debug_assert!(input.len() == out.len()); + self.aes_ctr_update(ctr, input, out); + } +} + +/// 256 - Key expansion +#[inline] +fn key_expansion(key: &[u8]) -> ExtendedKey { + debug_assert!(key.len() == KEY_LEN); + + let mut keyex = from_fn(|_| T::new()); + keyex[0].load_block(&key[0..16]); + keyex[1].load_block(&key[16..32]); + + macro_rules! expansion_step256 { + ($i:expr,$rcon:expr) => { + // Split at $i to get the one we currently look at and the previous + // blocks. + let (prev, current) = keyex.split_at_mut($i); + + // Split again to get the $i and $i + 1 states to operate on. + let (c0, c1) = current.split_at_mut(1); + let key_i = &mut c0[0]; + let key_i_plus_1 = &mut c1[0]; + + key_i.aes_keygen_assist0::<$rcon>(&prev[$i - 1]); + key_i.key_expansion_step(&prev[$i - 2]); + + key_i_plus_1.aes_keygen_assist1(&key_i); + key_i_plus_1.key_expansion_step(&prev[$i - 1]); + + // The following is what will go through hax right now. But it + // requires copies that are really not necessary. + // let prev0 = keyex[$i - 2].clone(); + // let prev1 = keyex[$i - 1].clone(); + + // keyex[$i].aes_keygen_assist0::<$rcon>(&prev1); + // keyex[$i].key_expansion_step(&prev0); + + // let next0 = keyex[$i].clone(); + // keyex[$i + 1].aes_keygen_assist1(&next0); + // keyex[$i + 1].key_expansion_step(&prev1); + }; + } + + expansion_step256!(2, 0x01); + expansion_step256!(4, 0x02); + expansion_step256!(6, 0x04); + expansion_step256!(8, 0x08); + expansion_step256!(10, 0x10); + expansion_step256!(12, 0x20); + + let (prev0, tmp) = keyex.split_at_mut(13); + let (prev1, last) = tmp.split_at_mut(1); + // let prev0 = &mut prev0[12]; + // let prev1 = &mut prev1[0]; + // let last = &mut last[0]; + // To get through hax right now we'd have to clone instead. + // let prev0 = keyex[12].clone(); + // let prev1 = keyex[13].clone(); + // let last = &mut keyex[NUM_KEYS - 1]; + last[0].aes_keygen_assist0::<0x40>(&prev1[0]); + last[0].key_expansion_step(&prev0[12]); + + keyex +} diff --git a/aesgcm/src/ctr/test128.rs b/aesgcm/src/ctr/test128.rs new file mode 100644 index 000000000..7a2c8ba2e --- /dev/null +++ b/aesgcm/src/ctr/test128.rs @@ -0,0 +1,141 @@ +use crate::{ + aes_gcm_128::GCM_KEY_LEN, + ctr::Aes128CtrContext, + platform::{self, AESState}, + NONCE_LEN, +}; + +pub(crate) fn aes128_ctr_xor_block( + ctx: &Aes128CtrContext, + ctr: u32, + inp: &[u8], + out: &mut [u8], +) { + debug_assert!(inp.len() == out.len() && inp.len() <= 16); + ctx.aes_ctr_xor_block(ctr, inp, out); +} + +pub(crate) fn aes128_ctr_encrypt( + key: &[u8], + nonce: &[u8], + ctr: u32, + inp: &[u8], + out: &mut [u8], +) { + debug_assert!(nonce.len() == NONCE_LEN); + debug_assert!(key.len() == GCM_KEY_LEN); + debug_assert!(inp.len() == out.len()); + let ctx = Aes128CtrContext::::init(key, nonce); + ctx.update(ctr, inp, out); +} + +const INPUT: [u8; 32] = [ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, +]; +const KEY: [u8; 16] = [ + 0x7E, 0x24, 0x06, 0x78, 0x17, 0xFA, 0xE0, 0xD7, 0x43, 0xD6, 0xCE, 0x1F, 0x32, 0x53, 0x91, 0x63, +]; +const NONCE: [u8; 12] = [ + 0x00, 0x6C, 0xB6, 0xDB, 0xC0, 0x54, 0x3B, 0x59, 0xDA, 0x48, 0xD9, 0x0B, +]; +const EXPECTED: [u8; 32] = [ + 0x51, 0x04, 0xA1, 0x06, 0x16, 0x8A, 0x72, 0xD9, 0x79, 0x0D, 0x41, 0xEE, 0x8E, 0xDA, 0xD3, 0x88, + 0xEB, 0x2E, 0x1E, 0xFC, 0x46, 0xDA, 0x57, 0xC8, 0xFC, 0xE6, 0x30, 0xDF, 0x91, 0x41, 0xBE, 0x28, +]; + +#[test] +fn test_ctr_block() { + let mut computed: [u8; 32] = [0u8; 32]; + let ctx = Aes128CtrContext::::init(&KEY, &NONCE); + aes128_ctr_xor_block(&ctx, 1, &INPUT[0..16], &mut computed[0..16]); + aes128_ctr_xor_block(&ctx, 2, &INPUT[16..32], &mut computed[16..32]); + for i in 0..32 { + if computed[i] != EXPECTED[i] { + #[cfg(feature = "std")] + std::eprintln!( + "mismatch at {}: expected is {}, computed is {}", + i, + EXPECTED[i], + computed[i] + ); + assert!(false); + } + } +} + +#[cfg(feature = "simd128")] +#[test] +fn test_ctr_block_neon() { + let mut computed: [u8; 32] = [0u8; 32]; + let ctx = Aes128CtrContext::::init(&KEY, &NONCE); + aes128_ctr_xor_block(&ctx, 1, &INPUT[0..16], &mut computed[0..16]); + aes128_ctr_xor_block(&ctx, 2, &INPUT[16..32], &mut computed[16..32]); + for i in 0..32 { + if computed[i] != EXPECTED[i] { + #[cfg(feature = "std")] + std::eprintln!( + "mismatch at {}: expected is {}, computed is {}", + i, + EXPECTED[i], + computed[i] + ); + assert!(false); + } + } +} + +#[test] +fn test_ctr_encrypt() { + let mut computed: [u8; 32] = [0u8; 32]; + aes128_ctr_encrypt::(&KEY, &NONCE, 1, &INPUT, &mut computed); + for i in 0..32 { + if computed[i] != EXPECTED[i] { + #[cfg(feature = "std")] + std::eprintln!( + "mismatch at {}: expected is {}, computed is {}", + i, + EXPECTED[i], + computed[i] + ); + assert!(false); + } + } +} + +#[cfg(feature = "simd128")] +#[test] +fn test_ctr_encrypt_neon() { + let mut computed: [u8; 32] = [0u8; 32]; + aes128_ctr_encrypt::(&KEY, &NONCE, 1, &INPUT, &mut computed); + for i in 0..32 { + if computed[i] != EXPECTED[i] { + #[cfg(feature = "std")] + std::eprintln!( + "mismatch at {}: expected is {}, computed is {}", + i, + EXPECTED[i], + computed[i] + ); + assert!(false); + } + } +} + +#[cfg(all(feature = "simd256", feature = "std"))] +#[test] +fn test_ctr_encrypt_intel() { + let mut computed: [u8; 32] = [0u8; 32]; + aes128_ctr_encrypt::(&KEY, &NONCE, 1, &INPUT, &mut computed); + for i in 0..32 { + if computed[i] != EXPECTED[i] { + std::eprintln!( + "mismatch at {}: expected is {}, computed is {}", + i, + EXPECTED[i], + computed[i] + ); + assert!(false); + } + } +} diff --git a/aesgcm/src/gf128.rs b/aesgcm/src/gf128.rs new file mode 100644 index 000000000..fe2c4d0cc --- /dev/null +++ b/aesgcm/src/gf128.rs @@ -0,0 +1,75 @@ +//! Generic Gf128 implementation. +//! +//! Generic over platform dependent [`GF128FieldElement`]. + +use crate::{aes::AES_BLOCK_LEN, platform::*}; + +#[cfg(test)] +mod test; + +/// Generic Gf128 state. +pub(crate) struct GF128State { + accumulator: T, + r: T, +} + +const KEY_LEN: usize = AES_BLOCK_LEN; + +impl GF128State { + #[inline] + pub(crate) fn init(key: &[u8]) -> Self { + debug_assert!(key.len() == KEY_LEN); + + Self { + accumulator: T::zero(), + r: T::load_element(key), + } + } + + #[inline] + pub(crate) fn update(&mut self, block: &[u8]) { + debug_assert!(block.len() == KEY_LEN); + + let block_elem = T::load_element(block); + self.accumulator.add(&block_elem); + self.accumulator.mul(&self.r); + } + + #[inline] + pub(crate) fn update_blocks(&mut self, input: &[u8]) { + debug_assert!(input.len().is_multiple_of(AES_BLOCK_LEN)); + + let blocks = input.len() / AES_BLOCK_LEN; + for i in 0..blocks { + let offset = i * AES_BLOCK_LEN; + self.update(&input[offset..offset + AES_BLOCK_LEN]); + } + } + + #[inline] + pub(crate) fn update_last(&mut self, partial_block: &[u8]) { + debug_assert!(partial_block.len() < 16); + + let mut block = [0u8; 16]; + block[0..partial_block.len()].copy_from_slice(partial_block); + self.update(&block); + } + + #[inline] + pub(crate) fn update_padded(&mut self, input: &[u8]) { + let blocks = input.len() / AES_BLOCK_LEN; + self.update_blocks(&input[0..blocks * AES_BLOCK_LEN]); + + let last = input.len() - input.len() % AES_BLOCK_LEN; + if last < input.len() { + self.update_last(&input[last..]); + } + } + + #[inline] + pub(crate) fn emit(&self, out: &mut [u8]) { + debug_assert!(out.len() == 16); + + self.accumulator.store_element(out); + } +} diff --git a/aesgcm/src/gf128/test.rs b/aesgcm/src/gf128/test.rs new file mode 100644 index 000000000..a44b8b425 --- /dev/null +++ b/aesgcm/src/gf128/test.rs @@ -0,0 +1,85 @@ +use super::*; + +fn gf128(key: &[u8], input: &[u8], out: &mut [u8]) { + debug_assert!(key.len() == 16); + debug_assert!(out.len() == 16); + + let mut st = GF128State::::init(key); + st.update_padded(input); + st.emit(out); +} + +const INPUT: [u8; 132] = [ + 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef, 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef, + 0xab, 0xad, 0xda, 0xd2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1, 0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20, + 0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19, 0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4, + 0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45, 0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde, + 0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e, 0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1, + 0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20, 0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19, + 0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4, 0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45, + 0x44, 0xae, 0x7e, 0x3f, +]; + +const KEY: [u8; 16] = [ + 0xac, 0xbe, 0xf2, 0x05, 0x79, 0xb4, 0xb8, 0xeb, 0xce, 0x88, 0x9b, 0xac, 0x87, 0x32, 0xda, 0xd7, +]; + +const EXPECTED: [u8; 16] = [ + 0xfb, 0xba, 0xaa, 0x70, 0xa0, 0x73, 0x6f, 0xf9, 0xed, 0x2f, 0xc4, 0x62, 0xde, 0x72, 0x61, 0xe0, +]; + +#[test] +fn test_gf128() { + let mut computed: [u8; 16] = [0u8; 16]; + gf128::(&KEY, &INPUT, &mut computed); + for i in 0..16 { + if computed[i] != EXPECTED[i] { + #[cfg(feature = "std")] + std::eprintln!( + "mismatch at {}: expected is {}, computed is {}", + i, + EXPECTED[i], + computed[i] + ); + assert!(false); + } + } +} + +#[cfg(feature = "simd128")] +#[test] +fn test_gf128_neon() { + let mut computed: [u8; 16] = [0u8; 16]; + gf128::(&KEY, &INPUT, &mut computed); + for i in 0..16 { + if computed[i] != EXPECTED[i] { + #[cfg(feature = "std")] + std::eprintln!( + "mismatch at {}: expected is {}, computed is {}", + i, + EXPECTED[i], + computed[i] + ); + assert!(false); + } + } +} + +#[cfg(all(feature = "simd256", feature = "std"))] +#[test] +fn test_gf128_intel() { + let mut computed: [u8; 16] = [0u8; 16]; + gf128::(&KEY, &INPUT, &mut computed); + for i in 0..16 { + if computed[i] != EXPECTED[i] { + std::eprintln!( + "mismatch at {}: expected is {}, computed is {}", + i, + EXPECTED[i], + computed[i] + ); + assert!(false); + } + } +} diff --git a/aesgcm/src/lib.rs b/aesgcm/src/lib.rs new file mode 100644 index 000000000..f289c67d1 --- /dev/null +++ b/aesgcm/src/lib.rs @@ -0,0 +1,476 @@ +#![no_std] +#![deny(unsafe_code)] + +#[cfg(feature = "std")] +extern crate std; + +mod aes; +mod ctr; +mod gf128; +mod platform; + +mod aes_gcm; +mod aes_gcm_128; +mod aes_gcm_256; + +use libcrux_traits::aead::{arrayref, consts, slice, typed_owned}; + +// TODO: should this trait be re-exported here? +pub use libcrux_traits::aead::arrayref::Aead; + +/// Trait for an AES State. +/// Implemented for 128 and 256. +pub(crate) trait State { + fn init(key: &[u8]) -> Self; + fn set_nonce(&mut self, nonce: &[u8]); + fn encrypt(&mut self, aad: &[u8], plaintext: &[u8], ciphertext: &mut [u8], tag: &mut [u8]); + fn decrypt( + &mut self, + aad: &[u8], + ciphertext: &[u8], + tag: &[u8], + plaintext: &mut [u8], + ) -> Result<(), DecryptError>; +} + +/// AES-GCM decryption error. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DecryptError(); + +/// AES-GCM 128. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct AesGcm128; + +/// Portable AES-GCM 128. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct PortableAesGcm128; + +/// Neon AES-GCM 128. +#[cfg(feature = "simd128")] +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct NeonAesGcm128; +#[cfg(not(feature = "simd128"))] +pub type NeonAesGcm128 = PortableAesGcm128; + +/// AES-NI AES-GCM 128. +#[cfg(feature = "simd256")] +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct X64AesGcm128; +#[cfg(not(feature = "simd256"))] +pub type X64AesGcm128 = PortableAesGcm128; + +/// AES-GCM 256. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct AesGcm256; + +/// Portable AES-GCM 256. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct PortableAesGcm256; + +/// Neon AES-GCM 256. +#[cfg(feature = "simd128")] +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct NeonAesGcm256; + +/// Neon AES-GCM 256. +#[cfg(not(feature = "simd128"))] +pub type NeonAesGcm256 = PortableAesGcm256; + +/// AES-NI AES-GCM 256. +#[derive(Clone, Copy, PartialEq, Eq)] +#[cfg(feature = "simd256")] +pub struct X64AesGcm256; + +/// AES-NI AES-GCM 256. +#[cfg(not(feature = "simd256"))] +pub type X64AesGcm256 = PortableAesGcm256; + +/// Tag length. +pub(crate) const TAG_LEN: usize = 16; + +/// Nonce length. +pub(crate) const NONCE_LEN: usize = 12; + +/// Generic AES-GCM encrypt. +pub(crate) fn encrypt( + key: &[u8], + nonce: &[u8], + aad: &[u8], + plaintext: &[u8], + ciphertext: &mut [u8], + tag: &mut [u8], +) { + debug_assert!(nonce.len() == NONCE_LEN); + debug_assert!(tag.len() == TAG_LEN); + + let mut st = S::init(key); + st.set_nonce(nonce); + st.encrypt(aad, plaintext, ciphertext, tag); +} + +/// Generic AES-GCM decrypt. +pub(crate) fn decrypt( + key: &[u8], + nonce: &[u8], + aad: &[u8], + ciphertext: &[u8], + tag: &[u8], + plaintext: &mut [u8], +) -> Result<(), DecryptError> { + debug_assert!(nonce.len() == NONCE_LEN); + debug_assert!(tag.len() == TAG_LEN); + + let mut st = S::init(key); + st.set_nonce(nonce); + st.decrypt(aad, ciphertext, tag, plaintext) +} + +/// Macro to instantiate the different variants, both 128/256 and platforms. +macro_rules! pub_mod { + ($variant_comment:literal, $mod_name:ident, $state:ty) => { + #[doc = $variant_comment] + pub mod $mod_name { + use crate::$mod_name::KEY_LEN; + use crate::{platform, DecryptError}; + + type State = $state; + + #[doc = $variant_comment] + /// encrypt. + pub fn encrypt( + key: &[u8], + nonce: &[u8], + aad: &[u8], + plaintext: &[u8], + ciphertext: &mut [u8], + tag: &mut [u8], + ) { + debug_assert!(key.len() == KEY_LEN); + crate::encrypt::(key, nonce, aad, plaintext, ciphertext, tag); + } + + #[doc = $variant_comment] + /// decrypt. + pub fn decrypt( + key: &[u8], + nonce: &[u8], + aad: &[u8], + ciphertext: &[u8], + tag: &[u8], + plaintext: &mut [u8], + ) -> Result<(), DecryptError> { + debug_assert!(key.len() == KEY_LEN); + crate::decrypt::(key, nonce, aad, ciphertext, tag, plaintext) + } + } + }; +} + +pub mod portable { + pub_mod!(r"AES-GCM 128 ", aes_gcm_128, crate::aes_gcm_128::State); + pub_mod!(r"AES-GCM 256 ", aes_gcm_256, crate::aes_gcm_256::State); +} + +#[cfg(feature = "simd128")] +pub mod neon { + pub_mod!(r"AES-GCM 128 ", aes_gcm_128, crate::aes_gcm_128::State); + pub_mod!(r"AES-GCM 256 ", aes_gcm_256, crate::aes_gcm_256::State); +} + +#[cfg(feature = "simd256")] +pub mod x64 { + // Here we don't use the `pub_mod` macro because we need to add target features + // onto the functions. + macro_rules! x64_pub_mod { + ($variant_comment:literal, $mod_name:ident, $state:ty) => { + #[doc = $variant_comment] + pub mod $mod_name { + use crate::$mod_name::KEY_LEN; + use crate::{platform, DecryptError}; + + type State = $state; + + #[doc = $variant_comment] + /// encrypt. + pub fn encrypt( + key: &[u8], + nonce: &[u8], + aad: &[u8], + plaintext: &[u8], + ciphertext: &mut [u8], + tag: &mut [u8], + ) { + debug_assert!(key.len() == KEY_LEN); + + #[inline] + #[target_feature(enable = "avx2", enable = "aes")] + #[allow(unsafe_code)] + unsafe fn inner( + key: &[u8], + nonce: &[u8], + aad: &[u8], + plaintext: &[u8], + ciphertext: &mut [u8], + tag: &mut [u8], + ) { + crate::encrypt::(key, nonce, aad, plaintext, ciphertext, tag); + } + + #[allow(unsafe_code)] + unsafe { + inner(key, nonce, aad, plaintext, ciphertext, tag) + }; + } + + #[doc = $variant_comment] + /// decrypt. + pub fn decrypt( + key: &[u8], + nonce: &[u8], + aad: &[u8], + ciphertext: &[u8], + tag: &[u8], + plaintext: &mut [u8], + ) -> Result<(), DecryptError> { + debug_assert!(key.len() == KEY_LEN); + + #[inline] + #[target_feature(enable = "avx2", enable = "aes")] + #[allow(unsafe_code)] + unsafe fn inner( + key: &[u8], + nonce: &[u8], + aad: &[u8], + ciphertext: &[u8], + tag: &[u8], + plaintext: &mut [u8], + ) -> Result<(), DecryptError> { + crate::decrypt::(key, nonce, aad, ciphertext, tag, plaintext) + } + + #[allow(unsafe_code)] + unsafe { + inner(key, nonce, aad, ciphertext, tag, plaintext) + } + } + } + }; + } + + x64_pub_mod!(r"AES-GCM 128 ", aes_gcm_128, crate::aes_gcm_128::State); + x64_pub_mod!(r"AES-GCM 256 ", aes_gcm_256, crate::aes_gcm_256::State); +} + +/// Macro to implement the libcrux_traits public API traits +/// +/// For the blanket impl of `typed_refs::Aead` to take place, +/// the `$type` must implement `Copy` and `PartialEq`. +macro_rules! impl_traits_public_api { + ($type:ty, $keylen:expr, $taglen:expr, $noncelen:expr ) => { + // prerequisite for typed_owned::Aead + impl consts::AeadConsts for $type { + const KEY_LEN: usize = KEY_LEN; + const TAG_LEN: usize = TAG_LEN; + const NONCE_LEN: usize = NONCE_LEN; + } + // implement typed_owned::Aead + typed_owned::impl_aead_typed_owned!($type, KEY_LEN, TAG_LEN, NONCE_LEN); + }; +} + +/// Macro to implement the different structs and multiplexing. +macro_rules! api { + ($mod_name:ident, $variant:ident, $multiplexing:ty, $portable:ident, $neon:ident, $x64:ident) => { + mod $mod_name { + use super::*; + use libcrux_traits::aead::arrayref::{DecryptError, EncryptError}; + use $variant::KEY_LEN; + + pub type Key = [u8; KEY_LEN]; + pub type Tag = [u8; TAG_LEN]; + pub type Nonce = [u8; NONCE_LEN]; + + mod _libcrux_traits_apis_multiplex { + use super::*; + + // implement `libcrux_traits` slice trait + slice::impl_aead_slice_trait!($multiplexing => KEY_LEN, TAG_LEN, NONCE_LEN); + + // implement `libcrux_traits` public API traits + impl_traits_public_api!($multiplexing, KEY_LEN, TAG_LEN, NONCE_LEN); + + impl arrayref::Aead for $multiplexing { + fn encrypt( + ciphertext: &mut [u8], + tag: &mut Tag, + key: &Key, + nonce: &Nonce, + aad: &[u8], + plaintext: &[u8], + ) -> Result<(), EncryptError> { + // SIMD256 needs to come first because SIMD128 is true for + // x64 as well, but we don't actually implement it. + if libcrux_platform::simd256_support() && libcrux_platform::aes_ni_support() { + $x64::encrypt(ciphertext, tag, key, nonce, aad, plaintext) + } else if libcrux_platform::simd128_support() + && libcrux_platform::aes_ni_support() + { + $neon::encrypt(ciphertext, tag, key, nonce, aad, plaintext) + } else { + $portable::encrypt(ciphertext, tag, key, nonce, aad, plaintext) + } + } + + fn decrypt( + plaintext: &mut [u8], + key: &Key, + nonce: &Nonce, + aad: &[u8], + ciphertext: &[u8], + tag: &Tag, + ) -> Result<(), DecryptError> { + // SIMD256 needs to come first because SIMD128 is true for + // x64 as well, but we don't actually implement it. + if libcrux_platform::simd256_support() && libcrux_platform::aes_ni_support() { + $x64::decrypt(plaintext, key, nonce, aad, ciphertext, tag) + } else if libcrux_platform::simd128_support() + && libcrux_platform::aes_ni_support() + { + $neon::decrypt(plaintext, key, nonce, aad, ciphertext, tag) + } else { + $portable::decrypt(plaintext, key, nonce, aad, ciphertext, tag) + } + } + } + } + + mod _libcrux_traits_apis_portable { + use super::*; + + // implement `libcrux_traits` slice trait + slice::impl_aead_slice_trait!($portable => KEY_LEN, TAG_LEN, NONCE_LEN); + + // implement `libcrux_traits` public API traits + impl_traits_public_api!($portable, KEY_LEN, TAG_LEN, NONCE_LEN); + + impl arrayref::Aead for $portable { + fn encrypt( + ciphertext: &mut [u8], + tag: &mut Tag, + key: &Key, + nonce: &Nonce, + aad: &[u8], + plaintext: &[u8], + ) -> Result<(), EncryptError> { + portable::$variant::encrypt(key, nonce, aad, plaintext, ciphertext, tag); + Ok(()) + } + + fn decrypt( + plaintext: &mut [u8], + key: &Key, + nonce: &Nonce, + aad: &[u8], + ciphertext: &[u8], + tag: &Tag, + ) -> Result<(), DecryptError> { + portable::$variant::decrypt(key, nonce, aad, ciphertext, tag, plaintext) + .map_err(|_| DecryptError::InvalidTag) + } + } + } + + #[cfg(feature = "simd128")] + mod _libcrux_traits_apis_neon { + use super::*; + + // implement `libcrux_traits` slice trait + slice::impl_aead_slice_trait!($neon => KEY_LEN, TAG_LEN, NONCE_LEN); + + // implement `libcrux_traits` public API traits + impl_traits_public_api!($neon, KEY_LEN, TAG_LEN, NONCE_LEN); + + impl arrayref::Aead for $neon { + fn encrypt( + ciphertext: &mut [u8], + tag: &mut Tag, + key: &Key, + nonce: &Nonce, + aad: &[u8], + plaintext: &[u8], + ) -> Result<(), EncryptError> { + neon::$variant::encrypt(key, nonce, aad, plaintext, ciphertext, tag); + Ok(()) + } + + fn decrypt( + plaintext: &mut [u8], + key: &Key, + nonce: &Nonce, + aad: &[u8], + ciphertext: &[u8], + tag: &Tag, + ) -> Result<(), DecryptError> { + neon::$variant::decrypt(key, nonce, aad, ciphertext, tag, plaintext) + .map_err(|_| DecryptError::InvalidTag) + } + } + } + + #[cfg(feature = "simd256")] + mod _libcrux_traits_api_x64 { + use super::*; + + // implement `libcrux_traits` slice trait + slice::impl_aead_slice_trait!($x64 => KEY_LEN, TAG_LEN, NONCE_LEN); + + // implement `libcrux_traits` public API traits + impl_traits_public_api!($x64, KEY_LEN, TAG_LEN, NONCE_LEN); + + impl arrayref::Aead for $x64 { + fn encrypt( + ciphertext: &mut [u8], + tag: &mut Tag, + key: &Key, + nonce: &Nonce, + aad: &[u8], + plaintext: &[u8], + ) -> Result<(), EncryptError> { + x64::$variant::encrypt(key, nonce, aad, plaintext, ciphertext, tag); + Ok(()) + } + + fn decrypt( + plaintext: &mut [u8], + key: &Key, + nonce: &Nonce, + aad: &[u8], + ciphertext: &[u8], + tag: &Tag, + ) -> Result<(), DecryptError> { + x64::$variant::decrypt(key, nonce, aad, ciphertext, tag, plaintext) + .map_err(|_| DecryptError::InvalidTag) + } + } + } + } + }; +} + +api!( + aes128, + aes_gcm_128, + AesGcm128, + PortableAesGcm128, + NeonAesGcm128, + X64AesGcm128 +); + +api!( + aes256, + aes_gcm_256, + AesGcm256, + PortableAesGcm256, + NeonAesGcm256, + X64AesGcm256 +); diff --git a/aesgcm/src/platform.rs b/aesgcm/src/platform.rs new file mode 100644 index 000000000..9c622c81a --- /dev/null +++ b/aesgcm/src/platform.rs @@ -0,0 +1,33 @@ +//! Traits for platform dependent implementations + +pub(crate) mod portable; + +#[cfg(feature = "simd128")] +pub(crate) mod neon; + +#[cfg(feature = "simd256")] +pub(crate) mod x64; + +/// The AES state. +pub(crate) trait AESState: Clone + core::fmt::Debug { + fn new() -> Self; + fn load_block(&mut self, b: &[u8]); + fn store_block(&self, out: &mut [u8]); + fn xor_block(&self, inp: &[u8], out: &mut [u8]); + + fn xor_key(&mut self, key: &Self); + fn aes_enc(&mut self, key: &Self); + fn aes_enc_last(&mut self, key: &Self); + fn aes_keygen_assist0(&mut self, prev: &Self); + fn aes_keygen_assist1(&mut self, prev: &Self); + fn key_expansion_step(&mut self, prev: &Self); +} + +/// A gf128 field element. +pub(crate) trait GF128FieldElement { + fn zero() -> Self; + fn load_element(bytes: &[u8]) -> Self; + fn store_element(&self, bytes: &mut [u8]); + fn add(&mut self, other: &Self); + fn mul(&mut self, other: &Self); +} diff --git a/aesgcm/src/platform/neon.rs b/aesgcm/src/platform/neon.rs new file mode 100644 index 000000000..37a24363c --- /dev/null +++ b/aesgcm/src/platform/neon.rs @@ -0,0 +1,5 @@ +pub(crate) use aes_core::State; +pub(crate) use gf128_core::FieldElement; + +mod aes_core; +mod gf128_core; diff --git a/aesgcm/src/platform/neon/aes_core.rs b/aesgcm/src/platform/neon/aes_core.rs new file mode 100644 index 000000000..674b620c9 --- /dev/null +++ b/aesgcm/src/platform/neon/aes_core.rs @@ -0,0 +1,130 @@ +use libcrux_intrinsics::arm64::{ + _uint8x16_t, _vaeseq_u8, _vaesmcq_u8, _vdupq_laneq_u32, _vdupq_n_u32, _vdupq_n_u8, _veorq_u32, + _veorq_u8, _vextq_u32, _vld1q_u32, _vld1q_u8, _vreinterpretq_u32_u8, _vreinterpretq_u8_u32, + _vst1q_u8, +}; + +/// The Neon state +pub(crate) type State = _uint8x16_t; + +#[inline] +fn new_state() -> State { + _vdupq_n_u8(0) +} + +#[inline] +fn xor_key1_state(st: &mut State, k: &State) { + *st = _veorq_u8(*st, *k); +} + +#[inline] +fn aes_enc(st: &mut State, key: &State) { + *st = _veorq_u8(_vaesmcq_u8(_vaeseq_u8(*st, _vdupq_n_u8(0))), *key); +} + +#[inline] +fn aes_enc_last(st: &mut State, key: &State) { + *st = _veorq_u8(_vaeseq_u8(*st, _vdupq_n_u8(0)), *key) +} + +#[inline] +fn aes_keygen_assist(next: &mut State, prev: &State, rcon: u8) { + let st = _vaeseq_u8(*prev, _vdupq_n_u8(0)); + let mut tmp = [0u8; 16]; + _vst1q_u8(&mut tmp, st); + let tmp_new = [ + tmp[4], tmp[1], tmp[14], tmp[11], tmp[1], tmp[14], tmp[11], tmp[4], tmp[12], tmp[9], + tmp[6], tmp[3], tmp[9], tmp[6], tmp[3], tmp[12], + ]; + let st_new = _vld1q_u8(&tmp_new); + let rcon_array = [0, rcon as u32, 0, rcon as u32]; + let rcon_vec = _vreinterpretq_u8_u32(_vld1q_u32(&rcon_array)); + *next = _veorq_u8(st_new, rcon_vec); +} + +#[inline] +fn aes_keygen_assist0(next: &mut State, prev: &State, rcon: u8) { + aes_keygen_assist(next, prev, rcon); + *next = _vreinterpretq_u8_u32(_vdupq_laneq_u32::<3>(_vreinterpretq_u32_u8(*next))) +} + +#[inline] +fn aes_keygen_assist1(next: &mut State, prev: &State) { + aes_keygen_assist(next, prev, 0); + *next = _vreinterpretq_u8_u32(_vdupq_laneq_u32::<2>(_vreinterpretq_u32_u8(*next))); +} + +#[inline] +fn key_expansion_step(next: &mut State, prev: &State) { + let zero = _vdupq_n_u32(0); + let prev0 = _vreinterpretq_u32_u8(*prev); + let prev1 = _veorq_u32(prev0, _vextq_u32::<3>(zero, prev0)); + let prev2 = _veorq_u32(prev1, _vextq_u32::<3>(zero, prev1)); + let prev3 = _veorq_u32(prev2, _vextq_u32::<3>(zero, prev2)); + *next = _veorq_u8(*next, _vreinterpretq_u8_u32(prev3)); +} + +impl crate::platform::AESState for State { + #[inline] + fn new() -> Self { + new_state() + } + + #[inline] + fn load_block(&mut self, b: &[u8]) { + debug_assert!(b.len() == 16); + *self = _vld1q_u8(b); + } + + #[inline] + fn store_block(&self, out: &mut [u8]) { + debug_assert!(out.len() == 16); + _vst1q_u8(out, *self); + } + + #[inline] + fn xor_block(&self, input: &[u8], out: &mut [u8]) { + debug_assert!(input.len() == out.len() && input.len() <= 16); + // XXX: hot-fix to have enough input and output here. + // For some reason this doesn't fail even if we don't do this. + let mut block_in = [0u8; 16]; + let mut block_out = [0u8; 16]; + block_in[0..input.len()].copy_from_slice(input); + + let inp_vec = _vld1q_u8(&block_in); + let out_vec = _veorq_u8(inp_vec, *self); + _vst1q_u8(&mut block_out, out_vec); + + out.copy_from_slice(&block_out[0..out.len()]); + } + + #[inline] + fn xor_key(&mut self, key: &Self) { + xor_key1_state(self, key); + } + + #[inline] + fn aes_enc(&mut self, key: &Self) { + aes_enc(self, key); + } + + #[inline] + fn aes_enc_last(&mut self, key: &Self) { + aes_enc_last(self, key); + } + + #[inline] + fn aes_keygen_assist0(&mut self, prev: &Self) { + aes_keygen_assist0(self, prev, RCON as u8); + } + + #[inline] + fn aes_keygen_assist1(&mut self, prev: &Self) { + aes_keygen_assist1(self, prev); + } + + #[inline] + fn key_expansion_step(&mut self, prev: &Self) { + key_expansion_step(self, prev); + } +} diff --git a/aesgcm/src/platform/neon/gf128_core.rs b/aesgcm/src/platform/neon/gf128_core.rs new file mode 100644 index 000000000..3c9594dc9 --- /dev/null +++ b/aesgcm/src/platform/neon/gf128_core.rs @@ -0,0 +1,93 @@ +use libcrux_intrinsics::arm64::*; + +/// A Neon gf128 field element +#[derive(Clone, Copy)] +pub(crate) struct FieldElement(pub(crate) u128); + +#[inline] +fn zero() -> FieldElement { + FieldElement(0) +} + +#[inline] +fn load_element(b: &[u8]) -> FieldElement { + debug_assert!(b.len() == 16); + + FieldElement(u128::from_be_bytes(b.try_into().unwrap())) +} + +#[inline] +fn store_element(element: &FieldElement, bytes: &mut [u8]) { + debug_assert!(bytes.len() == 16); + + bytes.copy_from_slice(&element.0.to_be_bytes()); +} + +#[inline] +fn add(element: &mut FieldElement, other: &FieldElement) { + element.0 ^= other.0; +} + +#[inline] +fn mul_wide(element: &FieldElement, other: &FieldElement) -> (FieldElement, FieldElement) { + let l0 = element.0 as u64; + let h0 = (element.0 >> 64) as u64; + let l1 = other.0 as u64; + let h1 = (other.0 >> 64) as u64; + + let low: u128 = _vmull_p64(l0, l1); + let m1: u128 = _vmull_p64(l0, h1); + let m2: u128 = _vmull_p64(l1, h0); + let high: u128 = _vmull_p64(h0, h1); + + let mid = m1 ^ m2; + let m0 = mid << 64; + let m1 = mid >> 64; + let low = low ^ m0; + let high = high ^ m1; + + (FieldElement(high), FieldElement(low)) +} + +#[inline] +fn reduce(high: &FieldElement, low: &FieldElement) -> FieldElement { + let high = (high.0 << 1) ^ (low.0 >> 127); + let low = low.0 << 1; + let x0_0 = low << 64; + let x1_x0 = low ^ (x0_0 << 63) ^ (x0_0 << 62) ^ (x0_0 << 57); + let x1_x0 = x1_x0 ^ (x1_x0 >> 1) ^ (x1_x0 >> 2) ^ (x1_x0 >> 7); + FieldElement(x1_x0 ^ high) +} + +#[inline] +fn mul(x: &mut FieldElement, y: &FieldElement) { + let (high, low) = mul_wide(x, y); + *x = reduce(&high, &low); +} + +impl crate::platform::GF128FieldElement for FieldElement { + #[inline] + fn zero() -> Self { + zero() + } + + #[inline] + fn load_element(b: &[u8]) -> Self { + load_element(b) + } + + #[inline] + fn store_element(&self, b: &mut [u8]) { + store_element(self, b); + } + + #[inline] + fn add(&mut self, other: &Self) { + add(self, other); + } + + #[inline] + fn mul(&mut self, other: &Self) { + mul(self, other) + } +} diff --git a/aesgcm/src/platform/portable.rs b/aesgcm/src/platform/portable.rs new file mode 100644 index 000000000..37a24363c --- /dev/null +++ b/aesgcm/src/platform/portable.rs @@ -0,0 +1,5 @@ +pub(crate) use aes_core::State; +pub(crate) use gf128_core::FieldElement; + +mod aes_core; +mod gf128_core; diff --git a/aesgcm/src/platform/portable/aes_core.rs b/aesgcm/src/platform/portable/aes_core.rs new file mode 100644 index 000000000..7cc75160b --- /dev/null +++ b/aesgcm/src/platform/portable/aes_core.rs @@ -0,0 +1,523 @@ +#![allow(clippy::needless_range_loop)] + +use crate::aes::AES_BLOCK_LEN; + +#[cfg(test)] +mod test; + +pub(crate) type State = [u16; 8]; + +#[inline] +fn new_state() -> State { + [0u16; 8] +} + +#[inline] +fn interleave_u8_1(i0: u8, i1: u8) -> u16 { + let mut x = i0 as u16; + + x = (x | (x << 4)) & 0x0F0F; + x = (x | (x << 2)) & 0x3333; + x = (x | (x << 1)) & 0x5555; + + let mut y = i1 as u16; + + y = (y | (y << 4)) & 0x0F0F; + y = (y | (y << 2)) & 0x3333; + y = (y | (y << 1)) & 0x5555; + + x | (y << 1) +} + +#[inline] +fn deinterleave_u8_1(i0: u16) -> (u8, u8) { + let mut x = i0 & 0x5555; + + x = (x | (x >> 1)) & 0x3333; + x = (x | (x >> 2)) & 0x0F0F; + x = (x | (x >> 4)) & 0x00FF; + + let mut y = (i0 >> 1) & 0x5555; + + y = (y | (y >> 1)) & 0x3333; + y = (y | (y >> 2)) & 0x0F0F; + y = (y | (y >> 4)) & 0x00FF; + + (x as u8, y as u8) +} + +#[inline] +fn interleave_u16_2(i0: u16, i1: u16) -> (u16, u16) { + let x = ((i1 & 0x3333) << 2) | (i0 & 0x3333); + let y = ((i0 & 0xcccc) >> 2) | (i1 & 0xcccc); + (x, y) +} + +#[inline] +fn interleave_u16_4(i0: u16, i1: u16) -> (u16, u16) { + let x = ((i1 & 0x0F0F) << 4) | (i0 & 0x0F0F); + let y = ((i0 & 0xF0F0) >> 4) | (i1 & 0xF0F0); + (x, y) +} + +#[inline] +fn interleave_u16_8(i0: u16, i1: u16) -> (u16, u16) { + let x = ((i1 & 0x00FF) << 8) | (i0 & 0x00FF); + let y = ((i0 & 0xFF00) >> 8) | (i1 & 0xFF00); + (x, y) +} + +#[inline] +fn transpose_u8x16(input: &[u8; 16], output: &mut [u16; 8]) { + let o0 = interleave_u8_1(input[0], input[1]); + let o1 = interleave_u8_1(input[2], input[3]); + let o2 = interleave_u8_1(input[4], input[5]); + let o3 = interleave_u8_1(input[6], input[7]); + let o4 = interleave_u8_1(input[8], input[9]); + let o5 = interleave_u8_1(input[10], input[11]); + let o6 = interleave_u8_1(input[12], input[13]); + let o7 = interleave_u8_1(input[14], input[15]); + + let (o0, o1) = interleave_u16_2(o0, o1); + let (o2, o3) = interleave_u16_2(o2, o3); + let (o4, o5) = interleave_u16_2(o4, o5); + let (o6, o7) = interleave_u16_2(o6, o7); + + let (o0, o2) = interleave_u16_4(o0, o2); + let (o1, o3) = interleave_u16_4(o1, o3); + let (o4, o6) = interleave_u16_4(o4, o6); + let (o5, o7) = interleave_u16_4(o5, o7); + + let (o0, o4) = interleave_u16_8(o0, o4); + let (o1, o5) = interleave_u16_8(o1, o5); + let (o2, o6) = interleave_u16_8(o2, o6); + let (o3, o7) = interleave_u16_8(o3, o7); + + output[0] = o0; + output[1] = o1; + output[2] = o2; + output[3] = o3; + output[4] = o4; + output[5] = o5; + output[6] = o6; + output[7] = o7; +} + +#[inline] +fn transpose_u16x8(input: &[u16; 8], output: &mut [u8]) { + let (i0, i4) = interleave_u16_8(input[0], input[4]); + let (i1, i5) = interleave_u16_8(input[1], input[5]); + let (i2, i6) = interleave_u16_8(input[2], input[6]); + let (i3, i7) = interleave_u16_8(input[3], input[7]); + + let (i0, i2) = interleave_u16_4(i0, i2); + let (i1, i3) = interleave_u16_4(i1, i3); + let (i4, i6) = interleave_u16_4(i4, i6); + let (i5, i7) = interleave_u16_4(i5, i7); + + let (i0, i1) = interleave_u16_2(i0, i1); + let (i2, i3) = interleave_u16_2(i2, i3); + let (i4, i5) = interleave_u16_2(i4, i5); + let (i6, i7) = interleave_u16_2(i6, i7); + + let (o0, o1) = deinterleave_u8_1(i0); + let (o2, o3) = deinterleave_u8_1(i1); + let (o4, o5) = deinterleave_u8_1(i2); + let (o6, o7) = deinterleave_u8_1(i3); + let (o8, o9) = deinterleave_u8_1(i4); + + let (o10, o11) = deinterleave_u8_1(i5); + let (o12, o13) = deinterleave_u8_1(i6); + let (o14, o15) = deinterleave_u8_1(i7); + + output[0] = o0; + output[1] = o1; + output[2] = o2; + output[3] = o3; + output[4] = o4; + output[5] = o5; + output[6] = o6; + output[7] = o7; + output[8] = o8; + output[9] = o9; + output[10] = o10; + output[11] = o11; + output[12] = o12; + output[13] = o13; + output[14] = o14; + output[15] = o15; +} + +#[inline] +fn xnor(a: u16, b: u16) -> u16 { + !(a ^ b) +} + +#[inline] +fn sub_bytes_state(st: &mut State) { + let u0 = st[7]; + let u1 = st[6]; + let u2 = st[5]; + let u3 = st[4]; + let u4 = st[3]; + let u5 = st[2]; + let u6 = st[1]; + let u7 = st[0]; + + let t1 = u6 ^ u4; + let t2 = u3 ^ u0; + let t3 = u1 ^ u2; + let t4 = u7 ^ t3; + let t5 = t1 ^ t2; + let t6 = u1 ^ u5; + let t7 = u0 ^ u6; + let t8 = t1 ^ t6; + let t9 = u6 ^ t4; + let t10 = u3 ^ t4; + let t11 = u7 ^ t5; + let t12 = t5 ^ t6; + let t13 = u2 ^ u5; + let t14 = t3 ^ t5; + let t15 = u5 ^ t7; + let t16 = u0 ^ u5; + let t17 = u7 ^ t8; + let t18 = u6 ^ u5; + let t19 = t2 ^ t18; + let t20 = t4 ^ t15; + let t21 = t1 ^ t13; + let t22 = u0 ^ t4; + let t39 = t21 ^ t5; + let t40 = t21 ^ t7; + let t41 = t7 ^ t19; + let t42 = t16 ^ t14; + let t43 = t22 ^ t17; + let t44 = t19 & t5; + let t45 = t20 & t11; + let t46 = t12 ^ t44; + let t47 = t10 & u7; + let t48 = t47 ^ t44; + let t49 = t7 & t21; + let t50 = t9 & t4; + let t51 = t40 ^ t49; + let t52 = t22 & t17; + let t53 = t52 ^ t49; + let t54 = t2 & t8; + let t55 = t41 & t39; + let t56 = t55 ^ t54; + let t57 = t16 & t14; + let t58 = t57 ^ t54; + let t59 = t46 ^ t45; + let t60 = t48 ^ t42; + let t61 = t51 ^ t50; + let t62 = t53 ^ t58; + let t63 = t59 ^ t56; + let t64 = t60 ^ t58; + let t65 = t61 ^ t56; + let t66 = t62 ^ t43; + let t67 = t65 ^ t66; + let t68 = t65 & t63; + let t69 = t64 ^ t68; + let t70 = t63 ^ t64; + let t71 = t66 ^ t68; + let t72 = t71 & t70; + let t73 = t69 & t67; + let t74 = t63 & t66; + let t75 = t70 & t74; + let t76 = t70 ^ t68; + let t77 = t64 & t65; + let t78 = t67 & t77; + let t79 = t67 ^ t68; + let t80 = t64 ^ t72; + let t81 = t75 ^ t76; + let t82 = t66 ^ t73; + let t83 = t78 ^ t79; + let t84 = t81 ^ t83; + let t85 = t80 ^ t82; + let t86 = t80 ^ t81; + let t87 = t82 ^ t83; + let t88 = t85 ^ t84; + let t89 = t87 & t5; + let t90 = t83 & t11; + let t91 = t82 & u7; + let t92 = t86 & t21; + let t93 = t81 & t4; + let t94 = t80 & t17; + let t95 = t85 & t8; + let t96 = t88 & t39; + let t97 = t84 & t14; + let t98 = t87 & t19; + let t99 = t83 & t20; + let t100 = t82 & t10; + let t101 = t86 & t7; + let t102 = t81 & t9; + let t103 = t80 & t22; + let t104 = t85 & t2; + let t105 = t88 & t41; + let t106 = t84 & t16; + let t107 = t104 ^ t105; + let t108 = t93 ^ t99; + let t109 = t96 ^ t107; + let t110 = t98 ^ t108; + let t111 = t91 ^ t101; + let t112 = t89 ^ t92; + let t113 = t107 ^ t112; + let t114 = t90 ^ t110; + let t115 = t89 ^ t95; + let t116 = t94 ^ t102; + let t117 = t97 ^ t103; + let t118 = t91 ^ t114; + let t119 = t111 ^ t117; + let t120 = t100 ^ t108; + let t121 = t92 ^ t95; + let t122 = t110 ^ t121; + let t123 = t106 ^ t119; + let t124 = t104 ^ t115; + let t125 = t111 ^ t116; + + let t128 = t94 ^ t107; + + let t131 = t93 ^ t101; + let t132 = t112 ^ t120; + + let t134 = t97 ^ t116; + let t135 = t131 ^ t134; + let t136 = t93 ^ t115; + + let t138 = t119 ^ t132; + let t140 = t114 ^ t136; + + let s0 = t109 ^ t122; + let s2 = xnor(t123, t124); + let s3 = t113 ^ t114; + let s4 = t118 ^ t128; + let s7 = xnor(t113, t125); + let s6 = xnor(t109, t135); + let s5 = t109 ^ t138; + let s1 = xnor(t109, t140); + + st[0] = s7; + st[1] = s6; + st[2] = s5; + st[3] = s4; + st[4] = s3; + st[5] = s2; + st[6] = s1; + st[7] = s0; +} + +#[inline] +fn shift_row_u16(input: u16) -> u16 { + (input & 0x1111) + | ((input & 0x2220) >> 4) + | ((input & 0x0002) << 12) + | ((input & 0x4400) >> 8) + | ((input & 0x0044) << 8) + | ((input & 0x8000) >> 12) + | ((input & 0x0888) << 4) +} + +#[inline] +fn shift_rows_state(st: &mut State) { + st[0] = shift_row_u16(st[0]); + st[1] = shift_row_u16(st[1]); + st[2] = shift_row_u16(st[2]); + st[3] = shift_row_u16(st[3]); + st[4] = shift_row_u16(st[4]); + st[5] = shift_row_u16(st[5]); + st[6] = shift_row_u16(st[6]); + st[7] = shift_row_u16(st[7]); +} + +#[inline] +fn mix_columns_state(st: &mut State) { + let mut last_col: u16 = 0; + + for i in 0..8 { + let col = st[i] ^ (((st[i] & 0xeeee) >> 1) | ((st[i] & 0x1111) << 3)); + st[i] = st[i] ^ last_col ^ col ^ (((col & 0xcccc) >> 2) | ((col & 0x3333) << 2)); + last_col = col; + } + + st[0] ^= last_col; + st[1] ^= last_col; + st[3] ^= last_col; + st[4] ^= last_col; +} + +#[inline] +fn xor_key1_state(st: &mut State, k: &State) { + st[0] ^= k[0]; + st[1] ^= k[1]; + st[2] ^= k[2]; + st[3] ^= k[3]; + st[4] ^= k[4]; + st[5] ^= k[5]; + st[6] ^= k[6]; + st[7] ^= k[7]; +} + +#[inline] +fn aes_enc(st: &mut State, key: &State) { + sub_bytes_state(st); + shift_rows_state(st); + mix_columns_state(st); + xor_key1_state(st, key) +} + +#[inline] +fn aes_enc_last(st: &mut State, key: &State) { + sub_bytes_state(st); + shift_rows_state(st); + xor_key1_state(st, key) +} + +#[inline] +fn aes_keygen_assisti(rcon: u8, i: usize, u: u16) -> u16 { + let u3 = u & 0xf000; + let n = u3 >> 12; + let n = ((n >> 1) | (n << 3)) & 0x000f; + let ri = ((rcon >> i) & 1) as u16; + let n = n ^ ri; + let n = n << 12; + n ^ (u3 >> 4) +} + +#[inline] +fn aes_keygen_assist(next: &mut State, prev: &State, rcon: u8) { + next.copy_from_slice(prev); + sub_bytes_state(next); + + next[0] = aes_keygen_assisti(rcon, 0, next[0]); + next[1] = aes_keygen_assisti(rcon, 1, next[1]); + next[2] = aes_keygen_assisti(rcon, 2, next[2]); + next[3] = aes_keygen_assisti(rcon, 3, next[3]); + next[4] = aes_keygen_assisti(rcon, 4, next[4]); + next[5] = aes_keygen_assisti(rcon, 5, next[5]); + next[6] = aes_keygen_assisti(rcon, 6, next[6]); + next[7] = aes_keygen_assisti(rcon, 7, next[7]); +} + +#[inline] +fn aes_keygen_assist0(next: &mut State, prev: &State, rcon: u8) { + aes_keygen_assist(next, prev, rcon); + + #[inline] + fn aux(mut n: u16) -> u16 { + n &= 0xf000; + n ^= n >> 4; + n ^= n >> 8; + n + } + + next[0] = aux(next[0]); + next[1] = aux(next[1]); + next[2] = aux(next[2]); + next[3] = aux(next[3]); + next[4] = aux(next[4]); + next[5] = aux(next[5]); + next[6] = aux(next[6]); + next[7] = aux(next[7]); +} + +#[inline] +fn aes_keygen_assist1(next: &mut State, prev: &State) { + aes_keygen_assist(next, prev, 0); + + #[inline] + fn aux(mut n: u16) -> u16 { + n &= 0x0f00; + n ^= n << 4; + n ^= n >> 8; + n + } + + next[0] = aux(next[0]); + next[1] = aux(next[1]); + next[2] = aux(next[2]); + next[3] = aux(next[3]); + next[4] = aux(next[4]); + next[5] = aux(next[5]); + next[6] = aux(next[6]); + next[7] = aux(next[7]); +} + +#[inline] +fn key_expand1(p: u16, n: u16) -> u16 { + let p = p ^ ((p & 0x0fff) << 4) ^ ((p & 0x00ff) << 8) ^ ((p & 0x000f) << 12); + n ^ p +} + +#[inline] +fn key_expansion_step(next: &mut State, prev: &State) { + next[0] = key_expand1(prev[0], next[0]); + next[1] = key_expand1(prev[1], next[1]); + next[2] = key_expand1(prev[2], next[2]); + next[3] = key_expand1(prev[3], next[3]); + next[4] = key_expand1(prev[4], next[4]); + next[5] = key_expand1(prev[5], next[5]); + next[6] = key_expand1(prev[6], next[6]); + next[7] = key_expand1(prev[7], next[7]); +} + +impl crate::platform::AESState for State { + #[inline] + fn new() -> Self { + new_state() + } + + #[inline] + fn load_block(&mut self, b: &[u8]) { + debug_assert!(b.len() == 16); + + transpose_u8x16(b.try_into().unwrap(), self); + } + + #[inline] + fn store_block(&self, out: &mut [u8]) { + debug_assert!(out.len() == AES_BLOCK_LEN, "out.len() = {}", out.len()); + + transpose_u16x8(self, out); + } + + #[inline] + fn xor_block(&self, input: &[u8], out: &mut [u8]) { + debug_assert!(input.len() == out.len() && input.len() <= AES_BLOCK_LEN); + + let mut block = [0u8; AES_BLOCK_LEN]; + self.store_block(&mut block); + + for i in 0..input.len() { + out[i] = input[i] ^ block[i]; + } + } + + #[inline] + fn xor_key(&mut self, key: &Self) { + xor_key1_state(self, key); + } + + #[inline] + fn aes_enc(&mut self, key: &Self) { + aes_enc(self, key); + } + + #[inline] + fn aes_enc_last(&mut self, key: &Self) { + aes_enc_last(self, key); + } + + #[inline] + fn aes_keygen_assist0(&mut self, prev: &Self) { + aes_keygen_assist0(self, prev, RCON as u8); + } + + #[inline] + fn aes_keygen_assist1(&mut self, prev: &Self) { + aes_keygen_assist1(self, prev); + } + + #[inline] + fn key_expansion_step(&mut self, prev: &Self) { + key_expansion_step(self, prev) + } +} diff --git a/aesgcm/src/platform/portable/aes_core/test.rs b/aesgcm/src/platform/portable/aes_core/test.rs new file mode 100644 index 000000000..c5524fc4f --- /dev/null +++ b/aesgcm/src/platform/portable/aes_core/test.rs @@ -0,0 +1,773 @@ +use super::*; + +#[allow(non_snake_case)] +fn sub_bytes_inv_state(st: &mut State) { + let U0 = st[7]; + let U1 = st[6]; + let U2 = st[5]; + let U3 = st[4]; + let U4 = st[3]; + let U5 = st[2]; + let U6 = st[1]; + let U7 = st[0]; + + let T23 = U0 ^ U3; + let T22 = xnor(U1, U3); + let T2 = xnor(U0, U1); + let T1 = U3 ^ U4; + let T24 = xnor(U4, U7); + let R5 = U6 ^ U7; + let T8 = xnor(U1, T23); + let T19 = T22 ^ R5; + let T9 = xnor(U7, T1); + let T10 = T2 ^ T24; + let T13 = T2 ^ R5; + let T3 = T1 ^ R5; + let T25 = xnor(U2, T1); + let R13 = U1 ^ U6; + let T17 = xnor(U2, T19); + let T20 = T24 ^ R13; + let T4 = U4 ^ T8; + let R17 = xnor(U2, U5); + let R18 = xnor(U5, U6); + let R19 = xnor(U2, U4); + let Y5 = U0 ^ R17; + let T6 = T22 ^ R17; + let T16 = R13 ^ R19; + let T27 = T1 ^ R18; + let T15 = T10 ^ T27; + let T14 = T10 ^ R18; + let T26 = T3 ^ T16; + let M1 = T13 & T6; + let M2 = T23 & T8; + let M3 = T14 ^ M1; + let M4 = T19 & Y5; + let M5 = M4 ^ M1; + let M6 = T3 & T16; + let M7 = T22 & T9; + let M8 = T26 ^ M6; + let M9 = T20 & T17; + let M10 = M9 ^ M6; + let M11 = T1 & T15; + let M12 = T4 & T27; + let M13 = M12 ^ M11; + let M14 = T2 & T10; + let M15 = M14 ^ M11; + let M16 = M3 ^ M2; + let M17 = M5 ^ T24; + let M18 = M8 ^ M7; + let M19 = M10 ^ M15; + let M20 = M16 ^ M13; + let M21 = M17 ^ M15; + let M22 = M18 ^ M13; + let M23 = M19 ^ T25; + let M24 = M22 ^ M23; + let M25 = M22 & M20; + let M26 = M21 ^ M25; + let M27 = M20 ^ M21; + let M28 = M23 ^ M25; + let M29 = M28 & M27; + let M30 = M26 & M24; + let M31 = M20 & M23; + let M32 = M27 & M31; + let M33 = M27 ^ M25; + let M34 = M21 & M22; + let M35 = M24 & M34; + let M36 = M24 ^ M25; + let M37 = M21 ^ M29; + let M38 = M32 ^ M33; + let M39 = M23 ^ M30; + let M40 = M35 ^ M36; + let M41 = M38 ^ M40; + let M42 = M37 ^ M39; + let M43 = M37 ^ M38; + let M44 = M39 ^ M40; + let M45 = M42 ^ M41; + let M46 = M44 & T6; + let M47 = M40 & T8; + let M48 = M39 & Y5; + let M49 = M43 & T16; + let M50 = M38 & T9; + let M51 = M37 & T17; + let M52 = M42 & T15; + let M53 = M45 & T27; + let M54 = M41 & T10; + let M55 = M44 & T13; + let M56 = M40 & T23; + let M57 = M39 & T19; + let M58 = M43 & T3; + let M59 = M38 & T22; + let M60 = M37 & T20; + let M61 = M42 & T1; + let M62 = M45 & T4; + let M63 = M41 & T2; + let P0 = M52 ^ M61; + let P1 = M58 ^ M59; + let P2 = M54 ^ M62; + let P3 = M47 ^ M50; + let P4 = M48 ^ M56; + let P5 = M46 ^ M51; + let P6 = M49 ^ M60; + let P7 = P0 ^ P1; + let P8 = M50 ^ M53; + let P9 = M55 ^ M63; + let P10 = M57 ^ P4; + let P11 = P0 ^ P3; + let P12 = M46 ^ M48; + let P13 = M49 ^ M51; + let P14 = M49 ^ M62; + let P15 = M54 ^ M59; + let P16 = M57 ^ M61; + let P17 = M58 ^ P2; + let P18 = M63 ^ P5; + let P19 = P2 ^ P3; + let P20 = P4 ^ P6; + let P22 = P2 ^ P7; + let P23 = P7 ^ P8; + let P24 = P5 ^ P7; + let P25 = P6 ^ P10; + let P26 = P9 ^ P11; + let P27 = P10 ^ P18; + let P28 = P11 ^ P25; + let P29 = P15 ^ P20; + let W0 = P13 ^ P22; + let W1 = P26 ^ P29; + let W2 = P17 ^ P28; + let W3 = P12 ^ P22; + let W4 = P23 ^ P27; + let W5 = P19 ^ P24; + let W6 = P14 ^ P23; + let W7 = P9 ^ P16; + + st[0] = W7; + st[1] = W6; + st[2] = W5; + st[3] = W4; + st[4] = W3; + st[5] = W2; + st[6] = W1; + st[7] = W0; +} + +fn sbox_fwd(s: u8) -> u8 { + match s { + 0 => 0x63, + 1 => 0x7c, + 2 => 0x77, + 3 => 0x7b, + 4 => 0xf2, + 5 => 0x6b, + 6 => 0x6f, + 7 => 0xc5, + 8 => 0x30, + 9 => 0x01, + 10 => 0x67, + 11 => 0x2b, + 12 => 0xfe, + 13 => 0xd7, + 14 => 0xab, + 15 => 0x76, + 16 => 0xca, + 17 => 0x82, + 18 => 0xc9, + 19 => 0x7d, + 20 => 0xfa, + 21 => 0x59, + 22 => 0x47, + 23 => 0xf0, + 24 => 0xad, + 25 => 0xd4, + 26 => 0xa2, + 27 => 0xaf, + 28 => 0x9c, + 29 => 0xa4, + 30 => 0x72, + 31 => 0xc0, + 32 => 0xb7, + 33 => 0xfd, + 34 => 0x93, + 35 => 0x26, + 36 => 0x36, + 37 => 0x3f, + 38 => 0xf7, + 39 => 0xcc, + 40 => 0x34, + 41 => 0xa5, + 42 => 0xe5, + 43 => 0xf1, + 44 => 0x71, + 45 => 0xd8, + 46 => 0x31, + 47 => 0x15, + 48 => 0x04, + 49 => 0xc7, + 50 => 0x23, + 51 => 0xc3, + 52 => 0x18, + 53 => 0x96, + 54 => 0x05, + 55 => 0x9a, + 56 => 0x07, + 57 => 0x12, + 58 => 0x80, + 59 => 0xe2, + 60 => 0xeb, + 61 => 0x27, + 62 => 0xb2, + 63 => 0x75, + 64 => 0x09, + 65 => 0x83, + 66 => 0x2c, + 67 => 0x1a, + 68 => 0x1b, + 69 => 0x6e, + 70 => 0x5a, + 71 => 0xa0, + 72 => 0x52, + 73 => 0x3b, + 74 => 0xd6, + 75 => 0xb3, + 76 => 0x29, + 77 => 0xe3, + 78 => 0x2f, + 79 => 0x84, + 80 => 0x53, + 81 => 0xd1, + 82 => 0x00, + 83 => 0xed, + 84 => 0x20, + 85 => 0xfc, + 86 => 0xb1, + 87 => 0x5b, + 88 => 0x6a, + 89 => 0xcb, + 90 => 0xbe, + 91 => 0x39, + 92 => 0x4a, + 93 => 0x4c, + 94 => 0x58, + 95 => 0xcf, + 96 => 0xd0, + 97 => 0xef, + 98 => 0xaa, + 99 => 0xfb, + 100 => 0x43, + 101 => 0x4d, + 102 => 0x33, + 103 => 0x85, + 104 => 0x45, + 105 => 0xf9, + 106 => 0x02, + 107 => 0x7f, + 108 => 0x50, + 109 => 0x3c, + 110 => 0x9f, + 111 => 0xa8, + 112 => 0x51, + 113 => 0xa3, + 114 => 0x40, + 115 => 0x8f, + 116 => 0x92, + 117 => 0x9d, + 118 => 0x38, + 119 => 0xf5, + 120 => 0xbc, + 121 => 0xb6, + 122 => 0xda, + 123 => 0x21, + 124 => 0x10, + 125 => 0xff, + 126 => 0xf3, + 127 => 0xd2, + 128 => 0xcd, + 129 => 0x0c, + 130 => 0x13, + 131 => 0xec, + 132 => 0x5f, + 133 => 0x97, + 134 => 0x44, + 135 => 0x17, + 136 => 0xc4, + 137 => 0xa7, + 138 => 0x7e, + 139 => 0x3d, + 140 => 0x64, + 141 => 0x5d, + 142 => 0x19, + 143 => 0x73, + 144 => 0x60, + 145 => 0x81, + 146 => 0x4f, + 147 => 0xdc, + 148 => 0x22, + 149 => 0x2a, + 150 => 0x90, + 151 => 0x88, + 152 => 0x46, + 153 => 0xee, + 154 => 0xb8, + 155 => 0x14, + 156 => 0xde, + 157 => 0x5e, + 158 => 0x0b, + 159 => 0xdb, + 160 => 0xe0, + 161 => 0x32, + 162 => 0x3a, + 163 => 0x0a, + 164 => 0x49, + 165 => 0x06, + 166 => 0x24, + 167 => 0x5c, + 168 => 0xc2, + 169 => 0xd3, + 170 => 0xac, + 171 => 0x62, + 172 => 0x91, + 173 => 0x95, + 174 => 0xe4, + 175 => 0x79, + 176 => 0xe7, + 177 => 0xc8, + 178 => 0x37, + 179 => 0x6d, + 180 => 0x8d, + 181 => 0xd5, + 182 => 0x4e, + 183 => 0xa9, + 184 => 0x6c, + 185 => 0x56, + 186 => 0xf4, + 187 => 0xea, + 188 => 0x65, + 189 => 0x7a, + 190 => 0xae, + 191 => 0x08, + 192 => 0xba, + 193 => 0x78, + 194 => 0x25, + 195 => 0x2e, + 196 => 0x1c, + 197 => 0xa6, + 198 => 0xb4, + 199 => 0xc6, + 200 => 0xe8, + 201 => 0xdd, + 202 => 0x74, + 203 => 0x1f, + 204 => 0x4b, + 205 => 0xbd, + 206 => 0x8b, + 207 => 0x8a, + 208 => 0x70, + 209 => 0x3e, + 210 => 0xb5, + 211 => 0x66, + 212 => 0x48, + 213 => 0x03, + 214 => 0xf6, + 215 => 0x0e, + 216 => 0x61, + 217 => 0x35, + 218 => 0x57, + 219 => 0xb9, + 220 => 0x86, + 221 => 0xc1, + 222 => 0x1d, + 223 => 0x9e, + 224 => 0xe1, + 225 => 0xf8, + 226 => 0x98, + 227 => 0x11, + 228 => 0x69, + 229 => 0xd9, + 230 => 0x8e, + 231 => 0x94, + 232 => 0x9b, + 233 => 0x1e, + 234 => 0x87, + 235 => 0xe9, + 236 => 0xce, + 237 => 0x55, + 238 => 0x28, + 239 => 0xdf, + 240 => 0x8c, + 241 => 0xa1, + 242 => 0x89, + 243 => 0x0d, + 244 => 0xbf, + 245 => 0xe6, + 246 => 0x42, + 247 => 0x68, + 248 => 0x41, + 249 => 0x99, + 250 => 0x2d, + 251 => 0x0f, + 252 => 0xb0, + 253 => 0x54, + 254 => 0xbb, + 255 => 0x16, + } +} + +fn sbox_inv(s: u8) -> u8 { + match s { + 0 => 0x52, + 1 => 0x09, + 2 => 0x6a, + 3 => 0xd5, + 4 => 0x30, + 5 => 0x36, + 6 => 0xa5, + 7 => 0x38, + 8 => 0xbf, + 9 => 0x40, + 10 => 0xa3, + 11 => 0x9e, + 12 => 0x81, + 13 => 0xf3, + 14 => 0xd7, + 15 => 0xfb, + 16 => 0x7c, + 17 => 0xe3, + 18 => 0x39, + 19 => 0x82, + 20 => 0x9b, + 21 => 0x2f, + 22 => 0xff, + 23 => 0x87, + 24 => 0x34, + 25 => 0x8e, + 26 => 0x43, + 27 => 0x44, + 28 => 0xc4, + 29 => 0xde, + 30 => 0xe9, + 31 => 0xcb, + 32 => 0x54, + 33 => 0x7b, + 34 => 0x94, + 35 => 0x32, + 36 => 0xa6, + 37 => 0xc2, + 38 => 0x23, + 39 => 0x3d, + 40 => 0xee, + 41 => 0x4c, + 42 => 0x95, + 43 => 0x0b, + 44 => 0x42, + 45 => 0xfa, + 46 => 0xc3, + 47 => 0x4e, + 48 => 0x08, + 49 => 0x2e, + 50 => 0xa1, + 51 => 0x66, + 52 => 0x28, + 53 => 0xd9, + 54 => 0x24, + 55 => 0xb2, + 56 => 0x76, + 57 => 0x5b, + 58 => 0xa2, + 59 => 0x49, + 60 => 0x6d, + 61 => 0x8b, + 62 => 0xd1, + 63 => 0x25, + 64 => 0x72, + 65 => 0xf8, + 66 => 0xf6, + 67 => 0x64, + 68 => 0x86, + 69 => 0x68, + 70 => 0x98, + 71 => 0x16, + 72 => 0xd4, + 73 => 0xa4, + 74 => 0x5c, + 75 => 0xcc, + 76 => 0x5d, + 77 => 0x65, + 78 => 0xb6, + 79 => 0x92, + 80 => 0x6c, + 81 => 0x70, + 82 => 0x48, + 83 => 0x50, + 84 => 0xfd, + 85 => 0xed, + 86 => 0xb9, + 87 => 0xda, + 88 => 0x5e, + 89 => 0x15, + 90 => 0x46, + 91 => 0x57, + 92 => 0xa7, + 93 => 0x8d, + 94 => 0x9d, + 95 => 0x84, + 96 => 0x90, + 97 => 0xd8, + 98 => 0xab, + 99 => 0x00, + 100 => 0x8c, + 101 => 0xbc, + 102 => 0xd3, + 103 => 0x0a, + 104 => 0xf7, + 105 => 0xe4, + 106 => 0x58, + 107 => 0x05, + 108 => 0xb8, + 109 => 0xb3, + 110 => 0x45, + 111 => 0x06, + 112 => 0xd0, + 113 => 0x2c, + 114 => 0x1e, + 115 => 0x8f, + 116 => 0xca, + 117 => 0x3f, + 118 => 0x0f, + 119 => 0x02, + 120 => 0xc1, + 121 => 0xaf, + 122 => 0xbd, + 123 => 0x03, + 124 => 0x01, + 125 => 0x13, + 126 => 0x8a, + 127 => 0x6b, + 128 => 0x3a, + 129 => 0x91, + 130 => 0x11, + 131 => 0x41, + 132 => 0x4f, + 133 => 0x67, + 134 => 0xdc, + 135 => 0xea, + 136 => 0x97, + 137 => 0xf2, + 138 => 0xcf, + 139 => 0xce, + 140 => 0xf0, + 141 => 0xb4, + 142 => 0xe6, + 143 => 0x73, + 144 => 0x96, + 145 => 0xac, + 146 => 0x74, + 147 => 0x22, + 148 => 0xe7, + 149 => 0xad, + 150 => 0x35, + 151 => 0x85, + 152 => 0xe2, + 153 => 0xf9, + 154 => 0x37, + 155 => 0xe8, + 156 => 0x1c, + 157 => 0x75, + 158 => 0xdf, + 159 => 0x6e, + 160 => 0x47, + 161 => 0xf1, + 162 => 0x1a, + 163 => 0x71, + 164 => 0x1d, + 165 => 0x29, + 166 => 0xc5, + 167 => 0x89, + 168 => 0x6f, + 169 => 0xb7, + 170 => 0x62, + 171 => 0x0e, + 172 => 0xaa, + 173 => 0x18, + 174 => 0xbe, + 175 => 0x1b, + 176 => 0xfc, + 177 => 0x56, + 178 => 0x3e, + 179 => 0x4b, + 180 => 0xc6, + 181 => 0xd2, + 182 => 0x79, + 183 => 0x20, + 184 => 0x9a, + 185 => 0xdb, + 186 => 0xc0, + 187 => 0xfe, + 188 => 0x78, + 189 => 0xcd, + 190 => 0x5a, + 191 => 0xf4, + 192 => 0x1f, + 193 => 0xdd, + 194 => 0xa8, + 195 => 0x33, + 196 => 0x88, + 197 => 0x07, + 198 => 0xc7, + 199 => 0x31, + 200 => 0xb1, + 201 => 0x12, + 202 => 0x10, + 203 => 0x59, + 204 => 0x27, + 205 => 0x80, + 206 => 0xec, + 207 => 0x5f, + 208 => 0x60, + 209 => 0x51, + 210 => 0x7f, + 211 => 0xa9, + 212 => 0x19, + 213 => 0xb5, + 214 => 0x4a, + 215 => 0x0d, + 216 => 0x2d, + 217 => 0xe5, + 218 => 0x7a, + 219 => 0x9f, + 220 => 0x93, + 221 => 0xc9, + 222 => 0x9c, + 223 => 0xef, + 224 => 0xa0, + 225 => 0xe0, + 226 => 0x3b, + 227 => 0x4d, + 228 => 0xae, + 229 => 0x2a, + 230 => 0xf5, + 231 => 0xb0, + 232 => 0xc8, + 233 => 0xeb, + 234 => 0xbb, + 235 => 0x3c, + 236 => 0x83, + 237 => 0x53, + 238 => 0x99, + 239 => 0x61, + 240 => 0x17, + 241 => 0x2b, + 242 => 0x04, + 243 => 0x7e, + 244 => 0xba, + 245 => 0x77, + 246 => 0xd6, + 247 => 0x26, + 248 => 0xe1, + 249 => 0x69, + 250 => 0x14, + 251 => 0x63, + 252 => 0x55, + 253 => 0x21, + 254 => 0x0c, + 255 => 0x7d, + } +} + +use rand_core::{OsRng, RngCore}; + +use crate::platform::portable::aes_core::transpose_u8x16; + +fn get_bit_u8(x: &[u8], i: usize, j: usize) -> u8 { + (x[i] >> j) & 0x1 +} + +fn get_bit_u16(x: &[u16], i: usize, j: usize) -> u8 { + ((x[j] >> i) & 0x1) as u8 +} + +#[test] +fn test_transpose() { + let mut x = [0u8; 16]; + OsRng.fill_bytes(&mut x); + let mut y = [0u16; 8]; + transpose_u8x16(&x, &mut y); + for i in 0..16 { + for j in 0..8 { + if get_bit_u8(&x, i, j) != get_bit_u16(&y, i, j) { + #[cfg(feature = "std")] + { + std::eprintln!("x[{},{}] = {}", i, j, get_bit_u8(&x, i, j)); + std::eprintln!("y[{},{}] = {}", i, j, get_bit_u16(&y, i, j)); + } + assert!(false); + } else { + #[cfg(feature = "std")] + std::eprintln!("transpose ok: {},{}", i, j); + } + } + } + let mut z = [0u8; 16]; + transpose_u16x8(&y, &mut z); + for i in 0..16 { + for j in 0..8 { + if get_bit_u8(&x, i, j) != get_bit_u8(&z, i, j) { + #[cfg(feature = "std")] + { + std::eprintln!("x[{},{}] = {}", i, j, get_bit_u8(&x, i, j)); + std::eprintln!("z[{},{}] = {}", i, j, get_bit_u8(&z, i, j)); + } + assert!(false); + } else { + #[cfg(feature = "std")] + std::eprintln!("inv-transpose ok: {},{}", i, j); + } + } + } +} + +#[test] +fn test_sbox() { + let mut x = [0u8; 16]; + let mut y = [0u16; 8]; + let mut w = [0u8; 16]; + for i in 0..=255 { + x[0] = i; + x[9] = i; + transpose_u8x16(&x, &mut y); + sub_bytes_state(&mut y); + transpose_u16x8(&y, &mut w); + if w[0] != sbox_fwd(i as u8) { + #[cfg(feature = "std")] + std::eprintln!("sbox[{}] = {}, should be {}", i, w[0], sbox_fwd(i as u8)); + assert!(false); + } else { + #[cfg(feature = "std")] + std::eprintln!("sbox ok {}", i) + } + } +} + +#[test] +fn test_sbox_inv() { + let mut x = [0u8; 16]; + let mut y = [0u16; 8]; + let mut w = [0u8; 16]; + for i in 0..=255 { + x[0] = i; + x[9] = i; + transpose_u8x16(&x, &mut y); + sub_bytes_inv_state(&mut y); + transpose_u16x8(&y, &mut w); + if w[0] != sbox_inv(i as u8) { + #[cfg(feature = "std")] + std::eprintln!( + "sbox_inv[{}] = {}, should be {}", + i, + w[0], + sbox_inv(i as u8) + ); + assert!(false); + } else { + #[cfg(feature = "std")] + std::eprintln!("sbox inv ok {}", i) + } + } +} diff --git a/aesgcm/src/platform/portable/gf128_core.rs b/aesgcm/src/platform/portable/gf128_core.rs new file mode 100644 index 000000000..d2523efa9 --- /dev/null +++ b/aesgcm/src/platform/portable/gf128_core.rs @@ -0,0 +1,90 @@ +/// A portable gf128 field element. +pub(crate) type FieldElement = u128; + +#[inline] +fn zero() -> FieldElement { + 0 +} + +#[inline] +fn load_element(bytes: &[u8]) -> FieldElement { + debug_assert!(bytes.len() == 16); + + u128::from_be_bytes(bytes.try_into().unwrap()) +} + +#[inline] +fn store_element(element: &FieldElement, bytes: &mut [u8]) { + debug_assert!(bytes.len() == 16); + bytes.copy_from_slice(&u128::to_be_bytes(*element)); +} + +#[inline] +fn add(element: &FieldElement, other: &FieldElement) -> FieldElement { + element ^ other +} + +#[inline] +fn ith_bit_mask(elem: &FieldElement, i: usize) -> FieldElement { + debug_assert!(i < 128); + + let bit: u16 = ((elem >> (127 - i)) as u16) & 0x1; + let bit_mask16 = (!bit).wrapping_add(1); + let bit_mask32 = (bit_mask16 as u32) ^ ((bit_mask16 as u32) << 16); + let bit_mask64 = (bit_mask32 as u64) ^ ((bit_mask32 as u64) << 32); + + (bit_mask64 as u128) ^ ((bit_mask64 as u128) << 64) +} + +const IRRED: FieldElement = 0xE100_0000_0000_0000_0000_0000_0000_0000; + +#[inline] +fn mul_x(elem: &mut FieldElement) { + let mask = ith_bit_mask(elem, 127); + *elem = (*elem >> 1) ^ (IRRED & mask) +} + +#[inline] +fn mul_step(x: &FieldElement, y: &mut FieldElement, i: usize, result: &mut FieldElement) { + debug_assert!(i < 128); + let mask = ith_bit_mask(x, i); + *result ^= *y & mask; + mul_x(y); +} + +#[inline] +fn mul(x: &FieldElement, y: &FieldElement) -> FieldElement { + let mut result = 0; + let mut multiplicand = *y; + for i in 0..128 { + mul_step(x, &mut multiplicand, i, &mut result) + } + result +} + +impl crate::platform::GF128FieldElement for FieldElement { + #[inline] + fn zero() -> Self { + zero() + } + + #[inline] + fn load_element(bytes: &[u8]) -> Self { + load_element(bytes) + } + + #[inline] + fn store_element(&self, bytes: &mut [u8]) { + store_element(self, bytes); + } + + #[inline] + fn add(&mut self, other: &Self) { + *self = add(self, other); + } + + #[inline] + fn mul(&mut self, other: &Self) { + *self = mul(self, other) + } +} diff --git a/aesgcm/src/platform/x64.rs b/aesgcm/src/platform/x64.rs new file mode 100644 index 000000000..2cb276213 --- /dev/null +++ b/aesgcm/src/platform/x64.rs @@ -0,0 +1,5 @@ +mod aes_core; +mod gf128_core; + +pub(crate) use aes_core::State; +pub(crate) use gf128_core::FieldElement; diff --git a/aesgcm/src/platform/x64/aes_core.rs b/aesgcm/src/platform/x64/aes_core.rs new file mode 100644 index 000000000..3ab931900 --- /dev/null +++ b/aesgcm/src/platform/x64/aes_core.rs @@ -0,0 +1,141 @@ +use core::arch::x86_64::*; + +use libcrux_intrinsics::avx2::{ + mm_aesenc_si128, mm_aesenclast_si128, mm_aeskeygenassist_si128, mm_loadu_si128, + mm_setzero_si128, mm_shuffle_epi32, mm_slli_si128, mm_storeu_si128_u8, mm_xor_si128, +}; + +/// The avx2 state. +pub(crate) type State = __m128i; + +#[inline] +fn new_state() -> State { + mm_setzero_si128() +} + +#[inline] +fn xor_key1_state(st: &mut State, k: &State) { + *st = mm_xor_si128(*st, *k); +} + +#[inline] +fn aes_enc(st: &mut State, key: &State) { + *st = mm_aesenc_si128(*st, *key); +} + +#[inline] +fn aes_enc_last(st: &mut State, key: &State) { + *st = mm_aesenclast_si128(*st, *key); +} + +#[inline] +fn aes_keygen_assist(next: &mut State, prev: &State) { + *next = mm_aeskeygenassist_si128::(*prev); +} + +#[inline] +fn aes_keygen_assist0(next: &mut State, prev: &State) { + aes_keygen_assist::(next, prev); + *next = mm_shuffle_epi32::<0xff>(*next); +} + +#[inline] +fn aes_keygen_assist1(next: &mut State, prev: &State) { + aes_keygen_assist::<0>(next, prev); + *next = mm_shuffle_epi32::<0xaa>(*next); +} + +#[inline] +fn key_expansion_step(next: &mut State, prev: &State) { + let p0 = mm_xor_si128(*prev, mm_slli_si128::<4>(*prev)); + let p1 = mm_xor_si128(p0, mm_slli_si128::<4>(p0)); + let p2 = mm_xor_si128(p1, mm_slli_si128::<4>(p1)); + *next = mm_xor_si128(*next, p2); +} + +impl crate::platform::AESState for State { + #[inline] + fn new() -> Self { + new_state() + } + + #[inline] + fn load_block(&mut self, b: &[u8]) { + debug_assert!(b.len() == 16); + + *self = mm_loadu_si128(b); + } + + #[inline] + fn store_block(&self, out: &mut [u8]) { + debug_assert!(out.len() == 16); + + mm_storeu_si128_u8(out, *self); + } + + #[inline] + fn xor_block(&self, input: &[u8], out: &mut [u8]) { + debug_assert!(input.len() == out.len() && input.len() <= 16); + // XXX: hot-fix to have enough input and output here. + let mut block_in = [0u8; 16]; + let mut block_out = [0u8; 16]; + block_in[0..input.len()].copy_from_slice(input); + + let inp_vec = mm_loadu_si128(&block_in); + let out_vec = mm_xor_si128(inp_vec, *self); + mm_storeu_si128_u8(&mut block_out, out_vec); + + out.copy_from_slice(&block_out[0..out.len()]); + } + + #[inline] + fn xor_key(&mut self, key: &Self) { + xor_key1_state(self, key); + } + + #[inline] + fn aes_enc(&mut self, key: &Self) { + aes_enc(self, key); + } + + #[inline] + fn aes_enc_last(&mut self, key: &Self) { + aes_enc_last(self, key); + } + + #[inline] + fn aes_keygen_assist0(&mut self, prev: &Self) { + aes_keygen_assist0::(self, prev); + } + + #[inline] + fn aes_keygen_assist1(&mut self, prev: &Self) { + aes_keygen_assist1(self, prev); + } + + #[inline] + fn key_expansion_step(&mut self, prev: &Self) { + key_expansion_step(self, prev) + } +} + +#[cfg(feature = "std")] +#[allow(unsafe_code)] +#[test] +fn test() { + unsafe { + let x = _mm_set_epi32(3, 2, 1, 0); + let y = _mm_shuffle_epi32(x, 0xaa); + let w = _mm_slli_si128(x, 4); + let mut z: [i32; 4] = [0; 4]; + _mm_storeu_si128(z.as_mut_ptr() as *mut __m128i, x); + + std::eprintln!("{:?}", z); + _mm_storeu_si128(z.as_mut_ptr() as *mut __m128i, w); + + std::eprintln!("shift right 4 {:?}", z); + _mm_storeu_si128(z.as_mut_ptr() as *mut __m128i, y); + + std::eprintln!("shuffle aa {:?}", z); + } +} diff --git a/aesgcm/src/platform/x64/gf128_core.rs b/aesgcm/src/platform/x64/gf128_core.rs new file mode 100644 index 000000000..919d7c6d2 --- /dev/null +++ b/aesgcm/src/platform/x64/gf128_core.rs @@ -0,0 +1,184 @@ +use core::arch::x86_64::*; + +use libcrux_intrinsics::avx2::{ + mm_clmulepi64_si128, mm_slli_si128, mm_srli_si128, mm_unpackhi_epi64, mm_unpacklo_epi64, + mm_xor_si128, +}; + +// XXX: A lot of the code below is shared with NEON. Refactor! + +/// An avx2 gf128 field element. +#[derive(Clone, Copy)] +#[repr(transparent)] +pub(crate) struct FieldElement(pub(super) u128); + +impl FieldElement { + /// Transmute `u128` and `__m128i`. + #[inline] + #[allow(unsafe_code)] + fn transmute(&self) -> __m128i { + unsafe { core::mem::transmute(self.0) } + } + + /// Convert a vec to self. + #[inline] + #[allow(unsafe_code)] + fn from_vec128(vec: __m128i) -> Self { + unsafe { core::mem::transmute(vec) } + } +} + +#[inline] +fn zero() -> FieldElement { + FieldElement(0) +} + +#[inline] +fn load_element(b: &[u8]) -> FieldElement { + debug_assert!(b.len() == 16); + + FieldElement(u128::from_be_bytes(b.try_into().unwrap())) +} + +#[inline] +fn store_element(elem: &FieldElement, b: &mut [u8]) { + debug_assert!(b.len() == 16); + + b.copy_from_slice(&elem.0.to_be_bytes()); +} + +#[inline] +fn add(elem: &FieldElement, other: &FieldElement) -> FieldElement { + FieldElement((*elem).0 ^ (*other).0) +} + +// #[inline] +// fn mul_wide(elem: &FieldElement, other: &FieldElement) -> (FieldElement, FieldElement) { +// let lhs: __m128i = unsafe { core::mem::transmute((*elem).0) }; +// let rhs: __m128i = unsafe { core::mem::transmute((*other).0) }; + +// let low = unsafe { _mm_clmulepi64_si128(lhs, rhs, 0x11) }; +// let mid0 = unsafe { _mm_clmulepi64_si128(lhs, rhs, 0x10) }; +// let mid1 = unsafe { _mm_clmulepi64_si128(lhs, rhs, 0x01) }; +// let high = unsafe { _mm_clmulepi64_si128(lhs, rhs, 0x00) }; +// let mid = unsafe { _mm_xor_si128(mid0, mid1) }; +// let m0 = unsafe { _mm_srli_si128(mid, 8) }; +// let m1 = unsafe { _mm_slli_si128(mid, 8) }; +// let low = unsafe { _mm_xor_si128(low, m0) }; +// let high = unsafe { _mm_xor_si128(high, m1) }; + +// let low128: u128 = unsafe { core::mem::transmute(low) }; +// let high128: u128 = unsafe { core::mem::transmute(high) }; + +// (FieldElement(low128), FieldElement(high128)) +// } + +/// Performs a 128x128 to 256-bit carry-less multiplication. +/// +/// This implementation uses the Karatsuba algorithm to reduce the number of expensive +/// PCLMULQDQ instructions from 4 to 3. On most modern x64 CPUs (Intel Sandy +/// Bridge and newer, AMD Zen and newer), this results in higher performance due to +/// better utilization of execution ports and potentially lower overall latency. +/// +/// @param elem The first 128-bit field element. +/// @param other The second 128-bit field element. +/// @returns A tuple `(high, low)` containing the 256-bit result. +#[inline] +fn mul_wide(elem: &FieldElement, other: &FieldElement) -> (FieldElement, FieldElement) { + // Let the inputs be a = (a_hi << 64) | a_lo and b = (b_hi << 64) | b_lo. + // The product is (a_hi*b_hi << 128) + ((a_lo*b_hi ^ a_hi*b_lo) << 64) + a_lo*b_lo. + // The Karatsuba trick computes the middle term using the other two products: + // (a_lo*b_hi ^ a_hi*b_lo) = (a_lo^a_hi)*(b_lo^b_hi) ^ a_lo*b_lo ^ a_hi*b_hi + + let a: __m128i = elem.transmute(); + let b: __m128i = other.transmute(); + + // 1. Calculate the low and high 128-bit parts of the product in parallel. + // p_lo = a_lo * b_lo + let p_lo = mm_clmulepi64_si128::<0x00>(a, b); + // p_hi = a_hi * b_hi + let p_hi = mm_clmulepi64_si128::<0x11>(a, b); + + // 2. Calculate the middle term using the third multiplication. + // First, prepare the operands (a_lo^a_hi) and (b_lo^b_hi). + // Using unpack instructions is an alternative to shuffling. + let a_xor = mm_xor_si128(mm_unpackhi_epi64(a, a), mm_unpacklo_epi64(a, a)); + let b_xor = mm_xor_si128(mm_unpackhi_epi64(b, b), mm_unpacklo_epi64(b, b)); + + // Multiply the low 64-bit parts of the XORed results. + // p_mid_prod = (a_lo^a_hi) * (b_lo^b_hi) + let p_mid_prod = mm_clmulepi64_si128::<0x00>(a_xor, b_xor); + + // Finish computing the middle term by XORing with p_lo and p_hi. + let p_mid = mm_xor_si128(mm_xor_si128(p_mid_prod, p_lo), p_hi); + + // 3. Combine the parts to get the final 256-bit result. + // The middle part is XORed at a 64-bit offset. + // res_low = p_lo ^ (p_mid << 64) + // res_high = p_hi ^ (p_mid >> 64) + let res_low = mm_xor_si128(p_lo, mm_slli_si128::<8>(p_mid)); + let res_high = mm_xor_si128(p_hi, mm_srli_si128::<8>(p_mid)); + + // The original function returned (high_part, low_part). We maintain that order. + ( + FieldElement::from_vec128(res_high), + FieldElement::from_vec128(res_low), + ) +} + +#[inline] +fn reduce(high: &FieldElement, low: &FieldElement) -> FieldElement { + let high = ((*high).0 << 1) ^ ((*low).0 >> 127); + let low = (*low).0 << 1; + let x0_0 = low << 64; + let x1_x0 = low ^ (x0_0 << 63) ^ (x0_0 << 62) ^ (x0_0 << 57); + let x1_x0 = x1_x0 ^ (x1_x0 >> 1) ^ (x1_x0 >> 2) ^ (x1_x0 >> 7); + FieldElement(x1_x0 ^ high) +} + +#[inline] +fn mul(x: &FieldElement, y: &FieldElement) -> FieldElement { + let (high, low) = mul_wide(x, y); + reduce(&high, &low) +} + +impl crate::platform::GF128FieldElement for FieldElement { + #[inline] + fn zero() -> Self { + zero() + } + + #[inline] + fn load_element(b: &[u8]) -> Self { + load_element(b) + } + + #[inline] + fn store_element(&self, b: &mut [u8]) { + store_element(self, b); + } + + #[inline] + fn add(&mut self, other: &Self) { + *self = add(self, other); + } + + #[inline] + fn mul(&mut self, other: &Self) { + *self = mul(self, other) + } +} + +#[allow(unsafe_code)] +#[cfg(feature = "std")] +#[test] +fn test_transmute() { + let x = 1u128 << 64 ^ 2u128; + let xv: __m128i = unsafe { core::mem::transmute(x) }; + let xv: __m128i = unsafe { _mm_slli_si128(xv, 8) }; + let x: u128 = unsafe { core::mem::transmute(xv) }; + std::eprintln!("trans {:x}", x); + let mut u64s = [0u64; 2]; + unsafe { _mm_storeu_si128(u64s.as_mut_ptr() as *mut __m128i, xv) }; + std::eprintln!("store {:?}", u64s) +} diff --git a/aesgcm/tests/key_centric.rs b/aesgcm/tests/key_centric.rs new file mode 100644 index 000000000..3e3551d99 --- /dev/null +++ b/aesgcm/tests/key_centric.rs @@ -0,0 +1,48 @@ +use libcrux_traits::aead::consts; +use libcrux_traits::aead::typed_owned; +use libcrux_traits::aead::typed_refs; + +use libcrux_aesgcm::AesGcm128; + +type Key = typed_owned::Key; +type Nonce = typed_owned::Nonce; +type Tag = typed_owned::Tag; + +#[test] +fn test_key_centric_owned() { + use consts::AeadConsts as _; + + let k: Key = [0; AesGcm128::KEY_LEN].into(); + let nonce: Nonce = [0; AesGcm128::NONCE_LEN].into(); + let mut tag: Tag = [0; AesGcm128::TAG_LEN].into(); + + let pt = b"the quick brown fox jumps over the lazy dog"; + let mut ct = [0; 43]; + let mut pt_out = [0; 43]; + + k.encrypt(&mut ct, &mut tag, &nonce, b"", pt).unwrap(); + k.decrypt(&mut pt_out, &nonce, b"", &ct, &tag).unwrap(); + assert_eq!(pt, &pt_out); +} + +#[test] +fn test_key_centric_refs() { + use consts::AeadConsts as _; + use typed_refs::Aead as _; + + let algo = AesGcm128; + + let mut tag_bytes = [0; AesGcm128::TAG_LEN]; + let key = algo.new_key(&[0; AesGcm128::KEY_LEN]).unwrap(); + let tag = algo.new_tag_mut(&mut tag_bytes).unwrap(); + let nonce = algo.new_nonce(&[0; AesGcm128::NONCE_LEN]).unwrap(); + + let pt = b"the quick brown fox jumps over the lazy dog"; + let mut ct = [0; 43]; + let mut pt_out = [0; 43]; + + key.encrypt(&mut ct, tag, nonce, b"", pt).unwrap(); + let tag = algo.new_tag(&tag_bytes).unwrap(); + key.decrypt(&mut pt_out, nonce, b"", &ct, tag).unwrap(); + assert_eq!(pt, &pt_out); +} diff --git a/aesgcm/tests/wycheproof.rs b/aesgcm/tests/wycheproof.rs new file mode 100644 index 000000000..66fb40c65 --- /dev/null +++ b/aesgcm/tests/wycheproof.rs @@ -0,0 +1,93 @@ +use libcrux_aesgcm::Aead; +use wycheproof::{aead::Test, TestResult}; + +#[test] +fn test() { + let test_set = wycheproof::aead::TestSet::load(wycheproof::aead::TestName::AesGcm).unwrap(); + + fn run>(test: &Test) { + let mut ciphertext = vec![0u8; test.pt.len()]; + let mut plaintext = vec![0u8; test.pt.len()]; + let mut tag = [0u8; 16]; + + Cipher::encrypt( + &mut ciphertext, + &mut tag, + test.key.as_ref().try_into().unwrap(), + test.nonce.as_ref().try_into().unwrap(), + &test.aad, + &test.pt, + ) + .unwrap(); + Cipher::decrypt( + &mut plaintext, + test.key.as_ref().try_into().unwrap(), + test.nonce.as_ref().try_into().unwrap(), + &test.aad, + &ciphertext, + tag.as_ref().try_into().unwrap(), + ) + .unwrap(); + + assert_eq!(plaintext.as_slice(), test.pt.as_slice()); + + if test.result == TestResult::Valid { + assert_eq!(test.ct.as_slice(), &ciphertext); + assert_eq!(test.tag.as_slice(), &tag); + } else { + let ct_ok = test.ct.as_slice() == &ciphertext; + let tag_ok = test.tag.as_slice() == &tag; + assert!(!ct_ok || !tag_ok); + } + } + + for test_group in test_set.test_groups { + println!( + "* Group key size:{} tag size:{} nonce size:{}", + test_group.key_size, test_group.tag_size, test_group.nonce_size, + ); + + if test_group.nonce_size != 96 { + println!(" Skipping unsupported nonce size"); + continue; + } + + if test_group.key_size == 128 { + for test in test_group.tests { + println!(" Test AES-GCM 128 {}", test.tc_id); + + // Multiplexing + run::<16, libcrux_aesgcm::AesGcm128>(&test); + + // Portable + run::<16, libcrux_aesgcm::PortableAesGcm128>(&test); + + // Neon + #[cfg(all(target_arch = "aarch64", target_feature = "aes"))] + run::<16, libcrux_aesgcm::NeonAesGcm128>(&test); + + // x64 + #[cfg(all(target_arch = "x86_64"))] + run::<16, libcrux_aesgcm::X64AesGcm128>(&test); + } + } else if test_group.key_size == 256 { + for test in test_group.tests { + println!(" Test AES-GCM 256 {}", test.tc_id); + + // Multiplexing + run::<32, libcrux_aesgcm::AesGcm256>(&test); + + // Portable + run::<32, libcrux_aesgcm::PortableAesGcm256>(&test); + + // Neon + #[cfg(all(target_arch = "aarch64", target_feature = "aes"))] + run::<32, libcrux_aesgcm::NeonAesGcm256>(&test); + + // x64 + #[cfg(all(target_arch = "x86_64"))] + run::<32, libcrux_aesgcm::X64AesGcm256>(&test); + } + } + } +} diff --git a/fstar-helpers/core-models/src/core_arch/x86.rs b/fstar-helpers/core-models/src/core_arch/x86.rs index aa4740856..f2f729e0d 100644 --- a/fstar-helpers/core-models/src/core_arch/x86.rs +++ b/fstar-helpers/core-models/src/core_arch/x86.rs @@ -260,6 +260,48 @@ pub mod sse2 { pub fn _mm_movemask_epi8(_: __m128i) -> i32 { unimplemented!() } + + /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi64) + #[hax_lib::opaque] + pub fn _mm_unpacklo_epi64(_: __m128i, _: __m128i) -> __m128i { + unimplemented!() + } + + /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi64) + #[hax_lib::opaque] + pub fn _mm_unpackhi_epi64(_: __m128i, _: __m128i) -> __m128i { + unimplemented!() + } + + /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32) + #[hax_lib::opaque] + pub fn _mm_shuffle_epi32(_: __m128i) -> __m128i { + unimplemented!() + } + + /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_si128) + #[hax_lib::opaque] + pub fn _mm_srli_si128(_: __m128i) -> __m128i { + unimplemented!() + } + + /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_si128) + #[hax_lib::opaque] + pub fn _mm_slli_si128(_: __m128i) -> __m128i { + unimplemented!() + } + + /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_si128) + #[hax_lib::opaque] + pub fn _mm_xor_si128(_: __m128i, _: __m128i) -> __m128i { + unimplemented!() + } + + /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_si128) + #[hax_lib::opaque] + pub fn _mm_setzero_si128() -> __m128i { + unimplemented!() + } } pub use avx::*; @@ -696,6 +738,34 @@ pub mod avx2 { } } +pub use other::*; +pub mod other { + use super::*; + + /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128) + #[hax_lib::opaque] + pub fn _mm_aeskeygenassist_si128(_: __m128i, _: i32) -> __m128i { + unimplemented!() + } + + /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128) + #[hax_lib::opaque] + pub fn _mm_aesenclast_si128(_: __m128i, _: __m128i) -> __m128i { + unimplemented!() + } + + /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc_si128) + #[hax_lib::opaque] + pub fn _mm_aesenc_si128(_: __m128i, _: __m128i) -> __m128i { + unimplemented!() + } + + /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128) + #[hax_lib::opaque] + pub fn _mm_clmulepi64_si128(_: __m128i, _: __m128i, _: i32) -> __m128i { + unimplemented!() + } +} /// Rewrite lemmas const _: () = { #[hax_lib::fstar::before("[@@ $REWRITE_RULE ]")] diff --git a/libcrux-intrinsics/src/arm64.rs b/libcrux-intrinsics/src/arm64.rs index f6bbefb04..94d8abccd 100644 --- a/libcrux-intrinsics/src/arm64.rs +++ b/libcrux-intrinsics/src/arm64.rs @@ -4,6 +4,7 @@ use core::arch::aarch64::*; pub type _int16x8_t = int16x8_t; pub type _uint32x4_t = uint32x4_t; pub type _uint64x2_t = uint64x2_t; +pub type _uint8x16_t = uint8x16_t; #[inline(always)] pub fn _vdupq_n_s16(i: i16) -> int16x8_t { @@ -171,6 +172,16 @@ pub fn _vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { unsafe { vreinterpretq_u32_s32(a) } } +#[inline(always)] +pub fn _vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { + unsafe { vreinterpretq_u32_u8(a) } +} + +#[inline(always)] +pub fn _vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { + unsafe { vreinterpretq_u8_u32(a) } +} + #[inline(always)] pub fn _vshrq_n_u32(a: uint32x4_t) -> uint32x4_t { unsafe { vshrq_n_u32::(a) } @@ -270,6 +281,12 @@ pub fn _vmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { pub fn _vld1q_u8(ptr: &[u8]) -> uint8x16_t { unsafe { vld1q_u8(ptr.as_ptr()) } } + +#[inline(always)] +pub fn _vld1q_u32(ptr: &[u32]) -> uint32x4_t { + unsafe { vld1q_u32(ptr.as_ptr()) } +} + #[inline(always)] pub fn _vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { unsafe { vreinterpretq_u8_s16(a) } @@ -351,6 +368,7 @@ pub fn _vld1q_u16(ptr: &[u16]) -> uint16x8_t { pub fn _vcleq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { unsafe { vcleq_s16(a, b) } } + #[inline(always)] pub fn _vaddvq_u16(a: uint16x8_t) -> u16 { unsafe { vaddvq_u16(a) } @@ -374,6 +392,16 @@ pub fn _vrax1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { _veorq_u64(a, _veorq_u64(_vshlq_n_u64::<1>(b), _vshrq_n_u64::<63>(b))) } +#[inline] +pub fn _veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { veorq_u32(a, b) } +} + +#[inline] +pub fn _vextq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { vextq_u32(a, b, N) } +} + #[inline(always)] pub fn _veor3q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { #[cfg(all( @@ -431,3 +459,33 @@ pub fn _vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { )))] _veorq_u64(a, _vbicq_u64(b, c)) } + +#[inline(always)] +pub fn _vmull_p64(a: u64, b: u64) -> u128 { + unsafe { vmull_p64(a, b) } +} + +#[inline] +pub fn _veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { veorq_u8(a, b) } +} + +#[inline] +pub fn _vaesmcq_u8(data: uint8x16_t) -> uint8x16_t { + unsafe { vaesmcq_u8(data) } +} + +#[inline] +pub fn _vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { + unsafe { vaeseq_u8(data, key) } +} + +#[inline] +pub fn _vdupq_n_u8(value: u8) -> uint8x16_t { + unsafe { vdupq_n_u8(value) } +} + +#[inline] +pub fn _vdupq_laneq_u32(a: uint32x4_t) -> uint32x4_t { + unsafe { vdupq_laneq_u32(a, N) } +} diff --git a/libcrux-intrinsics/src/avx2.rs b/libcrux-intrinsics/src/avx2.rs index 9acb73ddb..b04a71d87 100644 --- a/libcrux-intrinsics/src/avx2.rs +++ b/libcrux-intrinsics/src/avx2.rs @@ -57,6 +57,16 @@ pub fn mm_storeu_si128(output: &mut [i16], vector: Vec128) { } } +#[hax_lib::opaque] +#[inline(always)] +pub fn mm_storeu_si128_u8(output: &mut [u8], vector: Vec128) { + #[cfg(not(hax))] + debug_assert!(output.len() >= 8); + unsafe { + _mm_storeu_si128(output.as_mut_ptr() as *mut Vec128, vector); + } +} + #[hax_lib::opaque] #[inline(always)] pub fn mm_storeu_si128_i32(output: &mut [i32], vector: Vec128) { @@ -115,6 +125,11 @@ pub fn mm256_setzero_si256() -> Vec256 { unsafe { _mm256_setzero_si256() } } +#[inline(always)] +pub fn mm_setzero_si128() -> Vec128 { + unsafe { _mm_setzero_si128() } +} + #[inline(always)] pub fn mm256_set_m128i(hi: Vec128, lo: Vec128) -> Vec256 { unsafe { _mm256_set_m128i(hi, lo) } @@ -439,6 +454,12 @@ pub fn mm256_xor_si256(lhs: Vec256, rhs: Vec256) -> Vec256 { unsafe { _mm256_xor_si256(lhs, rhs) } } +#[inline(always)] +#[hax_lib::fstar::before(r#"[@@ "opaque_to_smt"]"#)] +pub fn mm_xor_si128(lhs: Vec128, rhs: Vec128) -> Vec128 { + unsafe { _mm_xor_si128(lhs, rhs) } +} + #[inline(always)] #[hax_lib::fstar::before(r#"[@@ "opaque_to_smt"]"#)] pub fn mm256_srai_epi16(vector: Vec256) -> Vec256 { @@ -504,6 +525,22 @@ pub fn mm256_slli_epi32(vector: Vec256) -> Vec256 { unsafe { _mm256_slli_epi32::(vector) } } +#[inline(always)] +#[hax_lib::fstar::before(r#"[@@ "opaque_to_smt"]"#)] +pub fn mm_slli_si128(vector: Vec128) -> Vec128 { + #[cfg(not(hax))] + debug_assert!(SHIFT_BY >= 0 && SHIFT_BY < 16); + unsafe { _mm_slli_si128::(vector) } +} + +#[inline(always)] +#[hax_lib::fstar::before(r#"[@@ "opaque_to_smt"]"#)] +pub fn mm_srli_si128(vector: Vec128) -> Vec128 { + #[cfg(not(hax))] + debug_assert!(SHIFT_BY >= 0 && SHIFT_BY < 16); + unsafe { _mm_srli_si128::(vector) } +} + #[inline(always)] #[hax_lib::fstar::before(r#"[@@ "opaque_to_smt"]"#)] pub fn mm_shuffle_epi8(vector: Vec128, control: Vec128) -> Vec128 { @@ -524,6 +561,14 @@ pub fn mm256_shuffle_epi32(vector: Vec256) -> Vec256 { unsafe { _mm256_shuffle_epi32::(vector) } } +#[inline(always)] +#[hax_lib::fstar::before(r#"[@@ "opaque_to_smt"]"#)] +pub fn mm_shuffle_epi32(vector: Vec128) -> Vec128 { + #[cfg(not(hax))] + debug_assert!(CONTROL >= 0 && CONTROL < 256); + unsafe { _mm_shuffle_epi32::(vector) } +} + #[inline(always)] #[hax_lib::fstar::before(r#"[@@ "opaque_to_smt"]"#)] pub fn mm256_permute4x64_epi64(vector: Vec256) -> Vec256 { @@ -538,6 +583,12 @@ pub fn mm256_unpackhi_epi64(lhs: Vec256, rhs: Vec256) -> Vec256 { unsafe { _mm256_unpackhi_epi64(lhs, rhs) } } +#[inline(always)] +#[hax_lib::fstar::before(r#"[@@ "opaque_to_smt"]"#)] +pub fn mm_unpackhi_epi64(lhs: Vec128, rhs: Vec128) -> Vec128 { + unsafe { _mm_unpackhi_epi64(lhs, rhs) } +} + #[inline(always)] #[hax_lib::fstar::before(r#"[@@ "opaque_to_smt"]"#)] pub fn mm256_unpacklo_epi32(lhs: Vec256, rhs: Vec256) -> Vec256 { @@ -701,8 +752,38 @@ pub fn mm256_unpacklo_epi64(lhs: Vec256, rhs: Vec256) -> Vec256 { unsafe { _mm256_unpacklo_epi64(lhs, rhs) } } +#[inline(always)] +#[hax_lib::fstar::before(r#"[@@ "opaque_to_smt"]"#)] +pub fn mm_unpacklo_epi64(lhs: Vec128, rhs: Vec128) -> Vec128 { + unsafe { _mm_unpacklo_epi64(lhs, rhs) } +} + #[inline(always)] #[hax_lib::fstar::before(r#"[@@ "opaque_to_smt"]"#)] pub fn mm256_permute2x128_si256(a: Vec256, b: Vec256) -> Vec256 { unsafe { _mm256_permute2x128_si256::(a, b) } } + +#[inline(always)] +#[hax_lib::fstar::before(r#"[@@ "opaque_to_smt"]"#)] +pub fn mm_clmulepi64_si128(a: Vec128, b: Vec128) -> Vec128 { + unsafe { _mm_clmulepi64_si128(a, b, IMM8) } +} + +#[inline(always)] +#[hax_lib::fstar::before(r#"[@@ "opaque_to_smt"]"#)] +pub fn mm_aesenc_si128(a: Vec128, b: Vec128) -> Vec128 { + unsafe { _mm_aesenc_si128(a, b) } +} + +#[inline(always)] +#[hax_lib::fstar::before(r#"[@@ "opaque_to_smt"]"#)] +pub fn mm_aesenclast_si128(a: Vec128, b: Vec128) -> Vec128 { + unsafe { _mm_aesenclast_si128(a, b) } +} + +#[inline(always)] +#[hax_lib::fstar::before(r#"[@@ "opaque_to_smt"]"#)] +pub fn mm_aeskeygenassist_si128(a: Vec128) -> Vec128 { + unsafe { _mm_aeskeygenassist_si128(a, RCON) } +} diff --git a/sys/pqclean/src/bindings.rs b/sys/pqclean/src/bindings.rs index 581711da3..84b3eef4f 100644 --- a/sys/pqclean/src/bindings.rs +++ b/sys/pqclean/src/bindings.rs @@ -1,4 +1,4 @@ -/* automatically generated by rust-bindgen 0.72.0 */ +/* automatically generated by rust-bindgen 0.72.1 */ pub const SHAKE128_RATE: u32 = 168; pub const SHAKE256_RATE: u32 = 136;