Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core/crypto: More improvements #4124

Draft
wants to merge 14 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/crypto/_aes/aes.odin
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@ GHASH_BLOCK_SIZE :: 16
GHASH_TAG_SIZE :: 16

// RCON is the AES keyschedule round constants.
@(rodata)
RCON := [10]byte{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36}
9 changes: 2 additions & 7 deletions core/crypto/_aes/ct64/ct64.odin
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@

package aes_ct64

import "base:intrinsics"

// Bitsliced AES for 64-bit general purpose (integer) registers. Each
// invocation will process up to 4 blocks at a time. This implementation
// is derived from the BearSSL ct64 code, and distributed under a 1-clause
Expand Down Expand Up @@ -212,11 +210,8 @@ orthogonalize :: proc "contextless" (q: ^[8]u64) {
}

@(require_results)
interleave_in :: proc "contextless" (w: []u32) -> (q0, q1: u64) #no_bounds_check {
if len(w) < 4 {
intrinsics.trap()
}
x0, x1, x2, x3 := u64(w[0]), u64(w[1]), u64(w[2]), u64(w[3])
interleave_in :: proc "contextless" (w0, w1, w2, w3: u32) -> (q0, q1: u64) #no_bounds_check {
x0, x1, x2, x3 := u64(w0), u64(w1), u64(w2), u64(w3)
x0 |= (x0 << 16)
x1 |= (x1 << 16)
x2 |= (x2 << 16)
Expand Down
6 changes: 1 addition & 5 deletions core/crypto/_aes/ct64/ct64_enc.odin
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,8 @@

package aes_ct64

import "base:intrinsics"

add_round_key :: proc "contextless" (q: ^[8]u64, sk: []u64) #no_bounds_check {
if len(sk) < 8 {
intrinsics.trap()
}
ensure_contextless(len(sk) >=8, "aes/ct64: invalid round key size")

q[0] ~= sk[0]
q[1] ~= sk[1]
Expand Down
61 changes: 3 additions & 58 deletions core/crypto/_aes/ct64/ct64_keysched.odin
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

package aes_ct64

import "base:intrinsics"
import "core:crypto/_aes"
import "core:encoding/endian"
import "core:mem"
Expand All @@ -42,7 +41,7 @@ sub_word :: proc "contextless" (x: u32) -> u32 {
}

@(private, require_results)
keysched :: proc(comp_skey: []u64, key: []byte) -> int {
keysched :: proc "contextless" (comp_skey: []u64, key: []byte) -> int {
num_rounds, key_len := 0, len(key)
switch key_len {
case _aes.KEY_SIZE_128:
Expand All @@ -52,7 +51,7 @@ keysched :: proc(comp_skey: []u64, key: []byte) -> int {
case _aes.KEY_SIZE_256:
num_rounds = _aes.ROUNDS_256
case:
panic("crypto/aes: invalid AES key size")
panic_contextless("crypto/aes: invalid AES key size")
}

skey: [60]u32 = ---
Expand All @@ -78,7 +77,7 @@ keysched :: proc(comp_skey: []u64, key: []byte) -> int {

q: [8]u64 = ---
for i, j := 0, 0; i < nkf; i, j = i + 4, j + 2 {
q[0], q[4] = interleave_in(skey[i:])
q[0], q[4] = interleave_in(skey[i], skey[i+1], skey[i+2], skey[i+3])
q[1] = q[0]
q[2] = q[0]
q[3] = q[0]
Expand Down Expand Up @@ -123,57 +122,3 @@ skey_expand :: proc "contextless" (skey, comp_skey: []u64, num_rounds: int) {
skey[v + 3] = (x3 << 4) - x3
}
}

orthogonalize_roundkey :: proc "contextless" (qq: []u64, key: []byte) {
if len(qq) < 8 || len(key) != 16 {
intrinsics.trap()
}

skey: [4]u32 = ---
skey[0] = endian.unchecked_get_u32le(key[0:])
skey[1] = endian.unchecked_get_u32le(key[4:])
skey[2] = endian.unchecked_get_u32le(key[8:])
skey[3] = endian.unchecked_get_u32le(key[12:])

q: [8]u64 = ---
q[0], q[4] = interleave_in(skey[:])
q[1] = q[0]
q[2] = q[0]
q[3] = q[0]
q[5] = q[4]
q[6] = q[4]
q[7] = q[4]
orthogonalize(&q)

comp_skey: [2]u64 = ---
comp_skey[0] =
(q[0] & 0x1111111111111111) |
(q[1] & 0x2222222222222222) |
(q[2] & 0x4444444444444444) |
(q[3] & 0x8888888888888888)
comp_skey[1] =
(q[4] & 0x1111111111111111) |
(q[5] & 0x2222222222222222) |
(q[6] & 0x4444444444444444) |
(q[7] & 0x8888888888888888)

for x, u in comp_skey {
x0 := x
x1, x2, x3 := x0, x0, x0
x0 &= 0x1111111111111111
x1 &= 0x2222222222222222
x2 &= 0x4444444444444444
x3 &= 0x8888888888888888
x1 >>= 1
x2 >>= 2
x3 >>= 3
qq[u * 4 + 0] = (x0 << 4) - x0
qq[u * 4 + 1] = (x1 << 4) - x1
qq[u * 4 + 2] = (x2 << 4) - x2
qq[u * 4 + 3] = (x3 << 4) - x3
}

mem.zero_explicit(&skey, size_of(skey))
mem.zero_explicit(&q, size_of(q))
mem.zero_explicit(&comp_skey, size_of(comp_skey))
}
6 changes: 2 additions & 4 deletions core/crypto/_aes/ct64/ghash.odin
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

package aes_ct64

import "base:intrinsics"
import "core:crypto/_aes"
import "core:encoding/endian"

Expand Down Expand Up @@ -64,9 +63,8 @@ rev64 :: proc "contextless" (x: u64) -> u64 {
// Note: `dst` is both an input and an output, to support easy implementation
// of GCM.
ghash :: proc "contextless" (dst, key, data: []byte) {
if len(dst) != _aes.GHASH_BLOCK_SIZE || len(key) != _aes.GHASH_BLOCK_SIZE {
intrinsics.trap()
}
ensure_contextless(len(dst) == _aes.GHASH_BLOCK_SIZE)
ensure_contextless(len(key) == _aes.GHASH_BLOCK_SIZE)

buf := data
l := len(buf)
Expand Down
80 changes: 37 additions & 43 deletions core/crypto/_aes/ct64/helpers.odin
Original file line number Diff line number Diff line change
@@ -1,75 +1,69 @@
package aes_ct64

import "base:intrinsics"
import "core:crypto/_aes"
import "core:encoding/endian"

@(require_results)
load_interleaved :: proc "contextless" (src: []byte) -> (u64, u64) {
w0 := endian.unchecked_get_u32le(src[0:])
w1 := endian.unchecked_get_u32le(src[4:])
w2 := endian.unchecked_get_u32le(src[8:])
w3 := endian.unchecked_get_u32le(src[12:])
return interleave_in(w0, w1, w2, w3)
}

store_interleaved :: proc "contextless" (dst: []byte, a0, a1: u64) {
w0, w1, w2, w3 := interleave_out(a0, a1)
endian.unchecked_put_u32le(dst[0:], w0)
endian.unchecked_put_u32le(dst[4:], w1)
endian.unchecked_put_u32le(dst[8:], w2)
endian.unchecked_put_u32le(dst[12:], w3)
}

@(require_results)
xor_interleaved :: #force_inline proc "contextless" (a0, a1, b0, b1: u64) -> (u64, u64) {
return a0 ~ b0, a1 ~ b1
}

@(require_results)
and_interleaved :: #force_inline proc "contextless" (a0, a1, b0, b1: u64) -> (u64, u64) {
return a0 & b0, a1 & b1
}

load_blockx1 :: proc "contextless" (q: ^[8]u64, src: []byte) {
if len(src) != _aes.BLOCK_SIZE {
intrinsics.trap()
}
ensure_contextless(len(src) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size")

w: [4]u32 = ---
w[0] = endian.unchecked_get_u32le(src[0:])
w[1] = endian.unchecked_get_u32le(src[4:])
w[2] = endian.unchecked_get_u32le(src[8:])
w[3] = endian.unchecked_get_u32le(src[12:])
q[0], q[4] = interleave_in(w[:])
q[0], q[4] = #force_inline load_interleaved(src)
orthogonalize(q)
}

store_blockx1 :: proc "contextless" (dst: []byte, q: ^[8]u64) {
if len(dst) != _aes.BLOCK_SIZE {
intrinsics.trap()
}
ensure_contextless(len(dst) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size")

orthogonalize(q)
w0, w1, w2, w3 := interleave_out(q[0], q[4])
endian.unchecked_put_u32le(dst[0:], w0)
endian.unchecked_put_u32le(dst[4:], w1)
endian.unchecked_put_u32le(dst[8:], w2)
endian.unchecked_put_u32le(dst[12:], w3)
#force_inline store_interleaved(dst, q[0], q[4])
}

load_blocks :: proc "contextless" (q: ^[8]u64, src: [][]byte) {
if n := len(src); n > STRIDE || n == 0 {
intrinsics.trap()
}
ensure_contextless(len(src) == 0 || len(src) <= STRIDE, "aes/ct64: invalid block(s) size")

w: [4]u32 = ---
for s, i in src {
if len(s) != _aes.BLOCK_SIZE {
intrinsics.trap()
}

w[0] = endian.unchecked_get_u32le(s[0:])
w[1] = endian.unchecked_get_u32le(s[4:])
w[2] = endian.unchecked_get_u32le(s[8:])
w[3] = endian.unchecked_get_u32le(s[12:])
q[i], q[i + 4] = interleave_in(w[:])
ensure_contextless(len(s) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size")
q[i], q[i + 4] = #force_inline load_interleaved(s)
}
orthogonalize(q)
}

store_blocks :: proc "contextless" (dst: [][]byte, q: ^[8]u64) {
if n := len(dst); n > STRIDE || n == 0 {
intrinsics.trap()
}
ensure_contextless(len(dst) == 0 || len(dst) <= STRIDE, "aes/ct64: invalid block(s) size")

orthogonalize(q)
for d, i in dst {
// Allow storing [0,4] blocks.
if d == nil {
break
}
if len(d) != _aes.BLOCK_SIZE {
intrinsics.trap()
}

w0, w1, w2, w3 := interleave_out(q[i], q[i + 4])
endian.unchecked_put_u32le(d[0:], w0)
endian.unchecked_put_u32le(d[4:], w1)
endian.unchecked_put_u32le(d[8:], w2)
endian.unchecked_put_u32le(d[12:], w3)
ensure_contextless(len(d) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size")
#force_inline store_interleaved(d, q[i], q[i + 4])
}
}
4 changes: 2 additions & 2 deletions core/crypto/_aes/hw_intel/ghash.odin
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ GHASH_STRIDE_BYTES_HW :: GHASH_STRIDE_HW * _aes.GHASH_BLOCK_SIZE
// that it is right-shifted by 1 bit. The left-shift is relatively
// inexpensive, and it can be mutualised.
//
// Since SSE2 opcodes do not have facilities for shitfting full 128-bit
// Since SSE2 opcodes do not have facilities for shifting full 128-bit
// values with bit precision, we have to break down values into 64-bit
// chunks. We number chunks from 0 to 3 in left to right order.

Expand Down Expand Up @@ -155,7 +155,7 @@ square_f128 :: #force_inline proc "contextless" (kw: x86.__m128i) -> (x86.__m128
@(enable_target_feature = "sse2,ssse3,pclmul")
ghash :: proc "contextless" (dst, key, data: []byte) #no_bounds_check {
if len(dst) != _aes.GHASH_BLOCK_SIZE || len(key) != _aes.GHASH_BLOCK_SIZE {
intrinsics.trap()
panic_contextless("aes/ghash: invalid dst or key size")
}

// Note: BearSSL opts to copy the remainder into a zero-filled
Expand Down
32 changes: 13 additions & 19 deletions core/crypto/_blake2/blake2.odin
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ BLAKE2S_SIZE :: 32
BLAKE2B_BLOCK_SIZE :: 128
BLAKE2B_SIZE :: 64

MAX_SIZE :: 255

Blake2s_Context :: struct {
h: [8]u32,
t: [2]u32,
Expand Down Expand Up @@ -68,30 +70,27 @@ Blake2_Tree :: struct {
is_last_node: bool,
}

@(private)
@(private, rodata)
BLAKE2S_IV := [8]u32 {
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19,
}

@(private)
@(private, rodata)
BLAKE2B_IV := [8]u64 {
0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
0x510e527fade682d1, 0x9b05688c2b3e6c1f,
0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
}

init :: proc(ctx: ^$T, cfg: ^Blake2_Config) {
init :: proc "contextless" (ctx: ^$T, cfg: ^Blake2_Config) {
when T == Blake2s_Context {
max_size :: BLAKE2S_SIZE
} else when T == Blake2b_Context {
max_size :: BLAKE2B_SIZE
}

if cfg.size > max_size {
panic("blake2: requested output size exceeeds algorithm max")
}
ensure_contextless(cfg.size <= max_size, "blake2: requested output size exceeeds algorithm max")

// To save having to allocate a scratch buffer, use the internal
// data buffer (`ctx.x`), as it is exactly the correct size.
Expand Down Expand Up @@ -167,8 +166,8 @@ init :: proc(ctx: ^$T, cfg: ^Blake2_Config) {
ctx.is_initialized = true
}

update :: proc(ctx: ^$T, p: []byte) {
assert(ctx.is_initialized)
update :: proc "contextless" (ctx: ^$T, p: []byte) {
ensure_contextless(ctx.is_initialized)

p := p
when T == Blake2s_Context {
Expand All @@ -195,8 +194,8 @@ update :: proc(ctx: ^$T, p: []byte) {
ctx.nx += copy(ctx.x[ctx.nx:], p)
}

final :: proc(ctx: ^$T, hash: []byte, finalize_clone: bool = false) {
assert(ctx.is_initialized)
final :: proc "contextless" (ctx: ^$T, hash: []byte, finalize_clone: bool = false) {
ensure_contextless(ctx.is_initialized)

ctx := ctx
if finalize_clone {
Expand All @@ -206,24 +205,19 @@ final :: proc(ctx: ^$T, hash: []byte, finalize_clone: bool = false) {
}
defer(reset(ctx))

ensure_contextless(len(hash) >= int(ctx.size), "crypto/blake2: invalid destination digest size")
when T == Blake2s_Context {
if len(hash) < int(ctx.size) {
panic("crypto/blake2s: invalid destination digest size")
}
blake2s_final(ctx, hash)
} else when T == Blake2b_Context {
if len(hash) < int(ctx.size) {
panic("crypto/blake2b: invalid destination digest size")
}
blake2b_final(ctx, hash)
}
}

clone :: proc(ctx, other: ^$T) {
clone :: proc "contextless" (ctx, other: ^$T) {
ctx^ = other^
}

reset :: proc(ctx: ^$T) {
reset :: proc "contextless" (ctx: ^$T) {
if !ctx.is_initialized {
return
}
Expand Down
Loading