Skip to content

Commit

Permalink
Merge bitcoin#26153: Reduce wasted pseudorandom bytes in ChaCha20 + v…
Browse files Browse the repository at this point in the history
…arious improvements

511aa4f Add unit test for ChaCha20's new caching (Pieter Wuille)
fb243d2 Improve test vectors for ChaCha20 (Pieter Wuille)
93aee8b Inline ChaCha20 32-byte specific constants (Pieter Wuille)
62ec713 Only support 32-byte keys in ChaCha20{,Aligned} (Pieter Wuille)
f21994a Use ChaCha20Aligned in MuHash3072 code (Pieter Wuille)
5d16f75 Use ChaCha20 caching in FastRandomContext (Pieter Wuille)
38eaece Add fuzz test for testing that ChaCha20 works as a stream (Pieter Wuille)
5f05b27 Add xoroshiro128++ PRNG (Martin Leitner-Ankerl)
12ff724 Make unrestricted ChaCha20 cipher not waste keystream bytes (Pieter Wuille)
6babf40 Rename ChaCha20::Seek -> Seek64 to clarify multiple of 64 (Pieter Wuille)
e37bcaa Split ChaCha20 into aligned/unaligned variants (Pieter Wuille)

Pull request description:

  This is an alternative to bitcoin#25354 (by my benchmarking, somewhat faster), subsumes bitcoin#25712, and adds additional test vectors.

  It separates the multiple-of-64-bytes-only "core" logic (which becomes simpler) from a layer around which performs caching/slicing to support arbitrary byte amounts. Both have their uses (in particular, the MuHash3072 code can benefit from multiple-of-64-bytes assumptions), plus the separation results in more readable code. Also, since FastRandomContext effectively had its own (more naive) caching on top of ChaCha20, that can be dropped in favor of ChaCha20's new built-in caching.

  I thought about rebasing bitcoin#25712 on top of this, but the changes before are fairly extensive, so redid it instead.

ACKs for top commit:
  ajtowns:
    ut reACK 511aa4f
  dhruv:
    tACK crACK 511aa4f

Tree-SHA512: 3aa80971322a93e780c75a8d35bd39da3a9ea570fbae4491eaf0c45242f5f670a24a592c50ad870d5fd09b9f88ec06e274e8aa3cefd9561d623c63f7198cf2c7
  • Loading branch information
fanquake authored and PastaPastaPasta committed Nov 19, 2023
1 parent 1915914 commit 5a4406e
Show file tree
Hide file tree
Showing 15 changed files with 582 additions and 225 deletions.
3 changes: 2 additions & 1 deletion src/Makefile.test.include
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ BITCOIN_TESTS =\
test/validation_chainstatemanager_tests.cpp \
test/validation_flush_tests.cpp \
test/validationinterface_tests.cpp \
test/versionbits_tests.cpp
test/versionbits_tests.cpp \
test/xoroshiro128plusplus_tests.cpp

if ENABLE_WALLET
BITCOIN_TESTS += \
Expand Down
3 changes: 2 additions & 1 deletion src/Makefile.test_util.include
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ TEST_UTIL_H = \
test/util/setup_common.h \
test/util/str.h \
test/util/transaction_utils.h \
test/util/wallet.h
test/util/wallet.h \
test/util/xoroshiro128plusplus.h

libtest_util_a_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES) $(MINIUPNPC_CPPFLAGS) $(EVENT_CFLAGS) $(EVENT_PTHREADS_CFLAGS)
libtest_util_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
Expand Down
4 changes: 2 additions & 2 deletions src/bench/chacha20.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ static const uint64_t BUFFER_SIZE_LARGE = 1024*1024;
static void CHACHA20(benchmark::Bench& bench, size_t buffersize)
{
std::vector<uint8_t> key(32,0);
ChaCha20 ctx(key.data(), key.size());
ChaCha20 ctx(key.data());
ctx.SetIV(0);
ctx.Seek(0);
ctx.Seek64(0);
std::vector<uint8_t> in(buffersize,0);
std::vector<uint8_t> out(buffersize,0);
bench.batch(in.size()).unit("byte").run([&] {
Expand Down
260 changes: 133 additions & 127 deletions src/crypto/chacha20.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <crypto/common.h>
#include <crypto/chacha20.h>

#include <algorithm>
#include <string.h>

constexpr static inline uint32_t rotl32(uint32_t v, int c) { return (v << c) | (v >> (32 - c)); }
Expand All @@ -20,95 +21,69 @@ constexpr static inline uint32_t rotl32(uint32_t v, int c) { return (v << c) | (

#define REPEAT10(a) do { {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; } while(0)

static const unsigned char sigma[] = "expand 32-byte k";
static const unsigned char tau[] = "expand 16-byte k";

void ChaCha20::SetKey(const unsigned char* k, size_t keylen)
void ChaCha20Aligned::SetKey32(const unsigned char* k)
{
const unsigned char *constants;

input[4] = ReadLE32(k + 0);
input[5] = ReadLE32(k + 4);
input[6] = ReadLE32(k + 8);
input[7] = ReadLE32(k + 12);
if (keylen == 32) { /* recommended */
k += 16;
constants = sigma;
} else { /* keylen == 16 */
constants = tau;
}
input[8] = ReadLE32(k + 0);
input[9] = ReadLE32(k + 4);
input[10] = ReadLE32(k + 8);
input[11] = ReadLE32(k + 12);
input[0] = ReadLE32(constants + 0);
input[1] = ReadLE32(constants + 4);
input[2] = ReadLE32(constants + 8);
input[3] = ReadLE32(constants + 12);
input[12] = 0;
input[13] = 0;
input[14] = 0;
input[15] = 0;
input[0] = ReadLE32(k + 0);
input[1] = ReadLE32(k + 4);
input[2] = ReadLE32(k + 8);
input[3] = ReadLE32(k + 12);
input[4] = ReadLE32(k + 16);
input[5] = ReadLE32(k + 20);
input[6] = ReadLE32(k + 24);
input[7] = ReadLE32(k + 28);
input[8] = 0;
input[9] = 0;
input[10] = 0;
input[11] = 0;
}

ChaCha20::ChaCha20()
ChaCha20Aligned::ChaCha20Aligned()
{
memset(input, 0, sizeof(input));
}

ChaCha20::ChaCha20(const unsigned char* k, size_t keylen)
ChaCha20Aligned::ChaCha20Aligned(const unsigned char* key32)
{
SetKey(k, keylen);
SetKey32(key32);
}

void ChaCha20::SetIV(uint64_t iv)
void ChaCha20Aligned::SetIV(uint64_t iv)
{
input[14] = iv;
input[15] = iv >> 32;
input[10] = iv;
input[11] = iv >> 32;
}

void ChaCha20::Seek(uint64_t pos)
void ChaCha20Aligned::Seek64(uint64_t pos)
{
input[12] = pos;
input[13] = pos >> 32;
input[8] = pos;
input[9] = pos >> 32;
}

void ChaCha20::Keystream(unsigned char* c, size_t bytes)
inline void ChaCha20Aligned::Keystream64(unsigned char* c, size_t blocks)
{
uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
unsigned char *ctarget = nullptr;
unsigned char tmp[64];
unsigned int i;

if (!bytes) return;

j0 = input[0];
j1 = input[1];
j2 = input[2];
j3 = input[3];
j4 = input[4];
j5 = input[5];
j6 = input[6];
j7 = input[7];
j8 = input[8];
j9 = input[9];
j10 = input[10];
j11 = input[11];
j12 = input[12];
j13 = input[13];
j14 = input[14];
j15 = input[15];
uint32_t j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;

if (!blocks) return;

j4 = input[0];
j5 = input[1];
j6 = input[2];
j7 = input[3];
j8 = input[4];
j9 = input[5];
j10 = input[6];
j11 = input[7];
j12 = input[8];
j13 = input[9];
j14 = input[10];
j15 = input[11];

for (;;) {
if (bytes < 64) {
ctarget = c;
c = tmp;
}
x0 = j0;
x1 = j1;
x2 = j2;
x3 = j3;
x0 = 0x61707865;
x1 = 0x3320646e;
x2 = 0x79622d32;
x3 = 0x6b206574;
x4 = j4;
x5 = j5;
x6 = j6;
Expand All @@ -134,10 +109,10 @@ void ChaCha20::Keystream(unsigned char* c, size_t bytes)
QUARTERROUND( x3, x4, x9,x14);
);

x0 += j0;
x1 += j1;
x2 += j2;
x3 += j3;
x0 += 0x61707865;
x1 += 0x3320646e;
x2 += 0x79622d32;
x3 += 0x6b206574;
x4 += j4;
x5 += j5;
x6 += j6;
Expand Down Expand Up @@ -171,59 +146,41 @@ void ChaCha20::Keystream(unsigned char* c, size_t bytes)
WriteLE32(c + 56, x14);
WriteLE32(c + 60, x15);

if (bytes <= 64) {
if (bytes < 64) {
for (i = 0;i < bytes;++i) ctarget[i] = c[i];
}
input[12] = j12;
input[13] = j13;
if (blocks == 1) {
input[8] = j12;
input[9] = j13;
return;
}
bytes -= 64;
blocks -= 1;
c += 64;
}
}

void ChaCha20::Crypt(const unsigned char* m, unsigned char* c, size_t bytes)
inline void ChaCha20Aligned::Crypt64(const unsigned char* m, unsigned char* c, size_t blocks)
{
uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
unsigned char *ctarget = nullptr;
unsigned char tmp[64];
unsigned int i;

if (!bytes) return;

j0 = input[0];
j1 = input[1];
j2 = input[2];
j3 = input[3];
j4 = input[4];
j5 = input[5];
j6 = input[6];
j7 = input[7];
j8 = input[8];
j9 = input[9];
j10 = input[10];
j11 = input[11];
j12 = input[12];
j13 = input[13];
j14 = input[14];
j15 = input[15];
uint32_t j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;

if (!blocks) return;

j4 = input[0];
j5 = input[1];
j6 = input[2];
j7 = input[3];
j8 = input[4];
j9 = input[5];
j10 = input[6];
j11 = input[7];
j12 = input[8];
j13 = input[9];
j14 = input[10];
j15 = input[11];

for (;;) {
if (bytes < 64) {
// if m has fewer than 64 bytes available, copy m to tmp and
// read from tmp instead
for (i = 0;i < bytes;++i) tmp[i] = m[i];
m = tmp;
ctarget = c;
c = tmp;
}
x0 = j0;
x1 = j1;
x2 = j2;
x3 = j3;
x0 = 0x61707865;
x1 = 0x3320646e;
x2 = 0x79622d32;
x3 = 0x6b206574;
x4 = j4;
x5 = j5;
x6 = j6;
Expand All @@ -249,10 +206,10 @@ void ChaCha20::Crypt(const unsigned char* m, unsigned char* c, size_t bytes)
QUARTERROUND( x3, x4, x9,x14);
);

x0 += j0;
x1 += j1;
x2 += j2;
x3 += j3;
x0 += 0x61707865;
x1 += 0x3320646e;
x2 += 0x79622d32;
x3 += 0x6b206574;
x4 += j4;
x5 += j5;
x6 += j6;
Expand Down Expand Up @@ -303,16 +260,65 @@ void ChaCha20::Crypt(const unsigned char* m, unsigned char* c, size_t bytes)
WriteLE32(c + 56, x14);
WriteLE32(c + 60, x15);

if (bytes <= 64) {
if (bytes < 64) {
for (i = 0;i < bytes;++i) ctarget[i] = c[i];
}
input[12] = j12;
input[13] = j13;
if (blocks == 1) {
input[8] = j12;
input[9] = j13;
return;
}
bytes -= 64;
blocks -= 1;
c += 64;
m += 64;
}
}

void ChaCha20::Keystream(unsigned char* c, size_t bytes)
{
if (!bytes) return;
if (m_bufleft) {
unsigned reuse = std::min<size_t>(m_bufleft, bytes);
memcpy(c, m_buffer + 64 - m_bufleft, reuse);
m_bufleft -= reuse;
bytes -= reuse;
c += reuse;
}
if (bytes >= 64) {
size_t blocks = bytes / 64;
m_aligned.Keystream64(c, blocks);
c += blocks * 64;
bytes -= blocks * 64;
}
if (bytes) {
m_aligned.Keystream64(m_buffer, 1);
memcpy(c, m_buffer, bytes);
m_bufleft = 64 - bytes;
}
}

void ChaCha20::Crypt(const unsigned char* m, unsigned char* c, size_t bytes)
{
if (!bytes) return;
if (m_bufleft) {
unsigned reuse = std::min<size_t>(m_bufleft, bytes);
for (unsigned i = 0; i < reuse; i++) {
c[i] = m[i] ^ m_buffer[64 - m_bufleft + i];
}
m_bufleft -= reuse;
bytes -= reuse;
c += reuse;
m += reuse;
}
if (bytes >= 64) {
size_t blocks = bytes / 64;
m_aligned.Crypt64(m, c, blocks);
c += blocks * 64;
m += blocks * 64;
bytes -= blocks * 64;
}
if (bytes) {
m_aligned.Keystream64(m_buffer, 1);
for (unsigned i = 0; i < bytes; i++) {
c[i] = m[i] ^ m_buffer[i];
}
m_bufleft = 64 - bytes;
}
}
Loading

0 comments on commit 5a4406e

Please sign in to comment.