-
Notifications
You must be signed in to change notification settings - Fork 12
/
AvalancheTest.cpp
282 lines (261 loc) · 9.95 KB
/
AvalancheTest.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
#include "AvalancheTest.h"
// This code implements the avalanche test. For this test we do the following:
//
// For a given length of key (including the empty key)
// For T iterations:
// choose a random seed and random key
// computer hash as h1
// for each bit K in seed and key
// flip bit
// compute hash as h2
// flip bit back
// for each bit H in h2
// if the bit is different from the corresponding bit in h1
// increment changed[K][H]
//
// We then can compute various goodness of fit statistics from the ratio of
// the changed count and the number of iterations, with an ideal function having
// each bit combination changing %50 of the time.
//
// gtest-cell: goodness of fit estimate using the g-test for a single cell
// gtest-all: goodness of fit estimate using the g-test for all cells in the matrix.
// error: sum of the square of the error
// error-ratio: percentage of the expected error for this number of iterations
// column/row: goodness of fit estimate using the gtest for each input bit (row),
// and for each output bit (column).
// worst-bit: the input/output bit that is the furthest from the expected %50
//
// NB: we actually do a 3 dimensional count matrix, with four cells per input/output
// bit combination, and count the pairs of input bit/output bit by value. We currently
// do not use this potential fully in testing and notationally reduce the 4 cell
// representation (00,01,10,11) to a 2 cell representation (unchanged,changed)
// during post-processing. Once I sort out the math I plan to use this data for
// additional tests.
//-----------------------------------------------------------------------------
// Print out a diagram in a hacked up base-101 representation.
// We are printing out counts of bit changes, where we expect to see the bits
// change half the time and we want to see percentage difference, so
// having a 100 unit scale - actually 101, as we have to deal with the closed
// interval [0,1] and not the half-open interval [0,1).
// The 'scale' parameter controls how magnified the data should be.
// A good hash function should show all "." for scale 1 at least.
const uint8_t digits1[101]= { 46, 49, 50, 51, 52, 53, 54, 55, 56, 57, 48, 97, 98,
99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
114, 115, 116, 117, 118, 119, 120, 121, 122, 195, 195, 195, 195, 195,
195, 195, 195, 195, 195, 195, 195, 195, 195, 195, 65, 66, 67, 68, 69, 70,
71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
89, 90, 195, 195, 195, 195, 195, 195, 195, 195, 195, 195, 195, 195, 195,
195, 194, 42, 194, 194, 194, 38, 37, 64, 35 };
const uint8_t digits2[101]= { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 163, 164,
165, 170, 171, 174, 175, 176, 177, 180, 181, 182, 187, 188, 191, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130,
131, 132, 133, 138, 139, 145, 148, 149, 150, 155, 156, 157, 184, 164, 0,
169, 174, 182, 0, 0, 0, 0 };
void PrintAvalancheDiagram ( av_statst *stats, int mode, int scale, double confidence )
{
int rows = (stats->seedbits + stats->keybits);
/* 1 2 3 4 5 */
/* 0 90 90 90 90 90 */
if(!scale) scale = 1;
double expect = double(stats->reps) / 4;
int failed = 0;
double *cursor= mode ? stats->gtests : stats->pcts;
printf("# %12s +---------------------------------------------------+\n","");
printf("# %12s |012345678901234567890123456789012345678901234567890|\n","");
printf("# %12s +---------------------------------------------------+\n","");
printf("# %-12s |","Scale:");
for (int i= 0; i <=50; i++) {
printf("%c",digits1[i]);
if(digits2[i])
printf("%c",digits2[i]);
}
printf("|\n");
printf("# %-12s |","");
for (int i= 50; i <=100; i++) {
printf("%c",digits1[i]);
if(digits2[i])
printf("%c",digits2[i]);
}
printf("|\n");
printf("# %12s +---------------------------------------------------+\n","");
if (mode) {
printf("# %12s |%-51s|\n","",
"scaled p-value above confidence level (zero is ok)");
} else {
printf("# %12s |%-48s%3d|\n","",
"pct diff from 50%: abs((0.5-(changed/reps))*2) *", scale * 100);
}
int words;
int width;
if (stats->hashbits == 32) {
printf("# %12s +--------------------------------+\n","");
printf("# %12s |0 1 2 3 |\n","");
printf("# %12s |01234567890123456789012345678901|\n","");
printf("# %12s +--------------------------------+\n","");
width= 32;
} else {
printf("# %12s +----------------------------------------------------------------+\n","");
printf("# %12s |0 1 2 3 4 5 6 |\n","");
printf("# %12s |0123456789012345678901234567890123456789012345678901234567890123|\n","");
printf("# %12s +----------------------------------------------------------------+\n","");
width = 64;
}
words = stats->hashbits / width;
unsigned char buf[words*(width+1)*rows];
unsigned char *outcursor=buf;
unsigned char *outend= buf + (words*(width+1)*rows);
int dupe= 0;
int i;
int j;
int w;
for( i = 0; i < rows; i++)
{
for( w = 0; w < words; w++)
{
for( j = 0; j < width; j++, cursor++ )
{
int digit;
double val = *cursor;
if (mode == 0) {
digit = (int)floor( 0.5 + val * (100.0 * double(scale))); /*interval: [0, 50] */
} else {
if (val < confidence) {
digit= 0;
} else {
double dscale = 100.0 / (1.0 - confidence);
digit= uint8_t((val - confidence) * dscale);
}
}
if (digit < 0) { digit = 0; }
else if (digit > 100) { digit = 100; }
if (digit)
failed ++;
*outcursor = uint8_t(1 + digit);
outcursor++;
}
*outcursor = 0;
outcursor++;
}
}
unsigned char *lastrow[words];
outcursor= buf;
for (int i= 0; i < rows; i++) {
for (int w= 0; w < words; w++, outcursor += (width+1)) {
if (i && outcursor < lastrow[w])
continue;
unsigned char *scancursor= outcursor + ((width+1) * words);
int count = 1;
int nextrow= i+1;
while (
scancursor < outend
&& nextrow != stats->seedbits
&& strcmp((char *)outcursor,(char *)scancursor) == 0)
{
count++;
scancursor += ((width+1) * words);
nextrow++;
}
lastrow[w]= scancursor;
printf("# %-4s %4d.%d |", i < stats->seedbits ? "seed" : "key",
i < stats->seedbits ? i : i - stats->seedbits, w);
for (int i= 0; i < width; i++) {
int digit = outcursor[i] - 1;
printf("%c", digits1[digit]);
if (digits2[digit])
printf("%c", digits2[digit]);
}
if (count>1) {
printf("| x %d\n", count);
} else{
printf("|\n");
}
}
}
printf("# %12s +%.*s+\n","",
stats->hashbits,
"-------------------------------------------------------------------"
);
if (mode) {
printf( "# %d of %d bits failed (%.2f%%) failed at %.6f confidence\n",
failed,
stats->num_bits,
100 * ( failed / double(stats->num_bits) ),
100 * confidence
);
}
}
//----------------------------------------------------------------------------
void
calcBiasStats ( std::vector<int> & counts, int reps, av_statst *stats, double confidence )
{
double dreps= double(reps);
double expected= dreps / 2.0;
stats->err= 0.0;
stats->gtest_prob= 0.0;
stats->worst_pct= 0.0;
for(int i = 0; i < (int)counts.size(); i += 4 )
{
double diff = double(counts[i + 1] + counts[i + 2]);
double same = double(counts[i + 0] + counts[i + 3]);
double ratio = diff / double(dreps);
double delta = 0.5 - ratio;
stats->err += pow(delta, 2.0);
stats->pcts[i/4] = fabs(delta * 2);
if ( stats->worst_pct < stats->pcts[i/4] )
stats->worst_pct = stats->pcts[i/4];
int row= (i / 4) / stats->hashbits;
int col= (i / 4) % stats->hashbits;
stats->col_bins[(col * 2) + 0] += diff;
stats->col_bins[(col * 2) + 1] += same;
stats->row_bins[(row * 2) + 0] += diff;
stats->row_bins[(row * 2) + 1] += same;
double this_gtest = 0.0;
if ( diff ) {
double adj= 0.0;
GTEST_ADD(adj, diff, expected );
this_gtest += adj;
stats->gtest_prob += adj;
}
if ( same ) {
double adj= 0.0;
GTEST_ADD(adj, same, expected );
this_gtest += adj;
stats->gtest_prob += adj;
}
stats->gtests[i/4] = GTEST_PROB( 2.0, this_gtest );
}
for (int i= 0; i < stats->hashbits; i++) {
double gtest= 0.0;
double diff= stats->col_bins[(i * 2) + 0];
double same= stats->col_bins[(i * 2) + 1];
double avg= (same + diff) / 2;
GTEST_ADD(gtest,same,avg);
GTEST_ADD(gtest,diff,avg);
gtest = GTEST_PROB( 2.0, gtest );
stats->col_gtests[i] = gtest;
if (gtest >= confidence) {
stats->col_errors++;
}
}
for (int i= 0; i < stats->num_rows; i++) {
double gtest= 0.0;
double diff= stats->row_bins[(i * 2) + 0];
double same= stats->row_bins[(i * 2) + 1];
double avg= (same + diff) / 2;
GTEST_ADD(gtest,same,avg);
GTEST_ADD(gtest,diff,avg);
gtest = GTEST_PROB( 2.0, gtest );
stats->row_gtests[i] = gtest;
if (gtest >= confidence) {
//printf("row-error %d: %.0f/%.0f => %.20f\n", i/2, l, r, gtest);
stats->row_errors++;
}
}
stats->err_scaled = stats->err / ( double(stats->num_bits) / 1024.0 );
stats->expected_err_scaled = (0.00256 / (stats->reps / 100000.0));
stats->err_scaled_ratio= stats->err_scaled / stats->expected_err_scaled;
stats->gtest_prob = GTEST_PROB( stats->num_bins / 2, stats->gtest_prob );
}
//-----------------------------------------------------------------------------
/* vim: set sts=2 sw=2 et: */