Skip to content

Commit

Permalink
Merge pull request ZcashFoundation#1 from bsdevlin/master
Browse files Browse the repository at this point in the history
pull for test
  • Loading branch information
LeonMac authored Sep 29, 2019
2 parents a8e323c + 3902713 commit b9f30d4
Showing 9 changed files with 106 additions and 34 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
The work in this repo is the result of a Zcash foundation grant to develop open-source FPGA code that can be used to accelerate various aspects of the network.
**An Architecture document is [here](zcash_fpga_design_doc_v1.4.1.pdf)**.
**An Architecture document is [here](zcash_fpga_design_doc_v1.4.2.pdf)**.

While mainly developed for Equihash verification and elliptic curve operations on the secp256k1 and bls12-381 curves, the code (ip_cores) used in this repo can also be applied to other curves by
changing parameters / minimum modification to equations.
2 changes: 1 addition & 1 deletion aws/cl_zcash/design/cl_zcash_aws_wrapper.sv
Original file line number Diff line number Diff line change
@@ -43,7 +43,7 @@ logic [USE_AXI4 == "YES" ? 63 : 3:0] rx_aws_if_keep, tx_aws_if_keep;


always_comb begin
rx_zcash_if_keep = rx_zcash_if.get_keep_from_mod();
rx_zcash_if_keep = 8'hFF;
tx_zcash_if.set_mod_from_keep( tx_zcash_if_keep );
end

8 changes: 4 additions & 4 deletions aws/cl_zcash/verif/tests/test_zcash.sv
Original file line number Diff line number Diff line change
@@ -61,8 +61,8 @@ initial begin


// Run our test cases
// test_status_message();
// test_block_secp256k1();
test_status_message();
test_block_secp256k1();
test_bls12_381();

$display("INFO: All tests passed");
@@ -297,7 +297,7 @@ task test_bls12_381();
data = '{dat:G2_p.y[0], pt:FP2_AF};
for(int i = 0; i < 48; i = i + 4) write_ocl_reg(.addr(`ZCASH_OFFSET + bls12_381_pkg::DATA_AXIL_START + 64*4 + i), .data(data[i*8 +: 32]));
data = '{dat:G2_p.y[1], pt:FP2_AF};
for(int i = 0; i < 48; i = i + 4) write_ocl_reg(.addr(`ZCASH_OFFSET + bls12_381_pkg::DATA_AXIL_START + 64*5 + i), .data(data[i*8 +: 32]));
for(int i = 0; i < 48; i = i + 4) write_ocl_reg(.addr(`ZCASH_OFFSET + bls12_381_pkg::DATA_AXIL_START + 64*5 + i), .data(data[i*8 +: 32]));


inst = '{code:SEND_INTERRUPT, a:16'd6, b:16'hbeef, c:16'd0};
@@ -340,7 +340,7 @@ task test_bls12_381();

end
begin
repeat(100000) @(posedge tb.card.fpga.clk_main_a0);
repeat(10000000) @(posedge tb.card.fpga.clk_main_a0);
$fatal(1, "ERROR: No reply received from test_bls12_381");
end
join_any
19 changes: 13 additions & 6 deletions ip_cores/accum_mult_mod/scripts/generate_files.py
Original file line number Diff line number Diff line change
@@ -31,7 +31,9 @@
GRID_BIT = 64
RAM_A_W = 10

URAM_PERCENT = 50
RAM_AXI_D = 32

URAM_PERCENT = 0
USE_INIT = 1

RES_W = A_DSP_W+B_DSP_W
@@ -81,8 +83,6 @@ def get_accum_gen():
end_padding = max(start+max_bits-end-start_padding, 0)
coef_l.append('{{{{{}{{1\'d0}}}},mul_grid[{}][{}][{}+:{}],{{{}{{1\'d0}}}}}}'.format(end_padding, j[0], j[1], start-offset, bitwidth, start_padding))



coef.append(coef_l)

# Create compressor trees and output
@@ -142,6 +142,8 @@ def get_accum_gen():

# Generate the init file lines - need to take into account earlier address bits
max_bits_value = max_bits + ram_bit_low
if (max_bits_value < RAM_A_W):
max_bits_value = RAM_A_W
#print("max_bits {} ram_bit_low {}".format( max_bits, ram_bit_low))
for i in range(1 << max_bits_value):
# The value of a bit here will depend on the GRID and posisition of bit
@@ -276,18 +278,23 @@ def get_accum_gen():
ram_we <= {ram_we, i_ram_we};
ram_d <= {ram_d, i_ram_d};
ram_se <= {ram_se, i_ram_se};
for (int i = 1; i <= RAM_PIPE; i++)
addr[i] <= addr[i-1];
if (ram_we[RAM_PIPE]) begin
addr <= addr + 1;'''
addr[0] <= addr[0] + 1;'''
for idx, i in enumerate(ram_addr_bits):
ram_write_s+= '''
mod_ram_{}_ram[addr] <= mod_ram_{}_d;'''.format(idx, idx)
mod_ram_{}_ram[addr[RAM_PIPE]] <= mod_ram_{}_d;'''.format(idx, idx)
ram_write_s += '''
end
'''
ram_write_s += '''
if (ram_se[RAM_PIPE]) begin'''
for idx, i in enumerate(ram_addr_bits):
previous_ram = "ram_d[RAM_PIPE]" if idx == 0 else "mod_ram_{}_d[{}:({}%RAM_D_W)]".format(idx-1, MODULUS.bit_length()-1, MODULUS.bit_length())
if idx == 0:
previous_ram = "ram_d[RAM_PIPE]"
else:
previous_ram = "mod_ram_{}_d[{}:{}]".format(idx-1, MODULUS.bit_length()-1, MODULUS.bit_length()-RAM_AXI_D)
ram_write_s += '''
mod_ram_{}_d <= {{mod_ram_{}_d, {}}};'''.format(idx, idx, previous_ram)

2 changes: 1 addition & 1 deletion ip_cores/accum_mult_mod/src/rtl/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
accum_mult_mod_generated.sv
accum_mult_mod_generated.inc
!.gitignore
2 changes: 1 addition & 1 deletion zcash_fpga/src/rtl/bls12_381/bls12_381_top.sv
Original file line number Diff line number Diff line change
@@ -367,7 +367,7 @@ generate
.A_DSP_W ( 26 ),
.B_DSP_W ( 17 ),
.GRID_BIT ( 64 ),
.RAM_A_W ( 8 ),
.RAM_A_W ( 10 ),
.RAM_D_W ( 32 )
)
accum_mult_mod (
6 changes: 3 additions & 3 deletions zcash_fpga/src/rtl/top/zcash_fpga_pkg.sv
Original file line number Diff line number Diff line change
@@ -27,11 +27,11 @@ package zcash_fpga_pkg;

import bls12_381_pkg::point_type_t;

parameter FPGA_VERSION = 32'h01_04_01; //v1.4.1
parameter FPGA_VERSION = 32'h01_04_02; //v1.4.2

// What features are enabled in this build
parameter bit ENB_VERIFY_SECP256K1_SIG = 1;
parameter bit ENB_VERIFY_EQUIHASH = 1;
parameter bit ENB_VERIFY_SECP256K1_SIG = 0;
parameter bit ENB_VERIFY_EQUIHASH = 0;
parameter bit ENB_BLS12_381 = 1;

localparam [63:0] FPGA_CMD_CAP = {{60'd0},
99 changes: 82 additions & 17 deletions zcash_fpga/src/tb/bls12_381_top_tb.sv
Original file line number Diff line number Diff line change
@@ -22,6 +22,9 @@ import common_pkg::*;
import bls12_381_pkg::*;
import zcash_fpga_pkg::bls12_381_interrupt_rpl_t;
import zcash_fpga_pkg::BLS12_381_INTERRUPT_RPL;
import zcash_fpga_pkg::BLS12_381_USE_KARATSUBA;

localparam LOAD_RAM = "NO"; // This loads the accum_mult_mod RAM, not needed as we use init files

localparam CLK_PERIOD = 100;

@@ -42,7 +45,10 @@ if_axi_stream #(.DAT_BYTS(8)) out_if(clk);
if_axi_lite #(.A_BITS(32)) axi_lite_if(clk);


bls12_381_top bls12_381_top (
bls12_381_top # (
.USE_KARATSUBA(BLS12_381_USE_KARATSUBA)
)
bls12_381_top (
.i_clk ( clk ),
.i_rst ( rst ),
// Only tx interface is used to send messages to SW on a SEND-INTERRUPT instruction
@@ -693,16 +699,14 @@ begin
miller_loop(G1_p, G2_p, f_exp1);
f_exp0 = fe12_mul(f_exp0, f_exp1);
final_exponent(f_exp0);

$display("Running test_multi_pairing...");

// See what current instruction pointer is
axi_lite_if.peek(.addr(32'h10), .data(rdata));

// First load generator points into memory
// G1 = ((1 << DATA_RAM_DEPTH) -1 -6)
// G1 = ((1 << DATA_RAM_DEPTH) -1 -4)


// G1
data = '{dat:G1_p.x, pt:FP_AF};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + ((1 << DATA_RAM_DEPTH) -1 -6)*64), .len(48));
@@ -719,28 +723,28 @@ begin
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + ((1 << DATA_RAM_DEPTH) -1 -2)*64), .len(48));
data = '{dat:G2_p.y[1], pt:FP2_AF};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + ((1 << DATA_RAM_DEPTH) -1 -1)*64), .len(48));
// Program instruction memory

// Program instruction memory

// Do two miller loops
inst = '{code:MILLER_LOOP, a:((1 << DATA_RAM_DEPTH) -1 -6), b:((1 << DATA_RAM_DEPTH) -1 -4), c:16'd0};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+1)*8), .len(8));
inst = '{code:MILLER_LOOP, a:((1 << DATA_RAM_DEPTH) -1 -6), b:((1 << DATA_RAM_DEPTH) -1 -4), c:16'd12};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+2)*8), .len(8));
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+2)*8), .len(8));

// Multiply result
inst = '{code:MUL_ELEMENT , a:16'd0, b:16'd12, c:16'd0};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+3)*8), .len(8));
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+3)*8), .len(8));

// Do final exp.
inst = '{code:FINAL_EXP , a:16'd0, b:16'd0, c:16'd0};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+4)*8), .len(8));
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+4)*8), .len(8));

inst = '{code:SEND_INTERRUPT, a:16'd0, b:16'h4321, c:16'd0};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+5)*8), .len(8));

axi_lite_if.poke(.addr(32'h10), .data(rdata+1));

fork
begin
out_if.get_stream(get_dat, get_len, 0);
@@ -798,11 +802,72 @@ begin
end
endtask;

task init_ram();
int fd;
int max_rams;
int eod;
int nxt_line, curr_line;
logic [380:0] dat;
logic [381*100-1:0] dat_flat;

// First find how many rams we have - assume less than 100
for (int i = 0; i < 100; i++) begin
fd = $fopen ($sformatf("mod_ram_%0d.mem", i), "r");
if (fd == 0) begin
$display("INFO: Finished reading file at cnt %0d", i);
max_rams = i;
break;
end
$fclose(fd);
end

if (max_rams == 99)
$display("WARNING: Reached max limit of RAMs, possibly will not simulate correctly");

eod = 0;
nxt_line = 0;
dat_flat = 0;

while(eod == 0) begin
dat_flat = 0;
for (int i = 0; i < max_rams; i++) begin
fd = $fopen ($sformatf("mod_ram_%0d.mem", i), "r");
curr_line = 0;

while((curr_line <= nxt_line)) begin
eod = $feof(fd);
if (eod) break;
$fscanf(fd,"%h\n", dat);
curr_line++;
end
dat_flat[i*381 +: 381] = dat;
$fclose(fd);
end

if (eod == 0) begin
// Now shift in data
for (int j = ((max_rams*381+31)/32); j >= 0; j--) begin
axi_lite_if.poke(.addr(32'h18), .data(dat_flat[j*32 +: 32]));
axi_lite_if.poke(.addr(32'h1c), .data(32'h02));
end
axi_lite_if.poke(.addr(32'h1c), .data(32'h01));
nxt_line++;
end

end

$display("INFO: Finished writing all RAMS", dat);

endtask

initial begin
axi_lite_if.reset_source();
out_if.rdy = 0;
#100ns;

if (BLS12_381_USE_KARATSUBA== "NO" && LOAD_RAM == "YES")
init_ram();

test_inv_element();
test_mul_add_sub_element();
test_point_mult();
Binary file not shown.

0 comments on commit b9f30d4

Please sign in to comment.