Skip to content

Commit

Permalink
Update testbench to test writing to RAM used in multiplier, fixed bug in
Browse files Browse the repository at this point in the history
multiplier generation code for loading RAM
  • Loading branch information
bsdevlin authored and bsdevlin committed Sep 27, 2019
1 parent 9b5fb2f commit deb8298
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 26 deletions.
2 changes: 1 addition & 1 deletion aws/cl_zcash/design/cl_zcash_aws_wrapper.sv
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ logic [USE_AXI4 == "YES" ? 63 : 3:0] rx_aws_if_keep, tx_aws_if_keep;


always_comb begin
rx_zcash_if_keep = rx_zcash_if.get_keep_from_mod();
rx_zcash_if_keep = 8'hFF;
tx_zcash_if.set_mod_from_keep( tx_zcash_if_keep );
end

Expand Down
4 changes: 2 additions & 2 deletions aws/cl_zcash/verif/tests/test_zcash.sv
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ task test_bls12_381();
data = '{dat:G2_p.y[0], pt:FP2_AF};
for(int i = 0; i < 48; i = i + 4) write_ocl_reg(.addr(`ZCASH_OFFSET + bls12_381_pkg::DATA_AXIL_START + 64*4 + i), .data(data[i*8 +: 32]));
data = '{dat:G2_p.y[1], pt:FP2_AF};
for(int i = 0; i < 48; i = i + 4) write_ocl_reg(.addr(`ZCASH_OFFSET + bls12_381_pkg::DATA_AXIL_START + 64*5 + i), .data(data[i*8 +: 32]));
for(int i = 0; i < 48; i = i + 4) write_ocl_reg(.addr(`ZCASH_OFFSET + bls12_381_pkg::DATA_AXIL_START + 64*5 + i), .data(data[i*8 +: 32]));


inst = '{code:SEND_INTERRUPT, a:16'd6, b:16'hbeef, c:16'd0};
Expand Down Expand Up @@ -340,7 +340,7 @@ task test_bls12_381();

end
begin
repeat(100000) @(posedge tb.card.fpga.clk_main_a0);
repeat(10000000) @(posedge tb.card.fpga.clk_main_a0);
$fatal(1, "ERROR: No reply received from test_bls12_381");
end
join_any
Expand Down
15 changes: 11 additions & 4 deletions ip_cores/accum_mult_mod/scripts/generate_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
GRID_BIT = 64
RAM_A_W = 10

URAM_PERCENT = 50
RAM_AXI_D = 32

URAM_PERCENT = 0
USE_INIT = 1

RES_W = A_DSP_W+B_DSP_W
Expand Down Expand Up @@ -276,18 +278,23 @@ def get_accum_gen():
ram_we <= {ram_we, i_ram_we};
ram_d <= {ram_d, i_ram_d};
ram_se <= {ram_se, i_ram_se};
for (int i = 1; i <= RAM_PIPE; i++)
addr[i] <= addr[i-1];
if (ram_we[RAM_PIPE]) begin
addr <= addr + 1;'''
addr[0] <= addr[0] + 1;'''
for idx, i in enumerate(ram_addr_bits):
ram_write_s+= '''
mod_ram_{}_ram[addr] <= mod_ram_{}_d;'''.format(idx, idx)
mod_ram_{}_ram[addr[RAM_PIPE]] <= mod_ram_{}_d;'''.format(idx, idx)
ram_write_s += '''
end
'''
ram_write_s += '''
if (ram_se[RAM_PIPE]) begin'''
for idx, i in enumerate(ram_addr_bits):
previous_ram = "ram_d[RAM_PIPE]" if idx == 0 else "mod_ram_{}_d[{}:({}%RAM_D_W)]".format(idx-1, MODULUS.bit_length()-1, MODULUS.bit_length())
if idx == 0:
previous_ram = "ram_d[RAM_PIPE]"
else:
previous_ram = "mod_ram_{}_d[{}:{}]".format(idx-1, MODULUS.bit_length()-1, MODULUS.bit_length()-RAM_AXI_D)
ram_write_s += '''
mod_ram_{}_d <= {{mod_ram_{}_d, {}}};'''.format(idx, idx, previous_ram)

Expand Down
2 changes: 1 addition & 1 deletion ip_cores/accum_mult_mod/src/rtl/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
accum_mult_mod_generated.sv
accum_mult_mod_generated.inc
!.gitignore
2 changes: 1 addition & 1 deletion zcash_fpga/src/rtl/bls12_381/bls12_381_top.sv
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ generate
.A_DSP_W ( 26 ),
.B_DSP_W ( 17 ),
.GRID_BIT ( 64 ),
.RAM_A_W ( 8 ),
.RAM_A_W ( 10 ),
.RAM_D_W ( 32 )
)
accum_mult_mod (
Expand Down
96 changes: 79 additions & 17 deletions zcash_fpga/src/tb/bls12_381_top_tb.sv
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ import common_pkg::*;
import bls12_381_pkg::*;
import zcash_fpga_pkg::bls12_381_interrupt_rpl_t;
import zcash_fpga_pkg::BLS12_381_INTERRUPT_RPL;
import zcash_fpga_pkg::BLS12_381_USE_KARATSUBA;

localparam LOAD_RAM = "NO"; // This loads the accum_mult_mod RAM, not needed as we use init files

localparam CLK_PERIOD = 100;

Expand All @@ -42,7 +45,10 @@ if_axi_stream #(.DAT_BYTS(8)) out_if(clk);
if_axi_lite #(.A_BITS(32)) axi_lite_if(clk);


bls12_381_top bls12_381_top (
bls12_381_top # (
.USE_KARATSUBA(BLS12_381_USE_KARATSUBA)
)
bls12_381_top (
.i_clk ( clk ),
.i_rst ( rst ),
// Only tx interface is used to send messages to SW on a SEND-INTERRUPT instruction
Expand Down Expand Up @@ -693,16 +699,14 @@ begin
miller_loop(G1_p, G2_p, f_exp1);
f_exp0 = fe12_mul(f_exp0, f_exp1);
final_exponent(f_exp0);

$display("Running test_multi_pairing...");

// See what current instruction pointer is
axi_lite_if.peek(.addr(32'h10), .data(rdata));

// First load generator points into memory
// G1 = ((1 << DATA_RAM_DEPTH) -1 -6)
// G1 = ((1 << DATA_RAM_DEPTH) -1 -4)


// G1
data = '{dat:G1_p.x, pt:FP_AF};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + ((1 << DATA_RAM_DEPTH) -1 -6)*64), .len(48));
Expand All @@ -719,28 +723,28 @@ begin
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + ((1 << DATA_RAM_DEPTH) -1 -2)*64), .len(48));
data = '{dat:G2_p.y[1], pt:FP2_AF};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + ((1 << DATA_RAM_DEPTH) -1 -1)*64), .len(48));
// Program instruction memory

// Program instruction memory

// Do two miller loops
inst = '{code:MILLER_LOOP, a:((1 << DATA_RAM_DEPTH) -1 -6), b:((1 << DATA_RAM_DEPTH) -1 -4), c:16'd0};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+1)*8), .len(8));
inst = '{code:MILLER_LOOP, a:((1 << DATA_RAM_DEPTH) -1 -6), b:((1 << DATA_RAM_DEPTH) -1 -4), c:16'd12};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+2)*8), .len(8));
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+2)*8), .len(8));

// Multiply result
inst = '{code:MUL_ELEMENT , a:16'd0, b:16'd12, c:16'd0};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+3)*8), .len(8));
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+3)*8), .len(8));

// Do final exp.
inst = '{code:FINAL_EXP , a:16'd0, b:16'd0, c:16'd0};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+4)*8), .len(8));
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+4)*8), .len(8));

inst = '{code:SEND_INTERRUPT, a:16'd0, b:16'h4321, c:16'd0};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+5)*8), .len(8));

axi_lite_if.poke(.addr(32'h10), .data(rdata+1));

fork
begin
out_if.get_stream(get_dat, get_len, 0);
Expand Down Expand Up @@ -798,11 +802,69 @@ begin
end
endtask;

task init_ram();
int fd;
int max_rams;
int eod;
int nxt_line, curr_line;
logic [380:0] dat;
logic [381*100-1:0] dat_flat;

// First find how many rams we have - assume less than 100
for (int i = 0; i < 100; i++) begin
fd = $fopen ($sformatf("mod_ram_%0d.mem", i), "r");
if (fd == 0) begin
$display("INFO: Finished reading file at cnt %0d", i);
max_rams = i;
break;
end
$fclose(fd);
end

if (max_rams == 99)
$display("WARNING: Reached max limit of RAMs, possibly will not simulate correctly");

eod = 0;
nxt_line = 0;
dat_flat = 0;

while(eod == 0) begin
dat_flat = 0;
for (int i = 0; i < max_rams; i++) begin
fd = $fopen ($sformatf("mod_ram_%0d.mem", i), "r");
curr_line = 0;
eod = $feof(fd);
while((curr_line <= nxt_line) && (eod == 0)) begin
$fscanf(fd,"%h\n", dat);
curr_line++;
end
dat_flat[i*381 +: 381] = dat;
$fclose(fd);
end

// Now shift in data
for (int j = ((max_rams*381+31)/32); j >= 0; j--) begin
axi_lite_if.poke(.addr(32'h18), .data(dat_flat[j*32 +: 32]));
axi_lite_if.poke(.addr(32'h1c), .data(32'h02));
end

axi_lite_if.poke(.addr(32'h1c), .data(32'h01));
nxt_line++;

end

$display("INFO: Finished writing all RAMS", dat);

endtask

initial begin
axi_lite_if.reset_source();
out_if.rdy = 0;
#100ns;

if (BLS12_381_USE_KARATSUBA== "NO" && LOAD_RAM == "YES")
init_ram();

test_inv_element();
test_mul_add_sub_element();
test_point_mult();
Expand Down

0 comments on commit deb8298

Please sign in to comment.