diff --git a/ip_cores/util/src/rtl/barret_mod.sv b/ip_cores/util/src/rtl/barret_mod.sv index bd745db..e90a029 100644 --- a/ip_cores/util/src/rtl/barret_mod.sv +++ b/ip_cores/util/src/rtl/barret_mod.sv @@ -1,206 +1,208 @@ -/* - Calculates a mod n, using barret reduction. - Can use either karatsuba multiplier or accumlate - multiply - for the multiplications. - - Copyright (C) 2019 Benjamin Devlin and Zcash Foundation - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - */ - -module barret_mod #( - parameter OUT_BITS = 256, - parameter CTL_BITS = 8, - parameter IN_BITS = 512, - parameter [OUT_BITS-1:0] P = 256'hFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFE_BAAEDCE6_AF48A03B_BFD25E8C_D0364141, - parameter K = $clog2(P), - parameter MULTIPLIER = "EXTERNAL" // [ACCUM_MULT || KARATSUBA || EXTERNAL] -)( - input i_clk, - input i_rst, - input [IN_BITS-1:0] i_dat, - input i_val, - input [CTL_BITS-1:0] i_ctl, - output logic [CTL_BITS-1:0] o_ctl, - output logic o_rdy, - output logic [OUT_BITS-1:0] o_dat, - output logic o_val, - input i_rdy, - if_axi_stream.source o_mult_if, - if_axi_stream.sink i_mult_if -); - -localparam MAX_IN_BITS = 2*K; -localparam [MAX_IN_BITS:0] U = (1 << (2*K)) / P; -localparam [MAX_IN_BITS-1:0] P_ = P; -logic [2:0][CTL_BITS-1:0] ctl_r; - -if_axi_stream #(.DAT_BITS(2*(OUT_BITS+2))) mult_in_if(i_clk); -if_axi_stream #(.DAT_BITS(2*(OUT_BITS+2))) mult_out_if(i_clk); - -logic [MAX_IN_BITS:0] c1, c2, c3, c4, c2_; - - -typedef enum {IDLE, S0, S1, S2, FINISHED, WAIT_MULT} state_t; -state_t state, prev_state; - -always_ff @ (posedge i_clk) begin - if (i_rst) begin - o_rdy <= 0; - o_dat <= 0; - o_val <= 0; - state <= IDLE; - prev_state <= IDLE; - c1 <= 0; - c2 <= 0; - c3 <= 0; - c4 <= 0; - mult_in_if.reset_source(); - mult_out_if.rdy <= 1; - o_ctl <= 0; - ctl_r <= 0; - end else begin - mult_out_if.rdy <= 1; - case (state) - {IDLE}: begin - o_rdy <= 1; - o_val <= 0; - c4 <= i_dat; - if (i_val && o_rdy) begin - o_rdy <= 0; - state <= WAIT_MULT; - mult_in_if.val <= 1; - ctl_r[0] <= i_ctl; - mult_in_if.dat[0 +: OUT_BITS+1] <= i_dat >> (K-1); - mult_in_if.dat[OUT_BITS+1 +: OUT_BITS+1] <= U; - prev_state <= S0; - c2_ <= (i_dat >> (K - 1))*U; // Using multiplier interface - end - end - {S0}: begin - c3 <= c2 >> (K + 1); - state <= S1; - ctl_r[1] <= ctl_r[0]; - end - {S1}: begin - mult_in_if.val <= 1; - mult_in_if.dat[0 +: OUT_BITS+1] <= c3; - mult_in_if.dat[OUT_BITS+1 +: OUT_BITS+1] <= P; - state <= WAIT_MULT; - prev_state <= S2; - ctl_r[2] <= ctl_r[1]; - end - {S2}: begin - if (c4 >= P_) begin - c4 <= c4 - P_; - end else begin - state <= FINISHED; - o_dat <= c4; - o_val <= 1; - o_ctl <= ctl_r[2]; - end - end - {FINISHED}: begin - if (o_val && i_rdy) begin - o_val <= 0; - state <= IDLE; - end - end - // In this state we are waiting for a multiply to be finished - {WAIT_MULT}: begin - if (mult_in_if.val && mult_in_if.rdy) mult_in_if.val <= 0; - if (mult_out_if.rdy && mult_out_if.val) begin - state <= prev_state; - case(prev_state) - S0: c2 <= mult_out_if.dat; - S2: c4 <= c4 - mult_out_if.dat; - endcase - end - end - endcase - end -end - -// Do the multiplications -generate - if (MULTIPLIER == "ACCUM_MULT") begin: MULTIPLIER_GEN - accum_mult # ( - .BITS_A ( OUT_BITS +8 ), - .LEVEL_A ( 6 ), - .LEVEL_B ( 4 ) - ) - accum_mult ( - .i_clk ( i_clk ), - .i_rst ( i_rst ), - .i_dat_a ({7'd0, mult_in_if.dat[0 +: OUT_BITS+1]}) , - .i_dat_b ({7'd0, mult_in_if.dat[OUT_BITS+1 +: OUT_BITS+1]}), - .i_val ( mult_in_if.val ), - .o_rdy ( mult_in_if.rdy ), - .o_dat ( mult_out_if.dat ), - .o_val ( mult_out_if.val ), - .i_rdy ( mult_out_if.rdy ) - ); - end else if (MULTIPLIER == "KARATSUBA") begin - localparam LEVEL = 2; - logic [LEVEL-1:0] val; - - karatsuba_ofman_mult # ( - .BITS ( OUT_BITS + 8 ), - .LEVEL ( LEVEL ) - ) - karatsuba_ofman_mult ( - .i_clk ( i_clk ), - .i_dat_a( {7'd0, mult_in_if.dat[0 +: OUT_BITS+1]}), - .i_dat_b( {7'd0, mult_in_if.dat[OUT_BITS+1 +: OUT_BITS+1]} ), - .o_dat ( mult_out_if.dat ) - ); - - always_comb begin - mult_in_if.rdy = mult_out_if.rdy; - mult_out_if.val = val[LEVEL-1]; - end - - always_ff @ (posedge i_clk) begin - if (i_rst) begin - val <= 0; - end else begin - val <= {val, mult_in_if.val}; - end - end - end else if (MULTIPLIER == "EXTERNAL") begin - always_comb begin - o_mult_if.val = mult_in_if.val; - o_mult_if.dat = mult_in_if.dat; - o_mult_if.sop = mult_in_if.sop; - o_mult_if.eop = mult_in_if.eop; - o_mult_if.err = mult_in_if.err; - o_mult_if.mod = mult_in_if.mod; - o_mult_if.ctl = mult_in_if.ctl; - mult_in_if.rdy = o_mult_if.rdy; - - mult_out_if.val = i_mult_if.val; - mult_out_if.dat = i_mult_if.dat; - mult_out_if.sop = i_mult_if.sop; - mult_out_if.eop = i_mult_if.eop; - mult_out_if.err = i_mult_if.err; - mult_out_if.mod = i_mult_if.mod; - mult_out_if.ctl = i_mult_if.ctl; - i_mult_if.rdy = mult_out_if.rdy; - end - end else - $fatal(1, "%m ERROR: Unknown multiplier type [%s] in barret_mod.sv", MULTIPLIER); -endgenerate -initial assert (IN_BITS <= MAX_IN_BITS) else $fatal(1, "%m ERROR: IN_BITS[%d] > MAX_IN_BITS[%d] in barret_mod", IN_BITS, MAX_IN_BITS); - +/* + Calculates a mod n, using barret reduction. + Can use either karatsuba multiplier or accumlate - multiply + for the multiplications. + + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +module barret_mod #( + parameter OUT_BITS = 256, + parameter CTL_BITS = 8, + parameter IN_BITS = 512, + parameter [OUT_BITS-1:0] P = 256'hFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFE_BAAEDCE6_AF48A03B_BFD25E8C_D0364141, + parameter K = $clog2(P), + parameter MULTIPLIER = "EXTERNAL" // [ACCUM_MULT || KARATSUBA || EXTERNAL] +)( + input i_clk, + input i_rst, + input [IN_BITS-1:0] i_dat, + input i_val, + input [CTL_BITS-1:0] i_ctl, + output logic [CTL_BITS-1:0] o_ctl, + output logic o_rdy, + output logic [OUT_BITS-1:0] o_dat, + output logic o_val, + input i_rdy, + if_axi_stream.source o_mult_if, + if_axi_stream.sink i_mult_if +); + +localparam MAX_IN_BITS = 2*K; +localparam [MAX_IN_BITS:0] U = (1 << (2*K)) / P; +logic [2:0][CTL_BITS-1:0] ctl_r; + +if_axi_stream #(.DAT_BITS(2*(OUT_BITS+2))) mult_in_if(i_clk); +if_axi_stream #(.DAT_BITS(2*(OUT_BITS+2))) mult_out_if(i_clk); + +logic [MAX_IN_BITS:0] c1, c2, c3, c4, c2_, P_; + +always_comb begin + P_ = 0; + P_[OUT_BITS-1:0] = P; +end + +typedef enum {IDLE, S0, S1, S2, FINISHED, WAIT_MULT} state_t; +state_t state, prev_state; + +always_ff @ (posedge i_clk) begin + if (i_rst) begin + o_rdy <= 0; + o_dat <= 0; + o_val <= 0; + state <= IDLE; + prev_state <= IDLE; + c1 <= 0; + c2 <= 0; + c3 <= 0; + c4 <= 0; + mult_in_if.reset_source(); + mult_out_if.rdy <= 1; + o_ctl <= 0; + ctl_r <= 0; + end else begin + mult_out_if.rdy <= 1; + case (state) + {IDLE}: begin + o_rdy <= 1; + o_val <= 0; + c4 <= i_dat; + if (i_val && o_rdy) begin + o_rdy <= 0; + state <= WAIT_MULT; + mult_in_if.val <= 1; + ctl_r[0] <= i_ctl; + mult_in_if.dat[0 +: OUT_BITS+1] <= i_dat >> (K-1); + mult_in_if.dat[OUT_BITS+1 +: OUT_BITS+1] <= U; + prev_state <= S0; + c2_ <= (i_dat >> (K - 1))*U; // Using multiplier interface + end + end + {S0}: begin + c3 <= c2 >> (K + 1); + state <= S1; + ctl_r[1] <= ctl_r[0]; + end + {S1}: begin + mult_in_if.val <= 1; + mult_in_if.dat[0 +: OUT_BITS+1] <= c3; + mult_in_if.dat[OUT_BITS+1 +: OUT_BITS+1] <= P; + state <= WAIT_MULT; + prev_state <= S2; + ctl_r[2] <= ctl_r[1]; + end + {S2}: begin + if (c4 >= P_) begin + c4 <= c4 - P_; + end else begin + state <= FINISHED; + o_dat <= c4; + o_val <= 1; + o_ctl <= ctl_r[2]; + end + end + {FINISHED}: begin + if (o_val && i_rdy) begin + o_val <= 0; + state <= IDLE; + end + end + // In this state we are waiting for a multiply to be finished + {WAIT_MULT}: begin + if (mult_in_if.val && mult_in_if.rdy) mult_in_if.val <= 0; + if (mult_out_if.rdy && mult_out_if.val) begin + state <= prev_state; + case(prev_state) + S0: c2 <= mult_out_if.dat; + S2: c4 <= c4 - mult_out_if.dat; + endcase + end + end + endcase + end +end + +// Do the multiplications +generate + if (MULTIPLIER == "ACCUM_MULT") begin: MULTIPLIER_GEN + accum_mult # ( + .BITS_A ( OUT_BITS +8 ), + .LEVEL_A ( 6 ), + .LEVEL_B ( 4 ) + ) + accum_mult ( + .i_clk ( i_clk ), + .i_rst ( i_rst ), + .i_dat_a ({7'd0, mult_in_if.dat[0 +: OUT_BITS+1]}) , + .i_dat_b ({7'd0, mult_in_if.dat[OUT_BITS+1 +: OUT_BITS+1]}), + .i_val ( mult_in_if.val ), + .o_rdy ( mult_in_if.rdy ), + .o_dat ( mult_out_if.dat ), + .o_val ( mult_out_if.val ), + .i_rdy ( mult_out_if.rdy ) + ); + end else if (MULTIPLIER == "KARATSUBA") begin + localparam LEVEL = 2; + + karatsuba_ofman_mult # ( + .BITS ( OUT_BITS + 8 ), + .LEVEL ( LEVEL ) + ) + karatsuba_ofman_mult ( + .i_clk ( i_clk ), + .i_rst ( i_rst), + .i_val ( mult_in_if.val ), + .i_ctl (), + .i_rdy( mult_out_if.rdy ), + .o_rdy (mult_in_if.rdy), + .o_val(mult_out_if.val), + .i_dat_a( {7'd0, mult_in_if.dat[0 +: OUT_BITS+1]}), + .i_dat_b( {7'd0, mult_in_if.dat[OUT_BITS+1 +: OUT_BITS+1]} ), + .o_dat ( mult_out_if.dat ) + ); + + always_comb begin + o_mult_if.val = 0; + i_mult_if.rdy = 0; + end + + + end else if (MULTIPLIER == "EXTERNAL") begin + always_comb begin + o_mult_if.val = mult_in_if.val; + o_mult_if.dat = mult_in_if.dat; + o_mult_if.sop = mult_in_if.sop; + o_mult_if.eop = mult_in_if.eop; + o_mult_if.err = mult_in_if.err; + o_mult_if.mod = mult_in_if.mod; + o_mult_if.ctl = mult_in_if.ctl; + mult_in_if.rdy = o_mult_if.rdy; + + mult_out_if.val = i_mult_if.val; + mult_out_if.dat = i_mult_if.dat; + mult_out_if.sop = i_mult_if.sop; + mult_out_if.eop = i_mult_if.eop; + mult_out_if.err = i_mult_if.err; + mult_out_if.mod = i_mult_if.mod; + mult_out_if.ctl = i_mult_if.ctl; + i_mult_if.rdy = mult_out_if.rdy; + end + end else + $fatal(1, "%m ERROR: Unknown multiplier type [%s] in barret_mod.sv", MULTIPLIER); +endgenerate +initial assert (IN_BITS <= MAX_IN_BITS) else $fatal(1, "%m ERROR: IN_BITS[%d] > MAX_IN_BITS[%d] in barret_mod", IN_BITS, MAX_IN_BITS); + endmodule \ No newline at end of file