From b1bdc38423a216f16c8ec2c8f3b8f70ed0238d90 Mon Sep 17 00:00:00 2001 From: bsdevlin Date: Wed, 9 Sep 2020 22:02:23 -0400 Subject: [PATCH] add Xilinx FIFO and fix flow control --- aws/cl_zcash/build/scripts/synth_cl_zcash.tcl | 2 + aws/cl_zcash/ip/cl_sde_ip_setup.tcl | 4 +- aws/cl_zcash/ip/fifo_generator_0.xci | 583 ++++++++++++ aws/cl_zcash/ip/mk_clean | 3 +- aws/cl_zcash/ip/run_cl_sde_ip_flow | 2 + zcash_fpga/src/rtl/control/control_top.sv | 839 +++++++++--------- zcash_fpga/src/rtl/top/zcash_fpga_top.sv | 553 ++++++------ 7 files changed, 1322 insertions(+), 664 deletions(-) create mode 100644 aws/cl_zcash/ip/fifo_generator_0.xci diff --git a/aws/cl_zcash/build/scripts/synth_cl_zcash.tcl b/aws/cl_zcash/build/scripts/synth_cl_zcash.tcl index 8ff8c3f..b6e0c2d 100644 --- a/aws/cl_zcash/build/scripts/synth_cl_zcash.tcl +++ b/aws/cl_zcash/build/scripts/synth_cl_zcash.tcl @@ -75,6 +75,8 @@ read_ip [ list \ puts "AWS FPGA: Generating IP blocks"; +upgrade_ip [get_ips *] + set_property generate_synth_checkpoint false [get_files axis_dwidth_converter_64_to_8.xci] set_property generate_synth_checkpoint false [get_files axis_dwidth_converter_8_to_64.xci] set_property generate_synth_checkpoint false [get_files axis_dwidth_converter_4_to_8.xci] diff --git a/aws/cl_zcash/ip/cl_sde_ip_setup.tcl b/aws/cl_zcash/ip/cl_sde_ip_setup.tcl index 83227d9..fd8e75c 100644 --- a/aws/cl_zcash/ip/cl_sde_ip_setup.tcl +++ b/aws/cl_zcash/ip/cl_sde_ip_setup.tcl @@ -10,6 +10,7 @@ axis_dwidth_converter_48_to_8/axis_dwidth_converter_48_to_8.xci axis_dwidth_converter_4_to_8/axis_dwidth_converter_4_to_8.xci axis_dwidth_converter_8_to_4/axis_dwidth_converter_8_to_4.xci ila_2/ila_2.xci +fifo_generator_0/fifo_generator_0.xci } upgrade_ip [get_ips *] @@ -21,4 +22,5 @@ generate_target all [get_files axis_dwidth_converter_8_to_64/axis_dwidth_conver generate_target all [get_files axis_dwidth_converter_48_to_8/axis_dwidth_converter_48_to_8.xci] generate_target all [get_files axis_dwidth_converter_4_to_8/axis_dwidth_converter_4_to_8.xci] generate_target all [get_files axis_dwidth_converter_8_to_4/axis_dwidth_converter_8_to_4.xci] -generate_target all [get_files ila_2/ila_2.xci] \ No newline at end of file +generate_target all [get_files ila_2/ila_2.xci] +generate_target all [get_files fifo_generator_0/fifo_generator_0.xci] \ No newline at end of file diff --git a/aws/cl_zcash/ip/fifo_generator_0.xci b/aws/cl_zcash/ip/fifo_generator_0.xci new file mode 100644 index 0000000..f7b83cd --- /dev/null +++ b/aws/cl_zcash/ip/fifo_generator_0.xci @@ -0,0 +1,583 @@ + + + xilinx.com + xci + unknown + 1.0 + + + fifo_generator_0 + + + + + + 100000000 + 0 + 0.000 + + + 100000000 + 0 + 0.000 + 1 + 0 + 0 + 0 + + 1 + 100000000 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 1 + 1 + 1 + 1 + 1 + 0.000 + AXI4LITE + READ_WRITE + 0 + 0 + 0 + 0 + 0 + + 100000000 + 0 + 0 + 0 + 0 + 0 + undef + 0.000 + 0 + 0 + 0 + 0 + + + + 1000000 + 0 + 0.000 + + 100000000 + 0 + 0.000 + 0 + 1 + 0 + 0 + 0 + + 1 + 100000000 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 1 + 1 + 1 + 1 + 1 + 0.000 + AXI4LITE + READ_WRITE + 0 + 0 + 0 + 0 + 0 + + 100000000 + 0 + 0 + 0 + 0 + 0 + undef + 0.000 + 0 + 0 + 0 + 0 + + + + 1000000 + 0 + 0.000 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 8 + 1 + 1 + 1 + 1 + 4 + 0 + 32 + 1 + 1 + 1 + 64 + 1 + 8 + 1 + 1 + 1 + 1 + 0 + 0 + 9 + BlankString + 66 + 1 + 32 + 64 + 1 + 64 + 2 + 0 + 66 + 0 + 1 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + virtexuplus + 0 + 0 + 0 + 1 + 0 + 0 + 0 + 0 + 1 + 0 + 1 + 0 + 0 + 0 + 0 + 1 + 0 + 1 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 1 + 0 + 0 + 0 + 0 + 0 + 6 + 1 + 2 + 1 + 2 + 1 + 2 + 0 + 0 + 4 + BlankString + 1 + 0 + 0 + 0 + 0 + 1 + 512x72 + 1kx18 + 512x36 + 512x72 + 512x36 + 512x72 + 512x36 + 6 + 1022 + 1022 + 1022 + 1022 + 1022 + 1022 + 7 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 511 + 1023 + 1023 + 1023 + 1023 + 1023 + 1023 + 510 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 9 + 512 + 1 + 9 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 2 + 0 + 0 + 0 + 0 + 1 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 1 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 9 + 512 + 1024 + 16 + 1024 + 16 + 1024 + 16 + 1 + 9 + 10 + 4 + 10 + 4 + 10 + 4 + 1 + 32 + 0 + 0 + false + false + false + 0 + 0 + Slave_Interface_Clock_Enable + Common_Clock + fifo_generator_0 + 64 + false + 9 + false + false + 0 + 6 + 1022 + 1022 + 1022 + 1022 + 1022 + 1022 + 7 + false + false + false + false + false + false + false + false + false + Hard_ECC + false + false + false + false + false + false + true + false + false + true + Data_FIFO + Data_FIFO + Data_FIFO + Data_FIFO + Data_FIFO + Data_FIFO + Common_Clock_Block_RAM + Common_Clock_Distributed_RAM + Common_Clock_Block_RAM + Common_Clock_Distributed_RAM + Common_Clock_Block_RAM + Common_Clock_Distributed_RAM + Independent_Clocks_Builtin_FIFO + 0 + 511 + 1023 + 1023 + 1023 + 1023 + 1023 + 1023 + 510 + false + false + false + 0 + Native + false + false + false + false + false + false + false + false + false + false + false + false + false + false + 66 + 512 + 1024 + 16 + 1024 + 16 + 1024 + 16 + false + 66 + 512 + Embedded_Reg + false + false + Active_High + Active_High + AXI4 + First_Word_Fall_Through + No_Programmable_Empty_Threshold + No_Programmable_Empty_Threshold + No_Programmable_Empty_Threshold + No_Programmable_Empty_Threshold + No_Programmable_Empty_Threshold + No_Programmable_Empty_Threshold + No_Programmable_Empty_Threshold + No_Programmable_Full_Threshold + No_Programmable_Full_Threshold + No_Programmable_Full_Threshold + No_Programmable_Full_Threshold + No_Programmable_Full_Threshold + No_Programmable_Full_Threshold + No_Programmable_Full_Threshold + READ_WRITE + 0 + 1 + false + 9 + Fully_Registered + Fully_Registered + Fully_Registered + Fully_Registered + Fully_Registered + Fully_Registered + true + Synchronous_Reset + false + 1 + 0 + 0 + 1 + 1 + 4 + false + false + Active_High + Active_High + true + true + false + false + false + Active_High + 0 + false + Active_High + 1 + false + 9 + false + FIFO + false + false + false + false + FIFO + FIFO + 2 + 2 + false + FIFO + FIFO + FIFO + virtexuplus + + + xcvu9p + flga2104 + VERILOG + + VERILOG + -2 + + E + TRUE + TRUE + IP_Flow + 4 + TRUE + . + + . + 2019.1 + OUT_OF_CONTEXT + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/aws/cl_zcash/ip/mk_clean b/aws/cl_zcash/ip/mk_clean index a0c128d..e357930 100755 --- a/aws/cl_zcash/ip/mk_clean +++ b/aws/cl_zcash/ip/mk_clean @@ -6,4 +6,5 @@ rm -rf axis_dwidth_converter_48_to_8/ rm -rf axi_fifo_mm_s_lite/ rm -rf axis_dwidth_converter_4_to_8/ rm -rf axis_dwidth_converter_8_to_4/ -rm -rf ila_2/ \ No newline at end of file +rm -rf ila_2/ +rm -rf fifo_generator_0.xci/ \ No newline at end of file diff --git a/aws/cl_zcash/ip/run_cl_sde_ip_flow b/aws/cl_zcash/ip/run_cl_sde_ip_flow index d529bb2..b5e5324 100755 --- a/aws/cl_zcash/ip/run_cl_sde_ip_flow +++ b/aws/cl_zcash/ip/run_cl_sde_ip_flow @@ -6,6 +6,7 @@ mkdir axis_dwidth_converter_48_to_8 mkdir axis_dwidth_converter_4_to_8 mkdir axis_dwidth_converter_8_to_4 mkdir ila_2 +mkdir fifo_generator_0 cp axi_fifo_mm_s_0.xci axi_fifo_mm_s_0/axi_fifo_mm_s_0.xci cp axi_fifo_mm_s_lite.xci axi_fifo_mm_s_lite/axi_fifo_mm_s_lite.xci @@ -15,6 +16,7 @@ cp axis_dwidth_converter_48_to_8.xci axis_dwidth_converter_48_to_8/axis_dwidth_c cp axis_dwidth_converter_4_to_8.xci axis_dwidth_converter_4_to_8/axis_dwidth_converter_4_to_8.xci cp axis_dwidth_converter_8_to_4.xci axis_dwidth_converter_8_to_4/axis_dwidth_converter_8_to_4.xci cp ila_2.xci ila_2/ila_2.xci +cp fifo_generator_0.xci fifo_generator_0/fifo_generator_0.xci vivado -mode batch -source cl_sde_ip_setup.tcl diff --git a/zcash_fpga/src/rtl/control/control_top.sv b/zcash_fpga/src/rtl/control/control_top.sv index 65c0f38..d3d4078 100644 --- a/zcash_fpga/src/rtl/control/control_top.sv +++ b/zcash_fpga/src/rtl/control/control_top.sv @@ -1,415 +1,426 @@ -/* - This module is the top level for the FPGA interface to SW. It takes in commands - from SW, running the commands, and then building the replies back to SW. - - Copyright (C) 2019 Benjamin Devlin and Zcash Foundation - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -module control_top - import zcash_fpga_pkg::*, equihash_pkg::*; -#( - parameter DAT_BYTS = 8, // Only tested at 8 byte data width - parameter [63:0] BUILD_HOST = "test", - parameter [63:0] BUILD_DATE = "20180311" -)( - input i_clk, i_rst, - // User is able to reset custom logic on FPGA - output logic o_usr_rst, - // Interface inputs and outputs - if_axi_stream.sink rx_if, - if_axi_stream.source tx_if, - - // Used when verifying equihash soltion - if_axi_stream.source o_equihash_if, - input equihash_bm_t i_equihash_mask, - input i_equihash_mask_val, - - // Driving secp256k1 core - if_axi_stream.source o_secp256k1_if, - if_axi_stream.sink i_secp256k1_if -); - -localparam DAT_BITS = DAT_BYTS*8; -localparam MAX_BYT_MSG = 256; // Max bytes in a reply message - -logic rst_int; -always_comb rst_int = i_rst || o_usr_rst; - -// When a command comes in it is put through a clock crossing, and then stored in a command -// FIFO to be processed. There are two FIFOS - one for processing status / reset commands (msg_type == 0), -// and one for everything else. This is so we can process these messages even if we are -// running something else. - -if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BYTS(1)) rx_int0_if (i_clk); -if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BYTS(1)) rx_int1_if (i_clk); - -if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BYTS(1)) rx_typ0_if (i_clk); -if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BYTS(1)) rx_typ1_if (i_clk); - -if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BYTS(1)) tx_arb_in_if [2] (i_clk); -if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BYTS(1)) tx_int_if (i_clk); - - -typedef enum {TYP0_IDLE = 0, - TYP0_SEND_STATUS = 1, - TYP0_RESET_FPGA = 2, - TYP0_SEND_IGNORE = 3, - TYP0_IGNORE = 4} typ0_msg_state_t; - -typ0_msg_state_t typ0_msg_state; - -typedef enum {TYP1_IDLE = 0, - TYP1_VERIFY_EQUIHASH = 1, - TYP1_VERIFY_SECP256K1 = 2, - TYP1_SEND_IGNORE = 3, - TYP1_IGNORE = 4} typ1_msg_state_t; - -typ1_msg_state_t typ1_msg_state; - -header_t header, header0, header1, header0_l, header1_l; -logic verify_equihash_rpl_val; - -logic [7:0] reset_cnt; -logic [$clog2(MAX_BYT_MSG) -1:0] typ0_wrd_cnt, typ1_wrd_cnt; -logic [MAX_BYT_MSG*8 -1:0] typ0_msg, typ1_msg; -logic [63:0] equihash_index; -logic equihash_index_val, rx_typ1_if_rdy; -logic sop_l, eop_l; -logic eop_typ0_l, eop_typ1_l; - -fpga_state_t fpga_state; -always_comb begin - fpga_state = 0; - fpga_state.error = 0; - fpga_state.typ1_state = typ1_msg_state; - header = rx_if.dat; - header0 = rx_typ0_if.dat; - header1 = rx_typ1_if.dat; -end - -// Logic for processing msg_type == 0 messages -always_ff @ (posedge i_clk) begin - if (i_rst) begin - rx_typ0_if.rdy <= 0; - typ0_msg_state <= TYP0_IDLE; - header0_l <= 0; - tx_arb_in_if[0].reset_source(); - typ0_wrd_cnt <= 0; - o_usr_rst <= 0; - reset_cnt <= 0; - eop_typ0_l <= 0; - typ0_msg <= 0; - end else begin - rx_typ0_if.rdy <= 1; - case (typ0_msg_state) - - TYP0_IDLE: begin - if (rx_typ0_if.val && rx_typ0_if.rdy) begin - header0_l <= header0; - rx_typ0_if.rdy <= 0; - case(header0.cmd) - RESET_FPGA: begin - typ0_msg <= get_fpga_reset_rpl(); - typ0_wrd_cnt <= $bits(fpga_reset_rpl_t)/8; - typ0_msg_state <= TYP0_RESET_FPGA; - o_usr_rst <= 1; - reset_cnt <= -1; - end - FPGA_STATUS: begin - typ0_msg <= get_fpga_status_rpl(BUILD_HOST, BUILD_DATE, fpga_state); - typ0_wrd_cnt <= $bits(fpga_status_rpl_t)/8; - typ0_msg_state <= TYP0_SEND_STATUS; - end - default: begin - typ0_msg <= get_fpga_ignore_rpl(header0); - typ0_wrd_cnt <= $bits(fpga_ignore_rpl_t)/8; - eop_typ0_l <= rx_typ0_if.eop; - typ0_msg_state <= TYP0_SEND_IGNORE; - end - endcase - end - end - TYP0_SEND_STATUS: begin - send_typ0_message($bits(fpga_status_rpl_t)/8); - end - TYP0_RESET_FPGA: begin - rx_typ0_if.rdy <= 0; - if (reset_cnt != 0) - reset_cnt <= reset_cnt - 1; - if (reset_cnt >> 4 == 0) - o_usr_rst <= 0; - if (reset_cnt == 0) begin - send_typ0_message($bits(fpga_reset_rpl_t)/8); - end - end - TYP0_SEND_IGNORE: begin - send_typ0_message($bits(fpga_ignore_rpl_t)/8, eop_typ0_l ? TYP0_IDLE : TYP0_IGNORE); - end - TYP0_IGNORE: begin - rx_typ0_if.rdy <= 1; - if (rx_typ0_if.rdy && rx_typ0_if.eop && rx_typ0_if.val) - typ0_msg_state <= TYP0_IDLE; - end - endcase - end -end - -// Task to help build reply messages. Assume no message will be more than MAX_BYT_MSG bytes -task send_typ0_message(input logic [$clog2(MAX_BYT_MSG)-1:0] msg_size, - input typ0_msg_state_t nxt_state = TYP0_IDLE); - rx_typ0_if.rdy <= 0; - if (~tx_arb_in_if[0].val || (tx_arb_in_if[0].rdy && tx_arb_in_if[0].val)) begin - tx_arb_in_if[0].dat <= typ0_msg; - tx_arb_in_if[0].val <= 1; - tx_arb_in_if[0].sop <= typ0_wrd_cnt == msg_size; - tx_arb_in_if[0].eop <= typ0_wrd_cnt <= DAT_BYTS; - tx_arb_in_if[0].mod <= typ0_wrd_cnt < DAT_BYTS ? typ0_wrd_cnt : 0; - typ0_wrd_cnt <= (typ0_wrd_cnt > DAT_BYTS) ? (typ0_wrd_cnt - DAT_BYTS) : 0; - typ0_msg <= typ0_msg >> DAT_BITS; - if (typ0_wrd_cnt == 0) begin - tx_arb_in_if[0].val <= 0; - typ0_msg_state <= nxt_state; - end - end -endtask - -always_comb begin - case(typ1_msg_state) - TYP1_IDLE: rx_typ1_if.rdy = rx_typ1_if_rdy; - VERIFY_EQUIHASH: rx_typ1_if.rdy = rx_typ1_if_rdy && o_equihash_if.rdy; - default: rx_typ1_if.rdy = rx_typ1_if_rdy; - endcase -end - -always_comb begin - i_secp256k1_if.rdy = (typ1_msg_state == TYP1_VERIFY_SECP256K1) && tx_arb_in_if[1].rdy; -end -// Logic for processing msg_type == 1 messages -always_ff @ (posedge i_clk) begin - if (rst_int) begin - rx_typ1_if_rdy <= 0; - typ1_msg_state <= TYP1_IDLE; - header1_l <= 0; - tx_arb_in_if[1].reset_source(); - o_equihash_if.reset_source(); - typ1_wrd_cnt <= 0; - equihash_index <= 0; - verify_equihash_rpl_val <= 0; - equihash_index_val <= 0; - sop_l <= 0; - eop_typ1_l <= 0; - typ1_msg <= 0; - o_secp256k1_if.reset_source(); - eop_l <= 0; - end else begin - case (typ1_msg_state) - TYP1_IDLE: begin - rx_typ1_if_rdy <= 1; - verify_equihash_rpl_val <= 0; - equihash_index_val <= 0; - sop_l <= 0; - if (rx_typ1_if.val && rx_typ1_if.rdy) begin - header1_l <= header1; - rx_typ1_if_rdy <= 0; - case(header1.cmd) - VERIFY_EQUIHASH: begin - rx_typ1_if_rdy <= 1; - typ1_wrd_cnt <= $bits(verify_equihash_rpl_t)/8; - typ1_msg_state <= TYP1_VERIFY_EQUIHASH; - if (~ENB_VERIFY_EQUIHASH) begin - typ1_msg <= get_fpga_ignore_rpl(header1); - typ1_wrd_cnt <= $bits(fpga_ignore_rpl_t)/8; - eop_typ1_l <= rx_typ1_if.eop; - typ1_msg_state <= TYP1_SEND_IGNORE; - end - end - VERIFY_SECP256K1_SIG: begin - rx_typ1_if_rdy <= o_secp256k1_if.rdy; - o_secp256k1_if.copy_if(rx_typ1_if.dat, rx_typ1_if.val, rx_typ1_if.sop, rx_typ1_if.eop); - typ1_msg_state <= TYP1_VERIFY_SECP256K1; - if (~ENB_VERIFY_SECP256K1_SIG) begin - typ1_msg <= get_fpga_ignore_rpl(header1); - typ1_wrd_cnt <= $bits(fpga_ignore_rpl_t)/8; - eop_typ1_l <= rx_typ1_if.eop; - typ1_msg_state <= TYP1_SEND_IGNORE; - end - end - default: begin - typ1_msg <= get_fpga_ignore_rpl(header1); - typ1_wrd_cnt <= $bits(fpga_ignore_rpl_t)/8; - eop_typ1_l <= rx_typ1_if.eop; - typ1_msg_state <= TYP1_SEND_IGNORE; - end - endcase - end - end - TYP1_VERIFY_EQUIHASH: begin - if (rx_typ1_if.eop && rx_typ1_if.val && rx_typ1_if.rdy) - rx_typ1_if_rdy <= 0; - - if (~equihash_index_val) begin - if (rx_typ1_if.rdy && rx_typ1_if.val) begin - equihash_index <= rx_typ1_if.dat; - equihash_index_val <= 1; - end - end else begin - // First load block data (this might be bypassed if loading from memory) - if (~o_equihash_if.val || (o_equihash_if.rdy && o_equihash_if.val)) begin - o_equihash_if.copy_if(rx_typ1_if.dat, rx_typ1_if.val, ~sop_l, rx_typ1_if.eop, rx_typ1_if.err, rx_typ1_if.mod); - // First cycle has .sop set - if (rx_typ1_if.val) sop_l <= 1; - end - end - - // Wait for reply with result - if (i_equihash_mask_val && ~verify_equihash_rpl_val) begin - typ1_msg <= get_verify_equihash_rpl(i_equihash_mask, equihash_index); - verify_equihash_rpl_val <= 1; - end - - // Send result - if (verify_equihash_rpl_val) begin - send_typ1_message($bits(verify_equihash_rpl_t)/8); - end - end - - // The command header is sent through to output - TYP1_VERIFY_SECP256K1: begin - rx_typ1_if_rdy <= o_secp256k1_if.rdy; - if (~eop_l && ~o_secp256k1_if.val || (o_secp256k1_if.rdy && o_secp256k1_if.val)) begin - o_secp256k1_if.copy_if(rx_typ1_if.dat, rx_typ1_if.val, rx_typ1_if.sop, rx_typ1_if.eop, rx_typ1_if.err, rx_typ1_if.mod); - eop_l <= rx_typ1_if.eop && rx_typ1_if.val; - if (rx_typ1_if.eop && rx_typ1_if.val) - rx_typ1_if_rdy <= 0; - end - - if (~tx_arb_in_if[1].val || (tx_arb_in_if[1].rdy && tx_arb_in_if[1].val)) begin - tx_arb_in_if[1].copy_if(i_secp256k1_if.dat, i_secp256k1_if.val, i_secp256k1_if.sop, i_secp256k1_if.eop, 0, i_secp256k1_if.mod); - end - - if (tx_arb_in_if[1].val && tx_arb_in_if[1].rdy && tx_arb_in_if[1].eop) begin - typ1_msg_state <= TYP1_IDLE; - end - - end - - TYP1_SEND_IGNORE: begin - send_typ1_message($bits(fpga_ignore_rpl_t)/8, eop_typ1_l ? TYP1_IDLE : TYP1_IGNORE); - end - TYP1_IGNORE: begin - rx_typ1_if_rdy <= 1; - if (rx_typ1_if.rdy && rx_typ1_if.eop && rx_typ1_if.val) - typ1_msg_state <= TYP1_IDLE; - end - endcase - end -end - -// Task to help build reply messages. Assume no message will be more than MAX_BYT_MSG bytes -task send_typ1_message(input logic [$clog2(MAX_BYT_MSG)-1:0] msg_size, - input typ1_msg_state_t nxt_state = TYP1_IDLE); - rx_typ1_if_rdy <= 0; - if (~tx_arb_in_if[1].val || (tx_arb_in_if[1].rdy && tx_arb_in_if[1].val)) begin - tx_arb_in_if[1].dat <= typ1_msg; - tx_arb_in_if[1].val <= 1; - tx_arb_in_if[1].sop <= typ1_wrd_cnt == msg_size; - tx_arb_in_if[1].eop <= typ1_wrd_cnt <= DAT_BYTS; - tx_arb_in_if[1].mod <= typ1_wrd_cnt < DAT_BYTS ? typ1_wrd_cnt : 0; - typ1_wrd_cnt <= (typ1_wrd_cnt > DAT_BYTS) ? (typ1_wrd_cnt - DAT_BYTS) : 0; - typ1_msg <= typ1_msg >> DAT_BITS; - if (typ1_wrd_cnt == 0) begin - tx_arb_in_if[1].val <= 0; - typ1_msg_state <= nxt_state; - end - end -endtask - -// Logic to mux the packet depending on its command type -logic msg_type, msg_type_l; -always_comb begin - rx_int0_if.copy_if_comb(rx_if.dat, 0, rx_if.sop, rx_if.eop, 0, rx_if.mod, 0); - rx_int1_if.copy_if_comb(rx_if.dat, 0, rx_if.sop, rx_if.eop, 0, rx_if.mod, 0); - - rx_if.rdy = 0; - - if (rx_if.sop && rx_if.val) begin - if(header.cmd[8 +: 8] == 8'd0) begin - msg_type = 0; - rx_int0_if.val = rx_if.val; - rx_if.rdy = rx_int0_if.rdy; - end else begin - msg_type = 1; - rx_int1_if.val = rx_if.val; - rx_if.rdy = rx_int1_if.rdy; - end - end else begin - rx_int0_if.val = rx_if.val && (msg_type_l == 0); - rx_int1_if.val = rx_if.val && (msg_type_l == 1); - rx_if.rdy = (msg_type_l == 0) ? rx_int0_if.rdy : rx_int1_if.rdy; - msg_type = msg_type_l; - end -end - -always_ff @ (posedge i_clk) begin - if (i_rst) begin - msg_type_l <= 0; - end else begin - if (rx_if.val && rx_if.rdy) begin - if (rx_if.sop) - msg_type_l <= msg_type; - end - end -end - -// FIFO control queues for different message types - -axi_stream_fifo #( - .SIZE ( 64 ), - .DAT_BITS ( DAT_BITS ) -) -cmd_fifo0 ( - .i_clk ( i_clk ), - .i_rst ( rst_int ), - .i_axi ( rx_int0_if ), - .o_axi ( rx_typ0_if ) -); - -axi_stream_fifo #( - .SIZE ( 64 ), - .DAT_BITS ( DAT_BITS ) -) -cmd_fifo1 ( - .i_clk ( i_clk ), - .i_rst ( rst_int ), - .i_axi ( rx_int1_if ), - .o_axi ( rx_typ1_if ) -); - -// Arbitrator for sending messages back -packet_arb # ( - .NUM_IN ( 2 ), - .DAT_BYTS ( DAT_BYTS ), - .CTL_BITS ( 8 ) -) -packet_arb_tx ( - .i_clk ( i_clk ), - .i_rst ( rst_int ), - - .i_axi ( tx_arb_in_if ), - .o_axi ( tx_if ) -); - +/* + This module is the top level for the FPGA interface to SW. It takes in commands + from SW, running the commands, and then building the replies back to SW. + + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +module control_top + import zcash_fpga_pkg::*, equihash_pkg::*; +#( + parameter DAT_BYTS = 8, // Only tested at 8 byte data width + parameter [63:0] BUILD_HOST = "test", + parameter [63:0] BUILD_DATE = "20180311" +)( + input i_clk, i_rst, + // User is able to reset custom logic on FPGA + output logic o_usr_rst, + // Interface inputs and outputs + if_axi_stream.sink rx_if, + if_axi_stream.source tx_if, + + // Used when verifying equihash soltion + if_axi_stream.source o_equihash_if, + input equihash_bm_t i_equihash_mask, + input i_equihash_mask_val, + + // Driving secp256k1 core + if_axi_stream.source o_secp256k1_if, + if_axi_stream.sink i_secp256k1_if +); + +localparam DAT_BITS = DAT_BYTS*8; +localparam MAX_BYT_MSG = 256; // Max bytes in a reply message + +logic rst_int; +always_comb rst_int = i_rst || o_usr_rst; + +// When a command comes in it is put through a clock crossing, and then stored in a command +// FIFO to be processed. There are two FIFOS - one for processing status / reset commands (msg_type == 0), +// and one for everything else. This is so we can process these messages even if we are +// running something else. + +if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BYTS(1)) rx_int0_if (i_clk); +if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BYTS(1)) rx_int1_if (i_clk); + +if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BYTS(1)) rx_typ0_if (i_clk); +if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BYTS(1)) rx_typ1_if (i_clk); + +if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BYTS(1)) tx_arb_in_if [2] (i_clk); +if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BYTS(1)) tx_int_if (i_clk); + + +typedef enum {TYP0_IDLE = 0, + TYP0_SEND_STATUS = 1, + TYP0_RESET_FPGA = 2, + TYP0_SEND_IGNORE = 3, + TYP0_IGNORE = 4} typ0_msg_state_t; + +typ0_msg_state_t typ0_msg_state; + +typedef enum {TYP1_IDLE = 0, + TYP1_VERIFY_EQUIHASH = 1, + TYP1_VERIFY_SECP256K1 = 2, + TYP1_SEND_IGNORE = 3, + TYP1_IGNORE = 4} typ1_msg_state_t; + +typ1_msg_state_t typ1_msg_state; + +header_t header, header0, header1, header0_l, header1_l; +logic verify_equihash_rpl_val; + +logic [7:0] reset_cnt; +logic [$clog2(MAX_BYT_MSG) -1:0] typ0_wrd_cnt, typ1_wrd_cnt; +logic [MAX_BYT_MSG*8 -1:0] typ0_msg, typ1_msg; +logic [63:0] equihash_index; +logic equihash_index_val, rx_typ1_if_rdy; +logic sop_l, eop_l; +logic eop_typ0_l, eop_typ1_l; + +fpga_state_t fpga_state; +always_comb begin + fpga_state = 0; + fpga_state.error = 0; + fpga_state.typ1_state = typ1_msg_state; + header = rx_if.dat; + header0 = rx_typ0_if.dat; + header1 = rx_typ1_if.dat; +end + +// Logic for processing msg_type == 0 messages +always_ff @ (posedge i_clk) begin + if (i_rst) begin + rx_typ0_if.rdy <= 0; + typ0_msg_state <= TYP0_IDLE; + header0_l <= 0; + tx_arb_in_if[0].reset_source(); + typ0_wrd_cnt <= 0; + o_usr_rst <= 0; + reset_cnt <= 0; + eop_typ0_l <= 0; + typ0_msg <= 0; + end else begin + rx_typ0_if.rdy <= 1; + + if (tx_arb_in_if[0].rdy) tx_arb_in_if[0].val <= 0; + + case (typ0_msg_state) + + TYP0_IDLE: begin + if (rx_typ0_if.val && rx_typ0_if.rdy) begin + header0_l <= header0; + rx_typ0_if.rdy <= 0; + case(header0.cmd) + RESET_FPGA: begin + typ0_msg <= get_fpga_reset_rpl(); + typ0_wrd_cnt <= $bits(fpga_reset_rpl_t)/8; + typ0_msg_state <= TYP0_RESET_FPGA; + o_usr_rst <= 1; + reset_cnt <= -1; + end + FPGA_STATUS: begin + typ0_msg <= get_fpga_status_rpl(BUILD_HOST, BUILD_DATE, fpga_state); + typ0_wrd_cnt <= $bits(fpga_status_rpl_t)/8; + typ0_msg_state <= TYP0_SEND_STATUS; + end + default: begin + typ0_msg <= get_fpga_ignore_rpl(header0); + typ0_wrd_cnt <= $bits(fpga_ignore_rpl_t)/8; + eop_typ0_l <= rx_typ0_if.eop; + typ0_msg_state <= TYP0_SEND_IGNORE; + end + endcase + end + end + TYP0_SEND_STATUS: begin + send_typ0_message($bits(fpga_status_rpl_t)/8); + end + TYP0_RESET_FPGA: begin + rx_typ0_if.rdy <= 0; + if (reset_cnt != 0) + reset_cnt <= reset_cnt - 1; + if (reset_cnt >> 4 == 0) + o_usr_rst <= 0; + if (reset_cnt == 0) begin + send_typ0_message($bits(fpga_reset_rpl_t)/8); + end + end + TYP0_SEND_IGNORE: begin + send_typ0_message($bits(fpga_ignore_rpl_t)/8, eop_typ0_l ? TYP0_IDLE : TYP0_IGNORE); + end + TYP0_IGNORE: begin + rx_typ0_if.rdy <= 1; + if (rx_typ0_if.rdy && rx_typ0_if.eop && rx_typ0_if.val) + typ0_msg_state <= TYP0_IDLE; + end + endcase + end +end + +// Task to help build reply messages. Assume no message will be more than MAX_BYT_MSG bytes +task send_typ0_message(input logic [$clog2(MAX_BYT_MSG)-1:0] msg_size, + input typ0_msg_state_t nxt_state = TYP0_IDLE); + rx_typ0_if.rdy <= 0; + if (~tx_arb_in_if[0].val || (tx_arb_in_if[0].rdy && tx_arb_in_if[0].val)) begin + tx_arb_in_if[0].dat <= typ0_msg; + tx_arb_in_if[0].val <= 1; + tx_arb_in_if[0].sop <= typ0_wrd_cnt == msg_size; + tx_arb_in_if[0].eop <= typ0_wrd_cnt <= DAT_BYTS; + tx_arb_in_if[0].mod <= typ0_wrd_cnt < DAT_BYTS ? typ0_wrd_cnt : 0; + typ0_wrd_cnt <= (typ0_wrd_cnt > DAT_BYTS) ? (typ0_wrd_cnt - DAT_BYTS) : 0; + typ0_msg <= typ0_msg >> DAT_BITS; + if (typ0_wrd_cnt == 0) begin + tx_arb_in_if[0].val <= 0; + typ0_msg_state <= nxt_state; + end + end +endtask + +always_comb begin + case(typ1_msg_state) + TYP1_IDLE: rx_typ1_if.rdy = rx_typ1_if_rdy; + VERIFY_EQUIHASH: rx_typ1_if.rdy = rx_typ1_if_rdy && o_equihash_if.rdy; + default: rx_typ1_if.rdy = rx_typ1_if_rdy; + endcase +end + +always_comb begin + i_secp256k1_if.rdy = (typ1_msg_state == TYP1_VERIFY_SECP256K1) && (~tx_arb_in_if[1].val || (tx_arb_in_if[1].rdy && tx_arb_in_if[1].val)); +end +// Logic for processing msg_type == 1 messages +always_ff @ (posedge i_clk) begin + if (rst_int) begin + rx_typ1_if_rdy <= 0; + typ1_msg_state <= TYP1_IDLE; + header1_l <= 0; + tx_arb_in_if[1].reset_source(); + o_equihash_if.reset_source(); + typ1_wrd_cnt <= 0; + equihash_index <= 0; + verify_equihash_rpl_val <= 0; + equihash_index_val <= 0; + sop_l <= 0; + eop_typ1_l <= 0; + typ1_msg <= 0; + o_secp256k1_if.reset_source(); + eop_l <= 0; + end else begin + + if (tx_arb_in_if[1].rdy) tx_arb_in_if[1].val <= 0; + + case (typ1_msg_state) + TYP1_IDLE: begin + rx_typ1_if_rdy <= 1; + verify_equihash_rpl_val <= 0; + equihash_index_val <= 0; + sop_l <= 0; + if (rx_typ1_if.val && rx_typ1_if.rdy) begin + header1_l <= header1; + rx_typ1_if_rdy <= 0; + case(header1.cmd) + VERIFY_EQUIHASH: begin + rx_typ1_if_rdy <= 1; + typ1_wrd_cnt <= $bits(verify_equihash_rpl_t)/8; + typ1_msg_state <= TYP1_VERIFY_EQUIHASH; + if (~ENB_VERIFY_EQUIHASH) begin + typ1_msg <= get_fpga_ignore_rpl(header1); + typ1_wrd_cnt <= $bits(fpga_ignore_rpl_t)/8; + eop_typ1_l <= rx_typ1_if.eop; + typ1_msg_state <= TYP1_SEND_IGNORE; + end + end + VERIFY_SECP256K1_SIG: begin + rx_typ1_if_rdy <= o_secp256k1_if.rdy; + o_secp256k1_if.copy_if(rx_typ1_if.dat, rx_typ1_if.val, rx_typ1_if.sop, rx_typ1_if.eop); + typ1_msg_state <= TYP1_VERIFY_SECP256K1; + if (~ENB_VERIFY_SECP256K1_SIG) begin + typ1_msg <= get_fpga_ignore_rpl(header1); + typ1_wrd_cnt <= $bits(fpga_ignore_rpl_t)/8; + eop_typ1_l <= rx_typ1_if.eop; + typ1_msg_state <= TYP1_SEND_IGNORE; + end + end + default: begin + typ1_msg <= get_fpga_ignore_rpl(header1); + typ1_wrd_cnt <= $bits(fpga_ignore_rpl_t)/8; + eop_typ1_l <= rx_typ1_if.eop; + typ1_msg_state <= TYP1_SEND_IGNORE; + end + endcase + end + end + TYP1_VERIFY_EQUIHASH: begin + if (rx_typ1_if.eop && rx_typ1_if.val && rx_typ1_if.rdy) + rx_typ1_if_rdy <= 0; + + if (~equihash_index_val) begin + if (rx_typ1_if.rdy && rx_typ1_if.val) begin + equihash_index <= rx_typ1_if.dat; + equihash_index_val <= 1; + end + end else begin + // First load block data (this might be bypassed if loading from memory) + if (~o_equihash_if.val || (o_equihash_if.rdy && o_equihash_if.val)) begin + o_equihash_if.copy_if(rx_typ1_if.dat, rx_typ1_if.val, ~sop_l, rx_typ1_if.eop, rx_typ1_if.err, rx_typ1_if.mod); + // First cycle has .sop set + if (rx_typ1_if.val) sop_l <= 1; + end + end + + // Wait for reply with result + if (i_equihash_mask_val && ~verify_equihash_rpl_val) begin + typ1_msg <= get_verify_equihash_rpl(i_equihash_mask, equihash_index); + verify_equihash_rpl_val <= 1; + end + + // Send result + if (verify_equihash_rpl_val) begin + send_typ1_message($bits(verify_equihash_rpl_t)/8); + end + end + + // The command header is sent through to output + TYP1_VERIFY_SECP256K1: begin + rx_typ1_if_rdy <= o_secp256k1_if.rdy; + if (~eop_l && ~o_secp256k1_if.val || (o_secp256k1_if.rdy && o_secp256k1_if.val)) begin + o_secp256k1_if.copy_if(rx_typ1_if.dat, rx_typ1_if.val, rx_typ1_if.sop, rx_typ1_if.eop, rx_typ1_if.err, rx_typ1_if.mod); + eop_l <= rx_typ1_if.eop && rx_typ1_if.val; + if (rx_typ1_if.eop && rx_typ1_if.val) + rx_typ1_if_rdy <= 0; + end + + if (~tx_arb_in_if[1].val || (tx_arb_in_if[1].rdy && tx_arb_in_if[1].val)) begin + tx_arb_in_if[1].val <= i_secp256k1_if.val; + tx_arb_in_if[1].dat <= i_secp256k1_if.dat; + tx_arb_in_if[1].mod <= i_secp256k1_if.mod; + tx_arb_in_if[1].sop <= i_secp256k1_if.sop; + tx_arb_in_if[1].eop <= i_secp256k1_if.eop; + tx_arb_in_if[1].err <= i_secp256k1_if.err; + end + + if (tx_arb_in_if[1].val && tx_arb_in_if[1].rdy && tx_arb_in_if[1].eop) begin + typ1_msg_state <= TYP1_IDLE; + end + + end + + TYP1_SEND_IGNORE: begin + send_typ1_message($bits(fpga_ignore_rpl_t)/8, eop_typ1_l ? TYP1_IDLE : TYP1_IGNORE); + end + TYP1_IGNORE: begin + rx_typ1_if_rdy <= 1; + if (rx_typ1_if.rdy && rx_typ1_if.eop && rx_typ1_if.val) + typ1_msg_state <= TYP1_IDLE; + end + endcase + end +end + +// Task to help build reply messages. Assume no message will be more than MAX_BYT_MSG bytes +task send_typ1_message(input logic [$clog2(MAX_BYT_MSG)-1:0] msg_size, + input typ1_msg_state_t nxt_state = TYP1_IDLE); + rx_typ1_if_rdy <= 0; + if (~tx_arb_in_if[1].val || (tx_arb_in_if[1].rdy && tx_arb_in_if[1].val)) begin + tx_arb_in_if[1].dat <= typ1_msg; + tx_arb_in_if[1].val <= 1; + tx_arb_in_if[1].sop <= typ1_wrd_cnt == msg_size; + tx_arb_in_if[1].eop <= typ1_wrd_cnt <= DAT_BYTS; + tx_arb_in_if[1].mod <= typ1_wrd_cnt < DAT_BYTS ? typ1_wrd_cnt : 0; + typ1_wrd_cnt <= (typ1_wrd_cnt > DAT_BYTS) ? (typ1_wrd_cnt - DAT_BYTS) : 0; + typ1_msg <= typ1_msg >> DAT_BITS; + if (typ1_wrd_cnt == 0) begin + tx_arb_in_if[1].val <= 0; + typ1_msg_state <= nxt_state; + end + end +endtask + +// Logic to mux the packet depending on its command type +logic msg_type, msg_type_l; +always_comb begin + rx_int0_if.copy_if_comb(rx_if.dat, 0, rx_if.sop, rx_if.eop, 0, rx_if.mod, 0); + rx_int1_if.copy_if_comb(rx_if.dat, 0, rx_if.sop, rx_if.eop, 0, rx_if.mod, 0); + + rx_if.rdy = 0; + + if (rx_if.sop && rx_if.val) begin + if(header.cmd[8 +: 8] == 8'd0) begin + msg_type = 0; + rx_int0_if.val = rx_if.val; + rx_if.rdy = rx_int0_if.rdy; + end else begin + msg_type = 1; + rx_int1_if.val = rx_if.val; + rx_if.rdy = rx_int1_if.rdy; + end + end else begin + rx_int0_if.val = rx_if.val && (msg_type_l == 0); + rx_int1_if.val = rx_if.val && (msg_type_l == 1); + rx_if.rdy = (msg_type_l == 0) ? rx_int0_if.rdy : rx_int1_if.rdy; + msg_type = msg_type_l; + end +end + +always_ff @ (posedge i_clk) begin + if (i_rst) begin + msg_type_l <= 0; + end else begin + if (rx_if.val && rx_if.rdy) begin + if (rx_if.sop) + msg_type_l <= msg_type; + end + end +end + +// FIFO control queues for different message types + +axi_stream_fifo #( + .SIZE ( 64 ), + .DAT_BITS ( DAT_BITS ) +) +cmd_fifo0 ( + .i_clk ( i_clk ), + .i_rst ( rst_int ), + .i_axi ( rx_int0_if ), + .o_axi ( rx_typ0_if ) +); + +axi_stream_fifo #( + .SIZE ( 64 ), + .DAT_BITS ( DAT_BITS ) +) +cmd_fifo1 ( + .i_clk ( i_clk ), + .i_rst ( rst_int ), + .i_axi ( rx_int1_if ), + .o_axi ( rx_typ1_if ) +); + +// Arbitrator for sending messages back +packet_arb # ( + .NUM_IN ( 2 ), + .DAT_BYTS ( DAT_BYTS ), + .CTL_BITS ( 8 ) +) +packet_arb_tx ( + .i_clk ( i_clk ), + .i_rst ( rst_int ), + + .i_axi ( tx_arb_in_if ), + .o_axi ( tx_if ) +); + endmodule \ No newline at end of file diff --git a/zcash_fpga/src/rtl/top/zcash_fpga_top.sv b/zcash_fpga/src/rtl/top/zcash_fpga_top.sv index 37f719f..51e46cf 100644 --- a/zcash_fpga/src/rtl/top/zcash_fpga_top.sv +++ b/zcash_fpga/src/rtl/top/zcash_fpga_top.sv @@ -1,249 +1,306 @@ -/* - This is the top level of the Zcash FPGA acceleration engine. - - We have different interfaces that are all muxed together to provide FPGA - with commands and data. - - Copyright (C) 2019 Benjamin Devlin and Zcash Foundation - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - */ - -module zcash_fpga_top - import zcash_fpga_pkg::*, equihash_pkg::*; -#( - parameter DAT_BYTS = 8 // Only tested at 8 byte data width -)( - // Clocks and resets - input i_clk_100, i_rst_100, // 100 MHz clock - input i_clk_200, i_rst_200, // 200 MHz clock - input i_clk_300, i_rst_300, // 300 MHz clock - input i_clk_if, i_rst_if, // Command interface clock (e.g. UART / PCIe) - // AXI lite interface - if_axi_lite.sink axi_lite_if, - // Command interface input and output - if_axi_stream.sink rx_if, - if_axi_stream.source tx_if - -); - -localparam CTL_BITS = 8; - -// These are the resets combined with the user reset -logic usr_rst_100, rst_100; -logic usr_rst_200, rst_200; -logic usr_rst_300, rst_300; -logic usr_rst; - -if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(CTL_BITS)) equihash_axi(i_clk_if); -if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(CTL_BITS)) equihash_axi_s(i_clk_100); - -if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(CTL_BITS)) secp256k1_out_if(i_clk_if); -if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(CTL_BITS)) secp256k1_in_if(i_clk_if); -if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(CTL_BITS)) secp256k1_out_if_s(i_clk_200); -if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(CTL_BITS)) secp256k1_in_if_s(i_clk_200); - -equihash_bm_t equihash_mask, equihash_mask_s; -logic equihash_mask_val, equihash_mask_val_s; - -// Synchronize resets from interface into each clock domain -synchronizer #(.DAT_BITS ( 1 ), .NUM_CLKS ( 3 )) rst_100_sync ( - .i_clk_a ( i_clk_if ), - .i_clk_b ( i_clk_100 ), - .i_dat_a ( usr_rst ), - .o_dat_b ( usr_rst_100 ) -); -always_ff @ (posedge i_clk_200) rst_200 <= i_rst_200 || usr_rst_200; - -synchronizer #(.DAT_BITS ( 1 ), .NUM_CLKS ( 3 )) rst_200_sync ( - .i_clk_a ( i_clk_if ), - .i_clk_b ( i_clk_200 ), - .i_dat_a ( usr_rst ), - .o_dat_b ( usr_rst_200 ) -); -always_ff @ (posedge i_clk_100) rst_100 <= i_rst_100 || usr_rst_100; - -synchronizer #(.DAT_BITS ( 1 ), .NUM_CLKS ( 3 )) rst_300_sync ( - .i_clk_a ( i_clk_if ), - .i_clk_b ( i_clk_300 ), - .i_dat_a ( usr_rst ), - .o_dat_b ( usr_rst_300 ) -); -always_ff @ (posedge i_clk_300) rst_300 <= i_rst_300 || usr_rst_300; - -// This block takes in the interface signals and interfaces with other blocks -// This runs on the same clock as the interface but we might need to change data width - -if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(1)) tx_int_if [1:0] (i_clk_if); - -control_top #( - .DAT_BYTS ( DAT_BYTS ) -) -control_top ( - .i_clk ( i_clk_if ), - .i_rst ( i_rst_if ), - .o_usr_rst ( usr_rst ), - .rx_if ( rx_if ), - .tx_if ( tx_int_if[0] ), - .o_equihash_if ( equihash_axi ), - .i_equihash_mask ( equihash_mask ), - .i_equihash_mask_val ( equihash_mask_val ), - .o_secp256k1_if ( secp256k1_out_if ), - .i_secp256k1_if ( secp256k1_in_if ) -); - - -// This block is used to verify a equihash solution -cdc_fifo_if #( - .SIZE ( 16 ), - .USE_BRAM ( 0 ), - .RAM_PERFORMANCE ("HIGH_PERFORMANCE") -) -cdc_fifo_equihash_rx ( - .i_clk_a ( i_clk_if ), - .i_rst_a ( usr_rst || i_rst_if ), - .i_clk_b ( i_clk_100 ), - .i_rst_b ( rst_100 || ENB_VERIFY_EQUIHASH == 0 ), - .i_a ( equihash_axi ), - .o_full_a(), - .o_b ( equihash_axi_s ), - .o_emp_b () -); - -cdc_fifo #( - .SIZE ( 16 ), - .DAT_BITS ( $bits(equihash_bm_t) ), - .USE_BRAM ( 0 ) -) -cdc_fifo_equihash_tx ( - .i_clk_a ( i_clk_100 ), - .i_rst_a ( rst_100 || ENB_VERIFY_EQUIHASH == 0 ), - .i_clk_b ( i_clk_if ), - .i_rst_b ( usr_rst || i_rst_if ), - .i_val_a ( equihash_mask_val_s ), - .i_dat_a ( equihash_mask_s ), - .o_rdy_a (), - .o_full_a(), - .o_val_b ( equihash_mask_val ), - .o_dat_b ( equihash_mask ), - .i_rdy_b ( 1'd1 ), - .o_emp_b (), - .o_rd_wrds_b() -); - -equihash_verif_top #( - .DAT_BYTS( DAT_BYTS ) -) -equihash_verif_top ( - .i_clk ( i_clk_100 ), - .i_rst ( rst_100 || ENB_VERIFY_EQUIHASH == 0 ), - .i_clk_300 ( i_clk_300 ), - .i_rst_300 ( rst_300 || ENB_VERIFY_EQUIHASH == 0 ), - .i_axi ( equihash_axi_s ), - .o_mask ( equihash_mask_s ), - .o_mask_val ( equihash_mask_val_s ) -); - - -// This block is the ECCDSA block for curve secp256k1 - -cdc_fifo_if #( - .SIZE ( 16 ), - .USE_BRAM ( 0 ), - .RAM_PERFORMANCE ("HIGH_PERFORMANCE") -) -cdc_fifo_secp256k1_rx ( - .i_clk_a ( i_clk_if ), - .i_rst_a ( usr_rst || i_rst_if ), - .i_clk_b ( i_clk_200 ), - .i_rst_b ( rst_200 || ENB_VERIFY_SECP256K1_SIG == 0 ), - .i_a ( secp256k1_out_if ), - .o_full_a(), - .o_b ( secp256k1_out_if_s ), - .o_emp_b () -); - -cdc_fifo_if #( - .SIZE ( 16 ), - .USE_BRAM ( 0 ), - .RAM_PERFORMANCE ("HIGH_PERFORMANCE") -) -cdc_fifo_secp256k1_tx ( - .i_clk_a ( i_clk_200 ), - .i_rst_a ( rst_200 || ENB_VERIFY_SECP256K1_SIG == 0 ), - .i_clk_b ( i_clk_if ), - .i_rst_b ( usr_rst || i_rst_if ), - .i_a ( secp256k1_in_if_s ), - .o_full_a(), - .o_b ( secp256k1_in_if ), - .o_emp_b () -); - -// We add pipelining so this block can be on a different SLR -if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(CTL_BITS)) secp256k1_out_if_s_r(i_clk_200); -if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(CTL_BITS)) secp256k1_in_if_s_r(i_clk_200); - -pipeline_if #( - .DAT_BYTS( DAT_BYTS ), - .CTL_BITS( CTL_BITS ), - .NUM_STAGES (2) -) -secp256k1_pipeline_if0 ( - .i_rst ( rst_200 ), - .i_if ( secp256k1_out_if_s ), - .o_if ( secp256k1_out_if_s_r ) -); - -pipeline_if #( - .DAT_BYTS( DAT_BYTS ), - .CTL_BITS( CTL_BITS ), - .NUM_STAGES (2) -) -secp256k1_pipeline_if1 ( - .i_rst ( rst_200 ), - .i_if ( secp256k1_in_if_s_r ), - .o_if ( secp256k1_in_if_s ) -); - - -secp256k1_top secp256k1_top ( - .i_clk ( i_clk_200 ), - .i_rst ( rst_200 || ENB_VERIFY_SECP256K1_SIG == 0 ), - .if_cmd_rx ( secp256k1_out_if_s_r ), - .if_cmd_tx ( secp256k1_in_if_s_r ) -); - -bls12_381_top #( - .USE_KARATSUBA ( BLS12_381_USE_KARATSUBA ) -) -bls12_381_top ( - .i_clk ( i_clk_if ), - .i_rst ( i_rst_if || ENB_BLS12_381 == 0 ), - .tx_if ( tx_int_if[1] ), - .axi_lite_if ( axi_lite_if ) -); - -// Mux output of control block and BLS12_381 block -packet_arb # ( - .DAT_BYTS ( DAT_BYTS ), - .CTL_BITS ( 1 ), - .NUM_IN ( 2 ) -) packet_arb_tx ( - .i_clk ( i_clk_if ), - .i_rst ( i_rst_if ), - .i_axi ( tx_int_if ), - .o_axi ( tx_if ) -); - +/* + This is the top level of the Zcash FPGA acceleration engine. + + We have different interfaces that are all muxed together to provide FPGA + with commands and data. + + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +module zcash_fpga_top + import zcash_fpga_pkg::*, equihash_pkg::*; +#( + parameter DAT_BYTS = 8 // Only tested at 8 byte data width +)( + // Clocks and resets + input i_clk_100, i_rst_100, // 100 MHz clock + input i_clk_200, i_rst_200, // 200 MHz clock + input i_clk_300, i_rst_300, // 300 MHz clock + input i_clk_if, i_rst_if, // Command interface clock (e.g. UART / PCIe) + // AXI lite interface + if_axi_lite.sink axi_lite_if, + // Command interface input and output + if_axi_stream.sink rx_if, + if_axi_stream.source tx_if + +); + +localparam CTL_BITS = 8; +localparam USE_XILINX_FIFO = "YES"; // If you use this make sure you generate the ip folder in aws/cl_zcash/ip + +// These are the resets combined with the user reset +logic usr_rst_100, rst_100; +logic usr_rst_200, rst_200; +logic usr_rst_300, rst_300; +logic usr_rst; + +if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(CTL_BITS)) equihash_axi(i_clk_if); +if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(CTL_BITS)) equihash_axi_s(i_clk_100); + +if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(CTL_BITS)) secp256k1_out_if(i_clk_if); +if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(CTL_BITS)) secp256k1_in_if(i_clk_if); +if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(CTL_BITS)) secp256k1_out_if_s(i_clk_200); +if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(CTL_BITS)) secp256k1_in_if_s(i_clk_200); + +equihash_bm_t equihash_mask, equihash_mask_s; +logic equihash_mask_val, equihash_mask_val_s; + +// Synchronize resets from interface into each clock domain +synchronizer #(.DAT_BITS ( 1 ), .NUM_CLKS ( 3 )) rst_100_sync ( + .i_clk_a ( i_clk_if ), + .i_clk_b ( i_clk_100 ), + .i_dat_a ( usr_rst ), + .o_dat_b ( usr_rst_100 ) +); +always_ff @ (posedge i_clk_200) rst_200 <= i_rst_200 || usr_rst_200; + +synchronizer #(.DAT_BITS ( 1 ), .NUM_CLKS ( 3 )) rst_200_sync ( + .i_clk_a ( i_clk_if ), + .i_clk_b ( i_clk_200 ), + .i_dat_a ( usr_rst ), + .o_dat_b ( usr_rst_200 ) +); +always_ff @ (posedge i_clk_100) rst_100 <= i_rst_100 || usr_rst_100; + +synchronizer #(.DAT_BITS ( 1 ), .NUM_CLKS ( 3 )) rst_300_sync ( + .i_clk_a ( i_clk_if ), + .i_clk_b ( i_clk_300 ), + .i_dat_a ( usr_rst ), + .o_dat_b ( usr_rst_300 ) +); +always_ff @ (posedge i_clk_300) rst_300 <= i_rst_300 || usr_rst_300; + +// This block takes in the interface signals and interfaces with other blocks +// This runs on the same clock as the interface but we might need to change data width + +if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(1)) tx_int_if [1:0] (i_clk_if); + +control_top #( + .DAT_BYTS ( DAT_BYTS ) +) +control_top ( + .i_clk ( i_clk_if ), + .i_rst ( i_rst_if ), + .o_usr_rst ( usr_rst ), + .rx_if ( rx_if ), + .tx_if ( tx_int_if[0] ), + .o_equihash_if ( equihash_axi ), + .i_equihash_mask ( equihash_mask ), + .i_equihash_mask_val ( equihash_mask_val ), + .o_secp256k1_if ( secp256k1_out_if ), + .i_secp256k1_if ( secp256k1_in_if ) +); + + +// This block is used to verify a equihash solution +cdc_fifo_if #( + .SIZE ( 16 ), + .USE_BRAM ( 0 ), + .RAM_PERFORMANCE ("HIGH_PERFORMANCE") +) +cdc_fifo_equihash_rx ( + .i_clk_a ( i_clk_if ), + .i_rst_a ( usr_rst || i_rst_if ), + .i_clk_b ( i_clk_100 ), + .i_rst_b ( rst_100 || ENB_VERIFY_EQUIHASH == 0 ), + .i_a ( equihash_axi ), + .o_full_a(), + .o_b ( equihash_axi_s ), + .o_emp_b () +); + +cdc_fifo #( + .SIZE ( 16 ), + .DAT_BITS ( $bits(equihash_bm_t) ), + .USE_BRAM ( 0 ) +) +cdc_fifo_equihash_tx ( + .i_clk_a ( i_clk_100 ), + .i_rst_a ( rst_100 || ENB_VERIFY_EQUIHASH == 0 ), + .i_clk_b ( i_clk_if ), + .i_rst_b ( usr_rst || i_rst_if ), + .i_val_a ( equihash_mask_val_s ), + .i_dat_a ( equihash_mask_s ), + .o_rdy_a (), + .o_full_a(), + .o_val_b ( equihash_mask_val ), + .o_dat_b ( equihash_mask ), + .i_rdy_b ( 1'd1 ), + .o_emp_b (), + .o_rd_wrds_b() +); + +equihash_verif_top #( + .DAT_BYTS( DAT_BYTS ) +) +equihash_verif_top ( + .i_clk ( i_clk_100 ), + .i_rst ( rst_100 || ENB_VERIFY_EQUIHASH == 0 ), + .i_clk_300 ( i_clk_300 ), + .i_rst_300 ( rst_300 || ENB_VERIFY_EQUIHASH == 0 ), + .i_axi ( equihash_axi_s ), + .o_mask ( equihash_mask_s ), + .o_mask_val ( equihash_mask_val_s ) +); + + +// This block is the ECCDSA block for curve secp256k1 + +if (USE_XILINX_FIFO == "YES") begin + + logic cdc_fifo_secp256k1_rx_full, cdc_fifo_secp256k1_rx_empty, cdc_fifo_secp256k1_rx_wr_rst_busy, cdc_fifo_secp256k1_rx_rd_rst_busy; + + always_comb begin + secp256k1_out_if.rdy = ~cdc_fifo_secp256k1_rx_full && ~cdc_fifo_secp256k1_rx_wr_rst_busy; + secp256k1_out_if_s.val = ~cdc_fifo_secp256k1_rx_rd_rst_busy && ~cdc_fifo_secp256k1_rx_empty; + secp256k1_out_if_s.ctl = 0; + secp256k1_out_if_s.err = 0; + secp256k1_out_if_s.mod = 0; + end + + fifo_generator_0 cdc_fifo_secp256k1_rx ( + .srst (i_rst_if), + .wr_clk (i_clk_if), + .rd_clk (i_clk_200), + .din ({secp256k1_out_if.dat, secp256k1_out_if.sop, secp256k1_out_if.eop}), + .wr_en (secp256k1_out_if.val), + .rd_en (secp256k1_out_if_s.rdy && secp256k1_out_if_s.val), + .dout ({secp256k1_out_if_s.dat, secp256k1_out_if_s.sop, secp256k1_out_if_s.eop}), + .full (cdc_fifo_secp256k1_rx_full), + .empty (cdc_fifo_secp256k1_rx_empty), + .wr_rst_busy(cdc_fifo_secp256k1_rx_wr_rst_busy), + .rd_rst_busy(cdc_fifo_secp256k1_rx_rd_rst_busy) + ); + + logic cdc_fifo_secp256k1_tx_full, cdc_fifo_secp256k1_tx_empty, cdc_fifo_secp256k1_tx_wr_rst_busy, cdc_fifo_secp256k1_tx_rd_rst_busy; + + always_comb begin + secp256k1_in_if_s.rdy = ~cdc_fifo_secp256k1_tx_full && ~cdc_fifo_secp256k1_tx_wr_rst_busy; + secp256k1_in_if.val = ~cdc_fifo_secp256k1_tx_rd_rst_busy && ~cdc_fifo_secp256k1_tx_empty; + secp256k1_in_if.ctl = 0; + secp256k1_in_if.err = 0; + secp256k1_in_if.mod = 0; + end + + fifo_generator_0 cdc_fifo_secp256k1_tx ( + .srst (i_rst_if), + .wr_clk (i_clk_200), + .rd_clk (i_clk_if), + .din ({secp256k1_in_if_s.dat, secp256k1_in_if_s.sop, secp256k1_in_if_s.eop}), + .wr_en (secp256k1_in_if_s.val), + .rd_en (secp256k1_in_if.rdy && secp256k1_in_if.val), + .dout ({secp256k1_in_if.dat, secp256k1_in_if.sop, secp256k1_in_if.eop}), + .full (cdc_fifo_secp256k1_tx_full), + .empty (cdc_fifo_secp256k1_tx_empty), + .wr_rst_busy(cdc_fifo_secp256k1_tx_wr_rst_busy), + .rd_rst_busy(cdc_fifo_secp256k1_tx_rd_rst_busy) + ); + +end else begin + + cdc_fifo_if #( + .SIZE ( 16 ), + .USE_BRAM ( 0 ), + .RAM_PERFORMANCE ("HIGH_PERFORMANCE") + ) + cdc_fifo_secp256k1_rx ( + .i_clk_a ( i_clk_if ), + .i_rst_a ( usr_rst || i_rst_if ), + .i_clk_b ( i_clk_200 ), + .i_rst_b ( rst_200 || ENB_VERIFY_SECP256K1_SIG == 0 ), + .i_a ( secp256k1_out_if ), + .o_full_a(), + .o_b ( secp256k1_out_if_s ), + .o_emp_b () + ); + + cdc_fifo_if #( + .SIZE ( 16 ), + .USE_BRAM ( 0 ), + .RAM_PERFORMANCE ("HIGH_PERFORMANCE") + ) + cdc_fifo_secp256k1_tx ( + .i_clk_a ( i_clk_200 ), + .i_rst_a ( rst_200 || ENB_VERIFY_SECP256K1_SIG == 0 ), + .i_clk_b ( i_clk_if ), + .i_rst_b ( usr_rst || i_rst_if ), + .i_a ( secp256k1_in_if_s ), + .o_full_a(), + .o_b ( secp256k1_in_if ), + .o_emp_b () + ); + +end + + + +// We add pipelining so this block can be on a different SLR +if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(CTL_BITS)) secp256k1_out_if_s_r(i_clk_200); +if_axi_stream #(.DAT_BYTS(DAT_BYTS), .CTL_BITS(CTL_BITS)) secp256k1_in_if_s_r(i_clk_200); + +pipeline_if #( + .DAT_BYTS( DAT_BYTS ), + .CTL_BITS( CTL_BITS ), + .NUM_STAGES (2) +) +secp256k1_pipeline_if0 ( + .i_rst ( rst_200 ), + .i_if ( secp256k1_out_if_s ), + .o_if ( secp256k1_out_if_s_r ) +); + +pipeline_if #( + .DAT_BYTS( DAT_BYTS ), + .CTL_BITS( CTL_BITS ), + .NUM_STAGES (2) +) +secp256k1_pipeline_if1 ( + .i_rst ( rst_200 ), + .i_if ( secp256k1_in_if_s_r ), + .o_if ( secp256k1_in_if_s ) +); + + +secp256k1_top secp256k1_top ( + .i_clk ( i_clk_200 ), + .i_rst ( rst_200 || ENB_VERIFY_SECP256K1_SIG == 0 ), + .if_cmd_rx ( secp256k1_out_if_s_r ), + .if_cmd_tx ( secp256k1_in_if_s_r ) +); + +bls12_381_top #( + .USE_KARATSUBA ( BLS12_381_USE_KARATSUBA ) +) +bls12_381_top ( + .i_clk ( i_clk_if ), + .i_rst ( i_rst_if || ENB_BLS12_381 == 0 ), + .tx_if ( tx_int_if[1] ), + .axi_lite_if ( axi_lite_if ) +); + +// Mux output of control block and BLS12_381 block +packet_arb # ( + .DAT_BYTS ( DAT_BYTS ), + .CTL_BITS ( 1 ), + .NUM_IN ( 2 ) +) packet_arb_tx ( + .i_clk ( i_clk_if ), + .i_rst ( i_rst_if ), + .i_axi ( tx_int_if ), + .o_axi ( tx_if ) +); + endmodule \ No newline at end of file