From f3d3de9a1000a7cf53a942267cb2de984eaafb21 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Wed, 3 Jul 2024 21:04:52 +0800 Subject: [PATCH 01/53] Initialize base types --- .gitmodules | 3 + lib/blue_wrapper | 1 + src/DmaController.bsv | 7 ++ src/DmaTypes.bsv | 37 +++++++++ src/PcieTypes.bsv | 101 ++++++++++++++++++++++++ test/TestAxiStream.bsv | 172 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 321 insertions(+) create mode 100644 .gitmodules create mode 160000 lib/blue_wrapper create mode 100644 src/DmaController.bsv create mode 100644 src/DmaTypes.bsv create mode 100644 src/PcieTypes.bsv create mode 100644 test/TestAxiStream.bsv diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..a2890b5 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "lib/blue_wrapper"] + path = lib/blue_wrapper + url = https://github.com/wengwz/blue-wrapper.git diff --git a/lib/blue_wrapper b/lib/blue_wrapper new file mode 160000 index 0000000..0845f36 --- /dev/null +++ b/lib/blue_wrapper @@ -0,0 +1 @@ +Subproject commit 0845f36b1dd60b90ba5b9163ed8ca37f493f3355 diff --git a/src/DmaController.bsv b/src/DmaController.bsv new file mode 100644 index 0000000..f8030f4 --- /dev/null +++ b/src/DmaController.bsv @@ -0,0 +1,7 @@ +import PcieTypes::*; +import DmaTypes::*; + +module mkDmaController#() (DmaController ifc); + + +endmodule \ No newline at end of file diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv new file mode 100644 index 0000000..f6d8e42 --- /dev/null +++ b/src/DmaTypes.bsv @@ -0,0 +1,37 @@ + +import SemiFifo :: *; + +typedef 512 DMA_DATA_WIDTH + + +typedef struct { + Bit#(dataWidth) data; + Bit#(TDiv#(dataWidth, 8)) byteEn; + Bool isFirst; + Bool isLast; +} DataFrame#(numeric type dataWidth) deriving(Bits, Bounded, Eq, FShow); + +typedef struct { + Bit#(32) address; + Bit#(32) length; +} CtrlFrame deriving(Bits, Bounded, Eq, FShow); + +typedef struct { + Bit#(32) address; + Bit#(32) value; +} CsrFrame deriving(Bits, Bounded, Eq, FShow); + +interface DmaController#(numeric type dataWidth); + + interface FifoIn#(DataFrame#(dataWidth)) DmaDataC2HPipeIn; + interface FifoIn#(CtrlFrame) DmaCtrlC2HPipeIn; + interface FifoIn#(CtrlFrame) DmaCtrlH2CPipeIn; + interface FifoOut#(DataFrame#(dataWidth)) DmaDataH2CPipeOut; + + interface FifoIn#(CsrFrame) DmaCsrC2HPipeIn; + interface FifoOut#(CsrFrame) DmaCsrC2HPipeOut; + interface FifoOut#(CsrFrame) DmaCsrH2CPipeOut; + + interface RawPcieRequester#(TDiv#(dataWidth, 8), PCIE_USR_WIDTH) PcieRequester; + interface RawPcieCompleter#(TDiv#(dataWidth, 8), PCIE_USR_WIDTH) PcieCompleter; +endinterface diff --git a/src/PcieTypes.bsv b/src/PcieTypes.bsv new file mode 100644 index 0000000..b8d016a --- /dev/null +++ b/src/PcieTypes.bsv @@ -0,0 +1,101 @@ +import AxiStreamTypes :: *; + +// from PG213 + +typedef 512 PCIE_TDATA_WIDTH +typedef 137 PCIE_TUSER_WIDTH + +typedef struct { + Bit#(8) first_be; + Bit#(8) last_be; + Bit#(4) addr_offset; + Bit#(2) is_sop; + Bit#(2) is_sop0_ptr; + Bit#(2) is_sop1_ptr; + Bit#(2) is_eop; + Bit#(4) is_eop0_ptr; + Bit#(4) is_eop1_ptr; + Bit#(1) discontinue; + Bit#(2) tph_present; + Bit#(4) tph_type; + Bit#(2) tph_indirect_tag_en; + Bit#(16) tph_st_tag; + Bit#(6) seq_num0; + Bit#(6) seq_num1; + Bit#(64) parity; +} PcieRRSideBandFrame deriving(Bits, Bounded, Eq); + +typedef struct { + Bit#(64) byte_en; + Bit#(4) is_sop; + Bit#(2) is_sop0_ptr; + Bit#(2) is_sop1_ptr; + Bit#(2) is_sop2_ptr; +} PcieRPSideBandFrame deriving(Bits, Bounded, Eq); + +interface RawPcieRequester#(numeric type keepWidth , numeric type usrWidth); + interface RawAxiStreamMaster#(keepWidth, usrWidth) Request; + interface RawAxiStreamSlave#(keepWidth, usrWidth) Complete; +endinterface + +interface RawPcieCompleter#(numeric type keepWidth, numeric type usrWidth); + interface RawAxiStreamSlave#(keepWidth, usrWidth) Request; + interface RawAxiStreamMaster#(keepWidth, usrWidth) Complete; +endinterface + +(*always_ready, always_enabled*) +interface RawPcieCfgMgmt#(); + (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; + // (* result = cfg_mgmt_byte_enable *) method Bit#(4) cfgMgmtAddr; + // (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; + // (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; + // (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; + // (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; + // (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; + // (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; +endinterface + +(*always_ready, always_enabled*) +interface RawPcieCfgPm#(); + +endinterface + +(*always_ready, always_enabled*) +interface RawPcieCfgMsi#(); + +endinterface + +(*always_ready, always_enabled*) +interface RawPcieCfgInterrupt#(); + +endinterface + +(*always_ready, always_enabled*) +interface RawPcieCfgControl#(); + +endinterface + +(*always_ready, always_enabled*) +interface RawPcieCfgFC#(); + +endinterface + +(*always_ready, always_enabled*) +interface RawPcieCfgFlowMsgTx#(); + +endinterface + +(*always_ready, always_enabled*) +interface RawPcieCfgFlowMsgRx#(); + +endinterface + +(*always_ready, always_enabled*) +interface RawPcieCfgStatus#(); + +endinterface + +(*always_ready, always_enabled*) +interface RawPcieCfgTransmitFC#(); + +endinterface \ No newline at end of file diff --git a/test/TestAxiStream.bsv b/test/TestAxiStream.bsv new file mode 100644 index 0000000..f293d84 --- /dev/null +++ b/test/TestAxiStream.bsv @@ -0,0 +1,172 @@ +import FIFO::*; +import Vector::*; +import AxiStreamTypes::*; +import Counter::*; + +typedef 512 DATA_WIDTH; +typedef TDiv#(DATA_WIDTH, 8) BATCH_BYTES; +typedef 128 USR_WIDTH; +typedef 4321 RD_BYTES_LENGTH; + + +interface AxisFifo#(numeric type keepWidth, numeric type usrWidth); + interface RawAxiStreamMaster#(keepWidth, usrWidth) axisMaster; + interface RawAxiStreamSlave#(keepWidth, usrWidth) axisSlave; +endinterface + + +module mkTbAxisRdWrLoop (Empty); + Reg#(File) fileIn <- mkRegU(); + Reg#(File) fileRef <- mkRegU(); + Reg#(File) fileOut <- mkRegU(); + Reg#(Bool) initFlag <- mkReg(False); + // Read the file + Reg#(Bool) rdDoneFlag <- mkReg(False); + Reg#(UInt#(32)) rdBatchCnt <- mkReg(0); + let rdTotalBytesLen = valueOf(RD_BYTES_LENGTH); + let rdBatchBytesLen = valueOf(BATCH_BYTES); + let rdLastBatchBytesLen = rdTotalBytesLen % rdBatchBytesLen; + let rdBatchesNum = rdTotalBytesLen % rdBatchBytesLen > 0 ? rdTotalBytesLen / rdBatchBytesLen + 1 : rdTotalBytesLen / rdBatchBytesLen; + FIFO#(AxiStream#(BATCH_BYTES, USR_WIDTH)) toDutFifo <- mkSizedFIFO(16); + // DUT + AxisFifo#(BATCH_BYTES, USR_WIDTH) dut <- mkTbAxisWire(); + // Control + Reg#(UInt#(32)) tValidCnt <- mkReg(0); + + rule init(!initFlag); + initFlag <= True; + File in <- $fopen("test.txt", "rb"); + File refer <- $fopen("ref.txt", "wb"); + File out <- $fopen("out.txt", "wb"); + if (in == InvalidFile || refer == InvalidFile || out == InvalidFile) begin + $display("ERROR: couldn't open test file"); + $finish; + end + fileIn <= in; + fileRef <= refer; + fileOut <= out; + endrule + + rule readfile(initFlag && !rdDoneFlag && rdBatchCnt < fromInteger(rdBatchesNum)); + Vector#(BATCH_BYTES, Bit#(8)) getChars = replicate(0); + Bit#(BATCH_BYTES) keep = 0; + Bool last = False; + if(rdBatchCnt == fromInteger(rdBatchesNum) - 1) begin + for(Integer i = 0; i < rdLastBatchBytesLen; i = i + 1) begin + int c <- $fgetc(fileIn); + if(c == -1) begin + $fclose(fileIn); + $fclose(fileRef); + end else begin + $fwrite(fileRef, "%c", c); + getChars[i] = truncate(pack(c)); + keep[i] = 1'b1; + end + end + $fclose(fileIn); + $fclose(fileRef); + rdDoneFlag <= True; + last = True; + $display("INFO: test file read done"); + end else begin + rdBatchCnt <= rdBatchCnt + 1; + for(Integer i = 0; i < rdBatchBytesLen; i = i + 1) begin + int c <- $fgetc(fileIn); + if(c == -1) begin + $fclose(fileRef); + $fclose(fileIn); + last = True; + end else begin + $fwrite(fileRef, "%c", c); + getChars[i] = truncate(pack(c)); + keep[i] = 1'b1; + end + end + end + let axis = AxiStream{ + tData: pack(getChars), + tKeep: keep, + tLast: last, + tUser: 0 + }; + toDutFifo.enq(axis); + endrule + + rule reader2dut if(rdBatchCnt > 0); + if(dut.axisSlave.tReady) begin + // $display("INFO: simulation exec a batch"); + toDutFifo.deq(); + let axis = toDutFifo.first; + dut.axisSlave.tValid( + True, + axis.tData, + axis.tKeep, + axis.tLast, + axis.tUser); + end + endrule + + rule dut2writer; + dut.axisMaster.tReady(True); + if(dut.axisMaster.tValid) begin + tValidCnt <= tValidCnt + 1; + let data = dut.axisMaster.tData; + Vector#(BATCH_BYTES, Bit#(8)) getChars = unpack(data); + let keep = dut.axisMaster.tKeep; + for(Integer i = 0; i < rdBatchBytesLen; i = i + 1) begin + if(keep[i] == 1'b1) begin $fwrite(fileOut, "%c", getChars[i]); end + end + end + if(tValidCnt == rdBatchCnt && rdDoneFlag) begin + $display("INFO: file write done, compare the ref and out") + $fclose(fileOut); + $finish(); + end + endrule + +endmodule + +module mkTbAxisWire(AxisFifo#(keepWidth, usrWidth) ifc); + Wire#(Bit#(TMul#(keepWidth, 8))) data <- mkDWire(0); + Wire#(Bit#(keepWidth)) keep <- mkDWire(0); + Wire#(Bit#(usrWidth)) user <- mkDWire(0); + Wire#(Bit#(1)) last <- mkDWire(0); + Wire#(Bit#(1)) rdy <- mkDWire(0); + Wire#(Bit#(1)) vld <- mkDWire(0); + + interface RawAxiStreamMaster axisMaster; + method Bool tValid = unpack(vld); + method Bool tLast = unpack(last); + method Bit#(TMul#(keepWidth, 8)) tData = data; + method Bit#(keepWidth) tKeep = keep; + method Bit#(usrWidth) tUser = user; + method Action tReady(Bool ready); + rdy <= pack(ready); + endmethod + endinterface + + interface RawAxiStreamSlave axisSlave; + method Bool tReady = True; + method Action tValid( + Bool tvalid, + Bit#(TMul#(keepWidth, 8)) tData, + Bit#(keepWidth) tKeep, + Bool tLast, + Bit#(usrWidth) tUser + ); + data <= tData; + keep <= tKeep; + user <= tUser; + last <= pack(tLast); + vld <= pack(tvalid); + endmethod + endinterface +endmodule + +// module mkTbAxisPipeFifo (AxisFifo#(keepWidth, usrWidth) ifc); +// FIFOF#(AxiStream#(keepWidth, usrWidth)) <- mkSizedFIFOF(10); + +// endmodule + + + From 03eb41082567c84ae012338add232f0f676b95f4 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Thu, 4 Jul 2024 10:10:52 +0800 Subject: [PATCH 02/53] init branch develop --- src/PcieTypes.bsv | 6 +++--- test/TestAxiStream.bsv | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/PcieTypes.bsv b/src/PcieTypes.bsv index b8d016a..f4a5f21 100644 --- a/src/PcieTypes.bsv +++ b/src/PcieTypes.bsv @@ -46,9 +46,9 @@ endinterface (*always_ready, always_enabled*) interface RawPcieCfgMgmt#(); (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; - // (* result = cfg_mgmt_byte_enable *) method Bit#(4) cfgMgmtAddr; - // (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; - // (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; + (* result = cfg_mgmt_byte_enable *) method Bit#(4) cfgMgmtByteEn; + (* result = cfg_mgmt_debug_access *) method Bool cfgMgmtAddr; + (* result = cfg_mgmt_function_number *) method Bit#(8) cfgMgmFuncNum; // (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; // (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; // (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; diff --git a/test/TestAxiStream.bsv b/test/TestAxiStream.bsv index f293d84..e395336 100644 --- a/test/TestAxiStream.bsv +++ b/test/TestAxiStream.bsv @@ -89,7 +89,7 @@ module mkTbAxisRdWrLoop (Empty); tLast: last, tUser: 0 }; - toDutFifo.enq(axis); + toDutFifo.enq(axis); endrule rule reader2dut if(rdBatchCnt > 0); From 765d837f5b4911e650412a7ca9bb73c326643df9 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Thu, 4 Jul 2024 17:50:57 +0800 Subject: [PATCH 03/53] Inplement PCIe Interface Types --- src/DmaTypes.bsv | 28 ++++-- src/PcieTypes.bsv | 222 ++++++++++++++++++++++++++++++++--------- test/TestAxiStream.bsv | 74 +++++++------- 3 files changed, 228 insertions(+), 96 deletions(-) diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index f6d8e42..1904d4a 100644 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -1,25 +1,31 @@ import SemiFifo :: *; +import PcieTypes :: *; -typedef 512 DMA_DATA_WIDTH - +typedef 512 DMA_DATA_WIDTH; +typedef 64 DMA_HOSTMEM_ADDR_WIDTH; +typedef 32 DMA_CSR_ADDR_WIDTH; +typedef 32 DMA_CSR_DATA_WIDTH; +typedef Bit#(DMA_HOSTMEM_ADDR_WIDTH) DmaMemAddr; +typedef Bit#(DMA_CSR_ADDR_WIDTH) DMACsrAddr; +typedef Bit#(DMA_CSR_DATA_WIDTH) DMACsrValue; typedef struct { Bit#(dataWidth) data; - Bit#(TDiv#(dataWidth, 8)) byteEn; + Bit#(TDiv#(dataWidth, BYTE_BITS)) byteEn; Bool isFirst; Bool isLast; } DataFrame#(numeric type dataWidth) deriving(Bits, Bounded, Eq, FShow); typedef struct { - Bit#(32) address; - Bit#(32) length; -} CtrlFrame deriving(Bits, Bounded, Eq, FShow); + DmaMemAddr startAddr; + DmaMemAddr length; +} DmaCtrlFrame deriving(Bits, Bounded, Eq, FShow); typedef struct { - Bit#(32) address; - Bit#(32) value; -} CsrFrame deriving(Bits, Bounded, Eq, FShow); + DMACsrAddr address; + DMACsrValue value; +} DmaCsrFrame deriving(Bits, Bounded, Eq, FShow); interface DmaController#(numeric type dataWidth); @@ -32,6 +38,6 @@ interface DmaController#(numeric type dataWidth); interface FifoOut#(CsrFrame) DmaCsrC2HPipeOut; interface FifoOut#(CsrFrame) DmaCsrH2CPipeOut; - interface RawPcieRequester#(TDiv#(dataWidth, 8), PCIE_USR_WIDTH) PcieRequester; - interface RawPcieCompleter#(TDiv#(dataWidth, 8), PCIE_USR_WIDTH) PcieCompleter; + interface RawPcieRequester PcieRequester; + interface RawPcieCompleter PcieCompleter; endinterface diff --git a/src/PcieTypes.bsv b/src/PcieTypes.bsv index f4a5f21..f2800d0 100644 --- a/src/PcieTypes.bsv +++ b/src/PcieTypes.bsv @@ -1,58 +1,184 @@ import AxiStreamTypes :: *; -// from PG213 - -typedef 512 PCIE_TDATA_WIDTH -typedef 137 PCIE_TUSER_WIDTH - +typedef 8 BYTE_BITS +typedef TMul#(4, BYTE_BITS) DWORD_BITS; + +typedef 512 PCIE_TDATA_WIDTH; +typedef 64 PCIE_TDATA_BYTES; +typedef 16 PCIE_TDATA_DWORDS; +// Indicate DWORD valid of tDATA +typedef PCIE_TDATA_DWORDS PCIE_TKEEP_WIDTH; +// tUser width vary among RR, RC, CR and CC +typedef 183 PCIE_COMPLETER_REQUEST_TUSER_WIDTH; +typedef 81 PCIE_COMPLETER_COMPLETE_TUSER_WIDTH; +typedef 137 PCIE_REQUESTER_REQUEST_TUSER_WIDTH; +typedef 161 PCIE_REQUESTER_COMPLETE_TUSER_WIDTH; + +// PcieTlpCtl**: SideBand Signals delivered in tUser defined by PG213 +typedef 8 PCIE_TLP_FIRST_BE_WIDTH; +typedef 8 PCIE_TLP_LAST_BE_WIDTH; +typedef Bit#(PCIE_TLP_FIRST_BE_WIDTH) PcieTlpCtlFirstByteEn; +typedef Bit#(PCIE_TLP_LAST_BE_WIDTH) PcieTlpCtlLastByteEn; +typedef PCIE_TDATA_BYTES PCIE_TLP_BYTE_EN_WIDTH; +typedef Bit#(PCIE_TLP_BYTE_EN_WIDTH) PcieTlpCtlByteEn; +typedef 2 PCIE_TLP_ISSOP_WIDTH; +typedef 2 PCIE_TLP_ISSOP_PTR_WIDTH; +typedef Bit#(PCIE_TLP_ISSOP_WIDTH) PcieTlpCtlIsSop; +typedef Bit#(PCIE_TLP_ISSOP_PTR_WIDTH) PcieTlpCtlIsSopPtr; +typedef 2 PCIE_TLP_ISEOP_WIDTH; +typedef 4 PCIE_TLP_ISEOP_PTR_WIDTH; +typedef Bit#(PCIE_TLP_ISEOP_WIDTH) PcieTlpCtlIsEop; +typedef Bit#(PCIE_TLP_ISEOP_PTR_WIDTH) PcieTlpCtlIsEopPtr; +typedef 2 PCIE_TPH_PRESENT_WIDTH; +typedef 4 PCIE_TPH_TYPE_WIDTH; +typedef 16 PCIE_TPH_STTAG; +typedef 2 PCIE_TPH_INDIRECT_TAGEN_WIDTH; +typedef Bit#(PCIE_TPH_PRESENT_WIDTH) PcieTlpCtlTphPresent; +typedef Bit#(PCIE_TPH_TYPE_WIDTH) PcieTlpCtlTphType; +typedef Bit#(PCIE_TPH_STTAG) PcieTlpCtlTphSteeringTag; +typedef Bit#(PCIE_TPH_INDIRECT_TAGEN_WIDTH) PcieTlpCtlTphIndirectTagEn; +typedef 64 PCIE_TLP_PARITY +typedef Bit#(PCIE_TLP_PARITY) PcieTlpCtlParity; +typedef 4 PCIE_TLP_ADDR_OFFSET_WIDTH; +typedef Bit#(PCIE_TLP_ADDR_OFFSET_WIDTH) PcieTlpCtlAddrOffset; +typedef 6 PCIE_TLP_SEQ_NUM_WIDTH; +typedef Bit#(PCIE_TLP_SEQ_NUM_WIDTH) PcieTlpCtlSeqNum; +typedef 4 PCIE_TLP_RC_ISSOP_WIDTH; +typedef Bit#(PCIE_TLP_RC_ISSOP_WIDTH) PcieTlpCtlIsSopRC; +typedef 4 PCIE_TLP_RC_ISEOP_WIDTH; +typedef Bit#(PCIE_TLP_RC_ISEOP_WIDTH) PcieTlpCtlIsEopRC; +// Signals the start of a new TLP, 6 bit. typedef struct { - Bit#(8) first_be; - Bit#(8) last_be; - Bit#(4) addr_offset; - Bit#(2) is_sop; - Bit#(2) is_sop0_ptr; - Bit#(2) is_sop1_ptr; - Bit#(2) is_eop; - Bit#(4) is_eop0_ptr; - Bit#(4) is_eop1_ptr; - Bit#(1) discontinue; - Bit#(2) tph_present; - Bit#(4) tph_type; - Bit#(2) tph_indirect_tag_en; - Bit#(16) tph_st_tag; - Bit#(6) seq_num0; - Bit#(6) seq_num1; - Bit#(64) parity; -} PcieRRSideBandFrame deriving(Bits, Bounded, Eq); - + PcieTlpCtlIsSop isSop; + PcieTlpCtlIsSopPtr isSopPtr0; + PcieTlpCtlIsSopPtr isSopPtr1; +} PcieTlpCtlIsSopCommon deriving(Bits, Bounded, Eq); +// Signals the start of a new TLP, 12 bit. +typedef struct { + PcieTlpCtlIsSopRC isSop; + PcieTlpCtlIsSopPtr isSopPtr0; + PcieTlpCtlIsSopPtr isSopPtr1; + PcieTlpCtlIsSopPtr isSopPtr2; + PcieTlpCtlIsSopPtr isSopPtr3; +} PcieTlpCtlIsSopReqCpl deriving(Bits, Bounded, Eq); +// Indicates a TLP is ending in this beat, 10bit. +typedef struct { + PcieTlpCtlIsEop isEop; + PcieTlpCtlIsEopPtr isEopPtr0; + PcieTlpCtlIsEopPtr isEopPtr1; +} PcieTlpCtlIsEopCommon deriving(Bits, Bounded, Eq); +// Indicates a TLP is ending in this beat, 20bit. +typedef struct { + PcieTlpCtlIsEopRC isEop; + PcieTlpCtlIsEopPtr isEopPtr0; + PcieTlpCtlIsEopPtr isEopPtr1; + PcieTlpCtlIsEopPtr isEopPtr2; + PcieTlpCtlIsEopPtr isEopPtr3; +} PcieTlpCtlIsEopReqCpl deriving(Bits, Bounded, Eq); + +// 183bit tUser of PcieCompleterRequeste AXIS-slave typedef struct { - Bit#(64) byte_en; - Bit#(4) is_sop; - Bit#(2) is_sop0_ptr; - Bit#(2) is_sop1_ptr; - Bit#(2) is_sop2_ptr; -} PcieRPSideBandFrame deriving(Bits, Bounded, Eq); - -interface RawPcieRequester#(numeric type keepWidth , numeric type usrWidth); - interface RawAxiStreamMaster#(keepWidth, usrWidth) Request; - interface RawAxiStreamSlave#(keepWidth, usrWidth) Complete; + PcieTlpCtlFirstByteEn firstByteEn; + PcieTlpCtlLastByteEn lastByteEn; + PcieTlpCtlByteEn dataByteEn; + PcieTlpCtlIsSopCommon isSop; + PcieTlpCtlIsEopCommon isEop; + Bool discontinue; + PcieTlpCtlTphPresent tphPresent; + PcieTlpCtlTphType tphType; + PcieTlpCtlTphSteeringTag tphSteeringTag; + PcieTlpCtlParity parity; +} PcieCompleterRequestSideBandFrame deriving(Bits, Bounded, Eq); +// 81bit tUser of PcieCompleterComplete AXIS-master +typedef struct { + PcieTlpCtlIsSopCommon isSop; + PcieTlpCtlIsEopCommon isEop; + Bool discontinue; + PcieTlpCtlParity parity; +} PcieCompleterCompleteSideBandFrame deriving(Bits, Bounded, Eq); +// 137bit tUser of PcieRequesterRequeste AXIS-master +typedef struct { + PcieTlpCtlFirstByteEn firstByteEn; + PcieTlpCtlLastByteEn lastByteEn + PcieTlpCtlAddrOffset addrOffset; + PcieTlpCtlIsSopCommon isSop; + PcieTlpCtlIsEopCommon isEop; + Bool discontinue; + PcieTlpCtlTphPresent tphPresent; + PcieTlpCtlTphType tphType; + PcieTlpCtlTphIndirectTagEn tphIndirectTagEn; + PcieTlpCtlTphSteeringTag tphSteeringTag; + PcieTlpCtlSeqNum seqNum0; + PcieTlpCtlSeqNum seqNum1; + PcieTlpCtlParity parity; +} PcieRequsterRequestSideBandFrame deriving(Bits, Bounded, Eq); +// 161 tUser of PcieRequesterComplete AXIS-slave +typedef struct { + PcieTlpCtlByteEn dataByteEn; + PcieTlpCtlIsSopReqCpl isSop; + PcieTlpCtlIsEopReqCpl isEop; + Bool discontinue; + PcieTlpCtlParity parity; +} PcieRequesterCompleteSideBandFrame deriving(Bits, Bounded, Eq); + + +typedef 2 PCIE_CR_NP_REQ_WIDTH; +typedef 6 PCIE_CR_NP_REQ_COUNT_WIDTH; +typedef Bit#(PCIE_CR_NP_REQ_WIDTH) PcieNonPostedRequst; +typedef Bit#(PCIE_CR_NP_REQ_COUNT_WIDTH) PcieNonPostedRequstCount; +// Interface to PCIe IP Completer Interface +(*always_ready, always_enabled*) +interface RawPcieCompleter; + // TODO: the AxiStream in blue-wrapper has tDataWidth = tKeepWidth * BYTE_BITS, but the PCIe IP has tDataWidth = tKeepWidth * DWORD_BITS + (* prefix = "s_axis_cq_" *) interface RawAxiStreamSlave#(PCIE_TKEEP_WIDTH, PCIE_COMPLETER_REQUEST_TUSER_WIDTH) Request; + // (* result = "pcie_cq_np_req" *) method PcieNonPostedRequst nonPostedReqCreditIncrement; + // (* prefix = "" *) method Action nonPostedReqCreditCnt( + // (* port = "pcie_cq_np_req_count" *) PcieNonPostedRequstCount ); + (* prefix = "m_axis_cc_" *) interface RawAxiStreamMaster#(PCIE_TKEEP_WIDTH, PCIE_COMPLETER_COMPLETE_TUSER_WIDTH) Complete; +endinterface + +// Interface to PCIe IP Requester Interface +(*always_ready, always_enabled*) +interface RawPcieRequester; + (* prefix = "m_axis_rq_" *) interface RawAxiStreamMaster#(PCIE_TKEEP_WIDTH, usrWidth) Request; + (* prefix = "s_axis_rc_" *) interface RawAxiStreamSlave#(PCIE_TKEEP_WIDTH, usrWidth) Complete; endinterface -interface RawPcieCompleter#(numeric type keepWidth, numeric type usrWidth); - interface RawAxiStreamSlave#(keepWidth, usrWidth) Request; - interface RawAxiStreamMaster#(keepWidth, usrWidth) Complete; +typedef 10 PCIE_CFG_MGMT_ADDR_WIDTH; +typedef 4 PCIE_CFG_MGMT_BE_WIDTH; +typedef 8 PCIE_CFG_MGMT_FUNC_NUM_WIDTH; +typedef 32 PCIE_CFG_MGMT_DATA_WIDTH; +typedef Bit#(PCIE_CFG_MGMT_ADDR_WIDTH) PcieCfgMgmtAddr; +typedef Bit#(PCIE_CFG_MGMT_BE_WIDTH) PcieCfgMgmtByteEn; +typedef Bit#(PCIE_CFG_MGMT_FUNC_NUM_WIDTH) PcieCfgMgmtFuncNum; +typedef Bit#(PCIE_CFG_MGMT_DATA_WIDTH) PCieCfgMgmtData; + +interface RawPcieConfiguration; + (* prefix = "cfg_mgmt_" *) interface RawPcieCfgMgmt; + (* prefix = "cfg_pm_" *) interface RawPcieCfgPm; + (* prefix = "cfg_msi_" *) interface RawPcieCfgMsi; + (* prefix = "cfg_interrupt_" *) interface RawPcieCfgInterrupt; + (* prefix = "cfg_" *) interface RawPcieCfgControl; + (* prefix = "cfg_fc_" *) interface RawPcieCfgFC; + (* prefix = "cfg_msg_transmit_" *) interface RawPcieCfgMsgTx; + (* prefix = "cfg_msg_received_" *) interface RawPcieCfgMsgRx; + (* prefix = "" *) interface RawPcieCfgStatus; + (* prefix = "pcie_tfc_" *) interface RawPcieCfgTransmitFC; endinterface (*always_ready, always_enabled*) -interface RawPcieCfgMgmt#(); - (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; - (* result = cfg_mgmt_byte_enable *) method Bit#(4) cfgMgmtByteEn; - (* result = cfg_mgmt_debug_access *) method Bool cfgMgmtAddr; - (* result = cfg_mgmt_function_number *) method Bit#(8) cfgMgmFuncNum; - // (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; - // (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; - // (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; - // (* result = cfg_mgmt_addr *) method Bit#(10) cfgMgmtAddr; +interface RawPcieCfgMgmt; + (* result = addr *) method PcieCfgMgmtAddr cfgMgmtAddr; + (* result = byte_enable *) method PcieCfgMgmtByteEn cfgMgmtByteEn; + (* result = debug_access *) method Bool cfgMgmtDebugAccess; + (* result = function_number *) method PcieCfgMgmtFuncNum cfgMgmtFuncNum; + (* result = read *) method Bool cfgMgmtRead; + (* result = write_data *) method PCieCfgMgmtData cfgMgmtWriteData; + (* result = write *) method Bool cfgMgmtWrite; + (* prefix = "" *) method Action cfgMgmtReadData( + (* port = "read_data" *) PCieCfgMgmtData cfgMgmtRdData); + (* prefix = "" *) method Action cfgMgmtWriteDone( + (* port = "write_done" *) Bool cfgMgmtWrDone); endinterface (*always_ready, always_enabled*) @@ -81,12 +207,12 @@ interface RawPcieCfgFC#(); endinterface (*always_ready, always_enabled*) -interface RawPcieCfgFlowMsgTx#(); +interface RawPcieCfgMsgTx#(); endinterface (*always_ready, always_enabled*) -interface RawPcieCfgFlowMsgRx#(); +interface RawPcieCfgMsgRx#(); endinterface diff --git a/test/TestAxiStream.bsv b/test/TestAxiStream.bsv index e395336..1595a41 100644 --- a/test/TestAxiStream.bsv +++ b/test/TestAxiStream.bsv @@ -16,13 +16,13 @@ endinterface module mkTbAxisRdWrLoop (Empty); - Reg#(File) fileIn <- mkRegU(); - Reg#(File) fileRef <- mkRegU(); - Reg#(File) fileOut <- mkRegU(); - Reg#(Bool) initFlag <- mkReg(False); + Reg#(File) fileInReg <- mkRegU(); + Reg#(File) fileRefReg <- mkRegU(); + Reg#(File) fileOutReg <- mkRegU(); + Reg#(Bool) initFlagReg <- mkReg(False); // Read the file - Reg#(Bool) rdDoneFlag <- mkReg(False); - Reg#(UInt#(32)) rdBatchCnt <- mkReg(0); + Reg#(Bool) rdDoneFlagReg <- mkReg(False); + Reg#(UInt#(32)) rdBatchCntReg <- mkReg(0); let rdTotalBytesLen = valueOf(RD_BYTES_LENGTH); let rdBatchBytesLen = valueOf(BATCH_BYTES); let rdLastBatchBytesLen = rdTotalBytesLen % rdBatchBytesLen; @@ -33,8 +33,8 @@ module mkTbAxisRdWrLoop (Empty); // Control Reg#(UInt#(32)) tValidCnt <- mkReg(0); - rule init(!initFlag); - initFlag <= True; + rule init(!initFlagReg); + initFlagReg <= True; File in <- $fopen("test.txt", "rb"); File refer <- $fopen("ref.txt", "wb"); File out <- $fopen("out.txt", "wb"); @@ -42,44 +42,44 @@ module mkTbAxisRdWrLoop (Empty); $display("ERROR: couldn't open test file"); $finish; end - fileIn <= in; - fileRef <= refer; - fileOut <= out; + fileInReg <= in; + fileRefReg <= refer; + fileOutReg <= out; endrule - rule readfile(initFlag && !rdDoneFlag && rdBatchCnt < fromInteger(rdBatchesNum)); + rule readfile(initFlagReg && !rdDoneFlagReg && rdBatchCntReg < fromInteger(rdBatchesNum)); Vector#(BATCH_BYTES, Bit#(8)) getChars = replicate(0); Bit#(BATCH_BYTES) keep = 0; Bool last = False; - if(rdBatchCnt == fromInteger(rdBatchesNum) - 1) begin - for(Integer i = 0; i < rdLastBatchBytesLen; i = i + 1) begin - int c <- $fgetc(fileIn); + if(rdBatchCntReg == fromInteger(rdBatchesNum) - 1) begin + for(Integer idx = 0; idx < rdLastBatchBytesLen; idx = idx + 1) begin + int c <- $fgetc(fileInReg); if(c == -1) begin - $fclose(fileIn); - $fclose(fileRef); + $fclose(fileInReg); + $fclose(fileRefReg); end else begin - $fwrite(fileRef, "%c", c); - getChars[i] = truncate(pack(c)); - keep[i] = 1'b1; + $fwrite(fileRefReg, "%c", c); + getChars[idx] = truncate(pack(c)); + keep[idx] = 1'b1; end end - $fclose(fileIn); - $fclose(fileRef); - rdDoneFlag <= True; + $fclose(fileInReg); + $fclose(fileRefReg); + rdDoneFlagReg <= True; last = True; $display("INFO: test file read done"); end else begin - rdBatchCnt <= rdBatchCnt + 1; - for(Integer i = 0; i < rdBatchBytesLen; i = i + 1) begin - int c <- $fgetc(fileIn); - if(c == -1) begin - $fclose(fileRef); - $fclose(fileIn); + rdBatchCntReg <= rdBatchCntReg + 1; + for(Integer idx = 0; idx < rdBatchBytesLen; idx = idx + 1) begin + int rdChar <- $fgetc(fileInReg); + if(rdChar == -1) begin + $fclose(fileRefReg); + $fclose(fileInReg); last = True; end else begin - $fwrite(fileRef, "%c", c); - getChars[i] = truncate(pack(c)); - keep[i] = 1'b1; + $fwrite(fileRefReg, "%c", rdChar); + getChars[idx] = truncate(pack(rdChar)); + keep[idx] = 1'b1; end end end @@ -92,7 +92,7 @@ module mkTbAxisRdWrLoop (Empty); toDutFifo.enq(axis); endrule - rule reader2dut if(rdBatchCnt > 0); + rule reader2dut if(rdBatchCntReg > 0); if(dut.axisSlave.tReady) begin // $display("INFO: simulation exec a batch"); toDutFifo.deq(); @@ -113,13 +113,13 @@ module mkTbAxisRdWrLoop (Empty); let data = dut.axisMaster.tData; Vector#(BATCH_BYTES, Bit#(8)) getChars = unpack(data); let keep = dut.axisMaster.tKeep; - for(Integer i = 0; i < rdBatchBytesLen; i = i + 1) begin - if(keep[i] == 1'b1) begin $fwrite(fileOut, "%c", getChars[i]); end + for(Integer idx = 0; idx < rdBatchBytesLen; idx = idx + 1) begin + if(keep[idx] == 1'b1) begin $fwrite(fileOutReg, "%c", getChars[i]); end end end - if(tValidCnt == rdBatchCnt && rdDoneFlag) begin + if(tValidCnt == rdBatchCntReg && rdDoneFlagReg) begin $display("INFO: file write done, compare the ref and out") - $fclose(fileOut); + $fclose(fileOutReg); $finish(); end endrule From f259b917a3de85e3a20dadcca6a14dd08065d484 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Thu, 4 Jul 2024 18:01:15 +0800 Subject: [PATCH 04/53] Inplement PCIe Interface Types --- src/DmaTypes.bsv | 12 +++++++----- test/TestAxiStream.bsv | 19 ++++++++++--------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 1904d4a..30b60da 100644 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -30,14 +30,16 @@ typedef struct { interface DmaController#(numeric type dataWidth); interface FifoIn#(DataFrame#(dataWidth)) DmaDataC2HPipeIn; - interface FifoIn#(CtrlFrame) DmaCtrlC2HPipeIn; - interface FifoIn#(CtrlFrame) DmaCtrlH2CPipeIn; + interface FifoIn#(DmaCtrlFrame) DmaCtrlC2HPipeIn; + interface FifoIn#(DmaCtrlFrame) DmaCtrlH2CPipeIn; interface FifoOut#(DataFrame#(dataWidth)) DmaDataH2CPipeOut; - interface FifoIn#(CsrFrame) DmaCsrC2HPipeIn; - interface FifoOut#(CsrFrame) DmaCsrC2HPipeOut; - interface FifoOut#(CsrFrame) DmaCsrH2CPipeOut; + interface FifoIn#(DmaCsrFrame) DmaCsrC2HPipeIn; + interface FifoOut#(DMACsrAddr) DmaCsrC2HPipeOut; + interface FifoOut#(DmaCsrFrame) DmaCsrH2CPipeOut; interface RawPcieRequester PcieRequester; interface RawPcieCompleter PcieCompleter; + interface RawPcieConfiguration PcieConfig; + endinterface diff --git a/test/TestAxiStream.bsv b/test/TestAxiStream.bsv index 1595a41..bd30140 100644 --- a/test/TestAxiStream.bsv +++ b/test/TestAxiStream.bsv @@ -4,7 +4,8 @@ import AxiStreamTypes::*; import Counter::*; typedef 512 DATA_WIDTH; -typedef TDiv#(DATA_WIDTH, 8) BATCH_BYTES; +typedef 8 BYTE_BITS; +typedef TDiv#(DATA_WIDTH, BYTE_BITS) BATCH_BYTES; typedef 128 USR_WIDTH; typedef 4321 RD_BYTES_LENGTH; @@ -48,18 +49,18 @@ module mkTbAxisRdWrLoop (Empty); endrule rule readfile(initFlagReg && !rdDoneFlagReg && rdBatchCntReg < fromInteger(rdBatchesNum)); - Vector#(BATCH_BYTES, Bit#(8)) getChars = replicate(0); + Vector#(BATCH_BYTES, Bit#(BYTE_BITS)) getChars = replicate(0); Bit#(BATCH_BYTES) keep = 0; Bool last = False; if(rdBatchCntReg == fromInteger(rdBatchesNum) - 1) begin for(Integer idx = 0; idx < rdLastBatchBytesLen; idx = idx + 1) begin - int c <- $fgetc(fileInReg); - if(c == -1) begin + int readChar <- $fgetc(fileInReg); + if(readChar == -1) begin $fclose(fileInReg); $fclose(fileRefReg); end else begin - $fwrite(fileRefReg, "%c", c); - getChars[idx] = truncate(pack(c)); + $fwrite(fileRefReg, "%c", readChar); + getChars[idx] = truncate(pack(readChar)); keep[idx] = 1'b1; end end @@ -92,10 +93,10 @@ module mkTbAxisRdWrLoop (Empty); toDutFifo.enq(axis); endrule - rule reader2dut if(rdBatchCntReg > 0); + rule reader2dut if (rdBatchCntReg > 0); if(dut.axisSlave.tReady) begin // $display("INFO: simulation exec a batch"); - toDutFifo.deq(); + toDutFifo.deq; let axis = toDutFifo.first; dut.axisSlave.tValid( True, @@ -111,7 +112,7 @@ module mkTbAxisRdWrLoop (Empty); if(dut.axisMaster.tValid) begin tValidCnt <= tValidCnt + 1; let data = dut.axisMaster.tData; - Vector#(BATCH_BYTES, Bit#(8)) getChars = unpack(data); + Vector#(BATCH_BYTES, Bit#(BYTE_BITS)) getChars = unpack(data); let keep = dut.axisMaster.tKeep; for(Integer idx = 0; idx < rdBatchBytesLen; idx = idx + 1) begin if(keep[idx] == 1'b1) begin $fwrite(fileOutReg, "%c", getChars[i]); end From a4a8d12b8bd654b689448d93ca3a6c685463ab0e Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Fri, 5 Jul 2024 20:35:59 +0800 Subject: [PATCH 05/53] update mkChunkComputer --- img/split.drawio | 100 +++++++++++++++++++++++++++++++++++++++++ src/DmaRequestCore.bsv | 99 ++++++++++++++++++++++++++++++++++++++++ src/DmaTypes.bsv | 12 ++--- src/PcieTypes.bsv | 11 +++-- 4 files changed, 212 insertions(+), 10 deletions(-) create mode 100644 img/split.drawio create mode 100644 src/DmaRequestCore.bsv diff --git a/img/split.drawio b/img/split.drawio new file mode 100644 index 0000000..6e78ab1 --- /dev/null +++ b/img/split.drawio @@ -0,0 +1,100 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/DmaRequestCore.bsv b/src/DmaRequestCore.bsv new file mode 100644 index 0000000..e2220ab --- /dev/null +++ b/src/DmaRequestCore.bsv @@ -0,0 +1,99 @@ +import FIFOF::*; +import PcieTypes::*; +import DmaTypes::*; + +typedef 4096 BUS_BOUNDARY +typedef 12 BUS_BOUNDARY_WIDTH + +typedef struct { + DmaRequestFrame dmaRequest; + Maybe#(DmaMemAddr) firstChunkLen; +} ChunkRequestFrame deriving(Bits, Eq); + +Interface ChunkCompute; + FifoIn#(DmaRequestFrame) dmaRequests; + FifoOut#(DmaRequestFrame) chunkRequests; +endinterface + +module mkChunkComputer(ChunkCompute ifc); + FIFOF#(DmaRequestFrame) inputFifo <- mkFIFOF; + FIFOF#(DmaRequestFrame) outputFifo <- mkFIFOF; + FIFOF#(ChunkRequestFrame) splitFifo <- mkFIFOF; + + Reg#(DmaMemAddr) newChunkPtrReg <- mkReg(0); + Reg#(DmaMemAddr) totalLenRemainReg <- mkReg(0); + Reg#(Bool) isSplittigReg <- mkReg(False); + + function Bool hasBoundary(DmaRequestFrame request); + let highIdx = (request.startAddr + request.length) >> BUS_BOUNDARY_WIDTH; + let lowIdx = request.startAddr >> BUS_BOUNDARY_WIDTH; + return (highIdx > lowIdx); + endfunction + + function DmaMemAddr getOffset(DmaRequestFrame request); + DmaMemAddr offset = zeroExtend(fromInteger(valueOf(BUS_BOUNDARY)) - pack(request.startAddr[BUS_BOUNDARY_WIDTH-1:0])); + return offset; + endfunction + + rule getfirstChunkLen if(inputFifo.notEmpty && splitFifo.notFull); + let request = inputFifo.first; + inputFifo.deq; + let offset = getOffSet(request); + // firstChunkLen = offset % PCIE_TLP_BYTES + DmaMemAddr firstLen = zeroExtend(offset[valueOf(PCIE_TLP_BYTES_WIDTH)-1:0]); + ChunkRequestFrame splitRequest = { + dmaRequest: request, + firstChunkLen: hasBoundary(request) ? tagged Valid firstLen : tagged Invalid; + } + splitFifo.enq(splitRequest); + endrule + + rule execSplit if(splitFifo.notEmpty && outFifo.notFull); + let splitRequest = splitFifo.first; + if (isSplittingReg) begin + if (totalLenRemainReg <= PCIE_TLP_BYTES) begin + isSplittingReg <= False; + outputFifo.enq(DmaRequestFrame { + startAddr: newChunkPtrReg; + length: totalLenRemainReg; + }); + splitFifo.deq; + totalLenRemainReg <= 0; + end else begin + isSplittingReg <= True; + outputFifo.enq(DmaRequestFrame { + startAddr: newChunkPtrReg; + length: fromInteger(valueOf(PCIE_TLP_BYTES)); + }); + newChunkPtrReg <= newChunkPtrReg + fromInteger(valueOf(PCIE_TLP_BYTES)); + totalLenRemainReg <= totalLenRemainReg - PCIE_TLP_BYTES; + end + end else begin + let remainderLength = splitRequest.dmaRequest.length - fromMaybe(0, splitRequest.firstChunkLen); + if (isValid(splitRequest.firstChunkLen)) begin + Bool isSplittingNextCycle = (remainderLength > 0); + isSplittingReg <= isSplittingNextCycle; + outputFifo.enq(DmaRequestFrame { + startAddr: splitRequest.dmaRequest.startAddr; + length: fromMaybe(0, splitRequest.firstChunkLen); + }); + if (!isSplittingNextCycle) begin splitFifo.deq; end; + newChunkPtrReg <= splitRequest.dmaRequest + fromMaybe(0, splitRequest.firstChunkLen); + totalLenRemainReg <= remainderLength; + end else begin + Bool isSplittingNextCycle = (remainderLength > PCIE_TLP_BYTES); + isSplittingReg <= isSplittingNextCycle; + outputFifo.enq(DmaRequestFrame { + startAddr: splitRequest.dmaRequest.startAddr; + length: fromInteger(valueOf(PCIE_TLP_BYTES)); + }); + if (!isSplittingNextCycle) begin splitFifo.deq; end + newChunkPtrReg <= newChunkPtrReg + fromInteger(valueOf(PCIE_TLP_BYTES)); + totalLenRemainReg <= remainderLength - PCIE_TLP_BYTES; + end + end + endrule + + interface dmaRequests = convertFifoToFifoOut(inputFifo); + interface chunkRequests = convertFifoToFifoOut(outputFifo); +endmodule \ No newline at end of file diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 30b60da..27c4035 100644 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -20,7 +20,7 @@ typedef struct { typedef struct { DmaMemAddr startAddr; DmaMemAddr length; -} DmaCtrlFrame deriving(Bits, Bounded, Eq, FShow); +} DmaRequestFrame deriving(Bits, Bounded, Eq, FShow); typedef struct { DMACsrAddr address; @@ -30,13 +30,13 @@ typedef struct { interface DmaController#(numeric type dataWidth); interface FifoIn#(DataFrame#(dataWidth)) DmaDataC2HPipeIn; - interface FifoIn#(DmaCtrlFrame) DmaCtrlC2HPipeIn; - interface FifoIn#(DmaCtrlFrame) DmaCtrlH2CPipeIn; + interface FifoIn#(DmaRequestFrame) DmaCtrlC2HPipeIn; + interface FifoIn#(DmaRequestFrame) DmaCtrlH2CPipeIn; interface FifoOut#(DataFrame#(dataWidth)) DmaDataH2CPipeOut; - interface FifoIn#(DmaCsrFrame) DmaCsrC2HPipeIn; - interface FifoOut#(DMACsrAddr) DmaCsrC2HPipeOut; - interface FifoOut#(DmaCsrFrame) DmaCsrH2CPipeOut; + interface FifoIn#(DmaCsrFrame) DmaCsrC2HPipeIn; + interface FifoOut#(DMACsrAddr) DmaCsrC2HPipeOut; + interface FifoOut#(DmaCsrFrame) DmaCsrH2CPipeOut; interface RawPcieRequester PcieRequester; interface RawPcieCompleter PcieCompleter; diff --git a/src/PcieTypes.bsv b/src/PcieTypes.bsv index f2800d0..d2fddd3 100644 --- a/src/PcieTypes.bsv +++ b/src/PcieTypes.bsv @@ -1,7 +1,10 @@ import AxiStreamTypes :: *; -typedef 8 BYTE_BITS -typedef TMul#(4, BYTE_BITS) DWORD_BITS; +typedef 8 BYTE_WIDTH +typedef TMul#(4, BYTE_WIDTH) DWORD_WIDTH; + +typedef 512 PCIE_TLP_BYTES +typedef #TLog(PCIE_TLP_BYTES) PCIE_TLP_BYTES_WIDTH typedef 512 PCIE_TDATA_WIDTH; typedef 64 PCIE_TDATA_BYTES; @@ -112,7 +115,7 @@ typedef struct { PcieTlpCtlSeqNum seqNum1; PcieTlpCtlParity parity; } PcieRequsterRequestSideBandFrame deriving(Bits, Bounded, Eq); -// 161 tUser of PcieRequesterComplete AXIS-slave +// 161bit tUser of PcieRequesterComplete AXIS-slave typedef struct { PcieTlpCtlByteEn dataByteEn; PcieTlpCtlIsSopReqCpl isSop; @@ -129,7 +132,7 @@ typedef Bit#(PCIE_CR_NP_REQ_COUNT_WIDTH) PcieNonPostedRequstCount; // Interface to PCIe IP Completer Interface (*always_ready, always_enabled*) interface RawPcieCompleter; - // TODO: the AxiStream in blue-wrapper has tDataWidth = tKeepWidth * BYTE_BITS, but the PCIe IP has tDataWidth = tKeepWidth * DWORD_BITS + // TODO: the AxiStream in blue-wrapper has tDataWidth = tKeepWidth * BYTE_WIDTH, but the PCIe IP has tDataWidth = tKeepWidth * DWORD_WIDTH (* prefix = "s_axis_cq_" *) interface RawAxiStreamSlave#(PCIE_TKEEP_WIDTH, PCIE_COMPLETER_REQUEST_TUSER_WIDTH) Request; // (* result = "pcie_cq_np_req" *) method PcieNonPostedRequst nonPostedReqCreditIncrement; // (* prefix = "" *) method Action nonPostedReqCreditCnt( From 2cc532cfc4fb8ee34b41ff338cfda1a455856b13 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Sun, 7 Jul 2024 19:49:35 +0800 Subject: [PATCH 06/53] Add mkChunkComputeTb and test pass --- .gitignore | 3 + Makefile.base | 26 +++++ Makefile.test | 18 +++ backend/Makefile | 40 +++++++ backend/listVlogFiles.tcl | 223 ++++++++++++++++++++++++++++++++++++++ img/concat.drawio | 22 ++++ run.sh | 29 +++++ run_one.sh | 37 +++++++ setup.sh | 22 ++++ src/DmaRequestCore.bsv | 100 +++++++++-------- src/DmaTypes.bsv | 22 ++-- src/PcieTypes.bsv | 74 ++++++------- test/Makefile | 22 ++++ test/TestDmaCore.bsv | 78 +++++++++++++ 14 files changed, 620 insertions(+), 96 deletions(-) create mode 100644 .gitignore create mode 100755 Makefile.base create mode 100755 Makefile.test create mode 100644 backend/Makefile create mode 100644 backend/listVlogFiles.tcl create mode 100644 img/concat.drawio create mode 100755 run.sh create mode 100755 run_one.sh create mode 100755 setup.sh create mode 100644 test/Makefile create mode 100644 test/TestDmaCore.bsv diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9fe87a4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +**/build/** +**/verilog/** +**/*.log diff --git a/Makefile.base b/Makefile.base new file mode 100755 index 0000000..c201f8b --- /dev/null +++ b/Makefile.base @@ -0,0 +1,26 @@ +TRANSFLAGS = -aggressive-conditions -lift # -split-if +RECOMPILEFLAGS = -u -show-compiles +SCHEDFLAGS = -show-schedule -sched-dot # -show-rule-rel dMemInit_request_put doExecute +# -show-elab-progress +DEBUGFLAGS = -check-assert \ + -continue-after-errors \ + -keep-fires \ + -keep-inlined-boundaries \ + -show-method-bvi \ + -show-method-conf \ + -show-module-use \ + -show-range-conflict \ + -show-stats \ + -warn-action-shadowing \ + -warn-method-urgency \ + -promote-warnings ALL +VERILOGFLAGS = -verilog -remove-dollar -remove-unused-modules # -use-dpi -verilog-filter cmd +BLUESIMFLAGS = -parallel-sim-link 16 # -systemc +BUILDDIR = build +OUTDIR = -bdir $(BUILDDIR) -info-dir $(BUILDDIR) -simdir $(BUILDDIR) -vdir $(BUILDDIR) +WORKDIR = -fdir $(abspath .) +LIBSRCDIR = $(abspath ../lib/blue_wrapper/src) +BSVSRCDIR = -p +:$(abspath ../src):$(LIBSRCDIR) +DIRFLAGS = $(BSVSRCDIR) $(OUTDIR) $(WORKDIR) +MISCFLAGS = -print-flags -show-timestamps -show-version -steps 6000000 # -D macro +RUNTIMEFLAGS = +RTS -K4095M -RTS diff --git a/Makefile.test b/Makefile.test new file mode 100755 index 0000000..0be9d4e --- /dev/null +++ b/Makefile.test @@ -0,0 +1,18 @@ +TESTDIR ?= $(abspath ../test) +LOGDIR ?= $(abspath ../tmp) + +TESTBENCHS = \ + TestDmaCore.bsv + +TestDmaCore.bsv = mkChunkComputerTb + +all: $(TESTBENCHS) + +%.bsv: + $(foreach testcase, $($@), $(shell cd $(TESTDIR) && make simulate TESTFILE=$@ TOPMODULE=$(testcase) > $(LOGDIR)/$@-$(testcase).log 2>&1)) + +clean: + rm -f $(LOGDIR)/*.log + +.PHONY: all TESTBENCHS %.bsv clean +.DEFAULT_GOAL := all diff --git a/backend/Makefile b/backend/Makefile new file mode 100644 index 0000000..8634dda --- /dev/null +++ b/backend/Makefile @@ -0,0 +1,40 @@ +include ../Makefile.base + +TCLDIR ?= xdc +CLK ?= rdma_clock +OOC ?= 1 +VLOGDIR ?= verilog +OUTPUTDIR ?= output +LOGFILE ?= run.log +RUNTOPHASE ?= place # synth place route all +PARTNAME = xcvu13p-fhgb2104-2-i +TARGETFILE ?= ../src/DmaRequestCore.bsv +TOPMODULE ?= mkChunkComputer + +export TOP = $(TOPMODULE) +export RTL = $(VLOGDIR) +export XDC = $(TCLDIR) +export IPS = $(SRCDIR)/ip/$(PARTNAME) +export CLOCKS = $(CLK) +export OUTPUT = $(OUTPUTDIR) +export OOCSYNTH = $(OOC) +export RUNTO = $(RUNTOPHASE) +export PART = $(PARTNAME) + +compile: + mkdir -p $(BUILDDIR) + bsc -elab -sim -verbose $(BLUESIMFLAGS) $(DEBUGFLAGS) $(DIRFLAGS) $(MISCFLAGS) $(RECOMPILEFLAGS) $(RUNTIMEFLAGS) $(SCHEDFLAGS) $(TRANSFLAGS) -g $(TOPMODULE) $(TARGETFILE) + +verilog: compile + mkdir -p $(VLOGDIR) + bsc $(VERILOGFLAGS) $(DIRFLAGS) $(MISCFLAGS) $(RECOMPILEFLAGS) $(RUNTIMEFLAGS) $(TRANSFLAGS) -g $(TOPMODULE) $(TARGETFILE) + bluetcl listVlogFiles.tcl -bdir $(BUILDDIR) -vdir $(BUILDDIR) $(TOPMODULE) $(TOPMODULE) | grep -i '\.v' | xargs -I {} cp {} $(VLOGDIR) + +# vivado: verilog +# vivado -mode tcl -nolog -nojournal -source ./non_project_build.tcl 2>&1 | tee $(LOGFILE) + +clean: + rm -rf $(BUILDDIR) $(OUTPUTDIR) $(VLOGDIR) .Xil *.jou *.log + +.PHONY: verilog vivado clean +.DEFAULT_GOAL := verilog \ No newline at end of file diff --git a/backend/listVlogFiles.tcl b/backend/listVlogFiles.tcl new file mode 100644 index 0000000..7e30e6f --- /dev/null +++ b/backend/listVlogFiles.tcl @@ -0,0 +1,223 @@ +#!/bin/sh + +# \ +exec $BLUESPECDIR/bin/bluetcl "$0" "$@" + +package require utils + +proc usage {} { + puts "" + puts "usage: $::argv0 top_package_name top_module" + puts "Options:" + puts " -q Do not print section headers" + puts " -p Bluespec search path" + puts " -bdir Bluespec bdir directory" + puts " -vdir Bluespec vdir directory" + puts " -generated Print synthesized BSV modules" + puts " -primitives Print Bluespec primitive modules" + puts " -imported Print imported modules" + puts " -no-inline-fns Print modules for no-inline functions" + puts " -all Alias for -generated -primitives -imported -no-inline-fns" + puts "" + puts " e.g: -bdir build -p build:+ -vdir rtl mkTop fpga_a" +} + +set boolOptions [list -- -q -generated -primitives -imported -no-inline-fns -all] +set valOptions [list -p -bdir -vdir] + +if { [catch [list ::utils::scanOptions $boolOptions $valOptions true OPT "$argv"] opts] } { + puts stderr $opts + usage + exit 1 +} + +if {[llength $opts] == 0} { + puts stderr "A package name argument is required" + usage + exit 1 +} + +if {[llength $opts] == 1} { + puts stderr "A top module name is required" + usage + exit 1 +} + +if {[llength $opts] > 2} { + puts stderr "Too many arguments" + usage + exit 1 +} + +if { [info exists OPT(-p)] } { + Bluetcl::flags set -p $OPT(-p) +} +if { [info exists OPT(-bdir)] } { + Bluetcl::flags set -bdir $OPT(-bdir) +} +if { [info exists OPT(-vdir)] } { + Bluetcl::flags set -vdir $OPT(-vdir) +} + +if {![info exists OPT(-all)] && ![info exists OPT(-generated)] && + ![info exists OPT(-no-inline-fns)] && ![info exists OPT(-primitives)] && + ![info exists OPT(-imported)]} { + set OPT(-all) 1 +} + +set top_pkg [lindex $opts 0] +set top_mod [lindex $opts 1] + +# Assume -verilog +Bluetcl::flags set -verilog + +# Load the module information +Bluetcl::module load $top_pkg + +# Walk the hierarchy extracting module information +set mods_to_process [list $top_pkg] +set already_done [list] +set is_noinline 0 +while {[llength $mods_to_process] > 0} { + set this_mod [utils::head $mods_to_process] + set mods_to_process [utils::tail $mods_to_process] + set res [Bluetcl::module submods $this_mod] + set this_mod_type [lindex $res 0] + if {$this_mod_type == "user" && $is_noinline != 0} { + set this_mod_type "no-inline-fn" + } + array set mod_info [list $this_mod $this_mod_type] + lappend already_done $this_mod + set sub_mods [lindex $res 1] + set no_inlines [lindex $res 2] + foreach mod $sub_mods { + set this_sub_mod [utils::snd $mod] + if {[lsearch -exact $already_done $this_sub_mod] == -1 && + [lsearch -exact $mods_to_process $this_sub_mod] == -1 } { + lappend mods_to_process $this_sub_mod + } + } + set is_noinline 1 + foreach mod $no_inlines { + set this_sub_mod [utils::snd $mod] + if {[lsearch -exact $already_done $this_sub_mod] == -1 && + [lsearch -exact $mods_to_process $this_sub_mod] == -1 } { + lappend mods_to_process $this_sub_mod + } + } + set is_noinline 0 +} + +# Procedure to locate a file for a given module +proc lookupfile {name path exts} { + foreach dir $path { + foreach ext $exts { + set fname [join [list $name $ext] "."] + set fpath [file join $dir $fname] + if {[file exist $fpath]} { + return [file normalize $fpath] + } + } + } + return "" +} + +# Procedure to add a file to a list, avoiding duplication +proc addfile {name flName} { + upvar 1 $flName file_list + + set matched 0 + foreach f $file_list { + if {$f == $name} { + set matched 1 + break + } + } + if {$matched == 0} { + lappend file_list $name + } +} + +# Identify the location of each module's file +set user_mods [list] +set noinline_fns [list] +set primitives [list] +set imported [list] + +set vdir [lindex [Bluetcl::flags show vdir] 1] +set bsdir $::env(BLUESPECDIR) + +set libs [list [file join $bsdir "Verilog"] [file join $bsdir "Libraries"]] +set vsearch [split [lindex [Bluetcl::flags show p] 1] ":"] +set vdir_and_libs [concat $vdir $libs] +set vsearch_and_libs [concat $vsearch $libs] + +foreach mod [array names mod_info] { + set mod_type $mod_info($mod) + + # The Probe primitive has no associated Verilog module + if {$mod_type == "primitive" && $mod == "Probe"} { + continue + } + + # Add the module info to the correct list + switch -exact $mod_type { + "user" {addfile [lookupfile $mod $vdir_and_libs {v}] user_mods} + "no-inline-fn" {addfile [lookupfile $mod $vdir {v}] noinline_fns} + "primitive" {addfile [lookupfile $mod $libs {v}] primitives} + "import" {addfile [lookupfile $mod $vsearch_and_libs {v vhd vhdl}] imported} + } + + # Some primitives use other primitives + if {$mod_type == "primitive"} { + switch -exact $mod { + "MakeReset" {addfile [lookupfile "SyncReset" $libs {v}] primitives} + "MakeResetA" {addfile [lookupfile "SyncResetA" $libs {v}] primitives} + "SyncFIFOLevel" {addfile [lookupfile "ClockGen" $libs {v}] primitives + addfile [lookupfile "SyncHandshake" $libs {v}] primitives + } + "SyncFIFO" {addfile [lookupfile "ClockGen" $libs {v}] primitives} + "SyncRegister " {addfile [lookupfile "ClockGen" $libs {v}] primitives + addfile [lookupfile "SyncHandshake" $libs {v}] primitives + } + } + } +} + +if {[llength $user_mods] > 0 && ([info exists OPT(-generated)] || [info exists OPT(-all)])} { + if {![info exists OPT(-q)]} { + puts "# Synthesized user modules:" + } + foreach file $user_mods { + puts $file + } +} + +if {[llength $noinline_fns] > 0 && ([info exists OPT(-no-inline-fns)] || [info exists OPT(-all)])} { + if {![info exists OPT(-q)]} { + puts "# No-inlined functions:" + } + foreach file $noinline_fns { + puts $file + } +} + +if {[llength $imported] > 0 && ([info exists OPT(-imported)] || [info exists OPT(-all)])} { + if {![info exists OPT(-q)]} { + puts "# Imported modules:" + } + foreach file $imported { + puts $file + } +} + +if {[llength $primitives] > 0 && ([info exists OPT(-primitives)] || [info exists OPT(-all)])} { + if {![info exists OPT(-q)]} { + puts "# Bluespec library primitives:" + } + foreach file $primitives { + puts $file + } +} + +exit \ No newline at end of file diff --git a/img/concat.drawio b/img/concat.drawio new file mode 100644 index 0000000..1d4939c --- /dev/null +++ b/img/concat.drawio @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..86169fa --- /dev/null +++ b/run.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +set -o errexit +set -o nounset +set -o xtrace + +BASH_PROFILE=$HOME/.bash_profile +if [ -f "$BASH_PROFILE" ]; then + source $BASH_PROFILE +fi + +TEST_DIR=`realpath ./test` +LOG_DIR=`realpath ./tmp` +ALL_LOG=$TEST_DIR/run.log + +mkdir -p $LOG_DIR + +make -j8 -f Makefile.test all TESTDIR=$TEST_DIR LOGDIR=$LOG_DIR +cat $LOG_DIR/*.log | tee $ALL_LOG + +FAIL_KEYWORKS='Error\|ImmAssert' +grep -w $FAIL_KEYWORKS $LOG_DIR/*.log | cat +ERR_NUM=`grep -c -w $FAIL_KEYWORKS $ALL_LOG | cat` +if [ $ERR_NUM -gt 0 ]; then + echo "FAIL" + false +else + echo "PASS" +fi diff --git a/run_one.sh b/run_one.sh new file mode 100755 index 0000000..f049c0d --- /dev/null +++ b/run_one.sh @@ -0,0 +1,37 @@ +#! /usr/bin/env bash + +set -o errexit +set -o nounset +set -o xtrace + +BASH_PROFILE=$HOME/.bash_profile +if [ -f "$BASH_PROFILE" ]; then + source $BASH_PROFILE +fi + +TEST_LOG=run.log +TEST_DIR=test +cd $TEST_DIR +truncate -s 0 $TEST_LOG +FILES=`ls TestDmaCore.bsv` +########################################################################### +for FILE in $FILES; do + # echo $FILE + TESTCASES=`grep -Phzo 'doc.*?\nmodule\s+\S+(?=\()' $FILE | xargs -0 -I {} echo "{}" | grep module | cut -d ' ' -f 2` + for TESTCASE in $TESTCASES; do + make -j8 TESTFILE=$FILE TOPMODULE=$TESTCASE 2>&1 | tee -a $TEST_LOG + done +########################################################################### +done +########################################################################### + +FAIL_KEYWORKS='Error\|ImmAssert' +grep -w $FAIL_KEYWORKS $TEST_LOG | cat +ERR_NUM=`grep -c -w $FAIL_KEYWORKS $TEST_LOG | cat` +if [ $ERR_NUM -gt 0 ]; then + echo "FAIL" + false +else + echo "PASS" +fi + diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..137cded --- /dev/null +++ b/setup.sh @@ -0,0 +1,22 @@ +#! /bin/sh + +set -o errexit +set -o nounset +set -o xtrace + +rm -rf bsc-* +# wget https://github.com/B-Lang-org/bsc/releases/download/2022.01/bsc-2022.01-ubuntu-20.04.tar.gz +wget https://github.com/B-Lang-org/bsc/releases/download/2023.01/bsc-2023.01-ubuntu-22.04.tar.gz +tar zxf bsc-* + +BSC_FILE_NAME=`ls bsc-*.tar.gz` +BSC_DIR_NAME=`basename $BSC_FILE_NAME .tar.gz` +BLUESPEC_HOME=`realpath $BSC_DIR_NAME` + +BASH_PROFILE=$HOME/.bash_profile +touch $BASH_PROFILE +cat <> $BASH_PROFILE +# BSV required env +export BLUESPECDIR=$BLUESPEC_HOME/lib +export PATH=$PATH:$BLUESPEC_HOME/bin +EOF diff --git a/src/DmaRequestCore.bsv b/src/DmaRequestCore.bsv index e2220ab..71fce0b 100644 --- a/src/DmaRequestCore.bsv +++ b/src/DmaRequestCore.bsv @@ -1,18 +1,20 @@ import FIFOF::*; +import SemiFifo::*; import PcieTypes::*; import DmaTypes::*; -typedef 4096 BUS_BOUNDARY -typedef 12 BUS_BOUNDARY_WIDTH + +typedef 4096 BUS_BOUNDARY; +typedef 12 BUS_BOUNDARY_WIDTH; typedef struct { DmaRequestFrame dmaRequest; Maybe#(DmaMemAddr) firstChunkLen; } ChunkRequestFrame deriving(Bits, Eq); -Interface ChunkCompute; - FifoIn#(DmaRequestFrame) dmaRequests; - FifoOut#(DmaRequestFrame) chunkRequests; +interface ChunkCompute; + interface FifoIn#(DmaRequestFrame) dmaRequests; + interface FifoOut#(DmaRequestFrame) chunkRequests; endinterface module mkChunkComputer(ChunkCompute ifc); @@ -22,78 +24,80 @@ module mkChunkComputer(ChunkCompute ifc); Reg#(DmaMemAddr) newChunkPtrReg <- mkReg(0); Reg#(DmaMemAddr) totalLenRemainReg <- mkReg(0); - Reg#(Bool) isSplittigReg <- mkReg(False); + Reg#(Bool) isSplittingReg <- mkReg(False); function Bool hasBoundary(DmaRequestFrame request); - let highIdx = (request.startAddr + request.length) >> BUS_BOUNDARY_WIDTH; - let lowIdx = request.startAddr >> BUS_BOUNDARY_WIDTH; + let highIdx = (request.startAddr + request.length - 1) >> valueOf(BUS_BOUNDARY_WIDTH); + let lowIdx = request.startAddr >> valueOf(BUS_BOUNDARY_WIDTH); return (highIdx > lowIdx); endfunction function DmaMemAddr getOffset(DmaRequestFrame request); - DmaMemAddr offset = zeroExtend(fromInteger(valueOf(BUS_BOUNDARY)) - pack(request.startAddr[BUS_BOUNDARY_WIDTH-1:0])); - return offset; + // 4096 - startAddr % 4096 + Bit#(BUS_BOUNDARY_WIDTH) remainder = truncate(request.startAddr); + Bit#(BUS_BOUNDARY_WIDTH) offset = fromInteger(valueOf(BUS_BOUNDARY)-1) - zeroExtend(remainder) + 1; + return zeroExtend(offset); endfunction - rule getfirstChunkLen if(inputFifo.notEmpty && splitFifo.notFull); + rule getfirstChunkLen; let request = inputFifo.first; inputFifo.deq; - let offset = getOffSet(request); + let offset = getOffset(request); // firstChunkLen = offset % PCIE_TLP_BYTES - DmaMemAddr firstLen = zeroExtend(offset[valueOf(PCIE_TLP_BYTES_WIDTH)-1:0]); - ChunkRequestFrame splitRequest = { + Bit#(PCIE_TLP_BYTES_WIDTH) offsetModTlpBytes = truncate(offset); + DmaMemAddr firstLen = zeroExtend(offsetModTlpBytes); + splitFifo.enq(ChunkRequestFrame { dmaRequest: request, - firstChunkLen: hasBoundary(request) ? tagged Valid firstLen : tagged Invalid; - } - splitFifo.enq(splitRequest); - endrule + firstChunkLen: hasBoundary(request) ? tagged Valid firstLen : tagged Invalid + }); +endrule - rule execSplit if(splitFifo.notEmpty && outFifo.notFull); + rule execChunkSplit; let splitRequest = splitFifo.first; - if (isSplittingReg) begin - if (totalLenRemainReg <= PCIE_TLP_BYTES) begin - isSplittingReg <= False; - outputFifo.enq(DmaRequestFrame { - startAddr: newChunkPtrReg; - length: totalLenRemainReg; - }); - splitFifo.deq; - totalLenRemainReg <= 0; - end else begin - isSplittingReg <= True; - outputFifo.enq(DmaRequestFrame { - startAddr: newChunkPtrReg; - length: fromInteger(valueOf(PCIE_TLP_BYTES)); - }); - newChunkPtrReg <= newChunkPtrReg + fromInteger(valueOf(PCIE_TLP_BYTES)); - totalLenRemainReg <= totalLenRemainReg - PCIE_TLP_BYTES; - end - end else begin + if (isSplittingReg) begin // !isFirst + if (totalLenRemainReg <= fromInteger(valueOf(PCIE_TLP_BYTES))) begin + isSplittingReg <= False; + outputFifo.enq(DmaRequestFrame { + startAddr: newChunkPtrReg, + length: totalLenRemainReg + }); + splitFifo.deq; + totalLenRemainReg <= 0; + end else begin + isSplittingReg <= True; + outputFifo.enq(DmaRequestFrame { + startAddr: newChunkPtrReg, + length: fromInteger(valueOf(PCIE_TLP_BYTES)) + }); + newChunkPtrReg <= newChunkPtrReg + fromInteger(valueOf(PCIE_TLP_BYTES)); + totalLenRemainReg <= totalLenRemainReg - fromInteger(valueOf(PCIE_TLP_BYTES)); + end + end else begin // isFirst let remainderLength = splitRequest.dmaRequest.length - fromMaybe(0, splitRequest.firstChunkLen); if (isValid(splitRequest.firstChunkLen)) begin Bool isSplittingNextCycle = (remainderLength > 0); isSplittingReg <= isSplittingNextCycle; outputFifo.enq(DmaRequestFrame { - startAddr: splitRequest.dmaRequest.startAddr; - length: fromMaybe(0, splitRequest.firstChunkLen); + startAddr: splitRequest.dmaRequest.startAddr, + length: fromMaybe(0, splitRequest.firstChunkLen) }); - if (!isSplittingNextCycle) begin splitFifo.deq; end; - newChunkPtrReg <= splitRequest.dmaRequest + fromMaybe(0, splitRequest.firstChunkLen); + if (!isSplittingNextCycle) begin splitFifo.deq; end + newChunkPtrReg <= splitRequest.dmaRequest.startAddr + fromMaybe(0, splitRequest.firstChunkLen); totalLenRemainReg <= remainderLength; end else begin - Bool isSplittingNextCycle = (remainderLength > PCIE_TLP_BYTES); + Bool isSplittingNextCycle = (remainderLength > fromInteger(valueOf(PCIE_TLP_BYTES))); isSplittingReg <= isSplittingNextCycle; outputFifo.enq(DmaRequestFrame { - startAddr: splitRequest.dmaRequest.startAddr; - length: fromInteger(valueOf(PCIE_TLP_BYTES)); + startAddr: splitRequest.dmaRequest.startAddr, + length: fromInteger(valueOf(PCIE_TLP_BYTES)) }); if (!isSplittingNextCycle) begin splitFifo.deq; end - newChunkPtrReg <= newChunkPtrReg + fromInteger(valueOf(PCIE_TLP_BYTES)); - totalLenRemainReg <= remainderLength - PCIE_TLP_BYTES; + newChunkPtrReg <= splitRequest.dmaRequest.startAddr + fromInteger(valueOf(PCIE_TLP_BYTES)); + totalLenRemainReg <= remainderLength - fromInteger(valueOf(PCIE_TLP_BYTES)); end end endrule - interface dmaRequests = convertFifoToFifoOut(inputFifo); + interface dmaRequests = convertFifoToFifoIn(inputFifo); interface chunkRequests = convertFifoToFifoOut(outputFifo); endmodule \ No newline at end of file diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 27c4035..b8a9b51 100644 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -12,7 +12,7 @@ typedef Bit#(DMA_CSR_DATA_WIDTH) DMACsrValue; typedef struct { Bit#(dataWidth) data; - Bit#(TDiv#(dataWidth, BYTE_BITS)) byteEn; + Bit#(TDiv#(dataWidth, BYTE_WIDTH)) byteEn; Bool isFirst; Bool isLast; } DataFrame#(numeric type dataWidth) deriving(Bits, Bounded, Eq, FShow); @@ -29,17 +29,17 @@ typedef struct { interface DmaController#(numeric type dataWidth); - interface FifoIn#(DataFrame#(dataWidth)) DmaDataC2HPipeIn; - interface FifoIn#(DmaRequestFrame) DmaCtrlC2HPipeIn; - interface FifoIn#(DmaRequestFrame) DmaCtrlH2CPipeIn; - interface FifoOut#(DataFrame#(dataWidth)) DmaDataH2CPipeOut; + interface FifoIn#(DataFrame#(dataWidth)) dataC2HPipeIn; + interface FifoIn#(DmaRequestFrame) reqC2HPipeIn; + interface FifoIn#(DmaRequestFrame) reqH2CPipeIn; + interface FifoOut#(DataFrame#(dataWidth)) dataH2CPipeOut; - interface FifoIn#(DmaCsrFrame) DmaCsrC2HPipeIn; - interface FifoOut#(DMACsrAddr) DmaCsrC2HPipeOut; - interface FifoOut#(DmaCsrFrame) DmaCsrH2CPipeOut; + interface FifoIn#(DmaCsrFrame) csrC2HPipeIn; + interface FifoOut#(DMACsrAddr) csrC2HPipeOut; // read reg in the card from Host + interface FifoOut#(DmaCsrFrame) csrH2CPipeOut; - interface RawPcieRequester PcieRequester; - interface RawPcieCompleter PcieCompleter; - interface RawPcieConfiguration PcieConfig; + interface RawPcieRequester pcieRequester; + interface RawPcieCompleter pcieCompleter; + interface RawPcieConfiguration pcieConfig; endinterface diff --git a/src/PcieTypes.bsv b/src/PcieTypes.bsv index d2fddd3..3fda845 100644 --- a/src/PcieTypes.bsv +++ b/src/PcieTypes.bsv @@ -1,10 +1,10 @@ import AxiStreamTypes :: *; -typedef 8 BYTE_WIDTH +typedef 8 BYTE_WIDTH; typedef TMul#(4, BYTE_WIDTH) DWORD_WIDTH; -typedef 512 PCIE_TLP_BYTES -typedef #TLog(PCIE_TLP_BYTES) PCIE_TLP_BYTES_WIDTH +typedef 512 PCIE_TLP_BYTES; +typedef TLog#(PCIE_TLP_BYTES) PCIE_TLP_BYTES_WIDTH; typedef 512 PCIE_TDATA_WIDTH; typedef 64 PCIE_TDATA_BYTES; @@ -40,7 +40,7 @@ typedef Bit#(PCIE_TPH_PRESENT_WIDTH) PcieTlpCtlTphPresent; typedef Bit#(PCIE_TPH_TYPE_WIDTH) PcieTlpCtlTphType; typedef Bit#(PCIE_TPH_STTAG) PcieTlpCtlTphSteeringTag; typedef Bit#(PCIE_TPH_INDIRECT_TAGEN_WIDTH) PcieTlpCtlTphIndirectTagEn; -typedef 64 PCIE_TLP_PARITY +typedef 64 PCIE_TLP_PARITY; typedef Bit#(PCIE_TLP_PARITY) PcieTlpCtlParity; typedef 4 PCIE_TLP_ADDR_OFFSET_WIDTH; typedef Bit#(PCIE_TLP_ADDR_OFFSET_WIDTH) PcieTlpCtlAddrOffset; @@ -102,7 +102,7 @@ typedef struct { // 137bit tUser of PcieRequesterRequeste AXIS-master typedef struct { PcieTlpCtlFirstByteEn firstByteEn; - PcieTlpCtlLastByteEn lastByteEn + PcieTlpCtlLastByteEn lastByteEn; PcieTlpCtlAddrOffset addrOffset; PcieTlpCtlIsSopCommon isSop; PcieTlpCtlIsEopCommon isEop; @@ -133,18 +133,18 @@ typedef Bit#(PCIE_CR_NP_REQ_COUNT_WIDTH) PcieNonPostedRequstCount; (*always_ready, always_enabled*) interface RawPcieCompleter; // TODO: the AxiStream in blue-wrapper has tDataWidth = tKeepWidth * BYTE_WIDTH, but the PCIe IP has tDataWidth = tKeepWidth * DWORD_WIDTH - (* prefix = "s_axis_cq_" *) interface RawAxiStreamSlave#(PCIE_TKEEP_WIDTH, PCIE_COMPLETER_REQUEST_TUSER_WIDTH) Request; + (* prefix = "s_axis_cq_" *) interface RawAxiStreamSlave#(PCIE_TKEEP_WIDTH, PCIE_COMPLETER_REQUEST_TUSER_WIDTH) request; // (* result = "pcie_cq_np_req" *) method PcieNonPostedRequst nonPostedReqCreditIncrement; // (* prefix = "" *) method Action nonPostedReqCreditCnt( // (* port = "pcie_cq_np_req_count" *) PcieNonPostedRequstCount ); - (* prefix = "m_axis_cc_" *) interface RawAxiStreamMaster#(PCIE_TKEEP_WIDTH, PCIE_COMPLETER_COMPLETE_TUSER_WIDTH) Complete; + (* prefix = "m_axis_cc_" *) interface RawAxiStreamMaster#(PCIE_TKEEP_WIDTH, PCIE_COMPLETER_COMPLETE_TUSER_WIDTH) complete; endinterface // Interface to PCIe IP Requester Interface (*always_ready, always_enabled*) interface RawPcieRequester; - (* prefix = "m_axis_rq_" *) interface RawAxiStreamMaster#(PCIE_TKEEP_WIDTH, usrWidth) Request; - (* prefix = "s_axis_rc_" *) interface RawAxiStreamSlave#(PCIE_TKEEP_WIDTH, usrWidth) Complete; + (* prefix = "m_axis_rq_" *) interface RawAxiStreamMaster#(PCIE_TKEEP_WIDTH, usrWidth) request; + (* prefix = "s_axis_rc_" *) interface RawAxiStreamSlave#(PCIE_TKEEP_WIDTH, usrWidth) complete; endinterface typedef 10 PCIE_CFG_MGMT_ADDR_WIDTH; @@ -157,74 +157,74 @@ typedef Bit#(PCIE_CFG_MGMT_FUNC_NUM_WIDTH) PcieCfgMgmtFuncNum; typedef Bit#(PCIE_CFG_MGMT_DATA_WIDTH) PCieCfgMgmtData; interface RawPcieConfiguration; - (* prefix = "cfg_mgmt_" *) interface RawPcieCfgMgmt; - (* prefix = "cfg_pm_" *) interface RawPcieCfgPm; - (* prefix = "cfg_msi_" *) interface RawPcieCfgMsi; - (* prefix = "cfg_interrupt_" *) interface RawPcieCfgInterrupt; - (* prefix = "cfg_" *) interface RawPcieCfgControl; - (* prefix = "cfg_fc_" *) interface RawPcieCfgFC; - (* prefix = "cfg_msg_transmit_" *) interface RawPcieCfgMsgTx; - (* prefix = "cfg_msg_received_" *) interface RawPcieCfgMsgRx; - (* prefix = "" *) interface RawPcieCfgStatus; - (* prefix = "pcie_tfc_" *) interface RawPcieCfgTransmitFC; + (* prefix = "cfg_mgmt_" *) interface RawPcieCfgMgmt mgmt; + (* prefix = "cfg_pm_" *) interface RawPcieCfgPm pm; + (* prefix = "cfg_msi_" *) interface RawPcieCfgMsi msi; + (* prefix = "cfg_interrupt_" *) interface RawPcieCfgInterrupt interrupt; + (* prefix = "cfg_" *) interface RawPcieCfgControl control; + (* prefix = "cfg_fc_" *) interface RawPcieCfgFC flowControl; + (* prefix = "cfg_msg_transmit_" *) interface RawPcieCfgMsgTx msgTx; + (* prefix = "cfg_msg_received_" *) interface RawPcieCfgMsgRx msgRx; + (* prefix = "" *) interface RawPcieCfgStatus status; + (* prefix = "pcie_tfc_" *) interface RawPcieCfgTransmitFC txFlowControl; endinterface (*always_ready, always_enabled*) interface RawPcieCfgMgmt; - (* result = addr *) method PcieCfgMgmtAddr cfgMgmtAddr; - (* result = byte_enable *) method PcieCfgMgmtByteEn cfgMgmtByteEn; - (* result = debug_access *) method Bool cfgMgmtDebugAccess; - (* result = function_number *) method PcieCfgMgmtFuncNum cfgMgmtFuncNum; - (* result = read *) method Bool cfgMgmtRead; - (* result = write_data *) method PCieCfgMgmtData cfgMgmtWriteData; - (* result = write *) method Bool cfgMgmtWrite; - (* prefix = "" *) method Action cfgMgmtReadData( + (* result = "addr" *) method PcieCfgMgmtAddr addr; + (* result = "byte_enable" *) method PcieCfgMgmtByteEn byteEn; + (* result = "debug_access" *) method Bool debugAccess; + (* result = "function_number" *) method PcieCfgMgmtFuncNum funcNum; + (* result = "read" *) method Bool read; + (* result = "write_data" *) method PCieCfgMgmtData writeData; + (* result = "write" *) method Bool write; + (* prefix = "" *) method Action readData( (* port = "read_data" *) PCieCfgMgmtData cfgMgmtRdData); - (* prefix = "" *) method Action cfgMgmtWriteDone( + (* prefix = "" *) method Action writeDone( (* port = "write_done" *) Bool cfgMgmtWrDone); endinterface (*always_ready, always_enabled*) -interface RawPcieCfgPm#(); +interface RawPcieCfgPm; endinterface (*always_ready, always_enabled*) -interface RawPcieCfgMsi#(); +interface RawPcieCfgMsi; endinterface (*always_ready, always_enabled*) -interface RawPcieCfgInterrupt#(); +interface RawPcieCfgInterrupt; endinterface (*always_ready, always_enabled*) -interface RawPcieCfgControl#(); +interface RawPcieCfgControl; endinterface (*always_ready, always_enabled*) -interface RawPcieCfgFC#(); +interface RawPcieCfgFC; endinterface (*always_ready, always_enabled*) -interface RawPcieCfgMsgTx#(); +interface RawPcieCfgMsgTx; endinterface (*always_ready, always_enabled*) -interface RawPcieCfgMsgRx#(); +interface RawPcieCfgMsgRx; endinterface (*always_ready, always_enabled*) -interface RawPcieCfgStatus#(); +interface RawPcieCfgStatus; endinterface (*always_ready, always_enabled*) -interface RawPcieCfgTransmitFC#(); +interface RawPcieCfgTransmitFC; endinterface \ No newline at end of file diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..4667929 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,22 @@ +include ../Makefile.base + +TESTFILE ?= TestDmaCore.bsv +TOPMODULE ?= mkChunkComputerTb + +SIMSCRIPT = $(BUILDDIR)/$(TOPMODULE).sh + +compile: + mkdir -p $(BUILDDIR) + bsc -elab -sim -verbose $(BLUESIMFLAGS) $(DEBUGFLAGS) $(DIRFLAGS) $(MISCFLAGS) $(RECOMPILEFLAGS) $(RUNTIMEFLAGS) $(SCHEDFLAGS) $(TRANSFLAGS) -g $(TOPMODULE) $(TESTFILE) + +link: compile + bsc -sim $(BLUESIMFLAGS) $(DIRFLAGS) $(RECOMPILEFLAGS) $(SCHEDFLAGS) $(TRANSFLAGS) -e $(TOPMODULE) -o $(SIMSCRIPT) + +simulate: link + $(SIMSCRIPT) + +clean: + rm -rf $(BUILDDIR) + +.PHONY: compile link simulate clean +.DEFAULT_GOAL := simulate \ No newline at end of file diff --git a/test/TestDmaCore.bsv b/test/TestDmaCore.bsv new file mode 100644 index 0000000..0698e83 --- /dev/null +++ b/test/TestDmaCore.bsv @@ -0,0 +1,78 @@ +import SemiFifo::*; +import Randomizable::*; +import DmaTypes::*; +import DmaRequestCore::*; + +typedef 50 TEST_NUM; +typedef 64'hFFFFFFFFFFFFFFFF MAX_ADDRESS; +typedef 32'hFFFFFFFF MAX_TEST_LENGTH; + +(* doc = "testcase" *) +module mkChunkComputerTb (Empty); + + ChunkCompute dut <- mkChunkComputer; + + Reg#(Bool) isInitReg <- mkReg(False); + Reg#(UInt#(32)) testCntReg <- mkReg(0); + Reg#(DmaMemAddr) lenRemainReg <- mkReg(0); + Reg#(DmaRequestFrame) testRequest <- mkRegU; + Randomize#(DmaMemAddr) startAddrRandomVal <- mkConstrainedRandomizer(0, fromInteger(valueOf(MAX_ADDRESS)-1)); + Randomize#(DmaMemAddr) lengthRandomVal <- mkConstrainedRandomizer(1, fromInteger(valueOf(MAX_TEST_LENGTH))); + + function Bool hasBoundary(DmaRequestFrame request); + let highIdx = (request.startAddr + request.length - 1) >> valueOf(BUS_BOUNDARY_WIDTH); + let lowIdx = request.startAddr >> valueOf(BUS_BOUNDARY_WIDTH); + return (highIdx > lowIdx); + endfunction + + function Action showRequest (DmaRequestFrame request); + return action + $display("startAddr: ", request.startAddr, " length: ", request.length); + endaction; + endfunction + + rule testInit if (!isInitReg); + startAddrRandomVal.cntrl.init; + lengthRandomVal.cntrl.init; + isInitReg <= True; + $display("Start Test of mkChunkComputerTb"); + endrule + + rule testInput if (isInitReg && lenRemainReg == 0); + DmaMemAddr testAddr <- startAddrRandomVal.next; + DmaMemAddr testLength <- lengthRandomVal.next; + let testEnd = testAddr + testLength - 1; + if (testEnd > testAddr && testEnd <= fromInteger(valueOf(MAX_ADDRESS))) begin + let request = DmaRequestFrame{ + startAddr: testAddr, + length: testLength + }; + lenRemainReg <= testLength; + dut.dmaRequests.enq(request); + showRequest(request); + end else begin + lenRemainReg <= 0; + end + endrule + + rule testOutput if (isInitReg && lenRemainReg > 0); + let newRequest = dut.chunkRequests.first; + dut.chunkRequests.deq; + if (hasBoundary(newRequest)) begin + $display("Error, has 4KB boundary!"); + showRequest(newRequest); + $finish(); + end else begin + // showRequest(newRequest); + let newRemain = lenRemainReg - newRequest.length; + lenRemainReg <= newRemain; + if(newRemain == 0) begin + testCntReg <= testCntReg + 1; + end + end + endrule + + rule testFinish ; + if (testCntReg == fromInteger(valueOf(TEST_NUM))) $finish(); + endrule +endmodule \ No newline at end of file From 89dfb11710fc0384d2719ddd56a6dcd6d405d206 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Mon, 8 Jul 2024 22:48:25 +0800 Subject: [PATCH 07/53] Dynamic TLP Max Payload Size --- src/DmaRequestCore.bsv | 82 ++++++++++++++++++++++++++++-------------- src/PcieTypes.bsv | 49 +++++++++++++++++-------- test/TestDmaCore.bsv | 5 ++- 3 files changed, 94 insertions(+), 42 deletions(-) diff --git a/src/DmaRequestCore.bsv b/src/DmaRequestCore.bsv index 71fce0b..b1782ae 100644 --- a/src/DmaRequestCore.bsv +++ b/src/DmaRequestCore.bsv @@ -1,27 +1,38 @@ import FIFOF::*; +import GetPut :: *; import SemiFifo::*; import PcieTypes::*; import DmaTypes::*; -typedef 4096 BUS_BOUNDARY; -typedef 12 BUS_BOUNDARY_WIDTH; +typedef 4096 BUS_BOUNDARY; +typedef TLog#(BUS_BOUNDARY) BUS_BOUNDARY_WIDTH; +typedef Bit#(BUS_BOUNDARY_WIDTH) PcieTlpMaxMaxSize; +typedef Bit#(TLog#(BUS_BOUNDARY_WIDTH)) PcieTlpSizeWidth; +typedef 128 DEFAULT_TLP_SIZE; +typedef TLog#(DEFAULT_TLP_SIZE) DEFAULT_TLP_SIZE_WIDTH; +typedef 3 PCIE_TLP_SIZE_SETTING_WIDTH; +typedef Bit#(PCIE_TLP_SIZE_SETTING_WIDTH) PcieTlpSizeSetting; +typedef enum {DMA_RX, DMA_TX} TRXDirection deriving(Bits, Eq); + typedef struct { DmaRequestFrame dmaRequest; - Maybe#(DmaMemAddr) firstChunkLen; + Maybe#(DmaMemAddr) firstChunkLenMaybe; } ChunkRequestFrame deriving(Bits, Eq); interface ChunkCompute; - interface FifoIn#(DmaRequestFrame) dmaRequests; + interface FifoIn#(DmaRequestFrame) dmaRequests; interface FifoOut#(DmaRequestFrame) chunkRequests; + interface Put#(PcieTlpSizeSetting) setTlpMaxSize; endinterface -module mkChunkComputer(ChunkCompute ifc); +module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); FIFOF#(DmaRequestFrame) inputFifo <- mkFIFOF; FIFOF#(DmaRequestFrame) outputFifo <- mkFIFOF; FIFOF#(ChunkRequestFrame) splitFifo <- mkFIFOF; - + Reg#(DmaMemAddr) tlpMaxSize <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE))); //MPS if isTX, MRRS else + Reg#(PcieTlpSizeWidth) tlpMaxSizeWidth <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH))); Reg#(DmaMemAddr) newChunkPtrReg <- mkReg(0); Reg#(DmaMemAddr) totalLenRemainReg <- mkReg(0); Reg#(Bool) isSplittingReg <- mkReg(False); @@ -35,7 +46,7 @@ module mkChunkComputer(ChunkCompute ifc); function DmaMemAddr getOffset(DmaRequestFrame request); // 4096 - startAddr % 4096 Bit#(BUS_BOUNDARY_WIDTH) remainder = truncate(request.startAddr); - Bit#(BUS_BOUNDARY_WIDTH) offset = fromInteger(valueOf(BUS_BOUNDARY)-1) - zeroExtend(remainder) + 1; + Bit#(BUS_BOUNDARY_WIDTH) offset = fromInteger(valueOf(BUS_BOUNDARY) - 1) - zeroExtend(remainder) + 1; return zeroExtend(offset); endfunction @@ -44,18 +55,17 @@ module mkChunkComputer(ChunkCompute ifc); inputFifo.deq; let offset = getOffset(request); // firstChunkLen = offset % PCIE_TLP_BYTES - Bit#(PCIE_TLP_BYTES_WIDTH) offsetModTlpBytes = truncate(offset); - DmaMemAddr firstLen = zeroExtend(offsetModTlpBytes); + DmaMemAddr firstLen = zeroExtend(PcieTlpMaxMaxSize'(offset[tlpMaxSizeWidth-1:0])); splitFifo.enq(ChunkRequestFrame { dmaRequest: request, - firstChunkLen: hasBoundary(request) ? tagged Valid firstLen : tagged Invalid + firstChunkLenMaybe: hasBoundary(request) ? tagged Valid firstLen : tagged Invalid }); endrule rule execChunkSplit; let splitRequest = splitFifo.first; if (isSplittingReg) begin // !isFirst - if (totalLenRemainReg <= fromInteger(valueOf(PCIE_TLP_BYTES))) begin + if (totalLenRemainReg <= tlpMaxSize) begin isSplittingReg <= False; outputFifo.enq(DmaRequestFrame { startAddr: newChunkPtrReg, @@ -63,41 +73,59 @@ endrule }); splitFifo.deq; totalLenRemainReg <= 0; - end else begin + end + else begin isSplittingReg <= True; outputFifo.enq(DmaRequestFrame { startAddr: newChunkPtrReg, - length: fromInteger(valueOf(PCIE_TLP_BYTES)) + length: tlpMaxSize }); - newChunkPtrReg <= newChunkPtrReg + fromInteger(valueOf(PCIE_TLP_BYTES)); - totalLenRemainReg <= totalLenRemainReg - fromInteger(valueOf(PCIE_TLP_BYTES)); + newChunkPtrReg <= newChunkPtrReg + tlpMaxSize; + totalLenRemainReg <= totalLenRemainReg - tlpMaxSize; end - end else begin // isFirst - let remainderLength = splitRequest.dmaRequest.length - fromMaybe(0, splitRequest.firstChunkLen); - if (isValid(splitRequest.firstChunkLen)) begin + end + else begin // isFirst + let remainderLength = splitRequest.dmaRequest.length - fromMaybe(0, splitRequest.firstChunkLenMaybe); + if (isValid(splitRequest.firstChunkLenMaybe)) begin Bool isSplittingNextCycle = (remainderLength > 0); isSplittingReg <= isSplittingNextCycle; outputFifo.enq(DmaRequestFrame { startAddr: splitRequest.dmaRequest.startAddr, - length: fromMaybe(0, splitRequest.firstChunkLen) + length: fromMaybe(0, splitRequest.firstChunkLenMaybe) }); - if (!isSplittingNextCycle) begin splitFifo.deq; end - newChunkPtrReg <= splitRequest.dmaRequest.startAddr + fromMaybe(0, splitRequest.firstChunkLen); + if (!isSplittingNextCycle) begin + splitFifo.deq; + end + newChunkPtrReg <= splitRequest.dmaRequest.startAddr + fromMaybe(0, splitRequest.firstChunkLenMaybe); totalLenRemainReg <= remainderLength; - end else begin - Bool isSplittingNextCycle = (remainderLength > fromInteger(valueOf(PCIE_TLP_BYTES))); + end + else begin + Bool isSplittingNextCycle = (remainderLength > tlpMaxSize); isSplittingReg <= isSplittingNextCycle; outputFifo.enq(DmaRequestFrame { startAddr: splitRequest.dmaRequest.startAddr, - length: fromInteger(valueOf(PCIE_TLP_BYTES)) + length: tlpMaxSize }); - if (!isSplittingNextCycle) begin splitFifo.deq; end - newChunkPtrReg <= splitRequest.dmaRequest.startAddr + fromInteger(valueOf(PCIE_TLP_BYTES)); - totalLenRemainReg <= remainderLength - fromInteger(valueOf(PCIE_TLP_BYTES)); + if (!isSplittingNextCycle) begin + splitFifo.deq; + end + newChunkPtrReg <= splitRequest.dmaRequest.startAddr + tlpMaxSize; + totalLenRemainReg <= remainderLength - tlpMaxSize; end end endrule interface dmaRequests = convertFifoToFifoIn(inputFifo); interface chunkRequests = convertFifoToFifoOut(outputFifo); + + interface Put setTlpMaxSize; + method Action put (PcieTlpSizeSetting tlpSizeSetting); + let setting = tlpSizeSetting; + setting[valueOf(PCIE_TLP_SIZE_SETTING_WIDTH)-1] = (direction == DMA_TX) ? 0 : setting[valueOf(PCIE_TLP_SIZE_SETTING_WIDTH)-1]; + DmaMemAddr defaultTlpMaxSize = fromInteger(valueOf(DEFAULT_TLP_SIZE)); + tlpMaxSize <= DmaMemAddr'(defaultTlpMaxSize << setting); + PcieTlpSizeWidth defaultTlpMaxSizeWidth = fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH)); + tlpMaxSizeWidth <= PcieTlpSizeWidth'(defaultTlpMaxSizeWidth << setting); + endmethod + endinterface endmodule \ No newline at end of file diff --git a/src/PcieTypes.bsv b/src/PcieTypes.bsv index 3fda845..3e780b8 100644 --- a/src/PcieTypes.bsv +++ b/src/PcieTypes.bsv @@ -156,19 +156,6 @@ typedef Bit#(PCIE_CFG_MGMT_BE_WIDTH) PcieCfgMgmtByteEn; typedef Bit#(PCIE_CFG_MGMT_FUNC_NUM_WIDTH) PcieCfgMgmtFuncNum; typedef Bit#(PCIE_CFG_MGMT_DATA_WIDTH) PCieCfgMgmtData; -interface RawPcieConfiguration; - (* prefix = "cfg_mgmt_" *) interface RawPcieCfgMgmt mgmt; - (* prefix = "cfg_pm_" *) interface RawPcieCfgPm pm; - (* prefix = "cfg_msi_" *) interface RawPcieCfgMsi msi; - (* prefix = "cfg_interrupt_" *) interface RawPcieCfgInterrupt interrupt; - (* prefix = "cfg_" *) interface RawPcieCfgControl control; - (* prefix = "cfg_fc_" *) interface RawPcieCfgFC flowControl; - (* prefix = "cfg_msg_transmit_" *) interface RawPcieCfgMsgTx msgTx; - (* prefix = "cfg_msg_received_" *) interface RawPcieCfgMsgRx msgRx; - (* prefix = "" *) interface RawPcieCfgStatus status; - (* prefix = "pcie_tfc_" *) interface RawPcieCfgTransmitFC txFlowControl; -endinterface - (*always_ready, always_enabled*) interface RawPcieCfgMgmt; (* result = "addr" *) method PcieCfgMgmtAddr addr; @@ -219,12 +206,46 @@ interface RawPcieCfgMsgRx; endinterface +typedef 1 PCIE_CFG_PHY_LINK_DOWN_WIDTH; +typedef 2 PCIE_CFG_PHY_LINK_STATUS_WIDTH; +typedef Bit#(PCIE_CFG_PHY_LINK_DOWN_WIDTH) PcieCfgPhyLinkDown; +typedef Bit#(PCIE_CFG_PHY_LINK_STATUS_WIDTH) PcieCfgPhyLinkStatus; +typedef 3 PCIE_CFG_NEGOTIATED_WIDTH_WIDTH; +typedef 3 PCIE_CFG_CURRENT_SPEED_WIDTH; +typedef 2 PCIE_CFG_MAX_PAYLOAD_WIDTH; +typedef 3 PCIE_CFG_MAX_READ_REQ_WIDTH; +typedef Bit#(PCIE_CFG_NEGOTIATED_WIDTH_WIDTH) PcieCfgNegotiatedWidth; +typedef Bit#(PCIE_CFG_CURRENT_SPEED_WIDTH) PCieCfgCurrentSpeed; +typedef Bit#(PCIE_CFG_MAX_PAYLOAD_WIDTH) PcieCfgMaxPayloadSize; +typedef Bit#(PCIE_CFG_MAX_READ_REQ_WIDTH) PCieCfgMaxReadReqSize; +typedef 16 PCIE_FUNCTIONS_STATUS_WIDTH; +typedef Bit#(PCIE_FUNCTIONS_STATUS_WIDTH) PcieCfgFunctionStatus; + (*always_ready, always_enabled*) interface RawPcieCfgStatus; - + (* result = "phy_link_down" *) method PcieCfgPhyLinkDown phyLinkDown; + (* result = "phy_link_status" *) method PcieCfgPhyLinkStatus phyLinkStatus; + (* result = "negotiated_width" *) method PcieCfgNegotiatedWidth negotiatedWidth; + (* result = "current_speed" *) method PCieCfgCurrentSpeed currentSpeed; + (* result = "max_payload" *) method PcieCfgMaxPayloadSize maxPayloadSize; + (* result = "max_read_req" *) method PCieCfgMaxReadReqSize maxReadReqSize; + (* result = "function_status" *) method PcieCfgFunctionStatus functionStatus; endinterface (*always_ready, always_enabled*) interface RawPcieCfgTransmitFC; +endinterface + +interface RawPcieConfiguration; + (* prefix = "cfg_mgmt_" *) interface RawPcieCfgMgmt mgmt; + (* prefix = "cfg_pm_" *) interface RawPcieCfgPm pm; + (* prefix = "cfg_msi_" *) interface RawPcieCfgMsi msi; + (* prefix = "cfg_interrupt_" *) interface RawPcieCfgInterrupt interrupt; + (* prefix = "cfg_" *) interface RawPcieCfgControl control; + (* prefix = "cfg_fc_" *) interface RawPcieCfgFC flowControl; + (* prefix = "cfg_msg_transmit_" *) interface RawPcieCfgMsgTx msgTx; + (* prefix = "cfg_msg_received_" *) interface RawPcieCfgMsgRx msgRx; + (* prefix = "" *) interface RawPcieCfgStatus status; + (* prefix = "pcie_tfc_" *) interface RawPcieCfgTransmitFC txFlowControl; endinterface \ No newline at end of file diff --git a/test/TestDmaCore.bsv b/test/TestDmaCore.bsv index 0698e83..0f0c2cf 100644 --- a/test/TestDmaCore.bsv +++ b/test/TestDmaCore.bsv @@ -1,4 +1,5 @@ import SemiFifo::*; +import GetPut::*; import Randomizable::*; import DmaTypes::*; import DmaRequestCore::*; @@ -6,11 +7,12 @@ import DmaRequestCore::*; typedef 50 TEST_NUM; typedef 64'hFFFFFFFFFFFFFFFF MAX_ADDRESS; typedef 32'hFFFFFFFF MAX_TEST_LENGTH; +typedef 2'b10 TLP_SIZE_512_SETTING; (* doc = "testcase" *) module mkChunkComputerTb (Empty); - ChunkCompute dut <- mkChunkComputer; + ChunkCompute dut <- mkChunkComputer(DMA_TX); Reg#(Bool) isInitReg <- mkReg(False); Reg#(UInt#(32)) testCntReg <- mkReg(0); @@ -35,6 +37,7 @@ module mkChunkComputerTb (Empty); startAddrRandomVal.cntrl.init; lengthRandomVal.cntrl.init; isInitReg <= True; + dut.setTlpMaxSize.put(fromInteger(valueOf(TLP_SIZE_512_SETTING))); $display("Start Test of mkChunkComputerTb"); endrule From 85baea72b7df8241a955d9e2791eb149d3d71253 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Mon, 8 Jul 2024 23:21:48 +0800 Subject: [PATCH 08/53] Dynamic TLP Max Payload Size --- src/DmaRequestCore.bsv | 8 ++++---- test/TestDmaCore.bsv | 6 ++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/DmaRequestCore.bsv b/src/DmaRequestCore.bsv index b1782ae..193cda7 100644 --- a/src/DmaRequestCore.bsv +++ b/src/DmaRequestCore.bsv @@ -6,11 +6,11 @@ import DmaTypes::*; typedef 4096 BUS_BOUNDARY; -typedef TLog#(BUS_BOUNDARY) BUS_BOUNDARY_WIDTH; +typedef TLog#(TAdd#(1, BUS_BOUNDARY)) BUS_BOUNDARY_WIDTH; typedef Bit#(BUS_BOUNDARY_WIDTH) PcieTlpMaxMaxSize; typedef Bit#(TLog#(BUS_BOUNDARY_WIDTH)) PcieTlpSizeWidth; typedef 128 DEFAULT_TLP_SIZE; -typedef TLog#(DEFAULT_TLP_SIZE) DEFAULT_TLP_SIZE_WIDTH; +typedef TLog#(TAdd#(1, DEFAULT_TLP_SIZE)) DEFAULT_TLP_SIZE_WIDTH; typedef 3 PCIE_TLP_SIZE_SETTING_WIDTH; typedef Bit#(PCIE_TLP_SIZE_SETTING_WIDTH) PcieTlpSizeSetting; typedef enum {DMA_RX, DMA_TX} TRXDirection deriving(Bits, Eq); @@ -60,7 +60,7 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); dmaRequest: request, firstChunkLenMaybe: hasBoundary(request) ? tagged Valid firstLen : tagged Invalid }); -endrule + endrule rule execChunkSplit; let splitRequest = splitFifo.first; @@ -125,7 +125,7 @@ endrule DmaMemAddr defaultTlpMaxSize = fromInteger(valueOf(DEFAULT_TLP_SIZE)); tlpMaxSize <= DmaMemAddr'(defaultTlpMaxSize << setting); PcieTlpSizeWidth defaultTlpMaxSizeWidth = fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH)); - tlpMaxSizeWidth <= PcieTlpSizeWidth'(defaultTlpMaxSizeWidth << setting); + tlpMaxSizeWidth <= PcieTlpSizeWidth'(defaultTlpMaxSizeWidth + zeroExtend(setting)); endmethod endinterface endmodule \ No newline at end of file diff --git a/test/TestDmaCore.bsv b/test/TestDmaCore.bsv index 0f0c2cf..48eb260 100644 --- a/test/TestDmaCore.bsv +++ b/test/TestDmaCore.bsv @@ -53,7 +53,8 @@ module mkChunkComputerTb (Empty); lenRemainReg <= testLength; dut.dmaRequests.enq(request); showRequest(request); - end else begin + end + else begin lenRemainReg <= 0; end endrule @@ -65,7 +66,8 @@ module mkChunkComputerTb (Empty); $display("Error, has 4KB boundary!"); showRequest(newRequest); $finish(); - end else begin + end + else begin // showRequest(newRequest); let newRemain = lenRemainReg - newRequest.length; lenRemainReg <= newRemain; From 00a6386b8e54c84ecd9b389e5a136eb69bd0949a Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Tue, 9 Jul 2024 16:55:01 +0800 Subject: [PATCH 09/53] Test various MPS settings and verify timing --- src/DmaRequestCore.bsv | 60 +++++++++++++++--------------------------- src/DmaTypes.bsv | 20 +++++++------- src/PcieTypes.bsv | 5 ++-- src/StreamUtils.bsv | 47 +++++++++++++++++++++++++++++++++ test/TestDmaCore.bsv | 33 ++++++++++++++++------- 5 files changed, 103 insertions(+), 62 deletions(-) create mode 100644 src/StreamUtils.bsv diff --git a/src/DmaRequestCore.bsv b/src/DmaRequestCore.bsv index 193cda7..e102bce 100644 --- a/src/DmaRequestCore.bsv +++ b/src/DmaRequestCore.bsv @@ -6,19 +6,17 @@ import DmaTypes::*; typedef 4096 BUS_BOUNDARY; -typedef TLog#(TAdd#(1, BUS_BOUNDARY)) BUS_BOUNDARY_WIDTH; -typedef Bit#(BUS_BOUNDARY_WIDTH) PcieTlpMaxMaxSize; +typedef TAdd#(1, TLog#(BUS_BOUNDARY)) BUS_BOUNDARY_WIDTH; +typedef Bit#(BUS_BOUNDARY_WIDTH) PcieTlpMaxMaxPayloadSize; typedef Bit#(TLog#(BUS_BOUNDARY_WIDTH)) PcieTlpSizeWidth; typedef 128 DEFAULT_TLP_SIZE; -typedef TLog#(TAdd#(1, DEFAULT_TLP_SIZE)) DEFAULT_TLP_SIZE_WIDTH; +typedef TAdd#(1, TLog#(DEFAULT_TLP_SIZE)) DEFAULT_TLP_SIZE_WIDTH; typedef 3 PCIE_TLP_SIZE_SETTING_WIDTH; typedef Bit#(PCIE_TLP_SIZE_SETTING_WIDTH) PcieTlpSizeSetting; -typedef enum {DMA_RX, DMA_TX} TRXDirection deriving(Bits, Eq); - typedef struct { DmaRequestFrame dmaRequest; - Maybe#(DmaMemAddr) firstChunkLenMaybe; + DmaMemAddr firstChunkLen; } ChunkRequestFrame deriving(Bits, Eq); interface ChunkCompute; @@ -44,21 +42,19 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); endfunction function DmaMemAddr getOffset(DmaRequestFrame request); - // 4096 - startAddr % 4096 - Bit#(BUS_BOUNDARY_WIDTH) remainder = truncate(request.startAddr); - Bit#(BUS_BOUNDARY_WIDTH) offset = fromInteger(valueOf(BUS_BOUNDARY) - 1) - zeroExtend(remainder) + 1; - return zeroExtend(offset); + // MPS - startAddr % MPS, MPS means MRRS when the module is set to RX mode + DmaMemAddr remainderOfMps = zeroExtend(PcieTlpMaxMaxPayloadSize'(request.startAddr[tlpMaxSizeWidth-1:0])); + DmaMemAddr offsetOfMps = tlpMaxSize - remainderOfMps; + return offsetOfMps; endfunction rule getfirstChunkLen; let request = inputFifo.first; inputFifo.deq; let offset = getOffset(request); - // firstChunkLen = offset % PCIE_TLP_BYTES - DmaMemAddr firstLen = zeroExtend(PcieTlpMaxMaxSize'(offset[tlpMaxSizeWidth-1:0])); splitFifo.enq(ChunkRequestFrame { dmaRequest: request, - firstChunkLenMaybe: hasBoundary(request) ? tagged Valid firstLen : tagged Invalid + firstChunkLen: hasBoundary(request) ? offset : tlpMaxSize }); endrule @@ -85,33 +81,18 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); end end else begin // isFirst - let remainderLength = splitRequest.dmaRequest.length - fromMaybe(0, splitRequest.firstChunkLenMaybe); - if (isValid(splitRequest.firstChunkLenMaybe)) begin - Bool isSplittingNextCycle = (remainderLength > 0); - isSplittingReg <= isSplittingNextCycle; - outputFifo.enq(DmaRequestFrame { - startAddr: splitRequest.dmaRequest.startAddr, - length: fromMaybe(0, splitRequest.firstChunkLenMaybe) - }); - if (!isSplittingNextCycle) begin - splitFifo.deq; - end - newChunkPtrReg <= splitRequest.dmaRequest.startAddr + fromMaybe(0, splitRequest.firstChunkLenMaybe); - totalLenRemainReg <= remainderLength; - end - else begin - Bool isSplittingNextCycle = (remainderLength > tlpMaxSize); - isSplittingReg <= isSplittingNextCycle; - outputFifo.enq(DmaRequestFrame { - startAddr: splitRequest.dmaRequest.startAddr, - length: tlpMaxSize - }); - if (!isSplittingNextCycle) begin - splitFifo.deq; - end - newChunkPtrReg <= splitRequest.dmaRequest.startAddr + tlpMaxSize; - totalLenRemainReg <= remainderLength - tlpMaxSize; + let remainderLength = splitRequest.dmaRequest.length - splitRequest.firstChunkLen; + Bool isSplittingNextCycle = (remainderLength > 0); + isSplittingReg <= isSplittingNextCycle; + outputFifo.enq(DmaRequestFrame { + startAddr: splitRequest.dmaRequest.startAddr, + length: splitRequest.firstChunkLen + }); + if (!isSplittingNextCycle) begin + splitFifo.deq; end + newChunkPtrReg <= splitRequest.dmaRequest.startAddr + splitRequest.firstChunkLen; + totalLenRemainReg <= remainderLength; end endrule @@ -128,4 +109,5 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); tlpMaxSizeWidth <= PcieTlpSizeWidth'(defaultTlpMaxSizeWidth + zeroExtend(setting)); endmethod endinterface + endmodule \ No newline at end of file diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index b8a9b51..120c9ef 100644 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -1,6 +1,7 @@ -import SemiFifo :: *; -import PcieTypes :: *; +import SemiFifo::*; +import StreamUtils::*; +import PcieTypes::*; typedef 512 DMA_DATA_WIDTH; typedef 64 DMA_HOSTMEM_ADDR_WIDTH; @@ -10,13 +11,6 @@ typedef Bit#(DMA_HOSTMEM_ADDR_WIDTH) DmaMemAddr; typedef Bit#(DMA_CSR_ADDR_WIDTH) DMACsrAddr; typedef Bit#(DMA_CSR_DATA_WIDTH) DMACsrValue; -typedef struct { - Bit#(dataWidth) data; - Bit#(TDiv#(dataWidth, BYTE_WIDTH)) byteEn; - Bool isFirst; - Bool isLast; -} DataFrame#(numeric type dataWidth) deriving(Bits, Bounded, Eq, FShow); - typedef struct { DmaMemAddr startAddr; DmaMemAddr length; @@ -27,12 +21,16 @@ typedef struct { DMACsrValue value; } DmaCsrFrame deriving(Bits, Bounded, Eq, FShow); +typedef enum { + DMA_RX, DMA_TX +} TRXDirection deriving(Bits, Eq); + interface DmaController#(numeric type dataWidth); - interface FifoIn#(DataFrame#(dataWidth)) dataC2HPipeIn; + interface FifoIn#(DataStream#(dataWidth)) dataC2HPipeIn; interface FifoIn#(DmaRequestFrame) reqC2HPipeIn; interface FifoIn#(DmaRequestFrame) reqH2CPipeIn; - interface FifoOut#(DataFrame#(dataWidth)) dataH2CPipeOut; + interface FifoOut#(DataStream#(dataWidth)) dataH2CPipeOut; interface FifoIn#(DmaCsrFrame) csrC2HPipeIn; interface FifoOut#(DMACsrAddr) csrC2HPipeOut; // read reg in the card from Host diff --git a/src/PcieTypes.bsv b/src/PcieTypes.bsv index 3e780b8..af09da9 100644 --- a/src/PcieTypes.bsv +++ b/src/PcieTypes.bsv @@ -1,7 +1,6 @@ -import AxiStreamTypes :: *; +import AxiStreamTypes::*; +import StreamUtils::*; -typedef 8 BYTE_WIDTH; -typedef TMul#(4, BYTE_WIDTH) DWORD_WIDTH; typedef 512 PCIE_TLP_BYTES; typedef TLog#(PCIE_TLP_BYTES) PCIE_TLP_BYTES_WIDTH; diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv new file mode 100644 index 0000000..a8fc721 --- /dev/null +++ b/src/StreamUtils.bsv @@ -0,0 +1,47 @@ +import Vector::*; +import FIFOF::*; +import SemiFifo::*; + +typedef 8 BYTE_WIDTH; +typedef TMul#(4, BYTE_WIDTH) DWORD_WIDTH; + +typedef 2 CONCAT_STREAM_NUM; + +typedef struct { + Bit#(dataWidth) data; + Bit#(TDiv#(dataWidth, BYTE_WIDTH)) byteEn; + Bool isFirst; + Bool isLast; +} DataStream#(numeric type dataWidth) deriving(Bits, Bounded, Eq, FShow); + +interface StreamConcat#(numeric type dataWidth); + interface FifoIn#(DataStream#(dataWidth)) inputStreamFirst; + interface FifoIn#(DataStream#(dataWidth)) inputStreamSecond; + interface FifoOut#(DataStream#(dataWidth)) outputStream; +endinterface + +module mkStreamConcat (StreamConcat#(dataWidth) ifc); + + FIFOF#(DataStream#(dataWidth)) firstInputFifo <- mkFIFOF; + FIFOF#(DataStream#(dataWidth)) secondInputFifo <- mkFIFOF; + FIFOF#(DataStream#(dataWidth)) outputFifo <- mkFIFOF; + + Vector#(TMul#(CONCAT_STREAM_NUM, dataWidth), Reg#(Bit#(1))) concatDataReg <- replicateM(mkReg(0)); + Vector#(TDiv#(TMul#(CONCAT_STREAM_NUM, dataWidth), BYTE_WIDTH), Reg#(Bit#(1))) concatByteEnReg <- replicateM(mkReg(0)); + + Reg#(DataStream#(dataWidth)) firstStreamReg <- mkRegU; + Reg#(DataStream#(dataWidth)) secondStreamReg <- mkRegU; + + + rule readStreamFirst; + let stream = firstInputFifo.first; + // concatDataReg[valueOf(dataWidth)-1:0] <= stream.data; + // concatByteEnReg[valueOf(dataWidth)-1:0] <= stream.byteEn; + endrule + + interface inputStreamFirst = convertFifoToFifoIn(firstInputFifo); + interface inputStreamSecond = convertFifoToFifoIn(secondInputFifo); + interface outputStream = convertFifoToFifoOut(outputFifo); + + +endmodule \ No newline at end of file diff --git a/test/TestDmaCore.bsv b/test/TestDmaCore.bsv index 48eb260..6bdbe6b 100644 --- a/test/TestDmaCore.bsv +++ b/test/TestDmaCore.bsv @@ -4,10 +4,12 @@ import Randomizable::*; import DmaTypes::*; import DmaRequestCore::*; -typedef 50 TEST_NUM; +typedef 1000 CHUNK_PER_EPOCH_TEST_NUM; typedef 64'hFFFFFFFFFFFFFFFF MAX_ADDRESS; typedef 32'hFFFFFFFF MAX_TEST_LENGTH; -typedef 2'b10 TLP_SIZE_512_SETTING; +typedef 2'b00 DEFAULT_TLP_SIZE_SETTING; +typedef 4 CHUNK_TX_TEST_SETTING_NUM; +typedef 6 CHUNK_RX_TEST_SETTING_NUM; (* doc = "testcase" *) module mkChunkComputerTb (Empty); @@ -16,6 +18,7 @@ module mkChunkComputerTb (Empty); Reg#(Bool) isInitReg <- mkReg(False); Reg#(UInt#(32)) testCntReg <- mkReg(0); + Reg#(UInt#(32)) epochCntReg <- mkReg(0); Reg#(DmaMemAddr) lenRemainReg <- mkReg(0); Reg#(DmaRequestFrame) testRequest <- mkRegU; Randomize#(DmaMemAddr) startAddrRandomVal <- mkConstrainedRandomizer(0, fromInteger(valueOf(MAX_ADDRESS)-1)); @@ -37,8 +40,9 @@ module mkChunkComputerTb (Empty); startAddrRandomVal.cntrl.init; lengthRandomVal.cntrl.init; isInitReg <= True; - dut.setTlpMaxSize.put(fromInteger(valueOf(TLP_SIZE_512_SETTING))); + dut.setTlpMaxSize.put(fromInteger(valueOf(DEFAULT_TLP_SIZE_SETTING))); $display("Start Test of mkChunkComputerTb"); + $display("INFO: Set Max Payload Size to ", valueOf(DEFAULT_TLP_SIZE)); endrule rule testInput if (isInitReg && lenRemainReg == 0); @@ -68,16 +72,27 @@ module mkChunkComputerTb (Empty); $finish(); end else begin - // showRequest(newRequest); let newRemain = lenRemainReg - newRequest.length; lenRemainReg <= newRemain; - if(newRemain == 0) begin - testCntReg <= testCntReg + 1; + if (newRemain == 0) begin + if (epochCntReg < fromInteger(valueOf(CHUNK_PER_EPOCH_TEST_NUM)-1)) begin + epochCntReg <= epochCntReg + 1; + end + else begin + epochCntReg <= 0; + testCntReg <= testCntReg + 1; + if (testCntReg == fromInteger(valueOf(CHUNK_TX_TEST_SETTING_NUM)-1)) begin + $display("INFO: ChunkComputer Test End."); + $finish(); + end + else begin + PcieTlpSizeSetting newSetting = fromInteger(valueOf(DEFAULT_TLP_SIZE_SETTING)) + truncate(pack(testCntReg)) + 1; + dut.setTlpMaxSize.put(newSetting); + $display("INFO: Set Max Payload Size to ", pack(fromInteger(valueOf(DEFAULT_TLP_SIZE)) << newSetting)); + end + end end end endrule - rule testFinish ; - if (testCntReg == fromInteger(valueOf(TEST_NUM))) $finish(); - endrule endmodule \ No newline at end of file From 5e6b0092dbd6ea97a0475c07313141660fc5e376 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Wed, 10 Jul 2024 19:58:18 +0800 Subject: [PATCH 10/53] Add StreamUtils::StreamConcat --- img/concat.drawio | 96 +++++++++++++++++++++++--- src/DmaTypes.bsv | 4 +- src/StreamUtils.bsv | 162 ++++++++++++++++++++++++++++++++++++++------ 3 files changed, 230 insertions(+), 32 deletions(-) diff --git a/img/concat.drawio b/img/concat.drawio index 1d4939c..607cf6a 100644 --- a/img/concat.drawio +++ b/img/concat.drawio @@ -1,20 +1,98 @@ - + - - + + - - + + - - + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 120c9ef..8d9da30 100644 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -27,10 +27,10 @@ typedef enum { interface DmaController#(numeric type dataWidth); - interface FifoIn#(DataStream#(dataWidth)) dataC2HPipeIn; + interface FifoIn#(DataStream) dataC2HPipeIn; interface FifoIn#(DmaRequestFrame) reqC2HPipeIn; interface FifoIn#(DmaRequestFrame) reqH2CPipeIn; - interface FifoOut#(DataStream#(dataWidth)) dataH2CPipeOut; + interface FifoOut#(DataStream) dataH2CPipeOut; interface FifoIn#(DmaCsrFrame) csrC2HPipeIn; interface FifoOut#(DMACsrAddr) csrC2HPipeOut; // read reg in the card from Host diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index a8fc721..3bf86db 100644 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -3,44 +3,164 @@ import FIFOF::*; import SemiFifo::*; typedef 8 BYTE_WIDTH; +typedef TAdd#(1, TLog#(BYTE_WIDTH)) BYTE_WIDTH_WIDTH; typedef TMul#(4, BYTE_WIDTH) DWORD_WIDTH; typedef 2 CONCAT_STREAM_NUM; +typedef 512 DATA_WIDTH; +typedef TDiv#(DATA_WIDTH, BYTE_WIDTH) BYTE_EN_WIDTH; + +typedef Bit#(DATA_WIDTH) Data; +typedef Bit#(BYTE_EN_WIDTH) ByteEn; +typedef Bit#(TAdd#(1, TLog#(DATA_WIDTH))) BitPtr; +typedef Bit#(TAdd#(1, TLog#(BYTE_EN_WIDTH))) BytePtr; + typedef struct { - Bit#(dataWidth) data; - Bit#(TDiv#(dataWidth, BYTE_WIDTH)) byteEn; + Data data; + ByteEn byteEn; Bool isFirst; Bool isLast; -} DataStream#(numeric type dataWidth) deriving(Bits, Bounded, Eq, FShow); +} DataStream deriving(Bits, Bounded, Eq, FShow); -interface StreamConcat#(numeric type dataWidth); - interface FifoIn#(DataStream#(dataWidth)) inputStreamFirst; - interface FifoIn#(DataStream#(dataWidth)) inputStreamSecond; - interface FifoOut#(DataStream#(dataWidth)) outputStream; +interface StreamConcat; + interface FifoIn#(DataStream) inputStreamFirst; + interface FifoIn#(DataStream) inputStreamSecond; + interface FifoOut#(DataStream) outputStream; endinterface -module mkStreamConcat (StreamConcat#(dataWidth) ifc); +module mkStreamConcat (StreamConcat ifc); + + FIFOF#(DataStream) inputFifoA <- mkFIFOF; + FIFOF#(DataStream) inputFifoB <- mkFIFOF; + FIFOF#(DataStream) outputFifo <- mkFIFOF; + + FIFOF#(DataStream) prepareFifoA <- mkFIFOF; + FIFOF#(DataStream) prepareFifoB <- mkFIFOF; - FIFOF#(DataStream#(dataWidth)) firstInputFifo <- mkFIFOF; - FIFOF#(DataStream#(dataWidth)) secondInputFifo <- mkFIFOF; - FIFOF#(DataStream#(dataWidth)) outputFifo <- mkFIFOF; + Reg#(BytePtr) bytePtrRegA <- mkReg(0); + Reg#(BytePtr) bytePtrRegB <- mkReg(0); + Reg#(BytePtr) remainBytePtrReg <- mkReg(0); + Reg#(Bool) hasRemainReg <- mkReg(False); + Reg#(DataStream) remainStreamReg <- mkRegU; - Vector#(TMul#(CONCAT_STREAM_NUM, dataWidth), Reg#(Bit#(1))) concatDataReg <- replicateM(mkReg(0)); - Vector#(TDiv#(TMul#(CONCAT_STREAM_NUM, dataWidth), BYTE_WIDTH), Reg#(Bit#(1))) concatByteEnReg <- replicateM(mkReg(0)); + DataStream emptyStream = DataStream{ + data: 0, + byteEn: 0, + isFirst: False, + isLast: False + }; - Reg#(DataStream#(dataWidth)) firstStreamReg <- mkRegU; - Reg#(DataStream#(dataWidth)) secondStreamReg <- mkRegU; + BytePtr maxBytePtr = fromInteger(valueOf(BYTE_EN_WIDTH)); + BitPtr maxBitPtr = fromInteger(valueOf(DATA_WIDTH)); + function BytePtr getByteConcatPtr (ByteEn byteEn); + ByteEn byteEnTemp = byteEn; + BytePtr ptr = 0; + while (byteEnTemp > 0) begin + byteEnTemp = byteEnTemp >> 1; + ptr = ptr + 1; + end + return ptr; + endfunction + + function Tuple3#(DataStream, DataStream, BytePtr) getConcatStream + (DataStream streamA, DataStream streamB, BytePtr bytePtrA, BytePtr bytePtrB); + Bool isCallLegally = (streamA.isLast && bytePtrA < maxBytePtr && bytePtrA > 0); + BitPtr bitPtrA = zeroExtend(bytePtrA) << fromInteger(valueOf(BYTE_WIDTH_WIDTH)); + // Fill the low PtrA bytes by streamA data + Data concatDataA = streamA.data; + ByteEn concatByteEnA = streamA.byteEn; + // Fill the high bytes by streamB data + Data concatDataB = streamB.data << bitPtrA; + ByteEn concatByteEnB = streamB.byteEn << bitPtrA; + Data concatData = concatDataA & concatDataB; + ByteEn concatByteEn = concatByteEnA & concatByteEnB; + // Get the remain bytes of streamB data + BitPtr resBitPtr = maxBitPtr - bitPtrA; + BytePtr resBytePtr = maxBytePtr - bytePtrA; + Data remainData = streamB.data >> resBitPtr; + ByteEn remainByteEn = streamB.byteEn >> resBytePtr; + // Get if the concat frame is the last + Bool isConcatStreamLast = streamB.isLast; + BytePtr remainBytePtr = 0; + if (resBytePtr < bytePtrB ) begin + isConcatStreamLast = False; + remainBytePtr = bytePtrB - resBytePtr; + end + DataStream concatStream = emptyStream; + DataStream remainStream = emptyStream; + // package the return concatStream and remainStream + if(isCallLegally) begin + concatStream = DataStream{ + data: concatData, + byteEn: concatByteEn, + isFirst: False, + isLast: isConcatStreamLast + }; + remainStream = DataStream{ + data: remainData, + byteEn: remainByteEn, + isFirst: False, + isLast: True + }; + end + return tuple3(concatStream, remainStream, remainBytePtr); + endfunction + + rule prepareStream; + let streamA = inputFifoA.first; + let streamB = inputFifoB.first; + inputFifoA.deq; + inputFifoB.deq; + prepareFifoA.enq(streamA); + prepareFifoB.enq(streamB); + bytePtrRegA <= streamA.isLast ? getByteConcatPtr(streamA.byteEn) : bytePtrRegA; + bytePtrRegB <= streamB.isLast ? getByteConcatPtr(streamB.byteEn) : maxBytePtr; + endrule - rule readStreamFirst; - let stream = firstInputFifo.first; - // concatDataReg[valueOf(dataWidth)-1:0] <= stream.data; - // concatByteEnReg[valueOf(dataWidth)-1:0] <= stream.byteEn; + rule concatStream; + let streamA = prepareFifoA.first; + let streamB = prepareFifoB.first; + // Only StreamA + if (!hasRemainReg && !streamA.isLast && streamB.isFirst) begin + outputFifo.enq(streamA); + prepareFifoA.deq; + end + // the last StreamA + the first StreamB + else if (!hasRemainReg && streamA.isLast && streamB.isFirst) begin + match{.concatStream, .remainStream, .remainBytePtr} = getConcatStream(streamA, streamB, bytePtrRegA, bytePtrRegB); + Bool hasRemain = unpack(remainStream.byteEn[0]); + hasRemainReg <= hasRemain; + remainStreamReg <= remainStream; + remainBytePtrReg <= remainBytePtr; + if (concatStream.byteEn[0] == 1) begin + outputFifo.enq(concatStream); + end + prepareFifoA.deq; + prepareFifoB.deq; + end + // streamB + the remain data + else if (hasRemainReg && !streamB.isFirst) begin + match{.concatStream, .remainStream, .remainBytePtr} = getConcatStream(remainStreamReg, streamB, remainBytePtrReg, bytePtrRegB); + Bool hasRemain = unpack(remainStream.byteEn[0]); + hasRemainReg <= hasRemain; + remainStreamReg <= remainStream; + remainBytePtrReg <= remainBytePtr; + if (concatStream.byteEn[0] == 1) begin + outputFifo.enq(concatStream); + end + prepareFifoB.deq; + end + // Only the remain data + else if (hasRemainReg) begin + outputFifo.enq(remainStreamReg); + hasRemainReg <= False; + end endrule - interface inputStreamFirst = convertFifoToFifoIn(firstInputFifo); - interface inputStreamSecond = convertFifoToFifoIn(secondInputFifo); + interface inputStreamFirst = convertFifoToFifoIn(inputFifoA); + interface inputStreamSecond = convertFifoToFifoIn(inputFifoB); interface outputStream = convertFifoToFifoOut(outputFifo); From 75ebadb05f016435d7357734b284f6bf9b4b6e4d Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Thu, 11 Jul 2024 02:20:42 +0800 Subject: [PATCH 11/53] a simple test: --- LICENSE | 0 Makefile.test | 2 + README.md | 0 run_one.sh | 2 +- src/DmaController.bsv | 0 src/DmaRequestCore.bsv | 0 src/DmaTypes.bsv | 0 src/PcieTypes.bsv | 0 src/StreamUtils.bsv | 20 ++++++---- test/Makefile | 0 test/TestAxiStream.bsv | 0 test/TestDmaCore.bsv | 2 +- test/TestStreamUtils.bsv | 80 ++++++++++++++++++++++++++++++++++++++++ 13 files changed, 97 insertions(+), 9 deletions(-) mode change 100644 => 100755 LICENSE mode change 100644 => 100755 README.md mode change 100644 => 100755 src/DmaController.bsv mode change 100644 => 100755 src/DmaRequestCore.bsv mode change 100644 => 100755 src/DmaTypes.bsv mode change 100644 => 100755 src/PcieTypes.bsv mode change 100644 => 100755 src/StreamUtils.bsv mode change 100644 => 100755 test/Makefile mode change 100644 => 100755 test/TestAxiStream.bsv mode change 100644 => 100755 test/TestDmaCore.bsv create mode 100755 test/TestStreamUtils.bsv diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/Makefile.test b/Makefile.test index 0be9d4e..e60c219 100755 --- a/Makefile.test +++ b/Makefile.test @@ -2,8 +2,10 @@ TESTDIR ?= $(abspath ../test) LOGDIR ?= $(abspath ../tmp) TESTBENCHS = \ + TestStreamUtils.bsv \ TestDmaCore.bsv +TestStreamUtils = mkStreamConcatTb TestDmaCore.bsv = mkChunkComputerTb all: $(TESTBENCHS) diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/run_one.sh b/run_one.sh index f049c0d..ad1eac1 100755 --- a/run_one.sh +++ b/run_one.sh @@ -13,7 +13,7 @@ TEST_LOG=run.log TEST_DIR=test cd $TEST_DIR truncate -s 0 $TEST_LOG -FILES=`ls TestDmaCore.bsv` +FILES=`ls TestStreamUtils.bsv` ########################################################################### for FILE in $FILES; do # echo $FILE diff --git a/src/DmaController.bsv b/src/DmaController.bsv old mode 100644 new mode 100755 diff --git a/src/DmaRequestCore.bsv b/src/DmaRequestCore.bsv old mode 100644 new mode 100755 diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv old mode 100644 new mode 100755 diff --git a/src/PcieTypes.bsv b/src/PcieTypes.bsv old mode 100644 new mode 100755 diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv old mode 100644 new mode 100755 index 3bf86db..0b609a5 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -3,7 +3,7 @@ import FIFOF::*; import SemiFifo::*; typedef 8 BYTE_WIDTH; -typedef TAdd#(1, TLog#(BYTE_WIDTH)) BYTE_WIDTH_WIDTH; +typedef TLog#(BYTE_WIDTH) BYTE_WIDTH_WIDTH; typedef TMul#(4, BYTE_WIDTH) DWORD_WIDTH; typedef 2 CONCAT_STREAM_NUM; @@ -64,23 +64,28 @@ module mkStreamConcat (StreamConcat ifc); return ptr; endfunction - function Tuple3#(DataStream, DataStream, BytePtr) getConcatStream - (DataStream streamA, DataStream streamB, BytePtr bytePtrA, BytePtr bytePtrB); + function Tuple3#(DataStream, DataStream, BytePtr) getConcatStream ( + DataStream streamA, DataStream streamB, BytePtr bytePtrA, BytePtr bytePtrB + ); Bool isCallLegally = (streamA.isLast && bytePtrA < maxBytePtr && bytePtrA > 0); BitPtr bitPtrA = zeroExtend(bytePtrA) << fromInteger(valueOf(BYTE_WIDTH_WIDTH)); + // Fill the low PtrA bytes by streamA data Data concatDataA = streamA.data; ByteEn concatByteEnA = streamA.byteEn; + // Fill the high bytes by streamB data Data concatDataB = streamB.data << bitPtrA; - ByteEn concatByteEnB = streamB.byteEn << bitPtrA; - Data concatData = concatDataA & concatDataB; - ByteEn concatByteEn = concatByteEnA & concatByteEnB; + ByteEn concatByteEnB = streamB.byteEn << bytePtrA; + Data concatData = concatDataA | concatDataB; + ByteEn concatByteEn = concatByteEnA | concatByteEnB; + // Get the remain bytes of streamB data BitPtr resBitPtr = maxBitPtr - bitPtrA; BytePtr resBytePtr = maxBytePtr - bytePtrA; Data remainData = streamB.data >> resBitPtr; ByteEn remainByteEn = streamB.byteEn >> resBytePtr; + // Get if the concat frame is the last Bool isConcatStreamLast = streamB.isLast; BytePtr remainBytePtr = 0; @@ -90,6 +95,7 @@ module mkStreamConcat (StreamConcat ifc); end DataStream concatStream = emptyStream; DataStream remainStream = emptyStream; + // package the return concatStream and remainStream if(isCallLegally) begin concatStream = DataStream{ @@ -129,6 +135,7 @@ module mkStreamConcat (StreamConcat ifc); end // the last StreamA + the first StreamB else if (!hasRemainReg && streamA.isLast && streamB.isFirst) begin + $display(bytePtrRegA); match{.concatStream, .remainStream, .remainBytePtr} = getConcatStream(streamA, streamB, bytePtrRegA, bytePtrRegB); Bool hasRemain = unpack(remainStream.byteEn[0]); hasRemainReg <= hasRemain; @@ -163,5 +170,4 @@ module mkStreamConcat (StreamConcat ifc); interface inputStreamSecond = convertFifoToFifoIn(inputFifoB); interface outputStream = convertFifoToFifoOut(outputFifo); - endmodule \ No newline at end of file diff --git a/test/Makefile b/test/Makefile old mode 100644 new mode 100755 diff --git a/test/TestAxiStream.bsv b/test/TestAxiStream.bsv old mode 100644 new mode 100755 diff --git a/test/TestDmaCore.bsv b/test/TestDmaCore.bsv old mode 100644 new mode 100755 index 6bdbe6b..582d27f --- a/test/TestDmaCore.bsv +++ b/test/TestDmaCore.bsv @@ -12,7 +12,7 @@ typedef 4 CHUNK_TX_TEST_SETTING_NUM; typedef 6 CHUNK_RX_TEST_SETTING_NUM; (* doc = "testcase" *) -module mkChunkComputerTb (Empty); +module mkChunkComputerTb(Empty); ChunkCompute dut <- mkChunkComputer(DMA_TX); diff --git a/test/TestStreamUtils.bsv b/test/TestStreamUtils.bsv new file mode 100755 index 0000000..5e716e1 --- /dev/null +++ b/test/TestStreamUtils.bsv @@ -0,0 +1,80 @@ +import SemiFifo::*; +import Randomizable::*; +import StreamUtils::*; + +typedef UInt#(32) StreamSize; +typedef 'hABABAB PSUEDO_DATA; +typedef 'hFFFF MAX_STREAM_SIZE; +typedef 'h1 MIN_STREAM_SIZE; +typedef 50 TEST_NUM; +typedef 'hFFFFFFFFFFFFFFFF MAX_BYTE_EN; + + +function Action showDataStream (DataStream stream); + return action + $display("Data = %b", stream.data); + $display("byteEn = %b", stream.byteEn); + $display("isFirst = %b, isLast = %b", stream.isFirst, stream.isLast); + endaction; +endfunction + +(* doc = "testcase" *) +module mkStreamConcatTb(Empty); + + StreamConcat dut <- mkStreamConcat; + + Randomize#(StreamSize) streamASizeRandomValue <- mkConstrainedRandomizer(MIN_STREAM_SIZE, MAX_STREAM_SIZE); + Randomize#(StreamSize) streamBSizeRandomValue <- mkConstrainedRandomizer(MIN_STREAM_SIZE, MAX_STREAM_SIZE); + + Reg#(StreamSize) streamASizeReg <- mkReg(0); + Reg#(StreamSize) streamBSizeReg <- mkReg(0); + Reg#(StreamSize) stramAframeCntReg <- mkReg(0); + Reg#(StreamSize) stramBframeCntReg <- mkReg(0); + + Reg#(Bool) isInitReg <- mkReg(False); + Reg#(UInt#(32)) testCntReg <- mkReg(0); + + DataStream testStream = DataStream{ + data: 'b1010101010101010, + byteEn: 'b11, + isFirst: True, + isLast: True + }; + + function DataStream generatePsuedoStream (StreamSize size, Bool isFirst); + if (size < BYTE_EN_WIDTH) begin + return DataStream{ + data: PSUEDO_DATA, + byteEn: (1 << size) - 1, + isFirst: isFirst, + isLast: True + }; + end + else begin + return DataStream{ + data: PSUEDO_DATA, + byteEn: MAX_BYTE_EN, + isFirst: isFirst, + isLast: False + }; + end + endfunction + + rule testInit if (!isInitReg); + $display("INFO: start StreamConcatTb!"); + streamSizeRandomValue.cntrl.init; + isInitReg <= True; + endrule + + rule testInput if (isInitReg && testCntReg < TEST_NUM); + dut.inputStreamFirst.enq(testStreamA); + dut.inputStreamSecond.enq(testStreamA); + testCntReg <= testCntReg + 1; + endrule + + rule testOutput; + let outStream = dut.outputStream.first; + showDataStream(outStream); + dut.outputStream.deq; + endrule +endmodule \ No newline at end of file From 1b763352b5440571243cd5221a405cda4ce328a3 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Thu, 11 Jul 2024 02:29:33 +0800 Subject: [PATCH 12/53] a simple test --- src/StreamUtils.bsv | 2 +- test/TestStreamUtils.bsv | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index 0b609a5..20531ef 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -101,7 +101,7 @@ module mkStreamConcat (StreamConcat ifc); concatStream = DataStream{ data: concatData, byteEn: concatByteEn, - isFirst: False, + isFirst: streamA.isLast, isLast: isConcatStreamLast }; remainStream = DataStream{ diff --git a/test/TestStreamUtils.bsv b/test/TestStreamUtils.bsv index 5e716e1..f970eb2 100755 --- a/test/TestStreamUtils.bsv +++ b/test/TestStreamUtils.bsv @@ -23,8 +23,8 @@ module mkStreamConcatTb(Empty); StreamConcat dut <- mkStreamConcat; - Randomize#(StreamSize) streamASizeRandomValue <- mkConstrainedRandomizer(MIN_STREAM_SIZE, MAX_STREAM_SIZE); - Randomize#(StreamSize) streamBSizeRandomValue <- mkConstrainedRandomizer(MIN_STREAM_SIZE, MAX_STREAM_SIZE); + Randomize#(StreamSize) streamASizeRandomValue <- mkConstrainedRandomizer(fromInteger(valueOf(MIN_STREAM_SIZE)), fromInteger(valueOf(MAX_STREAM_SIZE))); + Randomize#(StreamSize) streamBSizeRandomValue <- mkConstrainedRandomizer(fromInteger(valueOf(MIN_STREAM_SIZE)), fromInteger(valueOf(MAX_STREAM_SIZE))); Reg#(StreamSize) streamASizeReg <- mkReg(0); Reg#(StreamSize) streamBSizeReg <- mkReg(0); @@ -42,9 +42,9 @@ module mkStreamConcatTb(Empty); }; function DataStream generatePsuedoStream (StreamSize size, Bool isFirst); - if (size < BYTE_EN_WIDTH) begin + if (size < fromInteger(valueOf(BYTE_EN_WIDTH))) begin return DataStream{ - data: PSUEDO_DATA, + data: fromInteger(valueOf(PSUEDO_DATA)), byteEn: (1 << size) - 1, isFirst: isFirst, isLast: True @@ -52,8 +52,8 @@ module mkStreamConcatTb(Empty); end else begin return DataStream{ - data: PSUEDO_DATA, - byteEn: MAX_BYTE_EN, + data: fromInteger(valueOf(PSUEDO_DATA)), + byteEn: fromInteger(valueOf(MAX_BYTE_EN)), isFirst: isFirst, isLast: False }; @@ -62,13 +62,14 @@ module mkStreamConcatTb(Empty); rule testInit if (!isInitReg); $display("INFO: start StreamConcatTb!"); - streamSizeRandomValue.cntrl.init; + streamASizeRandomValue.cntrl.init; + streamBSizeRandomValue.cntrl.init; isInitReg <= True; endrule - rule testInput if (isInitReg && testCntReg < TEST_NUM); - dut.inputStreamFirst.enq(testStreamA); - dut.inputStreamSecond.enq(testStreamA); + rule testInput if (isInitReg && testCntReg < fromInteger(valueOf(TEST_NUM))); + dut.inputStreamFirst.enq(testStream); + dut.inputStreamSecond.enq(testStream); testCntReg <= testCntReg + 1; endrule From ebc614f695097041deccaaec6c4723c22af37630 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Thu, 11 Jul 2024 19:03:51 +0800 Subject: [PATCH 13/53] Test Pass --- src/StreamUtils.bsv | 137 +++++++++++++++++++++++++-------------- test/TestStreamUtils.bsv | 135 +++++++++++++++++++++++++------------- 2 files changed, 178 insertions(+), 94 deletions(-) diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index 20531ef..9288219 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -23,20 +23,43 @@ typedef struct { Bool isLast; } DataStream deriving(Bits, Bounded, Eq, FShow); +typedef struct { + DataStream stream; + BytePtr bytePtr; +} StreamWithPtr deriving(Bits, Bounded, Eq, FShow); + interface StreamConcat; interface FifoIn#(DataStream) inputStreamFirst; interface FifoIn#(DataStream) inputStreamSecond; interface FifoOut#(DataStream) outputStream; endinterface +function BytePtr convertByteEn2BytePtr (ByteEn byteEn); + ByteEn byteEnTemp = byteEn; + BytePtr ptr = 0; + while (byteEnTemp > 0) begin + byteEnTemp = byteEnTemp >> 1; + ptr = ptr + 1; + end + return ptr; +endfunction + +function Action showDataStream (DataStream stream); + return action + $display(" Data = %h", stream.data); + $display(" byteEn = %b", stream.byteEn); + $display(" isFirst = %b, isLast = %b", stream.isFirst, stream.isLast); + endaction; +endfunction + module mkStreamConcat (StreamConcat ifc); FIFOF#(DataStream) inputFifoA <- mkFIFOF; FIFOF#(DataStream) inputFifoB <- mkFIFOF; FIFOF#(DataStream) outputFifo <- mkFIFOF; - FIFOF#(DataStream) prepareFifoA <- mkFIFOF; - FIFOF#(DataStream) prepareFifoB <- mkFIFOF; + FIFOF#(StreamWithPtr) prepareFifoA <- mkFIFOF; + FIFOF#(StreamWithPtr) prepareFifoB <- mkFIFOF; Reg#(BytePtr) bytePtrRegA <- mkReg(0); Reg#(BytePtr) bytePtrRegB <- mkReg(0); @@ -47,27 +70,15 @@ module mkStreamConcat (StreamConcat ifc); DataStream emptyStream = DataStream{ data: 0, byteEn: 0, - isFirst: False, - isLast: False + isFirst: True, + isLast: True }; BytePtr maxBytePtr = fromInteger(valueOf(BYTE_EN_WIDTH)); BitPtr maxBitPtr = fromInteger(valueOf(DATA_WIDTH)); - function BytePtr getByteConcatPtr (ByteEn byteEn); - ByteEn byteEnTemp = byteEn; - BytePtr ptr = 0; - while (byteEnTemp > 0) begin - byteEnTemp = byteEnTemp >> 1; - ptr = ptr + 1; - end - return ptr; - endfunction - - function Tuple3#(DataStream, DataStream, BytePtr) getConcatStream ( - DataStream streamA, DataStream streamB, BytePtr bytePtrA, BytePtr bytePtrB - ); - Bool isCallLegally = (streamA.isLast && bytePtrA < maxBytePtr && bytePtrA > 0); + function Tuple3#(DataStream, DataStream, BytePtr) getConcatStream (DataStream streamA, DataStream streamB, BytePtr bytePtrA, BytePtr bytePtrB); + Bool isCallLegally = (streamA.isLast && bytePtrA <= maxBytePtr && bytePtrA > 0); BitPtr bitPtrA = zeroExtend(bytePtrA) << fromInteger(valueOf(BYTE_WIDTH_WIDTH)); // Fill the low PtrA bytes by streamA data @@ -114,51 +125,77 @@ module mkStreamConcat (StreamConcat ifc); return tuple3(concatStream, remainStream, remainBytePtr); endfunction - rule prepareStream; + rule prepareStreamA; let streamA = inputFifoA.first; - let streamB = inputFifoB.first; inputFifoA.deq; + BytePtr bytePtr = convertByteEn2BytePtr(streamA.byteEn); + prepareFifoA.enq(StreamWithPtr { + stream: streamA, + bytePtr: bytePtr + }); + endrule + + rule prepareStreamB; + let streamB = inputFifoB.first; inputFifoB.deq; - prepareFifoA.enq(streamA); - prepareFifoB.enq(streamB); - bytePtrRegA <= streamA.isLast ? getByteConcatPtr(streamA.byteEn) : bytePtrRegA; - bytePtrRegB <= streamB.isLast ? getByteConcatPtr(streamB.byteEn) : maxBytePtr; + BytePtr bytePtr = convertByteEn2BytePtr(streamB.byteEn); + prepareFifoB.enq(StreamWithPtr { + stream: streamB, + bytePtr: bytePtr + }); endrule rule concatStream; - let streamA = prepareFifoA.first; - let streamB = prepareFifoB.first; - // Only StreamA - if (!hasRemainReg && !streamA.isLast && streamB.isFirst) begin - outputFifo.enq(streamA); - prepareFifoA.deq; - end - // the last StreamA + the first StreamB - else if (!hasRemainReg && streamA.isLast && streamB.isFirst) begin - $display(bytePtrRegA); - match{.concatStream, .remainStream, .remainBytePtr} = getConcatStream(streamA, streamB, bytePtrRegA, bytePtrRegB); - Bool hasRemain = unpack(remainStream.byteEn[0]); - hasRemainReg <= hasRemain; - remainStreamReg <= remainStream; - remainBytePtrReg <= remainBytePtr; - if (concatStream.byteEn[0] == 1) begin + // StreamA or StreamA + first StreamB + if (prepareFifoA.notEmpty && prepareFifoB.notEmpty && !hasRemainReg) begin + let streamA = prepareFifoA.first.stream; + let streamB = prepareFifoB.first.stream; + let bytePtrA = prepareFifoA.first.bytePtr; + let bytePtrB = prepareFifoB.first.bytePtr; + // Only StreamA frame + if (!streamA.isLast && streamB.isFirst) begin + outputFifo.enq(streamA); + prepareFifoA.deq; + end + // the last StreamA + the first StreamB + else if (streamA.isLast && streamB.isFirst) begin + match{.concatStream, .remainStream, .remainBytePtr} = getConcatStream(streamA, streamB, bytePtrA, bytePtrB); + Bool hasRemain = unpack(remainStream.byteEn[0]); + hasRemainReg <= hasRemain; + remainStreamReg <= remainStream; + remainBytePtrReg <= remainBytePtr; outputFifo.enq(concatStream); + prepareFifoA.deq; + prepareFifoB.deq; end - prepareFifoA.deq; - prepareFifoB.deq; end + // streamB + the remain data - else if (hasRemainReg && !streamB.isFirst) begin - match{.concatStream, .remainStream, .remainBytePtr} = getConcatStream(remainStreamReg, streamB, remainBytePtrReg, bytePtrRegB); - Bool hasRemain = unpack(remainStream.byteEn[0]); - hasRemainReg <= hasRemain; - remainStreamReg <= remainStream; - remainBytePtrReg <= remainBytePtr; - if (concatStream.byteEn[0] == 1) begin + else if (prepareFifoB.notEmpty && hasRemainReg) begin + let streamB = prepareFifoB.first.stream; + let bytePtrB = prepareFifoB.first.bytePtr; + if (!streamB.isFirst) begin + match{.concatStream, .remainStream, .remainBytePtr} = getConcatStream(remainStreamReg, streamB, remainBytePtrReg, bytePtrB); + Bool hasRemain = unpack(remainStream.byteEn[0]); + hasRemainReg <= hasRemain; + remainStreamReg <= remainStream; + remainBytePtrReg <= remainBytePtr; + if (concatStream.byteEn == 0) begin + $display("B + remain", remainBytePtrReg, bytePtrB); + $display("StreamB"); + showDataStream(streamB); + $display("remain"); + showDataStream(remainStreamReg); + end outputFifo.enq(concatStream); + prepareFifoB.deq; + end + else begin + outputFifo.enq(remainStreamReg); + hasRemainReg <= False; end - prepareFifoB.deq; end + // Only the remain data else if (hasRemainReg) begin outputFifo.enq(remainStreamReg); diff --git a/test/TestStreamUtils.bsv b/test/TestStreamUtils.bsv index f970eb2..ca723be 100755 --- a/test/TestStreamUtils.bsv +++ b/test/TestStreamUtils.bsv @@ -1,22 +1,24 @@ +import FIFOF::*; import SemiFifo::*; import Randomizable::*; import StreamUtils::*; typedef UInt#(32) StreamSize; -typedef 'hABABAB PSUEDO_DATA; -typedef 'hFFFF MAX_STREAM_SIZE; -typedef 'h1 MIN_STREAM_SIZE; -typedef 50 TEST_NUM; typedef 'hFFFFFFFFFFFFFFFF MAX_BYTE_EN; +typedef 'hAB PSEUDO_DATA; +typedef 8 PSEUDO_DATA_WIDTH; + +typedef 'h1 MIN_STREAM_SIZE; + +// TEST HYPER PARAMETERS CASE 1 +// typedef 100 MAX_STREAM_SIZE; +// typedef 10 TEST_NUM; + +// TEST HYPER PARAMETERS CASE 2 +typedef 'hFFFF MAX_STREAM_SIZE; +typedef 1000 TEST_NUM; -function Action showDataStream (DataStream stream); - return action - $display("Data = %b", stream.data); - $display("byteEn = %b", stream.byteEn); - $display("isFirst = %b, isLast = %b", stream.isFirst, stream.isLast); - endaction; -endfunction (* doc = "testcase" *) module mkStreamConcatTb(Empty); @@ -26,56 +28,101 @@ module mkStreamConcatTb(Empty); Randomize#(StreamSize) streamASizeRandomValue <- mkConstrainedRandomizer(fromInteger(valueOf(MIN_STREAM_SIZE)), fromInteger(valueOf(MAX_STREAM_SIZE))); Randomize#(StreamSize) streamBSizeRandomValue <- mkConstrainedRandomizer(fromInteger(valueOf(MIN_STREAM_SIZE)), fromInteger(valueOf(MAX_STREAM_SIZE))); - Reg#(StreamSize) streamASizeReg <- mkReg(0); - Reg#(StreamSize) streamBSizeReg <- mkReg(0); - Reg#(StreamSize) stramAframeCntReg <- mkReg(0); - Reg#(StreamSize) stramBframeCntReg <- mkReg(0); + Reg#(StreamSize) streamARemainSizeReg <- mkReg(0); + Reg#(StreamSize) streamBRemainSizeReg <- mkReg(0); + Reg#(StreamSize) concatSizeReg <- mkReg(0); + + FIFOF#(StreamSize) ideaConcatSizeFifo <- mkSizedFIFOF(10); Reg#(Bool) isInitReg <- mkReg(False); Reg#(UInt#(32)) testCntReg <- mkReg(0); + Reg#(UInt#(32)) testRoundReg <- mkReg(0); + Reg#(UInt#(32)) testFinishCntReg <- mkReg(0); - DataStream testStream = DataStream{ - data: 'b1010101010101010, - byteEn: 'b11, - isFirst: True, - isLast: True - }; - - function DataStream generatePsuedoStream (StreamSize size, Bool isFirst); - if (size < fromInteger(valueOf(BYTE_EN_WIDTH))) begin - return DataStream{ - data: fromInteger(valueOf(PSUEDO_DATA)), - byteEn: (1 << size) - 1, - isFirst: isFirst, - isLast: True - }; - end - else begin - return DataStream{ - data: fromInteger(valueOf(PSUEDO_DATA)), - byteEn: fromInteger(valueOf(MAX_BYTE_EN)), - isFirst: isFirst, - isLast: False - }; - end + Data pseudoData = fromInteger(valueOf(PSEUDO_DATA)); + for (Integer idx = 0; idx < valueOf(TDiv#(DATA_WIDTH, PSEUDO_DATA_WIDTH)); idx = idx + 1) begin + pseudoData = pseudoData | (pseudoData << idx*valueOf(PSEUDO_DATA_WIDTH)); + end + + function DataStream generatePsuedoStream (StreamSize size, Bool isFirst, Bool isLast); + let offsetPtr = (fromInteger(valueOf(BYTE_EN_WIDTH)) - size) << valueOf(BYTE_WIDTH_WIDTH); + Data streamData = (pseudoData << offsetPtr) >> offsetPtr; + return DataStream{ + data: streamData, + byteEn: (1 << size) - 1, + isFirst: isFirst, + isLast: isLast + }; endfunction rule testInit if (!isInitReg); - $display("INFO: start StreamConcatTb!"); + $display("INFO: ================start StreamConcatTb!=================="); + $display(valueOf(BYTE_WIDTH_WIDTH)); streamASizeRandomValue.cntrl.init; streamBSizeRandomValue.cntrl.init; isInitReg <= True; endrule rule testInput if (isInitReg && testCntReg < fromInteger(valueOf(TEST_NUM))); - dut.inputStreamFirst.enq(testStream); - dut.inputStreamSecond.enq(testStream); - testCntReg <= testCntReg + 1; + + if (testRoundReg == 0 && dut.inputStreamFirst.notFull && dut.inputStreamSecond.notFull) begin + StreamSize sizeA <- streamASizeRandomValue.next; + StreamSize sizeB <- streamASizeRandomValue.next; + ideaConcatSizeFifo.enq(sizeA + sizeB); + testRoundReg <= (sizeA + sizeB) / fromInteger(valueOf(BYTE_EN_WIDTH)); + let isLast = sizeA <= fromInteger(valueOf(BYTE_EN_WIDTH)); + let firstSizeA = isLast ? sizeA : fromInteger(valueOf(BYTE_EN_WIDTH)); + let firstSizeB = isLast ? sizeB : fromInteger(valueOf(BYTE_EN_WIDTH)); + dut.inputStreamFirst.enq(generatePsuedoStream(firstSizeA, True, isLast)); + dut.inputStreamSecond.enq(generatePsuedoStream(firstSizeB, True, isLast)); + streamARemainSizeReg <= sizeA - firstSizeA; + streamBRemainSizeReg <= sizeB - firstSizeB; + testCntReg <= testCntReg + 1; + $display("INFO: Add Input of %d Epoch", testCntReg + 1); + $display("INFO: streamASize = %d, streamBSize = %d, ideaSize = %d", sizeA, sizeB, sizeA+sizeB); + end + + else if (testRoundReg > 0) begin + if (streamARemainSizeReg > 0 && dut.inputStreamFirst.notFull) begin + dut.inputStreamFirst.enq(generatePsuedoStream(streamARemainSizeReg, False, (streamARemainSizeReg <= fromInteger(valueOf(BYTE_EN_WIDTH))))); + streamARemainSizeReg <= (streamARemainSizeReg > fromInteger(valueOf(BYTE_EN_WIDTH))) ? streamARemainSizeReg - fromInteger(valueOf(BYTE_EN_WIDTH)) : 0; + end + if (streamBRemainSizeReg > 0 && dut.inputStreamSecond.notFull) begin + dut.inputStreamSecond.enq(generatePsuedoStream(streamBRemainSizeReg, False, (streamARemainSizeReg <= fromInteger(valueOf(BYTE_EN_WIDTH))))); + streamBRemainSizeReg <= (streamBRemainSizeReg > fromInteger(valueOf(BYTE_EN_WIDTH))) ? streamBRemainSizeReg - fromInteger(valueOf(BYTE_EN_WIDTH)) : 0; + end + testRoundReg <= testRoundReg - 1; + end + endrule rule testOutput; let outStream = dut.outputStream.first; - showDataStream(outStream); + StreamSize concatSize = concatSizeReg + unpack(zeroExtend(convertByteEn2BytePtr(outStream.byteEn))); + if (outStream.isLast) begin + let ideaSize = ideaConcatSizeFifo.first; + showDataStream(outStream); + if (concatSize != ideaSize) begin + $display("Error: ideaSize=%d, realSize=%d", ideaSize, concatSize); + $finish(); + end + else begin + $display("INFO: verify output ideaSize=%d, realSize=%d, ideaLastSize=%d", ideaSize, concatSize, ideaSize%fromInteger(valueOf(BYTE_EN_WIDTH))); + ideaConcatSizeFifo.deq; + testFinishCntReg <= testFinishCntReg + 1; + end + concatSizeReg <= 0; + end + else begin + concatSizeReg <= concatSize; + end dut.outputStream.deq; endrule + + rule testFinish; + if (testFinishCntReg == fromInteger(valueOf(TEST_NUM)-1)) begin + $finish(); + end + endrule + endmodule \ No newline at end of file From d20503f1790afafaae94b57bbfd0f5f8c1305008 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Fri, 12 Jul 2024 01:02:19 +0800 Subject: [PATCH 14/53] Update StreamUtils --- backend/Makefile | 4 +-- src/StreamUtils.bsv | 53 ++++++++++++++++++++++++++++++++++------ test/TestDmaCore.bsv | 4 +-- test/TestStreamUtils.bsv | 25 +++++++++++++------ 4 files changed, 67 insertions(+), 19 deletions(-) diff --git a/backend/Makefile b/backend/Makefile index 8634dda..01c68f1 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -8,8 +8,8 @@ OUTPUTDIR ?= output LOGFILE ?= run.log RUNTOPHASE ?= place # synth place route all PARTNAME = xcvu13p-fhgb2104-2-i -TARGETFILE ?= ../src/DmaRequestCore.bsv -TOPMODULE ?= mkChunkComputer +TARGETFILE ?= ../src/StreamUtils.bsv +TOPMODULE ?= mkStreamConcat export TOP = $(TOPMODULE) export RTL = $(VLOGDIR) diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index 9288219..d311101 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -1,5 +1,6 @@ import Vector::*; import FIFOF::*; +import GetPut::*; import SemiFifo::*; typedef 8 BYTE_WIDTH; @@ -10,12 +11,15 @@ typedef 2 CONCAT_STREAM_NUM; typedef 512 DATA_WIDTH; typedef TDiv#(DATA_WIDTH, BYTE_WIDTH) BYTE_EN_WIDTH; +typedef 'hFFFFFFFFFFFFFFFF MAX_BYTE_EN; typedef Bit#(DATA_WIDTH) Data; typedef Bit#(BYTE_EN_WIDTH) ByteEn; typedef Bit#(TAdd#(1, TLog#(DATA_WIDTH))) BitPtr; typedef Bit#(TAdd#(1, TLog#(BYTE_EN_WIDTH))) BytePtr; +typedef UInt#(32) StreamSize; + typedef struct { Data data; ByteEn byteEn; @@ -34,6 +38,12 @@ interface StreamConcat; interface FifoOut#(DataStream) outputStream; endinterface +interface StreamSplit; + interface FifoIn#(DataStream) inputStream; + interface Put#(StreamSize) setSplitPtr; + interface FifoOut#(DataStream) outputStream; +endinterface + function BytePtr convertByteEn2BytePtr (ByteEn byteEn); ByteEn byteEnTemp = byteEn; BytePtr ptr = 0; @@ -52,6 +62,21 @@ function Action showDataStream (DataStream stream); endaction; endfunction +function Action checkDataStream (DataStream stream, String name); + if (stream.byteEn == 0 || stream.data == 0) begin + return action + $display("Error: wrong dataStream ", name); + showDataStream(stream); + $finish(); + endaction; + end + else begin + return action + endaction; + end +endfunction + +(* synthesize *) module mkStreamConcat (StreamConcat ifc); FIFOF#(DataStream) inputFifoA <- mkFIFOF; @@ -112,7 +137,7 @@ module mkStreamConcat (StreamConcat ifc); concatStream = DataStream{ data: concatData, byteEn: concatByteEn, - isFirst: streamA.isLast, + isFirst: streamA.isFirst, isLast: isConcatStreamLast }; remainStream = DataStream{ @@ -180,13 +205,6 @@ module mkStreamConcat (StreamConcat ifc); hasRemainReg <= hasRemain; remainStreamReg <= remainStream; remainBytePtrReg <= remainBytePtr; - if (concatStream.byteEn == 0) begin - $display("B + remain", remainBytePtrReg, bytePtrB); - $display("StreamB"); - showDataStream(streamB); - $display("remain"); - showDataStream(remainStreamReg); - end outputFifo.enq(concatStream); prepareFifoB.deq; end @@ -207,4 +225,23 @@ module mkStreamConcat (StreamConcat ifc); interface inputStreamSecond = convertFifoToFifoIn(inputFifoB); interface outputStream = convertFifoToFifoOut(outputFifo); +endmodule + +(* synthesize *) +module mkStreamSplit(StreamSplit ifc); + + Reg#(Maybe#(StreamSize)) splitLocationMaybeReg <- mkReg(Invalid); + + FIFOF#(DataStream) inputFifo <- mkFIFOF; + FIFOF#(DataStream) outputFifo <- mkFIFOF; + + interface Put setSplitPtr; + method Action put(StreamSize splitPtr); + + endmethod + endinterface + + interface inputStream = convertFifoToFifoIn(inputFifo); + interface outputStream = convertFifoToFifoOut(outputFifo); + endmodule \ No newline at end of file diff --git a/test/TestDmaCore.bsv b/test/TestDmaCore.bsv index 582d27f..0a9f7f2 100755 --- a/test/TestDmaCore.bsv +++ b/test/TestDmaCore.bsv @@ -4,9 +4,9 @@ import Randomizable::*; import DmaTypes::*; import DmaRequestCore::*; -typedef 1000 CHUNK_PER_EPOCH_TEST_NUM; +typedef 10 CHUNK_PER_EPOCH_TEST_NUM; typedef 64'hFFFFFFFFFFFFFFFF MAX_ADDRESS; -typedef 32'hFFFFFFFF MAX_TEST_LENGTH; +typedef 16'hFFFF MAX_TEST_LENGTH; typedef 2'b00 DEFAULT_TLP_SIZE_SETTING; typedef 4 CHUNK_TX_TEST_SETTING_NUM; typedef 6 CHUNK_RX_TEST_SETTING_NUM; diff --git a/test/TestStreamUtils.bsv b/test/TestStreamUtils.bsv index ca723be..321ee6f 100755 --- a/test/TestStreamUtils.bsv +++ b/test/TestStreamUtils.bsv @@ -3,9 +3,6 @@ import SemiFifo::*; import Randomizable::*; import StreamUtils::*; -typedef UInt#(32) StreamSize; -typedef 'hFFFFFFFFFFFFFFFF MAX_BYTE_EN; - typedef 'hAB PSEUDO_DATA; typedef 8 PSEUDO_DATA_WIDTH; @@ -19,6 +16,11 @@ typedef 'h1 MIN_STREAM_SIZE; typedef 'hFFFF MAX_STREAM_SIZE; typedef 1000 TEST_NUM; +interface TestStreamConcat; + interface FifoOut#(DataStream) stream4concatFirst; + interface FifoOut#(DataStream) stream4concatSecond; + interface FifoIn#(DataStream) outputStream; +endinterface (* doc = "testcase" *) module mkStreamConcatTb(Empty); @@ -84,12 +86,16 @@ module mkStreamConcatTb(Empty); else if (testRoundReg > 0) begin if (streamARemainSizeReg > 0 && dut.inputStreamFirst.notFull) begin - dut.inputStreamFirst.enq(generatePsuedoStream(streamARemainSizeReg, False, (streamARemainSizeReg <= fromInteger(valueOf(BYTE_EN_WIDTH))))); - streamARemainSizeReg <= (streamARemainSizeReg > fromInteger(valueOf(BYTE_EN_WIDTH))) ? streamARemainSizeReg - fromInteger(valueOf(BYTE_EN_WIDTH)) : 0; + Bool isLast = streamARemainSizeReg <= fromInteger(valueOf(BYTE_EN_WIDTH)); + StreamSize size = isLast ? streamARemainSizeReg : fromInteger(valueOf(BYTE_EN_WIDTH)); + dut.inputStreamFirst.enq(generatePsuedoStream(size, False, isLast)); + streamARemainSizeReg <= streamARemainSizeReg - size; end if (streamBRemainSizeReg > 0 && dut.inputStreamSecond.notFull) begin - dut.inputStreamSecond.enq(generatePsuedoStream(streamBRemainSizeReg, False, (streamARemainSizeReg <= fromInteger(valueOf(BYTE_EN_WIDTH))))); - streamBRemainSizeReg <= (streamBRemainSizeReg > fromInteger(valueOf(BYTE_EN_WIDTH))) ? streamBRemainSizeReg - fromInteger(valueOf(BYTE_EN_WIDTH)) : 0; + Bool isLast = streamBRemainSizeReg <= fromInteger(valueOf(BYTE_EN_WIDTH)); + StreamSize size = isLast ? streamBRemainSizeReg : fromInteger(valueOf(BYTE_EN_WIDTH)); + dut.inputStreamSecond.enq(generatePsuedoStream(size, False, isLast)); + streamBRemainSizeReg <= streamBRemainSizeReg - size; end testRoundReg <= testRoundReg - 1; end @@ -115,6 +121,11 @@ module mkStreamConcatTb(Empty); end else begin concatSizeReg <= concatSize; + if (outStream.data != pseudoData) begin + $display("Error: Wrong data in round %d", testRoundReg); + showDataStream(outStream); + $finish(); + end end dut.outputStream.deq; endrule From 9c0ac450e38e11c1ac779c2865f43c4c21418292 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Fri, 12 Jul 2024 21:29:45 +0800 Subject: [PATCH 15/53] Add StreamSplit --- img/StreamSplit.drawio | 65 ++++++ img/{split.drawio => chunkSplit.drawio} | 0 img/dmac.drawio | 56 +++++ img/{concat.drawio => streamConcat.drawio} | 0 src/StreamUtils.bsv | 255 ++++++++++++++------- test/TestStreamUtils.bsv | 96 +++++--- 6 files changed, 364 insertions(+), 108 deletions(-) create mode 100644 img/StreamSplit.drawio rename img/{split.drawio => chunkSplit.drawio} (100%) create mode 100644 img/dmac.drawio rename img/{concat.drawio => streamConcat.drawio} (100%) diff --git a/img/StreamSplit.drawio b/img/StreamSplit.drawio new file mode 100644 index 0000000..8fe59ab --- /dev/null +++ b/img/StreamSplit.drawio @@ -0,0 +1,65 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/img/split.drawio b/img/chunkSplit.drawio similarity index 100% rename from img/split.drawio rename to img/chunkSplit.drawio diff --git a/img/dmac.drawio b/img/dmac.drawio new file mode 100644 index 0000000..003b8b7 --- /dev/null +++ b/img/dmac.drawio @@ -0,0 +1,56 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/img/concat.drawio b/img/streamConcat.drawio similarity index 100% rename from img/concat.drawio rename to img/streamConcat.drawio diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index d311101..ff2f719 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -33,15 +33,15 @@ typedef struct { } StreamWithPtr deriving(Bits, Bounded, Eq, FShow); interface StreamConcat; - interface FifoIn#(DataStream) inputStreamFirst; - interface FifoIn#(DataStream) inputStreamSecond; - interface FifoOut#(DataStream) outputStream; + interface FifoIn#(DataStream) inputStreamFirstFifoIn; + interface FifoIn#(DataStream) inputStreamSecondFifoIn; + interface FifoOut#(DataStream) outputStreamFifoOut; endinterface interface StreamSplit; - interface FifoIn#(DataStream) inputStream; - interface Put#(StreamSize) setSplitPtr; - interface FifoOut#(DataStream) outputStream; + interface FifoIn#(DataStream) inputStreamFifoIn; + interface FifoIn#(StreamSize) splitLocationFifoIn; + interface FifoOut#(DataStream) outputStreamFifoOut; endinterface function BytePtr convertByteEn2BytePtr (ByteEn byteEn); @@ -54,6 +54,72 @@ function BytePtr convertByteEn2BytePtr (ByteEn byteEn); return ptr; endfunction +function DataStream getEmptyStream (); + return DataStream{ + data: 0, + byteEn: 0, + isFirst: False, + isLast: True + }; +endfunction + +function BitPtr getMaxBitPtr (); + return fromInteger(valueOf(DATA_WIDTH)); +endfunction + +function BytePtr getMaxBytePtr (); + return fromInteger(valueOf(BYTE_EN_WIDTH)); +endfunction + +// Concat two DataStream frames into one +function Tuple3#(DataStream, DataStream, BytePtr) getConcatStream (DataStream streamA, DataStream streamB, BytePtr bytePtrA, BytePtr bytePtrB); + Bool isCallLegally = (streamA.isLast && bytePtrA <= getMaxBytePtr() && bytePtrA > 0); + BitPtr bitPtrA = zeroExtend(bytePtrA) << fromInteger(valueOf(BYTE_WIDTH_WIDTH)); + + // Fill the low PtrA bytes by streamA data + Data concatDataA = streamA.data; + ByteEn concatByteEnA = streamA.byteEn; + + // Fill the high bytes by streamB data + Data concatDataB = streamB.data << bitPtrA; + ByteEn concatByteEnB = streamB.byteEn << bytePtrA; + Data concatData = concatDataA | concatDataB; + ByteEn concatByteEn = concatByteEnA | concatByteEnB; + + // Get the remain bytes of streamB data + BitPtr resBitPtr = getMaxBitPtr() - bitPtrA; + BytePtr resBytePtr = getMaxBytePtr() - bytePtrA; + Data remainData = streamB.data >> resBitPtr; + ByteEn remainByteEn = streamB.byteEn >> resBytePtr; + + // Get if the concat frame is the last + Bool isConcatStreamLast = streamB.isLast; + BytePtr remainBytePtr = 0; + if (resBytePtr < bytePtrB ) begin + isConcatStreamLast = False; + remainBytePtr = bytePtrB - resBytePtr; + end + DataStream concatStream = getEmptyStream; + DataStream remainStream = getEmptyStream; + + // package the return concatStream and remainStream + if(isCallLegally) begin + concatStream = DataStream{ + data: concatData, + byteEn: concatByteEn, + isFirst: streamA.isFirst, + isLast: isConcatStreamLast + }; + remainStream = DataStream{ + data: remainData, + byteEn: remainByteEn, + isFirst: False, + isLast: True + }; + end + return tuple3(concatStream, remainStream, remainBytePtr); +endfunction + function Action showDataStream (DataStream stream); return action $display(" Data = %h", stream.data); @@ -92,64 +158,6 @@ module mkStreamConcat (StreamConcat ifc); Reg#(Bool) hasRemainReg <- mkReg(False); Reg#(DataStream) remainStreamReg <- mkRegU; - DataStream emptyStream = DataStream{ - data: 0, - byteEn: 0, - isFirst: True, - isLast: True - }; - - BytePtr maxBytePtr = fromInteger(valueOf(BYTE_EN_WIDTH)); - BitPtr maxBitPtr = fromInteger(valueOf(DATA_WIDTH)); - - function Tuple3#(DataStream, DataStream, BytePtr) getConcatStream (DataStream streamA, DataStream streamB, BytePtr bytePtrA, BytePtr bytePtrB); - Bool isCallLegally = (streamA.isLast && bytePtrA <= maxBytePtr && bytePtrA > 0); - BitPtr bitPtrA = zeroExtend(bytePtrA) << fromInteger(valueOf(BYTE_WIDTH_WIDTH)); - - // Fill the low PtrA bytes by streamA data - Data concatDataA = streamA.data; - ByteEn concatByteEnA = streamA.byteEn; - - // Fill the high bytes by streamB data - Data concatDataB = streamB.data << bitPtrA; - ByteEn concatByteEnB = streamB.byteEn << bytePtrA; - Data concatData = concatDataA | concatDataB; - ByteEn concatByteEn = concatByteEnA | concatByteEnB; - - // Get the remain bytes of streamB data - BitPtr resBitPtr = maxBitPtr - bitPtrA; - BytePtr resBytePtr = maxBytePtr - bytePtrA; - Data remainData = streamB.data >> resBitPtr; - ByteEn remainByteEn = streamB.byteEn >> resBytePtr; - - // Get if the concat frame is the last - Bool isConcatStreamLast = streamB.isLast; - BytePtr remainBytePtr = 0; - if (resBytePtr < bytePtrB ) begin - isConcatStreamLast = False; - remainBytePtr = bytePtrB - resBytePtr; - end - DataStream concatStream = emptyStream; - DataStream remainStream = emptyStream; - - // package the return concatStream and remainStream - if(isCallLegally) begin - concatStream = DataStream{ - data: concatData, - byteEn: concatByteEn, - isFirst: streamA.isFirst, - isLast: isConcatStreamLast - }; - remainStream = DataStream{ - data: remainData, - byteEn: remainByteEn, - isFirst: False, - isLast: True - }; - end - return tuple3(concatStream, remainStream, remainBytePtr); - endfunction - rule prepareStreamA; let streamA = inputFifoA.first; inputFifoA.deq; @@ -185,8 +193,7 @@ module mkStreamConcat (StreamConcat ifc); // the last StreamA + the first StreamB else if (streamA.isLast && streamB.isFirst) begin match{.concatStream, .remainStream, .remainBytePtr} = getConcatStream(streamA, streamB, bytePtrA, bytePtrB); - Bool hasRemain = unpack(remainStream.byteEn[0]); - hasRemainReg <= hasRemain; + hasRemainReg <= unpack(remainStream.byteEn[0]); remainStreamReg <= remainStream; remainBytePtrReg <= remainBytePtr; outputFifo.enq(concatStream); @@ -201,8 +208,7 @@ module mkStreamConcat (StreamConcat ifc); let bytePtrB = prepareFifoB.first.bytePtr; if (!streamB.isFirst) begin match{.concatStream, .remainStream, .remainBytePtr} = getConcatStream(remainStreamReg, streamB, remainBytePtrReg, bytePtrB); - Bool hasRemain = unpack(remainStream.byteEn[0]); - hasRemainReg <= hasRemain; + hasRemainReg <= unpack(remainStream.byteEn[0]); remainStreamReg <= remainStream; remainBytePtrReg <= remainBytePtr; outputFifo.enq(concatStream); @@ -221,27 +227,118 @@ module mkStreamConcat (StreamConcat ifc); end endrule - interface inputStreamFirst = convertFifoToFifoIn(inputFifoA); - interface inputStreamSecond = convertFifoToFifoIn(inputFifoB); - interface outputStream = convertFifoToFifoOut(outputFifo); + interface inputStreamFirstFifoIn = convertFifoToFifoIn(inputFifoA); + interface inputStreamSecondFifoIn = convertFifoToFifoIn(inputFifoB); + interface outputStreamFifoOut = convertFifoToFifoOut(outputFifo); endmodule (* synthesize *) module mkStreamSplit(StreamSplit ifc); - Reg#(Maybe#(StreamSize)) splitLocationMaybeReg <- mkReg(Invalid); - + Reg#(StreamSize) streamByteCntReg <- mkReg(0); + FIFOF#(StreamSize) splitLocationFifo <- mkFIFOF; FIFOF#(DataStream) inputFifo <- mkFIFOF; FIFOF#(DataStream) outputFifo <- mkFIFOF; + FIFOF#(StreamWithPtr) prepareFifo <- mkFIFOF; + FIFOF#(StreamWithPtr) assertFifo <- mkFIFOF; + FIFOF#(Tuple2#(BytePtr,BytePtr)) splitPtrFifo <- mkFIFOF; + + Reg#(DataStream) remainStreamReg <- mkRegU; + Reg#(Bool) hasRemainReg <- mkReg(False); + Reg#(Bool) isSplitted <- mkReg(False); + Reg#(BytePtr) remainBytePtrReg <- mkReg(0); + + rule prepareStream; + let stream = inputFifo.first; + inputFifo.deq; + StreamWithPtr streamWithPtr = StreamWithPtr{ + stream: stream, + bytePtr: convertByteEn2BytePtr(stream.byteEn) + }; + prepareFifo.enq(streamWithPtr); + endrule + + rule assertSplitStream; + let stream = prepareFifo.first.stream; + let bytePtr = prepareFifo.first.bytePtr; + let splitLocation = splitLocationFifo.first; + BytePtr truncateBytePtr = 0; + if (!isSplitted && unpack(zeroExtend(bytePtr)) + streamByteCntReg >= splitLocation) begin + truncateBytePtr = truncate(pack(splitLocation - streamByteCntReg)); + end + BytePtr resBytePtr = getMaxBytePtr() - truncateBytePtr; + splitPtrFifo.enq(tuple2(truncateBytePtr, resBytePtr)); + if (truncateBytePtr > 0 && !stream.isLast) begin + isSplitted <= True; + end + else begin + isSplitted <= False; + end + assertFifo.enq(prepareFifo.first); + prepareFifo.deq; + if (stream.isLast) begin + splitLocationFifo.deq; + end + endrule - interface Put setSplitPtr; - method Action put(StreamSize splitPtr); - endmethod - endinterface + rule execSplitStream; + if (assertFifo.notEmpty && splitPtrFifo.notEmpty) begin + let stream = assertFifo.first.stream; + let frameBytePtr = assertFifo.first.bytePtr; + assertFifo.deq; + match {.truncateBytePtr, .resBytePtr} = splitPtrFifo.first; + splitPtrFifo.deq; + + // no operatation + if (!hasRemainReg && truncateBytePtr == 0) begin + outputFifo.enq(stream); + end + + // split the frame in this cycle to a last frame and a remain frame + else if (!hasRemainReg && truncateBytePtr > 0) begin + BitPtr truncateBitPtr = zeroExtend(truncateBytePtr) << valueOf(BYTE_WIDTH_WIDTH); + BitPtr resBitPtr = zeroExtend(resBytePtr) << valueOf(BYTE_WIDTH_WIDTH); + outputFifo.enq(DataStream{ + data: (stream.data << resBitPtr) >> resBitPtr, + byteEn: (stream.byteEn << resBytePtr) >> resBytePtr, + isFirst: stream.isFirst, + isLast: True + }); + DataStream remainStream = DataStream{ + data: stream.data >> truncateBitPtr, + byteEn: stream.byteEn >> truncateBytePtr, + isFirst: True, + isLast: True + }; + hasRemainReg <= (remainStream.byteEn != 0); + remainBytePtrReg <= frameBytePtr - truncateBytePtr; + remainStreamReg <= remainStream; + end + + // concat the new frame with the remainReg + else if (hasRemainReg && !stream.isFirst) begin + match {.concatStream, .remainStream, .remainBytePtr} = getConcatStream(stream, remainStreamReg, frameBytePtr, remainBytePtrReg); + hasRemainReg <= unpack(remainStream.byteEn[0]); + remainStreamReg <= remainStream; + remainBytePtrReg <= remainBytePtr; + end + + else if (hasRemainReg) begin + outputFifo.enq(remainStreamReg); + hasRemainReg <= False; + end + + end + else if (hasRemainReg) begin + outputFifo.enq(remainStreamReg); + hasRemainReg <= False; + end + endrule - interface inputStream = convertFifoToFifoIn(inputFifo); - interface outputStream = convertFifoToFifoOut(outputFifo); + interface inputStreamFifoIn = convertFifoToFifoIn(inputFifo); + interface splitLocationFifoIn = convertFifoToFifoIn(splitLocationFifo); + interface outputStreamFifoOut = convertFifoToFifoOut(outputFifo); endmodule \ No newline at end of file diff --git a/test/TestStreamUtils.bsv b/test/TestStreamUtils.bsv index 321ee6f..7b917b3 100755 --- a/test/TestStreamUtils.bsv +++ b/test/TestStreamUtils.bsv @@ -16,11 +16,30 @@ typedef 'h1 MIN_STREAM_SIZE; typedef 'hFFFF MAX_STREAM_SIZE; typedef 1000 TEST_NUM; -interface TestStreamConcat; - interface FifoOut#(DataStream) stream4concatFirst; - interface FifoOut#(DataStream) stream4concatSecond; - interface FifoIn#(DataStream) outputStream; -endinterface +function Data getPseudoData(); + Data pseudoData = fromInteger(valueOf(PSEUDO_DATA)); + for (Integer idx = 0; idx < valueOf(TDiv#(DATA_WIDTH, PSEUDO_DATA_WIDTH)); idx = idx + 1) begin + pseudoData = pseudoData | (pseudoData << idx*valueOf(PSEUDO_DATA_WIDTH)); + end + return pseudoData; +endfunction + + +function DataStream generatePsuedoStream (StreamSize size, Bool isFirst, Bool isLast); + let pseudoData = getPseudoData(); + let offsetPtr = (unpack(zeroExtend(getMaxBytePtr())) - size) << valueOf(BYTE_WIDTH_WIDTH); + Data streamData = (pseudoData << offsetPtr) >> offsetPtr; + return DataStream{ + data: streamData, + byteEn: (1 << size) - 1, + isFirst: isFirst, + isLast: isLast + }; +endfunction + +function StreamSize getMaxFrameSize (); + return fromInteger(valueOf(BYTE_EN_WIDTH)); +endfunction (* doc = "testcase" *) module mkStreamConcatTb(Empty); @@ -41,21 +60,7 @@ module mkStreamConcatTb(Empty); Reg#(UInt#(32)) testRoundReg <- mkReg(0); Reg#(UInt#(32)) testFinishCntReg <- mkReg(0); - Data pseudoData = fromInteger(valueOf(PSEUDO_DATA)); - for (Integer idx = 0; idx < valueOf(TDiv#(DATA_WIDTH, PSEUDO_DATA_WIDTH)); idx = idx + 1) begin - pseudoData = pseudoData | (pseudoData << idx*valueOf(PSEUDO_DATA_WIDTH)); - end - function DataStream generatePsuedoStream (StreamSize size, Bool isFirst, Bool isLast); - let offsetPtr = (fromInteger(valueOf(BYTE_EN_WIDTH)) - size) << valueOf(BYTE_WIDTH_WIDTH); - Data streamData = (pseudoData << offsetPtr) >> offsetPtr; - return DataStream{ - data: streamData, - byteEn: (1 << size) - 1, - isFirst: isFirst, - isLast: isLast - }; - endfunction rule testInit if (!isInitReg); $display("INFO: ================start StreamConcatTb!=================="); @@ -67,7 +72,7 @@ module mkStreamConcatTb(Empty); rule testInput if (isInitReg && testCntReg < fromInteger(valueOf(TEST_NUM))); - if (testRoundReg == 0 && dut.inputStreamFirst.notFull && dut.inputStreamSecond.notFull) begin + if (testRoundReg == 0 && dut.inputStreamFirstFifoIn.notFull && dut.inputStreamSecondFifoIn.notFull) begin StreamSize sizeA <- streamASizeRandomValue.next; StreamSize sizeB <- streamASizeRandomValue.next; ideaConcatSizeFifo.enq(sizeA + sizeB); @@ -75,8 +80,8 @@ module mkStreamConcatTb(Empty); let isLast = sizeA <= fromInteger(valueOf(BYTE_EN_WIDTH)); let firstSizeA = isLast ? sizeA : fromInteger(valueOf(BYTE_EN_WIDTH)); let firstSizeB = isLast ? sizeB : fromInteger(valueOf(BYTE_EN_WIDTH)); - dut.inputStreamFirst.enq(generatePsuedoStream(firstSizeA, True, isLast)); - dut.inputStreamSecond.enq(generatePsuedoStream(firstSizeB, True, isLast)); + dut.inputStreamFirstFifoIn.enq(generatePsuedoStream(firstSizeA, True, isLast)); + dut.inputStreamSecondFifoIn.enq(generatePsuedoStream(firstSizeB, True, isLast)); streamARemainSizeReg <= sizeA - firstSizeA; streamBRemainSizeReg <= sizeB - firstSizeB; testCntReg <= testCntReg + 1; @@ -85,16 +90,16 @@ module mkStreamConcatTb(Empty); end else if (testRoundReg > 0) begin - if (streamARemainSizeReg > 0 && dut.inputStreamFirst.notFull) begin + if (streamARemainSizeReg > 0 && dut.inputStreamFirstFifoIn.notFull) begin Bool isLast = streamARemainSizeReg <= fromInteger(valueOf(BYTE_EN_WIDTH)); StreamSize size = isLast ? streamARemainSizeReg : fromInteger(valueOf(BYTE_EN_WIDTH)); - dut.inputStreamFirst.enq(generatePsuedoStream(size, False, isLast)); + dut.inputStreamFirstFifoIn.enq(generatePsuedoStream(size, False, isLast)); streamARemainSizeReg <= streamARemainSizeReg - size; end - if (streamBRemainSizeReg > 0 && dut.inputStreamSecond.notFull) begin + if (streamBRemainSizeReg > 0 && dut.inputStreamSecondFifoIn.notFull) begin Bool isLast = streamBRemainSizeReg <= fromInteger(valueOf(BYTE_EN_WIDTH)); StreamSize size = isLast ? streamBRemainSizeReg : fromInteger(valueOf(BYTE_EN_WIDTH)); - dut.inputStreamSecond.enq(generatePsuedoStream(size, False, isLast)); + dut.inputStreamSecondFifoIn.enq(generatePsuedoStream(size, False, isLast)); streamBRemainSizeReg <= streamBRemainSizeReg - size; end testRoundReg <= testRoundReg - 1; @@ -103,7 +108,7 @@ module mkStreamConcatTb(Empty); endrule rule testOutput; - let outStream = dut.outputStream.first; + let outStream = dut.outputStreamFifoOut.first; StreamSize concatSize = concatSizeReg + unpack(zeroExtend(convertByteEn2BytePtr(outStream.byteEn))); if (outStream.isLast) begin let ideaSize = ideaConcatSizeFifo.first; @@ -121,13 +126,13 @@ module mkStreamConcatTb(Empty); end else begin concatSizeReg <= concatSize; - if (outStream.data != pseudoData) begin + if (outStream.data != getPseudoData()) begin $display("Error: Wrong data in round %d", testRoundReg); showDataStream(outStream); $finish(); end end - dut.outputStream.deq; + dut.outputStreamFifoOut.deq; endrule rule testFinish; @@ -136,4 +141,37 @@ module mkStreamConcatTb(Empty); end endrule +endmodule + + +module mkStreamSplitTb(Empty); + + StreamSplit dut <- mkStreamSplit; + Randomize#(StreamSize) streamSizeRandomValue <- mkConstrainedRandomizer(fromInteger(valueOf(MIN_STREAM_SIZE)), fromInteger(valueOf(MAX_STREAM_SIZE))); + Reg#(Bool) isInitReg <- mkReg(False); + Reg#(UInt#(32)) testCntReg <- mkReg(0); + Reg#(StreamSize) streamSizeReg <- mkReg(0); + Reg#(StreamSize) streamSize2PutReg <- mkReg(0); + Reg#(UInt#(32)) testRoundReg <- mkReg(0); + + rule testInit if (!isInitReg); + isInitReg <= True; + streamSizeRandomValue.cntrl.init; + endrule + + rule testInput if (isInitReg); + if (testRoundReg == 0) begin + let size <- streamSizeRandomValue.next; + if (size <= getMaxFrameSize()) begin + let stream = generatePsuedoStream(size, True, True); + dut.inputStreamFifoIn.enq(stream); + streamSizeReg <= size; + end + end + endrule + + rule testOutput if (isInitReg); + + endrule + endmodule \ No newline at end of file From 4f9221255d78b9a41201cff14105a2f9ac1132b5 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Sat, 13 Jul 2024 22:48:05 +0800 Subject: [PATCH 16/53] Test StreamUtils Pass --- src/StreamUtils.bsv | 167 ++++++++++++++++++++---------------- test/TestStreamUtils.bsv | 180 ++++++++++++++++++++++++++++++--------- 2 files changed, 233 insertions(+), 114 deletions(-) diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index ff2f719..78034bc 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -15,10 +15,12 @@ typedef 'hFFFFFFFFFFFFFFFF MAX_BYTE_EN; typedef Bit#(DATA_WIDTH) Data; typedef Bit#(BYTE_EN_WIDTH) ByteEn; -typedef Bit#(TAdd#(1, TLog#(DATA_WIDTH))) BitPtr; -typedef Bit#(TAdd#(1, TLog#(BYTE_EN_WIDTH))) BytePtr; +typedef Bit#(TAdd#(1, TLog#(DATA_WIDTH))) DataBitPtr; +typedef Bit#(TAdd#(1, TLog#(BYTE_EN_WIDTH))) DataBytePtr; -typedef UInt#(32) StreamSize; +typedef 32 STREAM_SIZE_WIDTH; +typedef UInt#(STREAM_SIZE_WIDTH) StreamSize; +typedef Bit#(TAdd#(1, TLog#(STREAM_SIZE_WIDTH))) StreamSizeBitPtr; typedef struct { Data data; @@ -29,7 +31,7 @@ typedef struct { typedef struct { DataStream stream; - BytePtr bytePtr; + DataBytePtr bytePtr; } StreamWithPtr deriving(Bits, Bounded, Eq, FShow); interface StreamConcat; @@ -44,9 +46,9 @@ interface StreamSplit; interface FifoOut#(DataStream) outputStreamFifoOut; endinterface -function BytePtr convertByteEn2BytePtr (ByteEn byteEn); +function DataBytePtr convertByteEn2BytePtr (ByteEn byteEn); ByteEn byteEnTemp = byteEn; - BytePtr ptr = 0; + DataBytePtr ptr = 0; while (byteEnTemp > 0) begin byteEnTemp = byteEnTemp >> 1; ptr = ptr + 1; @@ -63,18 +65,18 @@ function DataStream getEmptyStream (); }; endfunction -function BitPtr getMaxBitPtr (); +function DataBitPtr getMaxBitPtr (); return fromInteger(valueOf(DATA_WIDTH)); endfunction -function BytePtr getMaxBytePtr (); +function DataBytePtr getMaxBytePtr (); return fromInteger(valueOf(BYTE_EN_WIDTH)); endfunction -// Concat two DataStream frames into one -function Tuple3#(DataStream, DataStream, BytePtr) getConcatStream (DataStream streamA, DataStream streamB, BytePtr bytePtrA, BytePtr bytePtrB); +// Concat two DataStream frames into one. StreamA.isLast must be True, otherwise the function will return a empty frame to end the stream. +function Tuple3#(DataStream, DataStream, DataBytePtr) getConcatStream (DataStream streamA, DataStream streamB, DataBytePtr bytePtrA, DataBytePtr bytePtrB); Bool isCallLegally = (streamA.isLast && bytePtrA <= getMaxBytePtr() && bytePtrA > 0); - BitPtr bitPtrA = zeroExtend(bytePtrA) << fromInteger(valueOf(BYTE_WIDTH_WIDTH)); + DataBitPtr bitPtrA = zeroExtend(bytePtrA) << fromInteger(valueOf(BYTE_WIDTH_WIDTH)); // Fill the low PtrA bytes by streamA data Data concatDataA = streamA.data; @@ -87,14 +89,14 @@ function Tuple3#(DataStream, DataStream, BytePtr) getConcatStream (DataStream st ByteEn concatByteEn = concatByteEnA | concatByteEnB; // Get the remain bytes of streamB data - BitPtr resBitPtr = getMaxBitPtr() - bitPtrA; - BytePtr resBytePtr = getMaxBytePtr() - bytePtrA; + DataBitPtr resBitPtr = getMaxBitPtr() - bitPtrA; + DataBytePtr resBytePtr = getMaxBytePtr() - bytePtrA; Data remainData = streamB.data >> resBitPtr; ByteEn remainByteEn = streamB.byteEn >> resBytePtr; // Get if the concat frame is the last Bool isConcatStreamLast = streamB.isLast; - BytePtr remainBytePtr = 0; + DataBytePtr remainBytePtr = 0; if (resBytePtr < bytePtrB ) begin isConcatStreamLast = False; remainBytePtr = bytePtrB - resBytePtr; @@ -131,7 +133,7 @@ endfunction function Action checkDataStream (DataStream stream, String name); if (stream.byteEn == 0 || stream.data == 0) begin return action - $display("Error: wrong dataStream ", name); + $display("Error: empty dataStream ", name); showDataStream(stream); $finish(); endaction; @@ -152,16 +154,21 @@ module mkStreamConcat (StreamConcat ifc); FIFOF#(StreamWithPtr) prepareFifoA <- mkFIFOF; FIFOF#(StreamWithPtr) prepareFifoB <- mkFIFOF; - Reg#(BytePtr) bytePtrRegA <- mkReg(0); - Reg#(BytePtr) bytePtrRegB <- mkReg(0); - Reg#(BytePtr) remainBytePtrReg <- mkReg(0); + Reg#(DataBytePtr) bytePtrRegA <- mkReg(0); + Reg#(DataBytePtr) bytePtrRegB <- mkReg(0); + Reg#(DataBytePtr) remainBytePtrReg <- mkReg(0); + Reg#(Bool) hasRemainReg <- mkReg(False); + Reg#(Bool) hasLastRemainReg <- mkReg(False); + Reg#(Bool) isStreamAEnd <- mkReg(False); + Reg#(DataStream) remainStreamReg <- mkRegU; + rule prepareStreamA; let streamA = inputFifoA.first; inputFifoA.deq; - BytePtr bytePtr = convertByteEn2BytePtr(streamA.byteEn); + DataBytePtr bytePtr = convertByteEn2BytePtr(streamA.byteEn); prepareFifoA.enq(StreamWithPtr { stream: streamA, bytePtr: bytePtr @@ -171,7 +178,7 @@ module mkStreamConcat (StreamConcat ifc); rule prepareStreamB; let streamB = inputFifoB.first; inputFifoB.deq; - BytePtr bytePtr = convertByteEn2BytePtr(streamB.byteEn); + DataBytePtr bytePtr = convertByteEn2BytePtr(streamB.byteEn); prepareFifoB.enq(StreamWithPtr { stream: streamB, bytePtr: bytePtr @@ -179,52 +186,58 @@ module mkStreamConcat (StreamConcat ifc); endrule rule concatStream; + // Only the remain data + if (hasRemainReg && hasLastRemainReg) begin + outputFifo.enq(remainStreamReg); + hasRemainReg <= False; + isStreamAEnd <= False; + end + + // StreamB or streamB + the remain data + else if (prepareFifoB.notEmpty && isStreamAEnd) begin + let streamB = prepareFifoB.first.stream; + let bytePtrB = prepareFifoB.first.bytePtr; + prepareFifoB.deq; + streamB.isFirst = False; + if (hasRemainReg) begin + match{.concatStream, .remainStream, .remainBytePtr} = getConcatStream(remainStreamReg, streamB, remainBytePtrReg, bytePtrB); + hasRemainReg <= unpack(remainStream.byteEn[0]); + hasLastRemainReg <= streamB.isLast; + remainStreamReg <= remainStream; + remainBytePtrReg <= remainBytePtr; + outputFifo.enq(concatStream); + end + else begin + outputFifo.enq(streamB); + end + isStreamAEnd <= !streamB.isLast; + end + // StreamA or StreamA + first StreamB - if (prepareFifoA.notEmpty && prepareFifoB.notEmpty && !hasRemainReg) begin + else if (prepareFifoA.notEmpty) begin let streamA = prepareFifoA.first.stream; - let streamB = prepareFifoB.first.stream; let bytePtrA = prepareFifoA.first.bytePtr; - let bytePtrB = prepareFifoB.first.bytePtr; // Only StreamA frame - if (!streamA.isLast && streamB.isFirst) begin + if (!streamA.isLast) begin outputFifo.enq(streamA); prepareFifoA.deq; + isStreamAEnd <= False; end // the last StreamA + the first StreamB - else if (streamA.isLast && streamB.isFirst) begin + else if (streamA.isLast && prepareFifoB.notEmpty) begin + let streamB = prepareFifoB.first.stream; + let bytePtrB = prepareFifoB.first.bytePtr; match{.concatStream, .remainStream, .remainBytePtr} = getConcatStream(streamA, streamB, bytePtrA, bytePtrB); hasRemainReg <= unpack(remainStream.byteEn[0]); + hasLastRemainReg <= streamB.isLast; remainStreamReg <= remainStream; remainBytePtrReg <= remainBytePtr; + isStreamAEnd <= !streamB.isLast; outputFifo.enq(concatStream); prepareFifoA.deq; prepareFifoB.deq; end end - - // streamB + the remain data - else if (prepareFifoB.notEmpty && hasRemainReg) begin - let streamB = prepareFifoB.first.stream; - let bytePtrB = prepareFifoB.first.bytePtr; - if (!streamB.isFirst) begin - match{.concatStream, .remainStream, .remainBytePtr} = getConcatStream(remainStreamReg, streamB, remainBytePtrReg, bytePtrB); - hasRemainReg <= unpack(remainStream.byteEn[0]); - remainStreamReg <= remainStream; - remainBytePtrReg <= remainBytePtr; - outputFifo.enq(concatStream); - prepareFifoB.deq; - end - else begin - outputFifo.enq(remainStreamReg); - hasRemainReg <= False; - end - end - - // Only the remain data - else if (hasRemainReg) begin - outputFifo.enq(remainStreamReg); - hasRemainReg <= False; - end endrule interface inputStreamFirstFifoIn = convertFifoToFifoIn(inputFifoA); @@ -237,18 +250,21 @@ endmodule module mkStreamSplit(StreamSplit ifc); Reg#(StreamSize) streamByteCntReg <- mkReg(0); + FIFOF#(StreamSize) splitLocationFifo <- mkFIFOF; FIFOF#(DataStream) inputFifo <- mkFIFOF; FIFOF#(DataStream) outputFifo <- mkFIFOF; FIFOF#(StreamWithPtr) prepareFifo <- mkFIFOF; FIFOF#(StreamWithPtr) assertFifo <- mkFIFOF; - FIFOF#(Tuple2#(BytePtr,BytePtr)) splitPtrFifo <- mkFIFOF; + FIFOF#(Tuple2#(DataBytePtr,DataBytePtr)) splitPtrFifo <- mkFIFOF; Reg#(DataStream) remainStreamReg <- mkRegU; Reg#(Bool) hasRemainReg <- mkReg(False); - Reg#(Bool) isSplitted <- mkReg(False); - Reg#(BytePtr) remainBytePtrReg <- mkReg(0); + Reg#(Bool) hasLastRemainReg <- mkReg(False); + Reg#(DataBytePtr) remainBytePtrReg <- mkReg(0); + Reg#(Bool) isSplitted <- mkReg(False); + rule prepareStream; let stream = inputFifo.first; inputFifo.deq; @@ -263,18 +279,20 @@ module mkStreamSplit(StreamSplit ifc); let stream = prepareFifo.first.stream; let bytePtr = prepareFifo.first.bytePtr; let splitLocation = splitLocationFifo.first; - BytePtr truncateBytePtr = 0; + DataBytePtr truncateBytePtr = 0; if (!isSplitted && unpack(zeroExtend(bytePtr)) + streamByteCntReg >= splitLocation) begin truncateBytePtr = truncate(pack(splitLocation - streamByteCntReg)); end - BytePtr resBytePtr = getMaxBytePtr() - truncateBytePtr; + DataBytePtr resBytePtr = getMaxBytePtr() - truncateBytePtr; splitPtrFifo.enq(tuple2(truncateBytePtr, resBytePtr)); if (truncateBytePtr > 0 && !stream.isLast) begin isSplitted <= True; end - else begin + else if (stream.isLast) begin isSplitted <= False; end + streamByteCntReg <= stream.isLast ? 0 : streamByteCntReg + unpack(zeroExtend(bytePtr)); + assertFifo.enq(prepareFifo.first); prepareFifo.deq; if (stream.isLast) begin @@ -284,11 +302,21 @@ module mkStreamSplit(StreamSplit ifc); rule execSplitStream; - if (assertFifo.notEmpty && splitPtrFifo.notEmpty) begin + // Only output remainStreamReg + if (hasRemainReg && hasLastRemainReg) begin + if (remainStreamReg.byteEn == 0) begin + showDataStream(remainStreamReg); + end + outputFifo.enq(remainStreamReg); + hasRemainReg <= False; + hasLastRemainReg <= False; + end + + else if (assertFifo.notEmpty && splitPtrFifo.notEmpty) begin let stream = assertFifo.first.stream; let frameBytePtr = assertFifo.first.bytePtr; - assertFifo.deq; match {.truncateBytePtr, .resBytePtr} = splitPtrFifo.first; + assertFifo.deq; splitPtrFifo.deq; // no operatation @@ -296,10 +324,10 @@ module mkStreamSplit(StreamSplit ifc); outputFifo.enq(stream); end - // split the frame in this cycle to a last frame and a remain frame + // split the frame in this cycle to a last frame and a remain frame else if (!hasRemainReg && truncateBytePtr > 0) begin - BitPtr truncateBitPtr = zeroExtend(truncateBytePtr) << valueOf(BYTE_WIDTH_WIDTH); - BitPtr resBitPtr = zeroExtend(resBytePtr) << valueOf(BYTE_WIDTH_WIDTH); + DataBitPtr truncateBitPtr = zeroExtend(truncateBytePtr) << valueOf(BYTE_WIDTH_WIDTH); + DataBitPtr resBitPtr = zeroExtend(resBytePtr) << valueOf(BYTE_WIDTH_WIDTH); outputFifo.enq(DataStream{ data: (stream.data << resBitPtr) >> resBitPtr, byteEn: (stream.byteEn << resBytePtr) >> resBytePtr, @@ -313,27 +341,22 @@ module mkStreamSplit(StreamSplit ifc); isLast: True }; hasRemainReg <= (remainStream.byteEn != 0); + hasLastRemainReg <= stream.isLast; remainBytePtrReg <= frameBytePtr - truncateBytePtr; remainStreamReg <= remainStream; end // concat the new frame with the remainReg - else if (hasRemainReg && !stream.isFirst) begin - match {.concatStream, .remainStream, .remainBytePtr} = getConcatStream(stream, remainStreamReg, frameBytePtr, remainBytePtrReg); + else if (hasRemainReg) begin + match {.concatStream, .remainStream, .remainBytePtr} = getConcatStream(remainStreamReg, stream, remainBytePtrReg, frameBytePtr); + checkDataStream(concatStream, "concat remain"); + outputFifo.enq(concatStream); hasRemainReg <= unpack(remainStream.byteEn[0]); + hasLastRemainReg <= stream.isLast; remainStreamReg <= remainStream; remainBytePtrReg <= remainBytePtr; + end - - else if (hasRemainReg) begin - outputFifo.enq(remainStreamReg); - hasRemainReg <= False; - end - - end - else if (hasRemainReg) begin - outputFifo.enq(remainStreamReg); - hasRemainReg <= False; end endrule diff --git a/test/TestStreamUtils.bsv b/test/TestStreamUtils.bsv index 7b917b3..a78bc3e 100755 --- a/test/TestStreamUtils.bsv +++ b/test/TestStreamUtils.bsv @@ -1,20 +1,31 @@ import FIFOF::*; import SemiFifo::*; -import Randomizable::*; +import LFSR::*; import StreamUtils::*; typedef 'hAB PSEUDO_DATA; typedef 8 PSEUDO_DATA_WIDTH; -typedef 'h1 MIN_STREAM_SIZE; +typedef 10 TEST_IDEAL_FIFO_DEPTH; + +typedef 'h12345678 SEED_1; +typedef 'hABCDEF01 SEED_2; // TEST HYPER PARAMETERS CASE 1 -// typedef 100 MAX_STREAM_SIZE; +// typedef 3 MAX_STREAM_SIZE_PTR; // typedef 10 TEST_NUM; // TEST HYPER PARAMETERS CASE 2 -typedef 'hFFFF MAX_STREAM_SIZE; -typedef 1000 TEST_NUM; +typedef 16 MAX_STREAM_SIZE_PTR; +typedef 10000 TEST_NUM; + +typedef enum { + WAITING, FirstChunk, SecondChunk +} StreamSplitOutStatus deriving(Bits, Eq); + +interface RandomStreamSize; + method ActionValue#(StreamSize) next(); +endinterface function Data getPseudoData(); Data pseudoData = fromInteger(valueOf(PSEUDO_DATA)); @@ -24,7 +35,6 @@ function Data getPseudoData(); return pseudoData; endfunction - function DataStream generatePsuedoStream (StreamSize size, Bool isFirst, Bool isLast); let pseudoData = getPseudoData(); let offsetPtr = (unpack(zeroExtend(getMaxBytePtr())) - size) << valueOf(BYTE_WIDTH_WIDTH); @@ -41,84 +51,104 @@ function StreamSize getMaxFrameSize (); return fromInteger(valueOf(BYTE_EN_WIDTH)); endfunction +module mkRandomStreamSize(StreamSize seed, StreamSizeBitPtr maxSizeBitPtr, RandomStreamSize ifc); + LFSR#(Bit#(STREAM_SIZE_WIDTH)) lfsr <- mkLFSR_32 ; + FIFOF#(StreamSize) outputFifo <- mkFIFOF ; + Reg#(Bool) isInitReg <- mkReg(False) ; + + rule run if (isInitReg); + let value = lfsr.value >> (fromInteger(valueOf(STREAM_SIZE_WIDTH)) - maxSizeBitPtr); + if (value > 0) begin + outputFifo.enq(unpack(value)); + end + lfsr.next; + endrule + + rule init if (!isInitReg); + isInitReg <= True; + lfsr.seed(pack(seed)); + endrule + + method ActionValue#(StreamSize) next(); + outputFifo.deq; + return outputFifo.first; + endmethod +endmodule + (* doc = "testcase" *) module mkStreamConcatTb(Empty); StreamConcat dut <- mkStreamConcat; - Randomize#(StreamSize) streamASizeRandomValue <- mkConstrainedRandomizer(fromInteger(valueOf(MIN_STREAM_SIZE)), fromInteger(valueOf(MAX_STREAM_SIZE))); - Randomize#(StreamSize) streamBSizeRandomValue <- mkConstrainedRandomizer(fromInteger(valueOf(MIN_STREAM_SIZE)), fromInteger(valueOf(MAX_STREAM_SIZE))); + RandomStreamSize streamASizeRandomValue <- mkRandomStreamSize(fromInteger(valueOf(SEED_1)), fromInteger(valueOf(MAX_STREAM_SIZE_PTR))); + RandomStreamSize streamBSizeRandomValue <- mkRandomStreamSize(fromInteger(valueOf(SEED_2)), fromInteger(valueOf(MAX_STREAM_SIZE_PTR))); Reg#(StreamSize) streamARemainSizeReg <- mkReg(0); Reg#(StreamSize) streamBRemainSizeReg <- mkReg(0); Reg#(StreamSize) concatSizeReg <- mkReg(0); - FIFOF#(StreamSize) ideaConcatSizeFifo <- mkSizedFIFOF(10); + FIFOF#(StreamSize) ideaConcatSizeFifo <- mkSizedFIFOF(valueOf(TEST_IDEAL_FIFO_DEPTH)); Reg#(Bool) isInitReg <- mkReg(False); Reg#(UInt#(32)) testCntReg <- mkReg(0); Reg#(UInt#(32)) testRoundReg <- mkReg(0); Reg#(UInt#(32)) testFinishCntReg <- mkReg(0); - - rule testInit if (!isInitReg); $display("INFO: ================start StreamConcatTb!=================="); - $display(valueOf(BYTE_WIDTH_WIDTH)); - streamASizeRandomValue.cntrl.init; - streamBSizeRandomValue.cntrl.init; isInitReg <= True; endrule rule testInput if (isInitReg && testCntReg < fromInteger(valueOf(TEST_NUM))); - if (testRoundReg == 0 && dut.inputStreamFirstFifoIn.notFull && dut.inputStreamSecondFifoIn.notFull) begin StreamSize sizeA <- streamASizeRandomValue.next; - StreamSize sizeB <- streamASizeRandomValue.next; + StreamSize sizeB <- streamBSizeRandomValue.next; ideaConcatSizeFifo.enq(sizeA + sizeB); - testRoundReg <= (sizeA + sizeB) / fromInteger(valueOf(BYTE_EN_WIDTH)); - let isLast = sizeA <= fromInteger(valueOf(BYTE_EN_WIDTH)); - let firstSizeA = isLast ? sizeA : fromInteger(valueOf(BYTE_EN_WIDTH)); - let firstSizeB = isLast ? sizeB : fromInteger(valueOf(BYTE_EN_WIDTH)); - dut.inputStreamFirstFifoIn.enq(generatePsuedoStream(firstSizeA, True, isLast)); - dut.inputStreamSecondFifoIn.enq(generatePsuedoStream(firstSizeB, True, isLast)); + testRoundReg <= (sizeA + sizeB) / getMaxFrameSize(); + + let isLastA = (sizeA <= getMaxFrameSize()); + let isLastB = (sizeB <= getMaxFrameSize()); + let firstSizeA = isLastA ? sizeA : getMaxFrameSize(); + let firstSizeB = isLastB ? sizeB : getMaxFrameSize(); + + dut.inputStreamFirstFifoIn.enq(generatePsuedoStream(firstSizeA, True, isLastA)); + dut.inputStreamSecondFifoIn.enq(generatePsuedoStream(firstSizeB, True, isLastB)); streamARemainSizeReg <= sizeA - firstSizeA; streamBRemainSizeReg <= sizeB - firstSizeB; testCntReg <= testCntReg + 1; - $display("INFO: Add Input of %d Epoch", testCntReg + 1); - $display("INFO: streamASize = %d, streamBSize = %d, ideaSize = %d", sizeA, sizeB, sizeA+sizeB); + // $display("INFO: Add Input of %d Epoch", testCntReg + 1); + // $display("INFO: streamASize = %d, streamBSize = %d, ideaSize = %d", sizeA, sizeB, sizeA+sizeB); end else if (testRoundReg > 0) begin if (streamARemainSizeReg > 0 && dut.inputStreamFirstFifoIn.notFull) begin - Bool isLast = streamARemainSizeReg <= fromInteger(valueOf(BYTE_EN_WIDTH)); - StreamSize size = isLast ? streamARemainSizeReg : fromInteger(valueOf(BYTE_EN_WIDTH)); + Bool isLast = streamARemainSizeReg <= getMaxFrameSize(); + StreamSize size = isLast ? streamARemainSizeReg : getMaxFrameSize(); dut.inputStreamFirstFifoIn.enq(generatePsuedoStream(size, False, isLast)); streamARemainSizeReg <= streamARemainSizeReg - size; end if (streamBRemainSizeReg > 0 && dut.inputStreamSecondFifoIn.notFull) begin - Bool isLast = streamBRemainSizeReg <= fromInteger(valueOf(BYTE_EN_WIDTH)); - StreamSize size = isLast ? streamBRemainSizeReg : fromInteger(valueOf(BYTE_EN_WIDTH)); + Bool isLast = streamBRemainSizeReg <= getMaxFrameSize(); + StreamSize size = isLast ? streamBRemainSizeReg : getMaxFrameSize(); dut.inputStreamSecondFifoIn.enq(generatePsuedoStream(size, False, isLast)); streamBRemainSizeReg <= streamBRemainSizeReg - size; end testRoundReg <= testRoundReg - 1; end - endrule rule testOutput; let outStream = dut.outputStreamFifoOut.first; + checkDataStream(outStream, "Output Stream"); StreamSize concatSize = concatSizeReg + unpack(zeroExtend(convertByteEn2BytePtr(outStream.byteEn))); if (outStream.isLast) begin let ideaSize = ideaConcatSizeFifo.first; - showDataStream(outStream); if (concatSize != ideaSize) begin $display("Error: ideaSize=%d, realSize=%d", ideaSize, concatSize); $finish(); end else begin - $display("INFO: verify output ideaSize=%d, realSize=%d, ideaLastSize=%d", ideaSize, concatSize, ideaSize%fromInteger(valueOf(BYTE_EN_WIDTH))); + // $display("INFO: verify output ideaSize=%d, realSize=%d, ideaLastSize=%d", ideaSize, concatSize, ideaSize%getMaxFrameSize()); ideaConcatSizeFifo.deq; testFinishCntReg <= testFinishCntReg + 1; end @@ -127,7 +157,7 @@ module mkStreamConcatTb(Empty); else begin concatSizeReg <= concatSize; if (outStream.data != getPseudoData()) begin - $display("Error: Wrong data in round %d", testRoundReg); + $display("Error: Wrong output data"); showDataStream(outStream); $finish(); end @@ -143,35 +173,101 @@ module mkStreamConcatTb(Empty); endmodule - +(* doc = "testcase" *) module mkStreamSplitTb(Empty); StreamSplit dut <- mkStreamSplit; - Randomize#(StreamSize) streamSizeRandomValue <- mkConstrainedRandomizer(fromInteger(valueOf(MIN_STREAM_SIZE)), fromInteger(valueOf(MAX_STREAM_SIZE))); + + RandomStreamSize streamSizeRandomValue <- mkRandomStreamSize(fromInteger(valueOf(SEED_1)), fromInteger(valueOf(MAX_STREAM_SIZE_PTR))); + RandomStreamSize splitLocationRandomValue <- mkRandomStreamSize(fromInteger(valueOf(SEED_2)), fromInteger(valueOf(MAX_STREAM_SIZE_PTR)-1)); + Reg#(Bool) isInitReg <- mkReg(False); Reg#(UInt#(32)) testCntReg <- mkReg(0); - Reg#(StreamSize) streamSizeReg <- mkReg(0); - Reg#(StreamSize) streamSize2PutReg <- mkReg(0); Reg#(UInt#(32)) testRoundReg <- mkReg(0); + FIFOF#(StreamSize) ideaTotalSizeFifo <- mkSizedFIFOF(valueOf(TEST_IDEAL_FIFO_DEPTH)); + FIFOF#(StreamSize) ideaSplitSizeFifo <- mkSizedFIFOF(valueOf(TEST_IDEAL_FIFO_DEPTH)); + + Reg#(StreamSize) streamSize2PutReg <- mkReg(0); + + Reg#(Bool) hasRecvFirstChunkReg <- mkReg(False); + Reg#(StreamSize) totalRecvSizeReg <- mkReg(0); + rule testInit if (!isInitReg); isInitReg <= True; - streamSizeRandomValue.cntrl.init; + $display("INFO: ================start StreamSplitTb!=================="); endrule - rule testInput if (isInitReg); - if (testRoundReg == 0) begin + rule testInput if (isInitReg && testCntReg < fromInteger(valueOf(TEST_NUM))); + // First Frame + if (streamSize2PutReg == 0) begin let size <- streamSizeRandomValue.next; - if (size <= getMaxFrameSize()) begin - let stream = generatePsuedoStream(size, True, True); + let splitLocation <- splitLocationRandomValue.next; + if (splitLocation < size) begin + let isLast = size <= getMaxFrameSize(); + let firstSize = isLast ? size : getMaxFrameSize(); + let stream = generatePsuedoStream(firstSize, True, isLast); + dut.splitLocationFifoIn.enq(splitLocation); dut.inputStreamFifoIn.enq(stream); - streamSizeReg <= size; + ideaTotalSizeFifo.enq(size); + ideaSplitSizeFifo.enq(splitLocation); + streamSize2PutReg <= size - firstSize; + // $display("INFO: Add input stream size %d, split at %d", size, splitLocation); end end + else begin + let isLast = streamSize2PutReg <= getMaxFrameSize(); + let size = isLast ? streamSize2PutReg : getMaxFrameSize(); + let stream = generatePsuedoStream(size, False, isLast); + dut.inputStreamFifoIn.enq(stream); + streamSize2PutReg <= streamSize2PutReg - size; + end endrule rule testOutput if (isInitReg); + let outStream = dut.outputStreamFifoOut.first; + dut.outputStreamFifoOut.deq; + checkDataStream(outStream, "split output stream"); + StreamSize totalSize = totalRecvSizeReg + unpack(zeroExtend(convertByteEn2BytePtr(outStream.byteEn))); + + if (outStream.isLast) begin + if (hasRecvFirstChunkReg) begin + if (totalSize != ideaTotalSizeFifo.first) begin + $display("Error: wrong total size, idea = %d, real = %d", ideaTotalSizeFifo.first, totalSize); + showDataStream(outStream); + $finish(); + end + else begin + // $display("INFO: receive total size", totalSize); + ideaTotalSizeFifo.deq; + testCntReg <= testCntReg + 1; + hasRecvFirstChunkReg <= False; + totalRecvSizeReg <= 0; + end + end + else begin + if (totalSize != ideaSplitSizeFifo.first) begin + $display("Error: wrong split location, idea = %d, real = %d", ideaSplitSizeFifo.first, totalSize); + showDataStream(outStream); + $finish(); + end + else begin + // $display("INFO: receive first chunk at %d, total size %d", ideaSplitSizeFifo.first, ideaTotalSizeFifo.first); + ideaSplitSizeFifo.deq; + hasRecvFirstChunkReg <= True; + totalRecvSizeReg <= totalSize; + end + end + end + else begin + totalRecvSizeReg <= totalSize; + end + endrule + rule testFinish; + if (testCntReg == fromInteger(valueOf(TEST_NUM)-1)) begin + $finish(); + end endrule endmodule \ No newline at end of file From 44e0f8a005fbe1611be8329c7f0c76be514c7140 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Mon, 15 Jul 2024 02:59:40 +0800 Subject: [PATCH 17/53] Update all modules according to the review --- .github/workflows/ci.yml | 24 ++++++ .gitignore | 1 + Makefile.test | 5 +- img/StreamSplit.drawio | 65 --------------- img/chunkSplit.drawio | 100 ---------------------- img/dmac.drawio | 56 ------------- img/streamConcat.drawio | 100 ---------------------- run_one.sh | 2 +- src/DmaController.bsv | 17 ++++ src/DmaRequestCore.bsv | 32 +++++--- src/DmaTypes.bsv | 71 +++++++++++----- src/PcieTypes.bsv | 1 - src/PrimUtils.bsv | 15 ++++ src/StreamUtils.bsv | 127 +++++++++++----------------- test/TestAxiStream.bsv | 173 --------------------------------------- test/TestDmaCore.bsv | 75 ++++++++--------- test/TestStreamUtils.bsv | 85 +++++++++---------- 17 files changed, 252 insertions(+), 697 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 img/StreamSplit.drawio delete mode 100644 img/chunkSplit.drawio delete mode 100644 img/dmac.drawio delete mode 100644 img/streamConcat.drawio create mode 100644 src/PrimUtils.bsv delete mode 100755 test/TestAxiStream.bsv diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..c4685cb --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,24 @@ +name: CI +on: + pull_request: + branches: [master] + push: + branches: [master] + # CI runs every 12 hours + schedule: [cron: "0 */12 * * *"] + +jobs: + ci-check: + name: CI Build and Simulate + runs-on: ubuntu-latest + strategy: + fail-fast: false + steps: + - uses: actions/checkout@v2 + - name: build and simulate + run : | + ./setup.sh + ./run.sh + - name: Setup tmate session + if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 9fe87a4..0190e22 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ **/build/** **/verilog/** **/*.log +img/*.drawio \ No newline at end of file diff --git a/Makefile.test b/Makefile.test index e60c219..af59283 100755 --- a/Makefile.test +++ b/Makefile.test @@ -5,8 +5,9 @@ TESTBENCHS = \ TestStreamUtils.bsv \ TestDmaCore.bsv -TestStreamUtils = mkStreamConcatTb -TestDmaCore.bsv = mkChunkComputerTb +TestStreamUtils.bsv = mkStreamConcatTb \ + mkStreamSplitTb +TestDmaCore.bsv = mkChunkComputerTb all: $(TESTBENCHS) diff --git a/img/StreamSplit.drawio b/img/StreamSplit.drawio deleted file mode 100644 index 8fe59ab..0000000 --- a/img/StreamSplit.drawio +++ /dev/null @@ -1,65 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/img/chunkSplit.drawio b/img/chunkSplit.drawio deleted file mode 100644 index 6e78ab1..0000000 --- a/img/chunkSplit.drawio +++ /dev/null @@ -1,100 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/img/dmac.drawio b/img/dmac.drawio deleted file mode 100644 index 003b8b7..0000000 --- a/img/dmac.drawio +++ /dev/null @@ -1,56 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/img/streamConcat.drawio b/img/streamConcat.drawio deleted file mode 100644 index 607cf6a..0000000 --- a/img/streamConcat.drawio +++ /dev/null @@ -1,100 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/run_one.sh b/run_one.sh index ad1eac1..f049c0d 100755 --- a/run_one.sh +++ b/run_one.sh @@ -13,7 +13,7 @@ TEST_LOG=run.log TEST_DIR=test cd $TEST_DIR truncate -s 0 $TEST_LOG -FILES=`ls TestStreamUtils.bsv` +FILES=`ls TestDmaCore.bsv` ########################################################################### for FILE in $FILES; do # echo $FILE diff --git a/src/DmaController.bsv b/src/DmaController.bsv index f8030f4..5d8df4b 100755 --- a/src/DmaController.bsv +++ b/src/DmaController.bsv @@ -1,6 +1,23 @@ import PcieTypes::*; import DmaTypes::*; +interface DmaController#(numeric type dataWidth); + + interface FifoIn#(DataStream) dataC2HPipeIn; + interface FifoIn#(DmaRequestFrame) reqC2HPipeIn; + interface FifoIn#(DmaRequestFrame) reqH2CPipeIn; + interface FifoOut#(DataStream) dataH2CPipeOut; + + interface FifoIn#(DmaCsrFrame) csrC2HPipeIn; + interface FifoOut#(DMACsrAddr) csrC2HPipeOut; // read reg in the card from Host + interface FifoOut#(DmaCsrFrame) csrH2CPipeOut; + + interface RawPcieRequester pcieRequester; + interface RawPcieCompleter pcieCompleter; + interface RawPcieConfiguration pcieConfig; + +endinterface + module mkDmaController#() (DmaController ifc); diff --git a/src/DmaRequestCore.bsv b/src/DmaRequestCore.bsv index e102bce..8c7df68 100755 --- a/src/DmaRequestCore.bsv +++ b/src/DmaRequestCore.bsv @@ -1,5 +1,6 @@ import FIFOF::*; import GetPut :: *; + import SemiFifo::*; import PcieTypes::*; import DmaTypes::*; @@ -7,10 +8,13 @@ import DmaTypes::*; typedef 4096 BUS_BOUNDARY; typedef TAdd#(1, TLog#(BUS_BOUNDARY)) BUS_BOUNDARY_WIDTH; + typedef Bit#(BUS_BOUNDARY_WIDTH) PcieTlpMaxMaxPayloadSize; typedef Bit#(TLog#(BUS_BOUNDARY_WIDTH)) PcieTlpSizeWidth; + typedef 128 DEFAULT_TLP_SIZE; typedef TAdd#(1, TLog#(DEFAULT_TLP_SIZE)) DEFAULT_TLP_SIZE_WIDTH; + typedef 3 PCIE_TLP_SIZE_SETTING_WIDTH; typedef Bit#(PCIE_TLP_SIZE_SETTING_WIDTH) PcieTlpSizeSetting; @@ -20,20 +24,23 @@ typedef struct { } ChunkRequestFrame deriving(Bits, Eq); interface ChunkCompute; - interface FifoIn#(DmaRequestFrame) dmaRequests; - interface FifoOut#(DmaRequestFrame) chunkRequests; + interface FifoIn#(DmaRequestFrame) dmaRequestFifoIn; + interface FifoOut#(DmaRequestFrame) chunkRequestFifoOut; interface Put#(PcieTlpSizeSetting) setTlpMaxSize; endinterface module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); - FIFOF#(DmaRequestFrame) inputFifo <- mkFIFOF; - FIFOF#(DmaRequestFrame) outputFifo <- mkFIFOF; - FIFOF#(ChunkRequestFrame) splitFifo <- mkFIFOF; - Reg#(DmaMemAddr) tlpMaxSize <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE))); //MPS if isTX, MRRS else + + FIFOF#(DmaRequestFrame) inputFifo <- mkFIFOF; + FIFOF#(DmaRequestFrame) outputFifo <- mkFIFOF; + FIFOF#(ChunkRequestFrame) splitFifo <- mkFIFOF; + + Reg#(DmaMemAddr) newChunkPtrReg <- mkReg(0); + Reg#(DmaMemAddr) totalLenRemainReg <- mkReg(0); + Reg#(Bool) isSplittingReg <- mkReg(False); + + Reg#(DmaMemAddr) tlpMaxSize <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE))); Reg#(PcieTlpSizeWidth) tlpMaxSizeWidth <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH))); - Reg#(DmaMemAddr) newChunkPtrReg <- mkReg(0); - Reg#(DmaMemAddr) totalLenRemainReg <- mkReg(0); - Reg#(Bool) isSplittingReg <- mkReg(False); function Bool hasBoundary(DmaRequestFrame request); let highIdx = (request.startAddr + request.length - 1) >> valueOf(BUS_BOUNDARY_WIDTH); @@ -52,9 +59,10 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); let request = inputFifo.first; inputFifo.deq; let offset = getOffset(request); + let firstLen = (request.length > tlpMaxSize) ? tlpMaxSize : request.length; splitFifo.enq(ChunkRequestFrame { dmaRequest: request, - firstChunkLen: hasBoundary(request) ? offset : tlpMaxSize + firstChunkLen: hasBoundary(request) ? offset : firstLen }); endrule @@ -96,8 +104,8 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); end endrule - interface dmaRequests = convertFifoToFifoIn(inputFifo); - interface chunkRequests = convertFifoToFifoOut(outputFifo); + interface dmaRequestFifoIn = convertFifoToFifoIn(inputFifo); + interface chunkRequestFifoOut = convertFifoToFifoOut(outputFifo); interface Put setTlpMaxSize; method Action put (PcieTlpSizeSetting tlpSizeSetting); diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 8d9da30..59d4ec2 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -1,43 +1,72 @@ +import FShow::*; import SemiFifo::*; -import StreamUtils::*; import PcieTypes::*; -typedef 512 DMA_DATA_WIDTH; -typedef 64 DMA_HOSTMEM_ADDR_WIDTH; +typedef 512 DATA_WIDTH; +typedef 64 DMA_MEM_ADDR_WIDTH; + typedef 32 DMA_CSR_ADDR_WIDTH; typedef 32 DMA_CSR_DATA_WIDTH; -typedef Bit#(DMA_HOSTMEM_ADDR_WIDTH) DmaMemAddr; + +typedef Bit#(DMA_MEM_ADDR_WIDTH) DmaMemAddr; typedef Bit#(DMA_CSR_ADDR_WIDTH) DMACsrAddr; typedef Bit#(DMA_CSR_DATA_WIDTH) DMACsrValue; +typedef 8 BYTE_WIDTH; +typedef TLog#(BYTE_WIDTH) BYTE_WIDTH_WIDTH; +typedef TMul#(4, BYTE_WIDTH) DWORD_WIDTH; + +typedef 2 CONCAT_STREAM_NUM; + +typedef TDiv#(DATA_WIDTH, BYTE_WIDTH) BYTE_EN_WIDTH; +typedef 'hFFFFFFFFFFFFFFFF MAX_BYTE_EN; + +typedef Bit#(DATA_WIDTH) Data; +typedef Bit#(BYTE_EN_WIDTH) ByteEn; +typedef Bit#(TAdd#(1, TLog#(DATA_WIDTH))) DataBitPtr; +typedef Bit#(TAdd#(1, TLog#(BYTE_EN_WIDTH))) DataBytePtr; + typedef struct { DmaMemAddr startAddr; DmaMemAddr length; -} DmaRequestFrame deriving(Bits, Bounded, Eq, FShow); +} DmaRequestFrame deriving(Bits, Bounded, Eq); typedef struct { DMACsrAddr address; DMACsrValue value; -} DmaCsrFrame deriving(Bits, Bounded, Eq, FShow); +} DmaCsrFrame deriving(Bits, Bounded, Eq); typedef enum { - DMA_RX, DMA_TX -} TRXDirection deriving(Bits, Eq); + DMA_RX, + DMA_TX +} TRXDirection deriving(Bits, Eq, FShow); -interface DmaController#(numeric type dataWidth); - - interface FifoIn#(DataStream) dataC2HPipeIn; - interface FifoIn#(DmaRequestFrame) reqC2HPipeIn; - interface FifoIn#(DmaRequestFrame) reqH2CPipeIn; - interface FifoOut#(DataStream) dataH2CPipeOut; +typedef struct { + Data data; + ByteEn byteEn; + Bool isFirst; + Bool isLast; +} DataStream deriving(Bits, Bounded, Eq); - interface FifoIn#(DmaCsrFrame) csrC2HPipeIn; - interface FifoOut#(DMACsrAddr) csrC2HPipeOut; // read reg in the card from Host - interface FifoOut#(DmaCsrFrame) csrH2CPipeOut; +instance FShow#(DmaRequestFrame); + function Fmt fshow(DmaRequestFrame request); + return ($format(" 0); DataBitPtr bitPtrA = zeroExtend(bytePtrA) << fromInteger(valueOf(BYTE_WIDTH_WIDTH)); // Fill the low PtrA bytes by streamA data - Data concatDataA = streamA.data; + Data concatDataA = streamA.data; ByteEn concatByteEnA = streamA.byteEn; // Fill the high bytes by streamB data - Data concatDataB = streamB.data << bitPtrA; + Data concatDataB = streamB.data << bitPtrA; ByteEn concatByteEnB = streamB.byteEn << bytePtrA; - Data concatData = concatDataA | concatDataB; - ByteEn concatByteEn = concatByteEnA | concatByteEnB; + Data concatData = concatDataA | concatDataB; + ByteEn concatByteEn = concatByteEnA | concatByteEnB; // Get the remain bytes of streamB data - DataBitPtr resBitPtr = getMaxBitPtr() - bitPtrA; - DataBytePtr resBytePtr = getMaxBytePtr() - bytePtrA; - Data remainData = streamB.data >> resBitPtr; - ByteEn remainByteEn = streamB.byteEn >> resBytePtr; + DataBitPtr resBitPtr = getMaxBitPtr() - bitPtrA; + DataBytePtr resBytePtr = getMaxBytePtr() - bytePtrA; + Data remainData = streamB.data >> resBitPtr; + ByteEn remainByteEn = streamB.byteEn >> resBytePtr; // Get if the concat frame is the last - Bool isConcatStreamLast = streamB.isLast; - DataBytePtr remainBytePtr = 0; + Bool isConcatStreamLast = streamB.isLast; + DataBytePtr remainBytePtr = 0; if (resBytePtr < bytePtrB ) begin isConcatStreamLast = False; - remainBytePtr = bytePtrB - resBytePtr; + remainBytePtr = bytePtrB - resBytePtr; end DataStream concatStream = getEmptyStream; DataStream remainStream = getEmptyStream; @@ -119,29 +100,20 @@ function Tuple3#(DataStream, DataStream, DataBytePtr) getConcatStream (DataStrea isLast: True }; end - return tuple3(concatStream, remainStream, remainBytePtr); -endfunction - -function Action showDataStream (DataStream stream); - return action - $display(" Data = %h", stream.data); - $display(" byteEn = %b", stream.byteEn); - $display(" isFirst = %b, isLast = %b", stream.isFirst, stream.isLast); - endaction; -endfunction - -function Action checkDataStream (DataStream stream, String name); - if (stream.byteEn == 0 || stream.data == 0) begin - return action - $display("Error: empty dataStream ", name); - showDataStream(stream); - $finish(); - endaction; - end - else begin - return action - endaction; - end + return ( + actionvalue + immAssert( + (streamA.isLast && bytePtrA <= getMaxBytePtr() && bytePtrA > 0), + "request check @ getConcatStream", + $format( + "bytePtrA=%d should in range of 1~%d", bytePtrA, getMaxBytePtr(), + "bytePtrB=%d should in range of 1~%d", bytePtrB, getMaxBytePtr(), + "streamA.isLast=", fshow(streamA.isLast), "should be False" + ) + ); + return tuple3(concatStream, remainStream, remainBytePtr); + endactionvalue + ); endfunction (* synthesize *) @@ -200,10 +172,10 @@ module mkStreamConcat (StreamConcat ifc); prepareFifoB.deq; streamB.isFirst = False; if (hasRemainReg) begin - match{.concatStream, .remainStream, .remainBytePtr} = getConcatStream(remainStreamReg, streamB, remainBytePtrReg, bytePtrB); - hasRemainReg <= unpack(remainStream.byteEn[0]); + match{.concatStream, .remainStream, .remainBytePtr} <- getConcatStream(remainStreamReg, streamB, remainBytePtrReg, bytePtrB); + hasRemainReg <= unpack(remainStream.byteEn[0]); hasLastRemainReg <= streamB.isLast; - remainStreamReg <= remainStream; + remainStreamReg <= remainStream; remainBytePtrReg <= remainBytePtr; outputFifo.enq(concatStream); end @@ -227,12 +199,12 @@ module mkStreamConcat (StreamConcat ifc); else if (streamA.isLast && prepareFifoB.notEmpty) begin let streamB = prepareFifoB.first.stream; let bytePtrB = prepareFifoB.first.bytePtr; - match{.concatStream, .remainStream, .remainBytePtr} = getConcatStream(streamA, streamB, bytePtrA, bytePtrB); - hasRemainReg <= unpack(remainStream.byteEn[0]); + match{.concatStream, .remainStream, .remainBytePtr} <- getConcatStream(streamA, streamB, bytePtrA, bytePtrB); + hasRemainReg <= unpack(remainStream.byteEn[0]); hasLastRemainReg <= streamB.isLast; - remainStreamReg <= remainStream; + remainStreamReg <= remainStream; remainBytePtrReg <= remainBytePtr; - isStreamAEnd <= !streamB.isLast; + isStreamAEnd <= !streamB.isLast; outputFifo.enq(concatStream); prepareFifoA.deq; prepareFifoB.deq; @@ -240,9 +212,9 @@ module mkStreamConcat (StreamConcat ifc); end endrule - interface inputStreamFirstFifoIn = convertFifoToFifoIn(inputFifoA); + interface inputStreamFirstFifoIn = convertFifoToFifoIn(inputFifoA); interface inputStreamSecondFifoIn = convertFifoToFifoIn(inputFifoB); - interface outputStreamFifoOut = convertFifoToFifoOut(outputFifo); + interface outputStreamFifoOut = convertFifoToFifoOut(outputFifo); endmodule @@ -251,19 +223,19 @@ module mkStreamSplit(StreamSplit ifc); Reg#(StreamSize) streamByteCntReg <- mkReg(0); - FIFOF#(StreamSize) splitLocationFifo <- mkFIFOF; - FIFOF#(DataStream) inputFifo <- mkFIFOF; - FIFOF#(DataStream) outputFifo <- mkFIFOF; - FIFOF#(StreamWithPtr) prepareFifo <- mkFIFOF; - FIFOF#(StreamWithPtr) assertFifo <- mkFIFOF; + FIFOF#(StreamSize) splitLocationFifo <- mkFIFOF; + FIFOF#(DataStream) inputFifo <- mkFIFOF; + FIFOF#(DataStream) outputFifo <- mkFIFOF; + FIFOF#(StreamWithPtr) prepareFifo <- mkFIFOF; + FIFOF#(StreamWithPtr) assertFifo <- mkFIFOF; FIFOF#(Tuple2#(DataBytePtr,DataBytePtr)) splitPtrFifo <- mkFIFOF; - Reg#(DataStream) remainStreamReg <- mkRegU; - Reg#(Bool) hasRemainReg <- mkReg(False); - Reg#(Bool) hasLastRemainReg <- mkReg(False); + Reg#(DataStream) remainStreamReg <- mkRegU; Reg#(DataBytePtr) remainBytePtrReg <- mkReg(0); - Reg#(Bool) isSplitted <- mkReg(False); + Reg#(Bool) hasRemainReg <- mkReg(False); + Reg#(Bool) hasLastRemainReg <- mkReg(False); + Reg#(Bool) isSplitted <- mkReg(False); rule prepareStream; let stream = inputFifo.first; @@ -292,7 +264,6 @@ module mkStreamSplit(StreamSplit ifc); isSplitted <= False; end streamByteCntReg <= stream.isLast ? 0 : streamByteCntReg + unpack(zeroExtend(bytePtr)); - assertFifo.enq(prepareFifo.first); prepareFifo.deq; if (stream.isLast) begin @@ -304,9 +275,6 @@ module mkStreamSplit(StreamSplit ifc); rule execSplitStream; // Only output remainStreamReg if (hasRemainReg && hasLastRemainReg) begin - if (remainStreamReg.byteEn == 0) begin - showDataStream(remainStreamReg); - end outputFifo.enq(remainStreamReg); hasRemainReg <= False; hasLastRemainReg <= False; @@ -348,8 +316,7 @@ module mkStreamSplit(StreamSplit ifc); // concat the new frame with the remainReg else if (hasRemainReg) begin - match {.concatStream, .remainStream, .remainBytePtr} = getConcatStream(remainStreamReg, stream, remainBytePtrReg, frameBytePtr); - checkDataStream(concatStream, "concat remain"); + match {.concatStream, .remainStream, .remainBytePtr} <- getConcatStream(remainStreamReg, stream, remainBytePtrReg, frameBytePtr); outputFifo.enq(concatStream); hasRemainReg <= unpack(remainStream.byteEn[0]); hasLastRemainReg <= stream.isLast; diff --git a/test/TestAxiStream.bsv b/test/TestAxiStream.bsv deleted file mode 100755 index bd30140..0000000 --- a/test/TestAxiStream.bsv +++ /dev/null @@ -1,173 +0,0 @@ -import FIFO::*; -import Vector::*; -import AxiStreamTypes::*; -import Counter::*; - -typedef 512 DATA_WIDTH; -typedef 8 BYTE_BITS; -typedef TDiv#(DATA_WIDTH, BYTE_BITS) BATCH_BYTES; -typedef 128 USR_WIDTH; -typedef 4321 RD_BYTES_LENGTH; - - -interface AxisFifo#(numeric type keepWidth, numeric type usrWidth); - interface RawAxiStreamMaster#(keepWidth, usrWidth) axisMaster; - interface RawAxiStreamSlave#(keepWidth, usrWidth) axisSlave; -endinterface - - -module mkTbAxisRdWrLoop (Empty); - Reg#(File) fileInReg <- mkRegU(); - Reg#(File) fileRefReg <- mkRegU(); - Reg#(File) fileOutReg <- mkRegU(); - Reg#(Bool) initFlagReg <- mkReg(False); - // Read the file - Reg#(Bool) rdDoneFlagReg <- mkReg(False); - Reg#(UInt#(32)) rdBatchCntReg <- mkReg(0); - let rdTotalBytesLen = valueOf(RD_BYTES_LENGTH); - let rdBatchBytesLen = valueOf(BATCH_BYTES); - let rdLastBatchBytesLen = rdTotalBytesLen % rdBatchBytesLen; - let rdBatchesNum = rdTotalBytesLen % rdBatchBytesLen > 0 ? rdTotalBytesLen / rdBatchBytesLen + 1 : rdTotalBytesLen / rdBatchBytesLen; - FIFO#(AxiStream#(BATCH_BYTES, USR_WIDTH)) toDutFifo <- mkSizedFIFO(16); - // DUT - AxisFifo#(BATCH_BYTES, USR_WIDTH) dut <- mkTbAxisWire(); - // Control - Reg#(UInt#(32)) tValidCnt <- mkReg(0); - - rule init(!initFlagReg); - initFlagReg <= True; - File in <- $fopen("test.txt", "rb"); - File refer <- $fopen("ref.txt", "wb"); - File out <- $fopen("out.txt", "wb"); - if (in == InvalidFile || refer == InvalidFile || out == InvalidFile) begin - $display("ERROR: couldn't open test file"); - $finish; - end - fileInReg <= in; - fileRefReg <= refer; - fileOutReg <= out; - endrule - - rule readfile(initFlagReg && !rdDoneFlagReg && rdBatchCntReg < fromInteger(rdBatchesNum)); - Vector#(BATCH_BYTES, Bit#(BYTE_BITS)) getChars = replicate(0); - Bit#(BATCH_BYTES) keep = 0; - Bool last = False; - if(rdBatchCntReg == fromInteger(rdBatchesNum) - 1) begin - for(Integer idx = 0; idx < rdLastBatchBytesLen; idx = idx + 1) begin - int readChar <- $fgetc(fileInReg); - if(readChar == -1) begin - $fclose(fileInReg); - $fclose(fileRefReg); - end else begin - $fwrite(fileRefReg, "%c", readChar); - getChars[idx] = truncate(pack(readChar)); - keep[idx] = 1'b1; - end - end - $fclose(fileInReg); - $fclose(fileRefReg); - rdDoneFlagReg <= True; - last = True; - $display("INFO: test file read done"); - end else begin - rdBatchCntReg <= rdBatchCntReg + 1; - for(Integer idx = 0; idx < rdBatchBytesLen; idx = idx + 1) begin - int rdChar <- $fgetc(fileInReg); - if(rdChar == -1) begin - $fclose(fileRefReg); - $fclose(fileInReg); - last = True; - end else begin - $fwrite(fileRefReg, "%c", rdChar); - getChars[idx] = truncate(pack(rdChar)); - keep[idx] = 1'b1; - end - end - end - let axis = AxiStream{ - tData: pack(getChars), - tKeep: keep, - tLast: last, - tUser: 0 - }; - toDutFifo.enq(axis); - endrule - - rule reader2dut if (rdBatchCntReg > 0); - if(dut.axisSlave.tReady) begin - // $display("INFO: simulation exec a batch"); - toDutFifo.deq; - let axis = toDutFifo.first; - dut.axisSlave.tValid( - True, - axis.tData, - axis.tKeep, - axis.tLast, - axis.tUser); - end - endrule - - rule dut2writer; - dut.axisMaster.tReady(True); - if(dut.axisMaster.tValid) begin - tValidCnt <= tValidCnt + 1; - let data = dut.axisMaster.tData; - Vector#(BATCH_BYTES, Bit#(BYTE_BITS)) getChars = unpack(data); - let keep = dut.axisMaster.tKeep; - for(Integer idx = 0; idx < rdBatchBytesLen; idx = idx + 1) begin - if(keep[idx] == 1'b1) begin $fwrite(fileOutReg, "%c", getChars[i]); end - end - end - if(tValidCnt == rdBatchCntReg && rdDoneFlagReg) begin - $display("INFO: file write done, compare the ref and out") - $fclose(fileOutReg); - $finish(); - end - endrule - -endmodule - -module mkTbAxisWire(AxisFifo#(keepWidth, usrWidth) ifc); - Wire#(Bit#(TMul#(keepWidth, 8))) data <- mkDWire(0); - Wire#(Bit#(keepWidth)) keep <- mkDWire(0); - Wire#(Bit#(usrWidth)) user <- mkDWire(0); - Wire#(Bit#(1)) last <- mkDWire(0); - Wire#(Bit#(1)) rdy <- mkDWire(0); - Wire#(Bit#(1)) vld <- mkDWire(0); - - interface RawAxiStreamMaster axisMaster; - method Bool tValid = unpack(vld); - method Bool tLast = unpack(last); - method Bit#(TMul#(keepWidth, 8)) tData = data; - method Bit#(keepWidth) tKeep = keep; - method Bit#(usrWidth) tUser = user; - method Action tReady(Bool ready); - rdy <= pack(ready); - endmethod - endinterface - - interface RawAxiStreamSlave axisSlave; - method Bool tReady = True; - method Action tValid( - Bool tvalid, - Bit#(TMul#(keepWidth, 8)) tData, - Bit#(keepWidth) tKeep, - Bool tLast, - Bit#(usrWidth) tUser - ); - data <= tData; - keep <= tKeep; - user <= tUser; - last <= pack(tLast); - vld <= pack(tvalid); - endmethod - endinterface -endmodule - -// module mkTbAxisPipeFifo (AxisFifo#(keepWidth, usrWidth) ifc); -// FIFOF#(AxiStream#(keepWidth, usrWidth)) <- mkSizedFIFOF(10); - -// endmodule - - - diff --git a/test/TestDmaCore.bsv b/test/TestDmaCore.bsv index 0a9f7f2..3d26e2b 100755 --- a/test/TestDmaCore.bsv +++ b/test/TestDmaCore.bsv @@ -1,10 +1,12 @@ -import SemiFifo::*; import GetPut::*; import Randomizable::*; + +import SemiFifo::*; +import PrimUtils::*; import DmaTypes::*; import DmaRequestCore::*; -typedef 10 CHUNK_PER_EPOCH_TEST_NUM; +typedef 100000 CHUNK_PER_EPOCH_TEST_NUM; typedef 64'hFFFFFFFFFFFFFFFF MAX_ADDRESS; typedef 16'hFFFF MAX_TEST_LENGTH; typedef 2'b00 DEFAULT_TLP_SIZE_SETTING; @@ -16,13 +18,14 @@ module mkChunkComputerTb(Empty); ChunkCompute dut <- mkChunkComputer(DMA_TX); - Reg#(Bool) isInitReg <- mkReg(False); - Reg#(UInt#(32)) testCntReg <- mkReg(0); - Reg#(UInt#(32)) epochCntReg <- mkReg(0); + Reg#(Bool) isInitReg <- mkReg(False); + Reg#(UInt#(32)) testCntReg <- mkReg(0); + Reg#(UInt#(32)) epochCntReg <- mkReg(0); + Reg#(DmaMemAddr) lenRemainReg <- mkReg(0); - Reg#(DmaRequestFrame) testRequest <- mkRegU; + Randomize#(DmaMemAddr) startAddrRandomVal <- mkConstrainedRandomizer(0, fromInteger(valueOf(MAX_ADDRESS)-1)); - Randomize#(DmaMemAddr) lengthRandomVal <- mkConstrainedRandomizer(1, fromInteger(valueOf(MAX_TEST_LENGTH))); + Randomize#(DmaMemAddr) lengthRandomVal <- mkConstrainedRandomizer(1, fromInteger(valueOf(MAX_TEST_LENGTH))); function Bool hasBoundary(DmaRequestFrame request); let highIdx = (request.startAddr + request.length - 1) >> valueOf(BUS_BOUNDARY_WIDTH); @@ -30,18 +33,12 @@ module mkChunkComputerTb(Empty); return (highIdx > lowIdx); endfunction - function Action showRequest (DmaRequestFrame request); - return action - $display("startAddr: ", request.startAddr, " length: ", request.length); - endaction; - endfunction - rule testInit if (!isInitReg); startAddrRandomVal.cntrl.init; lengthRandomVal.cntrl.init; isInitReg <= True; dut.setTlpMaxSize.put(fromInteger(valueOf(DEFAULT_TLP_SIZE_SETTING))); - $display("Start Test of mkChunkComputerTb"); + $display("INFO: Start Test of mkChunkComputerTb"); $display("INFO: Set Max Payload Size to ", valueOf(DEFAULT_TLP_SIZE)); endrule @@ -55,8 +52,8 @@ module mkChunkComputerTb(Empty); length: testLength }; lenRemainReg <= testLength; - dut.dmaRequests.enq(request); - showRequest(request); + dut.dmaRequestFifoIn.enq(request); + // $display("INFO: input ", fshow(request)); end else begin lenRemainReg <= 0; @@ -64,32 +61,30 @@ module mkChunkComputerTb(Empty); endrule rule testOutput if (isInitReg && lenRemainReg > 0); - let newRequest = dut.chunkRequests.first; - dut.chunkRequests.deq; - if (hasBoundary(newRequest)) begin - $display("Error, has 4KB boundary!"); - showRequest(newRequest); - $finish(); - end - else begin - let newRemain = lenRemainReg - newRequest.length; - lenRemainReg <= newRemain; - if (newRemain == 0) begin - if (epochCntReg < fromInteger(valueOf(CHUNK_PER_EPOCH_TEST_NUM)-1)) begin - epochCntReg <= epochCntReg + 1; + let newRequest = dut.chunkRequestFifoOut.first; + dut.chunkRequestFifoOut.deq; + immAssert( + !hasBoundary(newRequest), + "has boundary assert @ mkChunkComputerTb", + fshow(newRequest) + ); + let newRemain = lenRemainReg - newRequest.length; + lenRemainReg <= newRemain; + if (newRemain == 0) begin + if (epochCntReg < fromInteger(valueOf(CHUNK_PER_EPOCH_TEST_NUM)-1)) begin + epochCntReg <= epochCntReg + 1; + end + else begin + epochCntReg <= 0; + testCntReg <= testCntReg + 1; + if (testCntReg == fromInteger(valueOf(CHUNK_TX_TEST_SETTING_NUM)-1)) begin + $display("INFO: ChunkComputer Test End."); + $finish(); end else begin - epochCntReg <= 0; - testCntReg <= testCntReg + 1; - if (testCntReg == fromInteger(valueOf(CHUNK_TX_TEST_SETTING_NUM)-1)) begin - $display("INFO: ChunkComputer Test End."); - $finish(); - end - else begin - PcieTlpSizeSetting newSetting = fromInteger(valueOf(DEFAULT_TLP_SIZE_SETTING)) + truncate(pack(testCntReg)) + 1; - dut.setTlpMaxSize.put(newSetting); - $display("INFO: Set Max Payload Size to ", pack(fromInteger(valueOf(DEFAULT_TLP_SIZE)) << newSetting)); - end + PcieTlpSizeSetting newSetting = fromInteger(valueOf(DEFAULT_TLP_SIZE_SETTING)) + truncate(pack(testCntReg)) + 1; + dut.setTlpMaxSize.put(newSetting); + $display("INFO: Set Max Payload Size to ", pack(fromInteger(valueOf(DEFAULT_TLP_SIZE)) << newSetting)); end end end diff --git a/test/TestStreamUtils.bsv b/test/TestStreamUtils.bsv index a78bc3e..3241357 100755 --- a/test/TestStreamUtils.bsv +++ b/test/TestStreamUtils.bsv @@ -1,6 +1,9 @@ import FIFOF::*; import SemiFifo::*; import LFSR::*; + +import PrimUtils::*; +import DmaTypes::*; import StreamUtils::*; typedef 'hAB PSEUDO_DATA; @@ -19,10 +22,6 @@ typedef 'hABCDEF01 SEED_2; typedef 16 MAX_STREAM_SIZE_PTR; typedef 10000 TEST_NUM; -typedef enum { - WAITING, FirstChunk, SecondChunk -} StreamSplitOutStatus deriving(Bits, Eq); - interface RandomStreamSize; method ActionValue#(StreamSize) next(); endinterface @@ -95,7 +94,7 @@ module mkStreamConcatTb(Empty); Reg#(UInt#(32)) testFinishCntReg <- mkReg(0); rule testInit if (!isInitReg); - $display("INFO: ================start StreamConcatTb!=================="); + $display("INFO: start mkStreamConcatTb!"); isInitReg <= True; endrule @@ -139,34 +138,33 @@ module mkStreamConcatTb(Empty); rule testOutput; let outStream = dut.outputStreamFifoOut.first; - checkDataStream(outStream, "Output Stream"); StreamSize concatSize = concatSizeReg + unpack(zeroExtend(convertByteEn2BytePtr(outStream.byteEn))); if (outStream.isLast) begin let ideaSize = ideaConcatSizeFifo.first; - if (concatSize != ideaSize) begin - $display("Error: ideaSize=%d, realSize=%d", ideaSize, concatSize); - $finish(); - end - else begin - // $display("INFO: verify output ideaSize=%d, realSize=%d, ideaLastSize=%d", ideaSize, concatSize, ideaSize%getMaxFrameSize()); - ideaConcatSizeFifo.deq; - testFinishCntReg <= testFinishCntReg + 1; - end + immAssert( + (concatSize == ideaSize), + "outStream length check @ mkStreamConcatTb::testOutput", + $format("ideaSize = %d, realSize = %d \n", ideaSize, concatSize) + ); + // $display("INFO: verify output ideaSize=%d, realSize=%d, ideaLastSize=%d", ideaSize, concatSize, ideaSize%getMaxFrameSize()); + ideaConcatSizeFifo.deq; + testFinishCntReg <= testFinishCntReg + 1; concatSizeReg <= 0; end else begin concatSizeReg <= concatSize; - if (outStream.data != getPseudoData()) begin - $display("Error: Wrong output data"); - showDataStream(outStream); - $finish(); - end + immAssert( + (outStream.data == getPseudoData()), + "outStream Data Check @ mkStreamConcatTb::testOutput", + $format(outStream) + ); end dut.outputStreamFifoOut.deq; endrule rule testFinish; if (testFinishCntReg == fromInteger(valueOf(TEST_NUM)-1)) begin + $display("INFO: end mkStreamConcatTb"); $finish(); end endrule @@ -195,7 +193,7 @@ module mkStreamSplitTb(Empty); rule testInit if (!isInitReg); isInitReg <= True; - $display("INFO: ================start StreamSplitTb!=================="); + $display("INFO: start mkStreamSplitTb!"); endrule rule testInput if (isInitReg && testCntReg < fromInteger(valueOf(TEST_NUM))); @@ -227,36 +225,30 @@ module mkStreamSplitTb(Empty); rule testOutput if (isInitReg); let outStream = dut.outputStreamFifoOut.first; dut.outputStreamFifoOut.deq; - checkDataStream(outStream, "split output stream"); StreamSize totalSize = totalRecvSizeReg + unpack(zeroExtend(convertByteEn2BytePtr(outStream.byteEn))); - if (outStream.isLast) begin if (hasRecvFirstChunkReg) begin - if (totalSize != ideaTotalSizeFifo.first) begin - $display("Error: wrong total size, idea = %d, real = %d", ideaTotalSizeFifo.first, totalSize); - showDataStream(outStream); - $finish(); - end - else begin - // $display("INFO: receive total size", totalSize); - ideaTotalSizeFifo.deq; - testCntReg <= testCntReg + 1; - hasRecvFirstChunkReg <= False; - totalRecvSizeReg <= 0; - end + immAssert( + (totalSize == ideaTotalSizeFifo.first), + "outStream total length check @ mkStreamSplitTb", + $format("Wrong total length, ideaLen=%d, realLen=%d \n", ideaTotalSizeFifo.first, totalSize) + ); + // $display("INFO: receive total size", totalSize); + ideaTotalSizeFifo.deq; + testCntReg <= testCntReg + 1; + hasRecvFirstChunkReg <= False; + totalRecvSizeReg <= 0; end else begin - if (totalSize != ideaSplitSizeFifo.first) begin - $display("Error: wrong split location, idea = %d, real = %d", ideaSplitSizeFifo.first, totalSize); - showDataStream(outStream); - $finish(); - end - else begin - // $display("INFO: receive first chunk at %d, total size %d", ideaSplitSizeFifo.first, ideaTotalSizeFifo.first); - ideaSplitSizeFifo.deq; - hasRecvFirstChunkReg <= True; - totalRecvSizeReg <= totalSize; - end + immAssert( + (totalSize == ideaSplitSizeFifo.first), + "outStream split location check @ mkStreamSplitTb", + $format("Wrong split location, ideaLen=%d, realLen=%d \n", ideaSplitSizeFifo.first, totalSize) + ); + // $display("INFO: receive first chunk at %d, total size %d", ideaSplitSizeFifo.first, ideaTotalSizeFifo.first); + ideaSplitSizeFifo.deq; + hasRecvFirstChunkReg <= True; + totalRecvSizeReg <= totalSize; end end else begin @@ -266,6 +258,7 @@ module mkStreamSplitTb(Empty); rule testFinish; if (testCntReg == fromInteger(valueOf(TEST_NUM)-1)) begin + $display("INFO: end mkStreamSplitTb"); $finish(); end endrule From 32670906ce3b122a3388ff807188340a0d108b15 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Tue, 16 Jul 2024 16:36:36 +0800 Subject: [PATCH 18/53] Update StreaUtils --- src/StreamUtils.bsv | 128 +++++++++++++++++++++++++++++--------------- 1 file changed, 85 insertions(+), 43 deletions(-) diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index 58b13d0..91c9b27 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -28,12 +28,74 @@ interface StreamSplit; endinterface function DataBytePtr convertByteEn2BytePtr (ByteEn byteEn); - ByteEn byteEnTemp = byteEn; DataBytePtr ptr = 0; - while (byteEnTemp > 0) begin - byteEnTemp = byteEnTemp >> 1; - ptr = ptr + 1; - end + case(byteEn) + 'h0000000000000001: ptr = 1; + 'h0000000000000003: ptr = 2; + 'h0000000000000007: ptr = 3; + 'h000000000000000F: ptr = 4; + 'h000000000000001F: ptr = 5; + 'h000000000000003F: ptr = 6; + 'h000000000000007F: ptr = 7; + 'h00000000000000FF: ptr = 8; + 'h00000000000001FF: ptr = 9; + 'h00000000000003FF: ptr = 10; + 'h00000000000007FF: ptr = 11; + 'h0000000000000FFF: ptr = 12; + 'h0000000000001FFF: ptr = 13; + 'h0000000000003FFF: ptr = 14; + 'h0000000000007FFF: ptr = 15; + 'h000000000000FFFF: ptr = 16; + 'h000000000001FFFF: ptr = 17; + 'h000000000003FFFF: ptr = 18; + 'h000000000007FFFF: ptr = 19; + 'h00000000000FFFFF: ptr = 20; + 'h00000000001FFFFF: ptr = 21; + 'h00000000003FFFFF: ptr = 22; + 'h00000000007FFFFF: ptr = 23; + 'h0000000000FFFFFF: ptr = 24; + 'h0000000001FFFFFF: ptr = 25; + 'h0000000003FFFFFF: ptr = 26; + 'h0000000007FFFFFF: ptr = 27; + 'h000000000FFFFFFF: ptr = 28; + 'h000000001FFFFFFF: ptr = 29; + 'h000000003FFFFFFF: ptr = 30; + 'h000000007FFFFFFF: ptr = 31; + 'h00000000FFFFFFFF: ptr = 32; + 'h00000001FFFFFFFF: ptr = 33; + 'h00000003FFFFFFFF: ptr = 34; + 'h00000007FFFFFFFF: ptr = 35; + 'h0000000FFFFFFFFF: ptr = 36; + 'h0000001FFFFFFFFF: ptr = 37; + 'h0000003FFFFFFFFF: ptr = 38; + 'h0000007FFFFFFFFF: ptr = 39; + 'h000000FFFFFFFFFF: ptr = 40; + 'h000001FFFFFFFFFF: ptr = 41; + 'h000003FFFFFFFFFF: ptr = 42; + 'h000007FFFFFFFFFF: ptr = 43; + 'h00000FFFFFFFFFFF: ptr = 44; + 'h00001FFFFFFFFFFF: ptr = 45; + 'h00003FFFFFFFFFFF: ptr = 46; + 'h00007FFFFFFFFFFF: ptr = 47; + 'h0000FFFFFFFFFFFF: ptr = 48; + 'h0001FFFFFFFFFFFF: ptr = 49; + 'h0003FFFFFFFFFFFF: ptr = 50; + 'h0007FFFFFFFFFFFF: ptr = 51; + 'h000FFFFFFFFFFFFF: ptr = 52; + 'h001FFFFFFFFFFFFF: ptr = 53; + 'h003FFFFFFFFFFFFF: ptr = 54; + 'h007FFFFFFFFFFFFF: ptr = 55; + 'h00FFFFFFFFFFFFFF: ptr = 56; + 'h01FFFFFFFFFFFFFF: ptr = 57; + 'h03FFFFFFFFFFFFFF: ptr = 58; + 'h07FFFFFFFFFFFFFF: ptr = 59; + 'h0FFFFFFFFFFFFFFF: ptr = 60; + 'h1FFFFFFFFFFFFFFF: ptr = 61; + 'h3FFFFFFFFFFFFFFF: ptr = 62; + 'h7FFFFFFFFFFFFFFF: ptr = 63; + 'hFFFFFFFFFFFFFFFF: ptr = 64; + default : ptr = 0; + endcase return ptr; endfunction @@ -55,7 +117,7 @@ function DataBytePtr getMaxBytePtr (); endfunction // Concat two DataStream frames into one. StreamA.isLast must be True, otherwise the function will return a empty frame to end the stream. -function ActionValue#(Tuple3#(DataStream, DataStream, DataBytePtr)) getConcatStream (DataStream streamA, DataStream streamB, DataBytePtr bytePtrA, DataBytePtr bytePtrB); +function Tuple3#(DataStream, DataStream, DataBytePtr) getConcatStream (DataStream streamA, DataStream streamB, DataBytePtr bytePtrA, DataBytePtr bytePtrB); Bool isCallLegally = (streamA.isLast && bytePtrA <= getMaxBytePtr() && bytePtrA > 0); DataBitPtr bitPtrA = zeroExtend(bytePtrA) << fromInteger(valueOf(BYTE_WIDTH_WIDTH)); @@ -87,33 +149,20 @@ function ActionValue#(Tuple3#(DataStream, DataStream, DataBytePtr)) getConcatStr // package the return concatStream and remainStream if(isCallLegally) begin - concatStream = DataStream{ - data: concatData, - byteEn: concatByteEn, + concatStream = DataStream { + data : concatData, + byteEn : concatByteEn, isFirst: streamA.isFirst, - isLast: isConcatStreamLast + isLast : isConcatStreamLast }; - remainStream = DataStream{ - data: remainData, - byteEn: remainByteEn, + remainStream = DataStream { + data : remainData, + byteEn : remainByteEn, isFirst: False, - isLast: True + isLast : True }; end - return ( - actionvalue - immAssert( - (streamA.isLast && bytePtrA <= getMaxBytePtr() && bytePtrA > 0), - "request check @ getConcatStream", - $format( - "bytePtrA=%d should in range of 1~%d", bytePtrA, getMaxBytePtr(), - "bytePtrB=%d should in range of 1~%d", bytePtrB, getMaxBytePtr(), - "streamA.isLast=", fshow(streamA.isLast), "should be False" - ) - ); - return tuple3(concatStream, remainStream, remainBytePtr); - endactionvalue - ); + return tuple3(concatStream, remainStream, remainBytePtr); endfunction (* synthesize *) @@ -164,7 +213,6 @@ module mkStreamConcat (StreamConcat ifc); hasRemainReg <= False; isStreamAEnd <= False; end - // StreamB or streamB + the remain data else if (prepareFifoB.notEmpty && isStreamAEnd) begin let streamB = prepareFifoB.first.stream; @@ -172,7 +220,7 @@ module mkStreamConcat (StreamConcat ifc); prepareFifoB.deq; streamB.isFirst = False; if (hasRemainReg) begin - match{.concatStream, .remainStream, .remainBytePtr} <- getConcatStream(remainStreamReg, streamB, remainBytePtrReg, bytePtrB); + let {concatStream, remainStream, remainBytePtr} = getConcatStream(remainStreamReg, streamB, remainBytePtrReg, bytePtrB); hasRemainReg <= unpack(remainStream.byteEn[0]); hasLastRemainReg <= streamB.isLast; remainStreamReg <= remainStream; @@ -184,7 +232,6 @@ module mkStreamConcat (StreamConcat ifc); end isStreamAEnd <= !streamB.isLast; end - // StreamA or StreamA + first StreamB else if (prepareFifoA.notEmpty) begin let streamA = prepareFifoA.first.stream; @@ -199,7 +246,7 @@ module mkStreamConcat (StreamConcat ifc); else if (streamA.isLast && prepareFifoB.notEmpty) begin let streamB = prepareFifoB.first.stream; let bytePtrB = prepareFifoB.first.bytePtr; - match{.concatStream, .remainStream, .remainBytePtr} <- getConcatStream(streamA, streamB, bytePtrA, bytePtrB); + let {concatStream, remainStream, remainBytePtr} = getConcatStream(streamA, streamB, bytePtrA, bytePtrB); hasRemainReg <= unpack(remainStream.byteEn[0]); hasLastRemainReg <= streamB.isLast; remainStreamReg <= remainStream; @@ -240,7 +287,7 @@ module mkStreamSplit(StreamSplit ifc); rule prepareStream; let stream = inputFifo.first; inputFifo.deq; - StreamWithPtr streamWithPtr = StreamWithPtr{ + StreamWithPtr streamWithPtr = StreamWithPtr { stream: stream, bytePtr: convertByteEn2BytePtr(stream.byteEn) }; @@ -279,50 +326,45 @@ module mkStreamSplit(StreamSplit ifc); hasRemainReg <= False; hasLastRemainReg <= False; end - else if (assertFifo.notEmpty && splitPtrFifo.notEmpty) begin let stream = assertFifo.first.stream; let frameBytePtr = assertFifo.first.bytePtr; - match {.truncateBytePtr, .resBytePtr} = splitPtrFifo.first; + let {truncateBytePtr, resBytePtr} = splitPtrFifo.first; assertFifo.deq; splitPtrFifo.deq; - // no operatation if (!hasRemainReg && truncateBytePtr == 0) begin outputFifo.enq(stream); end - // split the frame in this cycle to a last frame and a remain frame else if (!hasRemainReg && truncateBytePtr > 0) begin DataBitPtr truncateBitPtr = zeroExtend(truncateBytePtr) << valueOf(BYTE_WIDTH_WIDTH); DataBitPtr resBitPtr = zeroExtend(resBytePtr) << valueOf(BYTE_WIDTH_WIDTH); - outputFifo.enq(DataStream{ + outputFifo.enq(DataStream { data: (stream.data << resBitPtr) >> resBitPtr, byteEn: (stream.byteEn << resBytePtr) >> resBytePtr, isFirst: stream.isFirst, isLast: True }); - DataStream remainStream = DataStream{ + DataStream remainStream = DataStream { data: stream.data >> truncateBitPtr, byteEn: stream.byteEn >> truncateBytePtr, isFirst: True, isLast: True }; - hasRemainReg <= (remainStream.byteEn != 0); + hasRemainReg <= unpack(remainStream.byteEn[0]); hasLastRemainReg <= stream.isLast; remainBytePtrReg <= frameBytePtr - truncateBytePtr; remainStreamReg <= remainStream; end - // concat the new frame with the remainReg else if (hasRemainReg) begin - match {.concatStream, .remainStream, .remainBytePtr} <- getConcatStream(remainStreamReg, stream, remainBytePtrReg, frameBytePtr); + let {concatStream, remainStream, remainBytePtr} = getConcatStream(remainStreamReg, stream, remainBytePtrReg, frameBytePtr); outputFifo.enq(concatStream); hasRemainReg <= unpack(remainStream.byteEn[0]); hasLastRemainReg <= stream.isLast; remainStreamReg <= remainStream; remainBytePtrReg <= remainBytePtr; - end end endrule From 9fb805debbaaa299bf65b91ee0690cc7b42f67ea Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Tue, 16 Jul 2024 16:46:15 +0800 Subject: [PATCH 19/53] update PcieTypes --- src/PcieTypes.bsv | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/PcieTypes.bsv b/src/PcieTypes.bsv index 7e09d71..e617cfd 100755 --- a/src/PcieTypes.bsv +++ b/src/PcieTypes.bsv @@ -20,16 +20,20 @@ typedef 8 PCIE_TLP_FIRST_BE_WIDTH; typedef 8 PCIE_TLP_LAST_BE_WIDTH; typedef Bit#(PCIE_TLP_FIRST_BE_WIDTH) PcieTlpCtlFirstByteEn; typedef Bit#(PCIE_TLP_LAST_BE_WIDTH) PcieTlpCtlLastByteEn; + typedef PCIE_TDATA_BYTES PCIE_TLP_BYTE_EN_WIDTH; typedef Bit#(PCIE_TLP_BYTE_EN_WIDTH) PcieTlpCtlByteEn; + typedef 2 PCIE_TLP_ISSOP_WIDTH; typedef 2 PCIE_TLP_ISSOP_PTR_WIDTH; typedef Bit#(PCIE_TLP_ISSOP_WIDTH) PcieTlpCtlIsSop; typedef Bit#(PCIE_TLP_ISSOP_PTR_WIDTH) PcieTlpCtlIsSopPtr; + typedef 2 PCIE_TLP_ISEOP_WIDTH; typedef 4 PCIE_TLP_ISEOP_PTR_WIDTH; typedef Bit#(PCIE_TLP_ISEOP_WIDTH) PcieTlpCtlIsEop; typedef Bit#(PCIE_TLP_ISEOP_PTR_WIDTH) PcieTlpCtlIsEopPtr; + typedef 2 PCIE_TPH_PRESENT_WIDTH; typedef 4 PCIE_TPH_TYPE_WIDTH; typedef 16 PCIE_TPH_STTAG; @@ -38,22 +42,29 @@ typedef Bit#(PCIE_TPH_PRESENT_WIDTH) PcieTlpCtlTphPresent; typedef Bit#(PCIE_TPH_TYPE_WIDTH) PcieTlpCtlTphType; typedef Bit#(PCIE_TPH_STTAG) PcieTlpCtlTphSteeringTag; typedef Bit#(PCIE_TPH_INDIRECT_TAGEN_WIDTH) PcieTlpCtlTphIndirectTagEn; + typedef 64 PCIE_TLP_PARITY; typedef Bit#(PCIE_TLP_PARITY) PcieTlpCtlParity; + typedef 4 PCIE_TLP_ADDR_OFFSET_WIDTH; typedef Bit#(PCIE_TLP_ADDR_OFFSET_WIDTH) PcieTlpCtlAddrOffset; + typedef 6 PCIE_TLP_SEQ_NUM_WIDTH; typedef Bit#(PCIE_TLP_SEQ_NUM_WIDTH) PcieTlpCtlSeqNum; + typedef 4 PCIE_TLP_RC_ISSOP_WIDTH; typedef Bit#(PCIE_TLP_RC_ISSOP_WIDTH) PcieTlpCtlIsSopRC; + typedef 4 PCIE_TLP_RC_ISEOP_WIDTH; typedef Bit#(PCIE_TLP_RC_ISEOP_WIDTH) PcieTlpCtlIsEopRC; + // Signals the start of a new TLP, 6 bit. typedef struct { PcieTlpCtlIsSop isSop; PcieTlpCtlIsSopPtr isSopPtr0; PcieTlpCtlIsSopPtr isSopPtr1; } PcieTlpCtlIsSopCommon deriving(Bits, Bounded, Eq); + // Signals the start of a new TLP, 12 bit. typedef struct { PcieTlpCtlIsSopRC isSop; @@ -62,12 +73,14 @@ typedef struct { PcieTlpCtlIsSopPtr isSopPtr2; PcieTlpCtlIsSopPtr isSopPtr3; } PcieTlpCtlIsSopReqCpl deriving(Bits, Bounded, Eq); + // Indicates a TLP is ending in this beat, 10bit. typedef struct { PcieTlpCtlIsEop isEop; PcieTlpCtlIsEopPtr isEopPtr0; PcieTlpCtlIsEopPtr isEopPtr1; } PcieTlpCtlIsEopCommon deriving(Bits, Bounded, Eq); + // Indicates a TLP is ending in this beat, 20bit. typedef struct { PcieTlpCtlIsEopRC isEop; @@ -90,6 +103,7 @@ typedef struct { PcieTlpCtlTphSteeringTag tphSteeringTag; PcieTlpCtlParity parity; } PcieCompleterRequestSideBandFrame deriving(Bits, Bounded, Eq); + // 81bit tUser of PcieCompleterComplete AXIS-master typedef struct { PcieTlpCtlIsSopCommon isSop; @@ -97,6 +111,7 @@ typedef struct { Bool discontinue; PcieTlpCtlParity parity; } PcieCompleterCompleteSideBandFrame deriving(Bits, Bounded, Eq); + // 137bit tUser of PcieRequesterRequeste AXIS-master typedef struct { PcieTlpCtlFirstByteEn firstByteEn; @@ -113,6 +128,7 @@ typedef struct { PcieTlpCtlSeqNum seqNum1; PcieTlpCtlParity parity; } PcieRequsterRequestSideBandFrame deriving(Bits, Bounded, Eq); + // 161bit tUser of PcieRequesterComplete AXIS-slave typedef struct { PcieTlpCtlByteEn dataByteEn; @@ -127,14 +143,12 @@ typedef 2 PCIE_CR_NP_REQ_WIDTH; typedef 6 PCIE_CR_NP_REQ_COUNT_WIDTH; typedef Bit#(PCIE_CR_NP_REQ_WIDTH) PcieNonPostedRequst; typedef Bit#(PCIE_CR_NP_REQ_COUNT_WIDTH) PcieNonPostedRequstCount; + // Interface to PCIe IP Completer Interface (*always_ready, always_enabled*) interface RawPcieCompleter; // TODO: the AxiStream in blue-wrapper has tDataWidth = tKeepWidth * BYTE_WIDTH, but the PCIe IP has tDataWidth = tKeepWidth * DWORD_WIDTH (* prefix = "s_axis_cq_" *) interface RawAxiStreamSlave#(PCIE_TKEEP_WIDTH, PCIE_COMPLETER_REQUEST_TUSER_WIDTH) request; - // (* result = "pcie_cq_np_req" *) method PcieNonPostedRequst nonPostedReqCreditIncrement; - // (* prefix = "" *) method Action nonPostedReqCreditCnt( - // (* port = "pcie_cq_np_req_count" *) PcieNonPostedRequstCount ); (* prefix = "m_axis_cc_" *) interface RawAxiStreamMaster#(PCIE_TKEEP_WIDTH, PCIE_COMPLETER_COMPLETE_TUSER_WIDTH) complete; endinterface @@ -149,6 +163,7 @@ typedef 10 PCIE_CFG_MGMT_ADDR_WIDTH; typedef 4 PCIE_CFG_MGMT_BE_WIDTH; typedef 8 PCIE_CFG_MGMT_FUNC_NUM_WIDTH; typedef 32 PCIE_CFG_MGMT_DATA_WIDTH; + typedef Bit#(PCIE_CFG_MGMT_ADDR_WIDTH) PcieCfgMgmtAddr; typedef Bit#(PCIE_CFG_MGMT_BE_WIDTH) PcieCfgMgmtByteEn; typedef Bit#(PCIE_CFG_MGMT_FUNC_NUM_WIDTH) PcieCfgMgmtFuncNum; @@ -208,6 +223,7 @@ typedef 1 PCIE_CFG_PHY_LINK_DOWN_WIDTH; typedef 2 PCIE_CFG_PHY_LINK_STATUS_WIDTH; typedef Bit#(PCIE_CFG_PHY_LINK_DOWN_WIDTH) PcieCfgPhyLinkDown; typedef Bit#(PCIE_CFG_PHY_LINK_STATUS_WIDTH) PcieCfgPhyLinkStatus; + typedef 3 PCIE_CFG_NEGOTIATED_WIDTH_WIDTH; typedef 3 PCIE_CFG_CURRENT_SPEED_WIDTH; typedef 2 PCIE_CFG_MAX_PAYLOAD_WIDTH; @@ -216,6 +232,7 @@ typedef Bit#(PCIE_CFG_NEGOTIATED_WIDTH_WIDTH) PcieCfgNegotiatedWidth; typedef Bit#(PCIE_CFG_CURRENT_SPEED_WIDTH) PCieCfgCurrentSpeed; typedef Bit#(PCIE_CFG_MAX_PAYLOAD_WIDTH) PcieCfgMaxPayloadSize; typedef Bit#(PCIE_CFG_MAX_READ_REQ_WIDTH) PCieCfgMaxReadReqSize; + typedef 16 PCIE_FUNCTIONS_STATUS_WIDTH; typedef Bit#(PCIE_FUNCTIONS_STATUS_WIDTH) PcieCfgFunctionStatus; From 0c9018d2d5aa86a8067e076305fb6347ae1ce3f8 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Sat, 20 Jul 2024 12:07:31 +0800 Subject: [PATCH 20/53] Add CompleterRequest --- img/StreamSplit.drawio.svg | 424 +++++++++++++++++++++++++++++++ img/chunkSplit.drawio.svg | 1 + img/dmac.drawio.svg | 494 ++++++++++++++++++++++++++++++++++++ img/streamConcat.drawio.svg | 1 + src/CompleterRequest.bsv | 92 +++++++ src/PcieAxiStreamTypes.bsv | 97 +++++++ src/PcieDescriptorTypes.bsv | 68 +++++ src/PcieTypes.bsv | 24 +- src/PrimUtils.bsv | 4 +- 9 files changed, 1198 insertions(+), 7 deletions(-) create mode 100644 img/StreamSplit.drawio.svg create mode 100644 img/chunkSplit.drawio.svg create mode 100644 img/dmac.drawio.svg create mode 100644 img/streamConcat.drawio.svg create mode 100644 src/CompleterRequest.bsv create mode 100644 src/PcieAxiStreamTypes.bsv create mode 100644 src/PcieDescriptorTypes.bsv diff --git a/img/StreamSplit.drawio.svg b/img/StreamSplit.drawio.svg new file mode 100644 index 0000000..4ae2123 --- /dev/null +++ b/img/StreamSplit.drawio.svg @@ -0,0 +1,424 @@ + + + + + + + + + +
+
+
+ InputFifo +
+
+
+
+ + InputFifo + +
+
+ + + + + + +
+
+
+ getFrameSize +
+
+
+
+ + getFrameSize + +
+
+ + + + + + +
+
+
+ assertSplit +
+
+
+
+ + assertSplit + +
+
+ + + + + + + + +
+
+
+ dataSplit +
+
+
+
+ + dataSplit + +
+
+ + + + + + +
+
+
+ remainStream +
+
+
+
+ + remainStream + +
+
+ + + + +
+
+
+ OutputFifo +
+
+
+
+ + OutputFifo + +
+
+ + + + +
+
+
+ is Split Location in this frame? +
+
+
+
+ + is Split Location in this f... + +
+
+ + + + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ OutputFifo +
+
+
+
+ + OutputFifo + +
+
+ + + + + + +
+
+
+ Last +
+
+
+
+ + Last + +
+
+ + + + +
+
+
+ Remain +
+
+
+
+ + Remain + +
+
+ + + + +
+
+
+ OutputFifo +
+
+
+
+ + OutputFifo + +
+
+ + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ Remain +
+
+
+
+ + Remain + +
+
+ + + + + + +
+
+
+ ConcatStream +
+
+
+
+ + ConcatStream + +
+
+ + + + +
+
+
+ Remain +
+
+
+
+ + Remain + +
+
+ + + + +
+
+
+ OutputFifo +
+
+
+
+ + OutputFifo + +
+
+ + + + +
+
+
+ ① +
+
+
+
+ + ① + +
+
+ + + + +
+
+
+ ② +
+
+
+
+ + ② + +
+
+ + + + +
+
+
+ ③ +
+
+
+
+ + ③ + +
+
+ + + + + + +
+
+
+ Remain +
+
+
+
+ + Remain + +
+
+ + + + +
+
+
+ OutputFifo +
+
+
+
+ + OutputFifo + +
+
+ + + + +
+
+
+ ④ +
+
+
+
+ + ④ + +
+
+
+ + + + + Text is not SVG - cannot display + + + +
\ No newline at end of file diff --git a/img/chunkSplit.drawio.svg b/img/chunkSplit.drawio.svg new file mode 100644 index 0000000..44d1e49 --- /dev/null +++ b/img/chunkSplit.drawio.svg @@ -0,0 +1 @@ +
FifoIn
FifoIn
getFirstChunkLen
getFirstChunkLen
splitFifo
splitFifo
genSplitChunks
genSplitChunks
  • dmaRequest
    • startAddr
    • length
  • firstChunkLen
dmaRequest...
FifoOut
FifoOut
  • startAddr
  • length
startAddrlength
  • startAddr
  • length
startAddrlength
  • newChunkPtrReg
  • totalLenRemainReg
  • isSplittingReg
newChunkPtrRegtotalLenRemainRe...
4KB
4KB
4KB
4KB
4KB
4KB
offset
offset
512
512...
512
512...
512
512...
firstChunk
firstChunk
512
512...
512
×
512...
512
×
512...
...
...
...
...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/img/dmac.drawio.svg b/img/dmac.drawio.svg new file mode 100644 index 0000000..3d41f42 --- /dev/null +++ b/img/dmac.drawio.svg @@ -0,0 +1,494 @@ + + + + + + + + + +
+
+
+ Stream +
+
+
+
+ + Stream + +
+
+ + + + + + +
+
+
+ cntl +
+
+
+
+ + cntl + +
+
+ + + + + + + + +
+
+
+ ChunkCompute +
+
+
+
+ + ChunkCompute + +
+
+ + + + + + +
+
+
+ StreamSplit +
+
+
+
+ + StreamSplit + +
+
+ + + + + + +
+
+
+ TlpHeaderAdd +
+
+
+
+ + TlpHeaderAdd + +
+
+ + + + +
+
+
+ Stream +
+
+
+
+ + Stream + +
+
+ + + + + + +
+
+
+ cntl +
+
+
+
+ + cntl + +
+
+ + + + + + + + +
+
+
+ ChunkCompute +
+
+
+
+ + ChunkCompute + +
+
+ + + + + + +
+
+
+ TlpHeaderAdd +
+
+
+
+ + TlpHeaderAdd + +
+
+ + + + +
+
+
+ AXIS +
+
+
+
+ + AXIS + +
+
+ + + + + + +
+
+
+ RX engine +
+
+
+
+ + RX engine + +
+
+ + + + + + +
+
+
+ PreserveOder +
+
+
+
+ + PreserveOder + +
+
+ + + + + + +
+
+
+ TX engine +
+
+
+
+ + TX engine + +
+
+ + + + + + +
+
+
+ AXIS +
+
+
+
+ + AXIS + +
+
+ + + + + + +
+
+
+ StreamConcat +
+
+
+
+ + StreamConcat + +
+
+ + + + +
+
+
+ CSR +
+ Write +
+
+
+
+ + CSR... + +
+
+ + + + + + + + + + +
+
+
+ CsrFrameGen +
+
+
+
+ + CsrFrameGen + +
+
+ + + + + + +
+
+
+ AXIS +
+
+
+
+ + AXIS + +
+
+ + + + + + +
+
+
+ RX engine +
+
+
+
+ + RX engine + +
+
+ + + + +
+
+
+ CSR +
+ Read +
+
+
+
+ + CSR... + +
+
+ + + + + + +
+
+
+ CSR +
+ Read +
+
+
+
+ + CSR... + +
+
+ + + + + + +
+
+
+ TlpHeaderAdd +
+
+
+
+ + TlpHeaderAdd + +
+
+ + + + + + +
+
+
+ TX engine +
+
+
+
+ + TX engine + +
+
+ + + + +
+
+
+ AXIS +
+
+
+
+ + AXIS + +
+
+ + + + + + + + +
+
+
+ Configuration +
+
+
+
+ + Configuration + +
+
+
+ + + + + Text is not SVG - cannot display + + + +
\ No newline at end of file diff --git a/img/streamConcat.drawio.svg b/img/streamConcat.drawio.svg new file mode 100644 index 0000000..6901206 --- /dev/null +++ b/img/streamConcat.drawio.svg @@ -0,0 +1 @@ +
stage1
stage1
stage1
stage1
output
output
remainDataReg
remainDataReg
mux
mux
Concat
Concat
isFirstA
isLastB
isFirstB
isLastB
hasRemain
isFirstA...
getPtr
getPtr
getPtr
getPtr
Input
Input
Input
Input
Text is not SVG - cannot display
\ No newline at end of file diff --git a/src/CompleterRequest.bsv b/src/CompleterRequest.bsv new file mode 100644 index 0000000..f009d4a --- /dev/null +++ b/src/CompleterRequest.bsv @@ -0,0 +1,92 @@ +import FIFO::*; + +import PcieAxiStreamTypes::*; +import PcieTypes::*; +import PcieDescriptorTypes::*; +import DmaTypes::*; + +typedef 1 MAX_DWORD_CNT_OF_CSR; +typedef 4'b1111 FIRST_BE_OF_CSR; + +typedef DmaCsrFrame CsrWriteReq; +typedef struct { + DmaCsrAddr rdAddr; + PcieCompleterRequestNonPostedStore npInfo; +} CsrReadReq; + +interface CompleterRequest; + interface RawPcieCompleterRequest rawCompleterComplete; + interface FifoOut#(DmaCsrFrame) csrWriteFifoOut; + interface FifoOut#(DmaCsrAddr) csrReadFifoOut; +endinterface + +interface CompleterRxEngine; + interface FifoIn#(PcieAxiStream) axiStreamFifoIn; + interface FifoOut#(CsrWriteReq) csrWriteFifoOut; + interface FifoOut#(CsrReadReq) csrReadFifoOut; +endinterface + + +// PcieCompleter does not support straddle mode now +// The completer is designed only for CSR Rd/Wr, and will ignore any len>32bit requests +module mkCompleterRxEngine; + FIFOF#(PcieAxiStream) inFifo <- mkFIFOF; + FIFOF#(CsrWriteReq) wrReqFifo <- mkFIFOF; + FIFOF#(CsrReadReq) rdReqFifo <- mkFIFOF; + + Reg#(Bool) isInPacket <- mkReg(False); + + Reg#(Uint#(32)) illegalPcieReqCntReg <- mkReg(0); + Reg#(BarId) barIdReg <- mkReg(0); + + function DmaCsrAddr getAddrFromCqDescriptor(PcieCompleterRequestDescriptor descriptor); + + endfunction + + function PcieCompleterRequestNonPostedStore convertDescriptorToNpStore(PcieCompleterRequestDescriptor descriptor); + + endfunction + + rule parseData; + inFifo.deq; + let axiStream = inFifo.first; + PcieCompleterRequestSideBandFrame sideBand = pack(axiStream.tUser); + isInPacket <= !unpack(axiStream.isLast); + if (!isInPacket) begin + PcieCompleterRequestDescriptor descriptor = pack(axiStream.tData[valueOf(CQ_DESCRIPTOR_WIDTH)-1:0]); + case (descriptor.reqType) begin + MEM_WRITE_REQ: begin + if (descriptor.dwordCnt <= valueOf(MAX_DWORD_CNT_OF_CSR) && sideBand.dataByteEn == 4'b1111) begin + DmaCsrValue wrValue = axiStream.tData[valueOf(DWORD_WIDTH)-1:0]; + DmaCsrAddr wrAddr = getAddrFromCqDescriptor(descriptor); + let wrReq = DmaCsrFrame { + address : wrAddr, + value : wrValue + } + wrReqFifo.enq(wrReq); + end + else begin + illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; + end + end + MEM_READ_REQ: begin + DmaCsrAddr rdAddr = getAddrFromCqDescriptor(descriptor); + let npInfo = PcieCompleterRequestNonPostedStore { + attributes: descriptor.attributes, + trafficClass: descriptor.trafficClass, + + } + end + default: illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; + end + + end + outFifo.enq(stream); + endrule +endmodule + + +module mkCompleterRequest; + + +endmodule diff --git a/src/PcieAxiStreamTypes.bsv b/src/PcieAxiStreamTypes.bsv new file mode 100644 index 0000000..b133c78 --- /dev/null +++ b/src/PcieAxiStreamTypes.bsv @@ -0,0 +1,97 @@ +import FIFOF :: *; +import GetPut :: *; +import PAClib :: *; + +import BusConversion :: *; +import SemiFifo :: *; + +typedef 8 BYTE_WIDTH; +typedef TMul#(2, BYTE_WIDTH) WORD_WIDTH; +typedef TMul#(4, BYTE_WIDTH) DWORD_WIDTH; + +typedef 512 PCIE_AXIS_DATA_WIDTH; +typedef TDiv#(PCIE_AXIS_DATA_WIDTH, DWORD_WIDTH) PCIE_AXIS_KEEP_WIDTH; + +typedef struct { + Bit#(PCIE_AXIS_DATA_WIDTH) tData; + Bit#(PCIE_AXIS_KEEP_WIDTH) tKeep; + Bool tLast; + Bit#(usrWidth) tUser; +} PcieAxiStream#(numeric type usrWidth) deriving(Bits, FShow, Eq, Bounded); + +(*always_ready, always_enabled*) +interface RawPcieAxiStreamMaster#(numeric type usrWidth); + (* result = "tvalid" *) method Bool tValid; + (* result = "tdata" *) method Bit#(PCIE_AXIS_DATA_WIDTH) tData; + (* result = "tkeep" *) method Bit#(PCIE_AXIS_KEEP_WIDTH) tKeep; + (* result = "tlast" *) method Bool tLast; + (* result = "tuser" *) method Bit#(usrWidth) tUser; + (* always_enabled, prefix = "" *) method Action tReady((* port="tready" *) Bool ready); +endinterface + +(* always_ready, always_enabled *) +interface RawPcieAxiStreamSlave#(numeric type usrWidth); + (* prefix = "" *) + method Action tValid ( + (* port="tvalid" *) Bool tValid, + (* port="tdata" *) Bit#(PCIE_AXIS_DATA_WIDTH) tData, + (* port="tkeep" *) Bit#(PCIE_AXIS_KEEP_WIDTH) tKeep, + (* port="tlast" *) Bool tLast, + (* port="tuser" *) Bit#(usrWidth) tUser + ); + (* result="tready" *) method Bool tReady; +endinterface + +module mkFifoOutToRawPcieAxiStreamMaster#(FifoOut#(PcieAxiStream#(usrWidth)) pipe + )(RawPcieAxiStreamMaster#(usrWidth)); + let rawBus <- mkFifoOutToRawBusMaster(pipe); + return convertRawBusToRawPcieAxiStreamMaster(rawBus); +endmodule + +module mkFifoInToRawPcieAxiStreamSlave#(FifoIn#(PcieAxiStream#(usrWidth)) pipe + )(RawPcieAxiStreamSlave#(usrWidth)); + let rawBus <- mkFifoInToRawBusSlave(pipe); + return convertRawBusToRawPcieAxiStreamSlave(rawBus); +endmodule + +function RawPcieAxiStreamMaster#(usrWidth) convertRawBusToRawPcieAxiStreamMaster( + RawBusMaster#(PcieAxiStream#(usrWidth)) rawBus +); + return ( + interface RawPcieAxiStreamMaster; + method Bool tValid = rawBus.valid; + method Bit#(PCIE_AXIS_DATA_WIDTH) tData = rawBus.data.tData; + method Bit#(PCIE_AXIS_KEEP_WIDTH) tKeep = rawBus.data.tKeep; + method Bool tLast = rawBus.data.tLast; + method Bit#(usrWidth) tUser = rawBus.data.tUser; + method Action tReady(Bool rdy); + rawBus.ready(rdy); + endmethod + endinterface + ); +endfunction + +function RawPcieAxiStreamSlave#(usrWidth) convertRawBusToRawPcieAxiStreamSlave( + RawBusSlave#(PcieAxiStream#(usrWidth)) rawBus + ); + return ( + interface RawPcieAxiStreamSlave; + method Bool tReady = rawBus.ready; + method Action tValid( + Bool valid, + Bit#(PCIE_AXIS_DATA_WIDTH) tData, + Bit#(PCIE_AXIS_KEEP_WIDTH) tKeep, + Bool tLast, + Bit#(usrWidth) tUser + ); + PcieAxiStream#(usrWidth) axiStream = PcieAxiStream { + tData: tData, + tKeep: tKeep, + tLast: tLast, + tUser: tUser + }; + rawBus.validData(valid, axiStream); + endmethod + endinterface + ); +endfunction \ No newline at end of file diff --git a/src/PcieDescriptorTypes.bsv b/src/PcieDescriptorTypes.bsv new file mode 100644 index 0000000..9762b67 --- /dev/null +++ b/src/PcieDescriptorTypes.bsv @@ -0,0 +1,68 @@ + +import PcieAxiStreamTypes::*; + +typedef 64 RQ_DESCRIPTOR_WIDTH; +typedef TDiv#(TSub#(PCIE_AXIS_DATA_WIDTH, RQ_DESCRIPTOR_WIDTH), DWORD_WIDTH) MAX_DWORD_CNT_OF_FIRST; + +typedef Bit#(1) ReserveBit1; + +typedef 64 CQ_DESCRIPTOR_WIDTH; +typedef 3 ATTR_WIDTH; +typedef 3 TC_WIDTH; +typedef 6 BAR_APERTURE_WIDTH; +typedef 3 BAR_ID_WIDTH; +typedef 8 TARGET_FUNCTION_WIDTH; +typedef 8 TAG_WIDTH; +typedef 16 BDF_WIDTH; +typedef 4 REQ_TYPE_WIDTH; +typedef 11 DWORD_COUNT_WIDTH; +typedef 62 ADDR_WIDTH; + +typedef Bit#(ATTR_WIDTH) Attributes; +typedef Bit#(TC_WIDTH) TrafficClass; +typedef Bit#(BAR_APERTURE_WIDTH) BarAperture; +typedef Bit#(BAR_ID_WIDTH) BarId; +typedef Bit#(TARGET_FUNCTION_WIDTH) TargetFunction; +typedef Bit#(TAG_WIDTH) Tag; +typedef Bit#(BDF_WIDTH) BusDeviceFunc; +typedef Bit#(REQ_TYPE_WIDTH) ReqType; +typedef Bit#(DWORD_COUNT_WIDTH) DwordCount; +typedef Bit#(ADDR_WIDTH) Address; + +// 16bytes Completer Request Descriptor Format for Memory, I/O, and Atomic Options +typedef struct { + ReserveBit1 reserve0; + Attributes attributes; + TrafficClass trafficClass; + BarAperture barAperture; + BarId barId; + TargetFunction targetFunction; + Tag tag; + BusDeviceFunc requesterId; + ReserveBit1 reserve1; + ReqType reqType; + DwordCount dwordCnt; + Address address; +} PcieCompleterRequestDescriptor deriving(Bits, Eq, Bounded, FShow); + +typedef struct { + Attributes attributes; + TrafficClass trafficClass; + Tag tag; + BusDeviceFunc requesterId; +} PcieCompleterRequestNonPostedStore deriving(Bits, Eq, Bounded, FShow); + + + +// Pcie Tlp types of descriptor +typedef 4'b0000 MEM_READ_REQ; +typedef 4'b0001 MEM_WRITE_REQ; +typedef 4'b0010 IO_READ_REQ; +typedef 4'b0011 IO_WRITE_REQ; +typedef 4'b0100 MEM_FETCHADD_REQ; +typedef 4'b0101 MEM_UNCOND_SWAP_REQ; +typedef 4'b0110 MEM_COMP_SWAP_REQ; +typedef 4'b0111 LOCK_READ_REQ; // allowed only in legacy devices +typedef 4'b1100 COMMON_MESG; +typedef 4'b1101 VENDOR_DEF_MESG; +typedef 4'b1110 ATS_MESG; diff --git a/src/PcieTypes.bsv b/src/PcieTypes.bsv index e617cfd..b374ad3 100755 --- a/src/PcieTypes.bsv +++ b/src/PcieTypes.bsv @@ -1,4 +1,4 @@ -import AxiStreamTypes::*; +import PcieAxiStreamTypes::*; typedef 512 PCIE_TLP_BYTES; @@ -15,6 +15,7 @@ typedef 81 PCIE_COMPLETER_COMPLETE_TUSER_WIDTH; typedef 137 PCIE_REQUESTER_REQUEST_TUSER_WIDTH; typedef 161 PCIE_REQUESTER_COMPLETE_TUSER_WIDTH; + // PcieTlpCtl**: SideBand Signals delivered in tUser defined by PG213 typedef 8 PCIE_TLP_FIRST_BE_WIDTH; typedef 8 PCIE_TLP_LAST_BE_WIDTH; @@ -139,6 +140,7 @@ typedef struct { } PcieRequesterCompleteSideBandFrame deriving(Bits, Bounded, Eq); +// PCIe raw interfaces typedef 2 PCIE_CR_NP_REQ_WIDTH; typedef 6 PCIE_CR_NP_REQ_COUNT_WIDTH; typedef Bit#(PCIE_CR_NP_REQ_WIDTH) PcieNonPostedRequst; @@ -146,10 +148,17 @@ typedef Bit#(PCIE_CR_NP_REQ_COUNT_WIDTH) PcieNonPostedRequstCount; // Interface to PCIe IP Completer Interface (*always_ready, always_enabled*) -interface RawPcieCompleter; - // TODO: the AxiStream in blue-wrapper has tDataWidth = tKeepWidth * BYTE_WIDTH, but the PCIe IP has tDataWidth = tKeepWidth * DWORD_WIDTH - (* prefix = "s_axis_cq_" *) interface RawAxiStreamSlave#(PCIE_TKEEP_WIDTH, PCIE_COMPLETER_REQUEST_TUSER_WIDTH) request; - (* prefix = "m_axis_cc_" *) interface RawAxiStreamMaster#(PCIE_TKEEP_WIDTH, PCIE_COMPLETER_COMPLETE_TUSER_WIDTH) complete; +interface RawPcieCompleterRequest; + (* prefix = "s_axis_cq_" *) interface RawPcieAxiStreamSlave#(PCIE_COMPLETER_REQUEST_TUSER_WIDTH) rawAxiStreamSlave; + (* result = "pcie_cq_np_req" *) method PcieNonPostedRequst nonPostedReqCreditIncrement; + (* prefix = "" *) method Action nonPostedReqCreditCnt( + (* port = "pcie_cq_np_req_count" *) PcieNonPostedRequstCount ); + method +endinterface + +(*always_ready, always_enabled*) +interface RawPcieCompleterComplete; + (* prefix = "m_axis_cc_" *) interface RawAxiStreamMaster#(PCIE_COMPLETER_COMPLETE_TUSER_WIDTH) rawAxiStreamMaster; endinterface // Interface to PCIe IP Requester Interface @@ -159,6 +168,8 @@ interface RawPcieRequester; (* prefix = "s_axis_rc_" *) interface RawAxiStreamSlave#(PCIE_TKEEP_WIDTH, usrWidth) complete; endinterface + +// Pcie Configuration Interfaces typedef 10 PCIE_CFG_MGMT_ADDR_WIDTH; typedef 4 PCIE_CFG_MGMT_BE_WIDTH; typedef 8 PCIE_CFG_MGMT_FUNC_NUM_WIDTH; @@ -263,4 +274,5 @@ interface RawPcieConfiguration; (* prefix = "cfg_msg_received_" *) interface RawPcieCfgMsgRx msgRx; (* prefix = "" *) interface RawPcieCfgStatus status; (* prefix = "pcie_tfc_" *) interface RawPcieCfgTransmitFC txFlowControl; -endinterface \ No newline at end of file +endinterface + diff --git a/src/PrimUtils.bsv b/src/PrimUtils.bsv index 6919aae..169f6ba 100644 --- a/src/PrimUtils.bsv +++ b/src/PrimUtils.bsv @@ -12,4 +12,6 @@ function Action immAssert(Bool condition, String assertName, Fmt assertFmtMsg); $finish(1); end endaction -endfunction \ No newline at end of file +endfunction + +function t getLowBytes(t data, tPtr aperture) provisos(Bits#(t, tSz), ) \ No newline at end of file From ddc177a23bf93245e9a5687de10cf5d473f1c03d Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Sat, 20 Jul 2024 13:05:22 +0800 Subject: [PATCH 21/53] Fix Types --- src/DmaTypes.bsv | 2 +- src/PcieTypes.bsv | 87 +++++++++++++++++++++------------------------ src/PrimUtils.bsv | 74 +++++++++++++++++++++++++++++++++++++- src/StreamUtils.bsv | 8 ++--- 4 files changed, 118 insertions(+), 53 deletions(-) diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 59d4ec2..f6af719 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -20,7 +20,7 @@ typedef TMul#(4, BYTE_WIDTH) DWORD_WIDTH; typedef 2 CONCAT_STREAM_NUM; typedef TDiv#(DATA_WIDTH, BYTE_WIDTH) BYTE_EN_WIDTH; -typedef 'hFFFFFFFFFFFFFFFF MAX_BYTE_EN; +typedef -1 MAX_BYTE_EN; typedef Bit#(DATA_WIDTH) Data; typedef Bit#(BYTE_EN_WIDTH) ByteEn; diff --git a/src/PcieTypes.bsv b/src/PcieTypes.bsv index b374ad3..7cf68d8 100755 --- a/src/PcieTypes.bsv +++ b/src/PcieTypes.bsv @@ -1,5 +1,6 @@ -import PcieAxiStreamTypes::*; +import Vector::*; +import PcieAxiStreamTypes::*; typedef 512 PCIE_TLP_BYTES; typedef TLog#(PCIE_TLP_BYTES) PCIE_TLP_BYTES_WIDTH; @@ -9,6 +10,7 @@ typedef 64 PCIE_TDATA_BYTES; typedef 16 PCIE_TDATA_DWORDS; // Indicate DWORD valid of tDATA typedef PCIE_TDATA_DWORDS PCIE_TKEEP_WIDTH; + // tUser width vary among RR, RC, CR and CC typedef 183 PCIE_COMPLETER_REQUEST_TUSER_WIDTH; typedef 81 PCIE_COMPLETER_COMPLETE_TUSER_WIDTH; @@ -61,82 +63,74 @@ typedef Bit#(PCIE_TLP_RC_ISEOP_WIDTH) PcieTlpCtlIsEopRC; // Signals the start of a new TLP, 6 bit. typedef struct { + Vector#(PCIE_TLP_ISSOP_WIDTH, PcieTlpCtlIsSopPtr) isSopPtrs; PcieTlpCtlIsSop isSop; - PcieTlpCtlIsSopPtr isSopPtr0; - PcieTlpCtlIsSopPtr isSopPtr1; } PcieTlpCtlIsSopCommon deriving(Bits, Bounded, Eq); // Signals the start of a new TLP, 12 bit. typedef struct { + Vector#(PCIE_TLP_RC_ISSOP_WIDTH, PcieTlpCtlIsSopPtr) isSopPtrs; PcieTlpCtlIsSopRC isSop; - PcieTlpCtlIsSopPtr isSopPtr0; - PcieTlpCtlIsSopPtr isSopPtr1; - PcieTlpCtlIsSopPtr isSopPtr2; - PcieTlpCtlIsSopPtr isSopPtr3; } PcieTlpCtlIsSopReqCpl deriving(Bits, Bounded, Eq); // Indicates a TLP is ending in this beat, 10bit. typedef struct { + Vector#(PCIE_TLP_ISEOP_WIDTH, PcieTlpCtlIsEopPtr) isEopPtrs; PcieTlpCtlIsEop isEop; - PcieTlpCtlIsEopPtr isEopPtr0; - PcieTlpCtlIsEopPtr isEopPtr1; } PcieTlpCtlIsEopCommon deriving(Bits, Bounded, Eq); // Indicates a TLP is ending in this beat, 20bit. typedef struct { + Vector#(PCIE_TLP_RC_ISEOP_WIDTH, PcieTlpCtlIsEopPtr) isEopPtrs; PcieTlpCtlIsEopRC isEop; - PcieTlpCtlIsEopPtr isEopPtr0; - PcieTlpCtlIsEopPtr isEopPtr1; - PcieTlpCtlIsEopPtr isEopPtr2; - PcieTlpCtlIsEopPtr isEopPtr3; } PcieTlpCtlIsEopReqCpl deriving(Bits, Bounded, Eq); // 183bit tUser of PcieCompleterRequeste AXIS-slave typedef struct { - PcieTlpCtlFirstByteEn firstByteEn; - PcieTlpCtlLastByteEn lastByteEn; - PcieTlpCtlByteEn dataByteEn; - PcieTlpCtlIsSopCommon isSop; - PcieTlpCtlIsEopCommon isEop; - Bool discontinue; - PcieTlpCtlTphPresent tphPresent; - PcieTlpCtlTphType tphType; - PcieTlpCtlTphSteeringTag tphSteeringTag; PcieTlpCtlParity parity; + PcieTlpCtlTphSteeringTag tphSteeringTag; + PcieTlpCtlTphType tphType; + PcieTlpCtlTphPresent tphPresent; + Bool discontinue; + PcieTlpCtlIsEopCommon isEop; + PcieTlpCtlIsSopCommon isSop; + PcieTlpCtlByteEn dataByteEn; + PcieTlpCtlLastByteEn lastByteEn; + PcieTlpCtlFirstByteEn firstByteEn; } PcieCompleterRequestSideBandFrame deriving(Bits, Bounded, Eq); // 81bit tUser of PcieCompleterComplete AXIS-master typedef struct { - PcieTlpCtlIsSopCommon isSop; - PcieTlpCtlIsEopCommon isEop; - Bool discontinue; PcieTlpCtlParity parity; + Bool discontinue; + PcieTlpCtlIsEopCommon isEop; + PcieTlpCtlIsSopCommon isSop; } PcieCompleterCompleteSideBandFrame deriving(Bits, Bounded, Eq); // 137bit tUser of PcieRequesterRequeste AXIS-master typedef struct { - PcieTlpCtlFirstByteEn firstByteEn; - PcieTlpCtlLastByteEn lastByteEn; - PcieTlpCtlAddrOffset addrOffset; - PcieTlpCtlIsSopCommon isSop; - PcieTlpCtlIsEopCommon isEop; - Bool discontinue; - PcieTlpCtlTphPresent tphPresent; - PcieTlpCtlTphType tphType; - PcieTlpCtlTphIndirectTagEn tphIndirectTagEn; - PcieTlpCtlTphSteeringTag tphSteeringTag; - PcieTlpCtlSeqNum seqNum0; - PcieTlpCtlSeqNum seqNum1; PcieTlpCtlParity parity; + PcieTlpCtlSeqNum seqNum1; + PcieTlpCtlSeqNum seqNum0; + PcieTlpCtlTphSteeringTag tphSteeringTag; + PcieTlpCtlTphIndirectTagEn tphIndirectTagEn; + PcieTlpCtlTphType tphType; + PcieTlpCtlTphPresent tphPresent; + Bool discontinue; + PcieTlpCtlIsEopCommon isEop; + PcieTlpCtlIsSopCommon isSop; + PcieTlpCtlAddrOffset addrOffset; + PcieTlpCtlLastByteEn lastByteEn; + PcieTlpCtlFirstByteEn firstByteEn; } PcieRequsterRequestSideBandFrame deriving(Bits, Bounded, Eq); // 161bit tUser of PcieRequesterComplete AXIS-slave typedef struct { - PcieTlpCtlByteEn dataByteEn; - PcieTlpCtlIsSopReqCpl isSop; - PcieTlpCtlIsEopReqCpl isEop; - Bool discontinue; - PcieTlpCtlParity parity; +PcieTlpCtlParity parity; +Bool discontinue; +PcieTlpCtlIsEopReqCpl isEop; +PcieTlpCtlIsSopReqCpl isSop; +PcieTlpCtlByteEn dataByteEn; } PcieRequesterCompleteSideBandFrame deriving(Bits, Bounded, Eq); @@ -152,20 +146,19 @@ interface RawPcieCompleterRequest; (* prefix = "s_axis_cq_" *) interface RawPcieAxiStreamSlave#(PCIE_COMPLETER_REQUEST_TUSER_WIDTH) rawAxiStreamSlave; (* result = "pcie_cq_np_req" *) method PcieNonPostedRequst nonPostedReqCreditIncrement; (* prefix = "" *) method Action nonPostedReqCreditCnt( - (* port = "pcie_cq_np_req_count" *) PcieNonPostedRequstCount ); - method + (* port = "pcie_cq_np_req_count" *) PcieNonPostedRequstCount nonPostedpReqCount ); endinterface (*always_ready, always_enabled*) interface RawPcieCompleterComplete; - (* prefix = "m_axis_cc_" *) interface RawAxiStreamMaster#(PCIE_COMPLETER_COMPLETE_TUSER_WIDTH) rawAxiStreamMaster; + (* prefix = "m_axis_cc_" *) interface RawPcieAxiStreamMaster#(PCIE_COMPLETER_COMPLETE_TUSER_WIDTH) rawAxiStreamMaster; endinterface // Interface to PCIe IP Requester Interface (*always_ready, always_enabled*) interface RawPcieRequester; - (* prefix = "m_axis_rq_" *) interface RawAxiStreamMaster#(PCIE_TKEEP_WIDTH, usrWidth) request; - (* prefix = "s_axis_rc_" *) interface RawAxiStreamSlave#(PCIE_TKEEP_WIDTH, usrWidth) complete; + (* prefix = "m_axis_rq_" *) interface RawPcieAxiStreamMaster#(PCIE_REQUESTER_REQUEST_TUSER_WIDTH) request; + (* prefix = "s_axis_rc_" *) interface RawPcieAxiStreamSlave#(PCIE_REQUESTER_COMPLETE_TUSER_WIDTH) complete; endinterface diff --git a/src/PrimUtils.bsv b/src/PrimUtils.bsv index 169f6ba..8ba0863 100644 --- a/src/PrimUtils.bsv +++ b/src/PrimUtils.bsv @@ -1,5 +1,7 @@ import FIFO::*; +import PcieAxiStreamTypes::*; + function Action immAssert(Bool condition, String assertName, Fmt assertFmtMsg); action let pos = printPosition(getStringPosition(assertName)); @@ -14,4 +16,74 @@ function Action immAssert(Bool condition, String assertName, Fmt assertFmtMsg); endaction endfunction -function t getLowBytes(t data, tPtr aperture) provisos(Bits#(t, tSz), ) \ No newline at end of file +function Bit#(n) getLowBytes(Bit#(n) data, Bit#(TLog#(TDiv#(n, BYTE_WIDTH))) ptr); + let temp = data; + case(ptr) + 1 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 2 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 3 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 4 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 5 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 6 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 7 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 8 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 9 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 10: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 11: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 12: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 13: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 14: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 15: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 16: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 17: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 18: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 19: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 20: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 21: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 22: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 23: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 24: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 25: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 26: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 27: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 28: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 29: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 30: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 31: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 32: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 33: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 34: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 35: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 36: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 37: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 38: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 39: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 40: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 41: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 42: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 43: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 44: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 45: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 46: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 47: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 48: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 49: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 50: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 51: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 52: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 53: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 54: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 55: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 56: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 57: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 58: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 59: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 60: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 61: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 62: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 63: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 64: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + default: temp = 0; + endcase + return temp; +endfunction diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index 91c9b27..78cc3d3 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -118,7 +118,7 @@ endfunction // Concat two DataStream frames into one. StreamA.isLast must be True, otherwise the function will return a empty frame to end the stream. function Tuple3#(DataStream, DataStream, DataBytePtr) getConcatStream (DataStream streamA, DataStream streamB, DataBytePtr bytePtrA, DataBytePtr bytePtrB); - Bool isCallLegally = (streamA.isLast && bytePtrA <= getMaxBytePtr() && bytePtrA > 0); + Bool isCallLegally = (streamA.isLast && bytePtrA <= getMaxBytePtr && bytePtrA > 0); DataBitPtr bitPtrA = zeroExtend(bytePtrA) << fromInteger(valueOf(BYTE_WIDTH_WIDTH)); // Fill the low PtrA bytes by streamA data @@ -132,8 +132,8 @@ function Tuple3#(DataStream, DataStream, DataBytePtr) getConcatStream (DataStrea ByteEn concatByteEn = concatByteEnA | concatByteEnB; // Get the remain bytes of streamB data - DataBitPtr resBitPtr = getMaxBitPtr() - bitPtrA; - DataBytePtr resBytePtr = getMaxBytePtr() - bytePtrA; + DataBitPtr resBitPtr = getMaxBitPtr - bitPtrA; + DataBytePtr resBytePtr = getMaxBytePtr - bytePtrA; Data remainData = streamB.data >> resBitPtr; ByteEn remainByteEn = streamB.byteEn >> resBytePtr; @@ -302,7 +302,7 @@ module mkStreamSplit(StreamSplit ifc); if (!isSplitted && unpack(zeroExtend(bytePtr)) + streamByteCntReg >= splitLocation) begin truncateBytePtr = truncate(pack(splitLocation - streamByteCntReg)); end - DataBytePtr resBytePtr = getMaxBytePtr() - truncateBytePtr; + DataBytePtr resBytePtr = getMaxBytePtr - truncateBytePtr; splitPtrFifo.enq(tuple2(truncateBytePtr, resBytePtr)); if (truncateBytePtr > 0 && !stream.isLast) begin isSplitted <= True; From c7d7d77bb3a403b84adf6135a865e14b8a31722d Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Sat, 20 Jul 2024 18:03:41 +0800 Subject: [PATCH 22/53] Add PcieCompleter --- img/dmac.drawio.svg | 494 ------------------------------------ src/CompleterRequest.bsv | 92 ------- src/DmaTypes.bsv | 1 - src/PcieCompleter.bsv | 173 +++++++++++++ src/PcieDescriptorTypes.bsv | 43 ++-- src/PrimUtils.bsv | 312 ++++++++++++++++++----- src/StreamUtils.bsv | 18 +- 7 files changed, 451 insertions(+), 682 deletions(-) delete mode 100644 img/dmac.drawio.svg delete mode 100644 src/CompleterRequest.bsv create mode 100644 src/PcieCompleter.bsv diff --git a/img/dmac.drawio.svg b/img/dmac.drawio.svg deleted file mode 100644 index 3d41f42..0000000 --- a/img/dmac.drawio.svg +++ /dev/null @@ -1,494 +0,0 @@ - - - - - - - - - -
-
-
- Stream -
-
-
-
- - Stream - -
-
- - - - - - -
-
-
- cntl -
-
-
-
- - cntl - -
-
- - - - - - - - -
-
-
- ChunkCompute -
-
-
-
- - ChunkCompute - -
-
- - - - - - -
-
-
- StreamSplit -
-
-
-
- - StreamSplit - -
-
- - - - - - -
-
-
- TlpHeaderAdd -
-
-
-
- - TlpHeaderAdd - -
-
- - - - -
-
-
- Stream -
-
-
-
- - Stream - -
-
- - - - - - -
-
-
- cntl -
-
-
-
- - cntl - -
-
- - - - - - - - -
-
-
- ChunkCompute -
-
-
-
- - ChunkCompute - -
-
- - - - - - -
-
-
- TlpHeaderAdd -
-
-
-
- - TlpHeaderAdd - -
-
- - - - -
-
-
- AXIS -
-
-
-
- - AXIS - -
-
- - - - - - -
-
-
- RX engine -
-
-
-
- - RX engine - -
-
- - - - - - -
-
-
- PreserveOder -
-
-
-
- - PreserveOder - -
-
- - - - - - -
-
-
- TX engine -
-
-
-
- - TX engine - -
-
- - - - - - -
-
-
- AXIS -
-
-
-
- - AXIS - -
-
- - - - - - -
-
-
- StreamConcat -
-
-
-
- - StreamConcat - -
-
- - - - -
-
-
- CSR -
- Write -
-
-
-
- - CSR... - -
-
- - - - - - - - - - -
-
-
- CsrFrameGen -
-
-
-
- - CsrFrameGen - -
-
- - - - - - -
-
-
- AXIS -
-
-
-
- - AXIS - -
-
- - - - - - -
-
-
- RX engine -
-
-
-
- - RX engine - -
-
- - - - -
-
-
- CSR -
- Read -
-
-
-
- - CSR... - -
-
- - - - - - -
-
-
- CSR -
- Read -
-
-
-
- - CSR... - -
-
- - - - - - -
-
-
- TlpHeaderAdd -
-
-
-
- - TlpHeaderAdd - -
-
- - - - - - -
-
-
- TX engine -
-
-
-
- - TX engine - -
-
- - - - -
-
-
- AXIS -
-
-
-
- - AXIS - -
-
- - - - - - - - -
-
-
- Configuration -
-
-
-
- - Configuration - -
-
-
- - - - - Text is not SVG - cannot display - - - -
\ No newline at end of file diff --git a/src/CompleterRequest.bsv b/src/CompleterRequest.bsv deleted file mode 100644 index f009d4a..0000000 --- a/src/CompleterRequest.bsv +++ /dev/null @@ -1,92 +0,0 @@ -import FIFO::*; - -import PcieAxiStreamTypes::*; -import PcieTypes::*; -import PcieDescriptorTypes::*; -import DmaTypes::*; - -typedef 1 MAX_DWORD_CNT_OF_CSR; -typedef 4'b1111 FIRST_BE_OF_CSR; - -typedef DmaCsrFrame CsrWriteReq; -typedef struct { - DmaCsrAddr rdAddr; - PcieCompleterRequestNonPostedStore npInfo; -} CsrReadReq; - -interface CompleterRequest; - interface RawPcieCompleterRequest rawCompleterComplete; - interface FifoOut#(DmaCsrFrame) csrWriteFifoOut; - interface FifoOut#(DmaCsrAddr) csrReadFifoOut; -endinterface - -interface CompleterRxEngine; - interface FifoIn#(PcieAxiStream) axiStreamFifoIn; - interface FifoOut#(CsrWriteReq) csrWriteFifoOut; - interface FifoOut#(CsrReadReq) csrReadFifoOut; -endinterface - - -// PcieCompleter does not support straddle mode now -// The completer is designed only for CSR Rd/Wr, and will ignore any len>32bit requests -module mkCompleterRxEngine; - FIFOF#(PcieAxiStream) inFifo <- mkFIFOF; - FIFOF#(CsrWriteReq) wrReqFifo <- mkFIFOF; - FIFOF#(CsrReadReq) rdReqFifo <- mkFIFOF; - - Reg#(Bool) isInPacket <- mkReg(False); - - Reg#(Uint#(32)) illegalPcieReqCntReg <- mkReg(0); - Reg#(BarId) barIdReg <- mkReg(0); - - function DmaCsrAddr getAddrFromCqDescriptor(PcieCompleterRequestDescriptor descriptor); - - endfunction - - function PcieCompleterRequestNonPostedStore convertDescriptorToNpStore(PcieCompleterRequestDescriptor descriptor); - - endfunction - - rule parseData; - inFifo.deq; - let axiStream = inFifo.first; - PcieCompleterRequestSideBandFrame sideBand = pack(axiStream.tUser); - isInPacket <= !unpack(axiStream.isLast); - if (!isInPacket) begin - PcieCompleterRequestDescriptor descriptor = pack(axiStream.tData[valueOf(CQ_DESCRIPTOR_WIDTH)-1:0]); - case (descriptor.reqType) begin - MEM_WRITE_REQ: begin - if (descriptor.dwordCnt <= valueOf(MAX_DWORD_CNT_OF_CSR) && sideBand.dataByteEn == 4'b1111) begin - DmaCsrValue wrValue = axiStream.tData[valueOf(DWORD_WIDTH)-1:0]; - DmaCsrAddr wrAddr = getAddrFromCqDescriptor(descriptor); - let wrReq = DmaCsrFrame { - address : wrAddr, - value : wrValue - } - wrReqFifo.enq(wrReq); - end - else begin - illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; - end - end - MEM_READ_REQ: begin - DmaCsrAddr rdAddr = getAddrFromCqDescriptor(descriptor); - let npInfo = PcieCompleterRequestNonPostedStore { - attributes: descriptor.attributes, - trafficClass: descriptor.trafficClass, - - } - end - default: illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; - end - - end - outFifo.enq(stream); - endrule -endmodule - - -module mkCompleterRequest; - - -endmodule diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index f6af719..66c5b44 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -20,7 +20,6 @@ typedef TMul#(4, BYTE_WIDTH) DWORD_WIDTH; typedef 2 CONCAT_STREAM_NUM; typedef TDiv#(DATA_WIDTH, BYTE_WIDTH) BYTE_EN_WIDTH; -typedef -1 MAX_BYTE_EN; typedef Bit#(DATA_WIDTH) Data; typedef Bit#(BYTE_EN_WIDTH) ByteEn; diff --git a/src/PcieCompleter.bsv b/src/PcieCompleter.bsv new file mode 100644 index 0000000..0288abb --- /dev/null +++ b/src/PcieCompleter.bsv @@ -0,0 +1,173 @@ +import FIFO::*; + +import PcieAxiStreamTypes::*; +import PcieTypes::*; +import PcieDescriptorTypes::*; +import DmaTypes::*; + +typedef 1 IDEA_DWORD_CNT_OF_CSR; +typedef 10 CMPL_NPREQ_INFLIGHT_NUM; +typedef 20 CMPL_NPREQ_WAITING_CLKS; + +typedef DmaCsrFrame CsrWriteReq; +typedef DmaCsrFrame CsrReadResp; +typedef struct { + DmaCsrAddr rdAddr; + PcieCompleterRequestNonPostedStore npInfo; +} CsrReadReq; + +interface Completer; + interface RawPcieCompleterRequest rawCompleterRequest; + interface RawPcieCompleterComplete rawCompleterComplete; + interface FifoOut#(DmaCsrFrame) csrWriteReqFifoOut; + interface FifoIn#(DmaCsrFrame) csrReadRespFifoIn; + interface FifoOut#(DmaCsrAddr) csrReadReqFifoOut; + method DmaCsrValue getRegisterValue(DmaCsrAddr addr); +endinterface + +interface CompleterRequest; + interface FifoIn#(PcieAxiStream) axiStreamFifoIn; + interface FifoOut#(CsrWriteReq) csrWriteReqFifoOut; + interface FifoOut#(CsrReadReq) csrReadReqFifoOut; +endinterface + +interface CompleterComplete; + interface FifoOut#(PcieAxiStream) axiStreamFifoOut; + interface FifoIn#(CsrReadResp) csrReadRespFifoIn; + interface FifoIn#(CsrReadReq) csrReadReqFifoIn; +endinterface + +// PcieCompleter does not support straddle mode now +// The completer is designed only for CSR Rd/Wr, and will ignore any len>32bit requests +(* synthesize *) +module mkCompleterRequest(CompleterRequest); + FIFOF#(PcieAxiStream) inFifo <- mkFIFOF; + FIFOF#(CsrWriteReq) wrReqFifo <- mkFIFOF; + FIFOF#(CsrReadReq) rdReqFifo <- mkFIFOF; + + Reg#(Bool) isInPacket <- mkReg(False); + Reg#(Uint#(32)) illegalPcieReqCntReg <- mkReg(0); + + function PcieCompleterRequestDescriptor getDescriptorFromFirstBeat(PcieAxiStream axiStream); + return pack(axiStream.tDATA[valueOf(CQ_DESCRIPTOR_WIDTH)-1:0]); + endfunction + + function Data getDataFromFirstBeat(PcieAxiStream axiStream); + return axiStream.tData >> valueOf(CQ_DESCRIPTOR_WIDTH); + endfunction + + function Bool isFirstBytesAllValid(PcieCompleterCompleteSideBandFrame sideBand); + return (sideBand.firstByteEn[valueOf(PCIE_TLP_FIRST_BE_WIDTH)-1] == 1); + endfunction + + function DmaCsrAddr getAddrFromCqDescriptor(PcieCompleterRequestDescriptor descriptor); + let addr = getAddrLowBits(zeroExtend(descriptor.address), descriptor.barAperture); + // Only support one BAR now, no operation + if (descriptor.barId == 0) begin + addr = addr; + end + else begin + addr = 0; + end + return truncate(addr); + endfunction + + function PcieCompleterRequestNonPostedStore convertDescriptorToNpStore(PcieCompleterRequestDescriptor descriptor); + return PcieCompleterRequestNonPostedStore { + attributes : descriptor.attributes, + trafficClass: descriptor.trafficClass, + tag : descriptor.tag, + requesterId : descriptor.requesterId + }; + endfunction + + rule parseData; + inFifo.deq; + let axiStream = inFifo.first; + PcieCompleterRequestSideBandFrame sideBand = pack(axiStream.tUser); + isInPacket <= !axiStream.isLast; + if (!isInPacket) begin + let descriptor = getDescriptorFromFirstBeat(axiStream); + case (descriptor.reqType) begin + MEM_WRITE_REQ: begin + if (descriptor.dwordCnt == valueOf(IDEA_DWORD_CNT_OF_CSR) && isFirstBytesAllValid) begin + DmaCsrValue wrValue = getDataFromFirstBeat(axiStream)[valueOf(DMA_CSR_ADDR_WIDTH)-1:0]; + DmaCsrAddr wrAddr = getAddrFromCqDescriptor(descriptor); + let wrReq = CsrWriteReq { + address : wrAddr, + value : wrValue + } + wrReqFifo.enq(wrReq); + end + else begin + illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; + end + end + MEM_READ_REQ: begin + let rdReqAddr = getAddrFromCqDescriptor(descriptor); + let npInfo = convertDescriptorToNpStore(descriptor); + let rdReq = CsrReadReq{ + rdAddr: rdReqAddr, + npInfo: npInfo + } + rdReqFifo.enq(rdReq); + end + default: illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; + end + end + outFifo.enq(stream); + endrule + + interface axiStreamFifoIn = convertFifoToFifoIn(inFifo); + interface csrWriteReqFifoOut = convertFifoToFifoOut(wrReqFifo); + interface csrReadReqFifoOut = convertFifoToFifoOut(rdReqFifo); +endmodule + +(* synthesize *) +module mkCompleterComplete(CompleterComplete); + FIFOF#(PcieAxiStream) outFifo <- mkFIFOF; + FIFOF#(CsrReadResp) rdRespFifo <- mkFIFOF; + FIFOF#(CsrReadReq) rdReqFifo <- mkFIFOF; + + // TODO: the logic of cc + + interface axiStreamFifoOut = convertFifoToFifoOut(outFifo); + interface csrReadRespFifoIn = convertFifoToFifoIn(rdRespFifo); + interface csrWriteReqFifoOut = convertFifoToFifoIn(rdReqFifo); +endmodule + +(* synthesize *) +module mkCompleter(Completer); + CompleterRequest cmplRequest = mkCompleterRequest; + CompleterComplete cmplComplete = mkCompleterComplete; + + FIFOF#(csrReadResp) csrRdRespFifo <- mkFIFOF; + FIFOF#(csrReadReq) csrRdReqOutFifo <- mkFIFOF; + FIFOF#(csrReadReq) csrRdReqWaitingFifo <- mkSizedFIFOF(CMPL_NPREQ_INFLIGHT_NUM); + + Reg#(PcieNonPostedRequstCount) npReqCreditCntReg <- mkReg(0); + + interface RawPcieCompleterRequest; + interface rawAxiStreamSlave = mkFifoInToRawPcieAxiStreamSlave#(cmplRequest.axiStreamFifoIn); + // TODO: back-pressure according to the temperory stored RdReq Num + method PcieNonPostedRequst nonPostedReqCreditIncrement = 2'b11; + method Action nonPostedReqCreditCnt(PcieNonPostedRequstCount nonPostedpReqCount); + npReqCreditCntReg <= nonPostedpReqCount; + endmethod + endinterface + + interface RawPcieCompleterComplete; + interface rawAxiStreamSlave = mkFifoOutToRawPcieAxiStreamMaster#(cmplComplete.axiStreamFifoOut); + endinterface + + interface csrWriteReqFifoOut = cmplRequest.csrWriteReqFifoOut; + + interface csrReadReqFifoOut = convertFifoToFifoOut(csrRdReqOutFifo); + interface csrReadRespFifoIn = convertFifoToFifoIn(csrRdRespFifo); + + // TODO: get internal registers value + method DmaCsrValue getRegisterValue(DmaCsrAddr addr); + return 0; + method + +endmodule diff --git a/src/PcieDescriptorTypes.bsv b/src/PcieDescriptorTypes.bsv index 9762b67..2537159 100644 --- a/src/PcieDescriptorTypes.bsv +++ b/src/PcieDescriptorTypes.bsv @@ -6,28 +6,28 @@ typedef TDiv#(TSub#(PCIE_AXIS_DATA_WIDTH, RQ_DESCRIPTOR_WIDTH), DWORD_WIDTH) MAX typedef Bit#(1) ReserveBit1; -typedef 64 CQ_DESCRIPTOR_WIDTH; -typedef 3 ATTR_WIDTH; -typedef 3 TC_WIDTH; -typedef 6 BAR_APERTURE_WIDTH; -typedef 3 BAR_ID_WIDTH; -typedef 8 TARGET_FUNCTION_WIDTH; -typedef 8 TAG_WIDTH; -typedef 16 BDF_WIDTH; -typedef 4 REQ_TYPE_WIDTH; -typedef 11 DWORD_COUNT_WIDTH; -typedef 62 ADDR_WIDTH; +typedef 64 DES_CQ_DESCRIPTOR_WIDTH; +typedef 3 DES_ATTR_WIDTH; +typedef 3 DES_TC_WIDTH; +typedef 6 DES_BAR_APERTURE_WIDTH; +typedef 3 DES_BAR_ID_WIDTH; +typedef 8 DES_TARGET_FUNCTION_WIDTH; +typedef 8 DES_TAG_WIDTH; +typedef 16 DES_BDF_WIDTH; +typedef 4 DES_REQ_TYPE_WIDTH; +typedef 11 DES_DWORD_COUNT_WIDTH; +typedef 62 DES_ADDR_WIDTH; -typedef Bit#(ATTR_WIDTH) Attributes; -typedef Bit#(TC_WIDTH) TrafficClass; -typedef Bit#(BAR_APERTURE_WIDTH) BarAperture; -typedef Bit#(BAR_ID_WIDTH) BarId; -typedef Bit#(TARGET_FUNCTION_WIDTH) TargetFunction; -typedef Bit#(TAG_WIDTH) Tag; -typedef Bit#(BDF_WIDTH) BusDeviceFunc; -typedef Bit#(REQ_TYPE_WIDTH) ReqType; -typedef Bit#(DWORD_COUNT_WIDTH) DwordCount; -typedef Bit#(ADDR_WIDTH) Address; +typedef Bit#(DES_ATTR_WIDTH) Attributes; +typedef Bit#(DES_TC_WIDTH) TrafficClass; +typedef Bit#(DES_BAR_APERTURE_WIDTH) BarAperture; +typedef Bit#(DES_BAR_ID_WIDTH) BarId; +typedef Bit#(DES_TARGET_FUNCTION_WIDTH) TargetFunction; +typedef Bit#(DES_TAG_WIDTH) Tag; +typedef Bit#(DES_BDF_WIDTH) BusDeviceFunc; +typedef Bit#(DES_REQ_TYPE_WIDTH) ReqType; +typedef Bit#(DES_DWORD_COUNT_WIDTH) DwordCount; +typedef Bit#(DES_ADDR_WIDTH) Address; // 16bytes Completer Request Descriptor Format for Memory, I/O, and Atomic Options typedef struct { @@ -50,6 +50,7 @@ typedef struct { TrafficClass trafficClass; Tag tag; BusDeviceFunc requesterId; + DmaCsrAddr reqAddr; } PcieCompleterRequestNonPostedStore deriving(Bits, Eq, Bounded, FShow); diff --git a/src/PrimUtils.bsv b/src/PrimUtils.bsv index 8ba0863..fe768e8 100644 --- a/src/PrimUtils.bsv +++ b/src/PrimUtils.bsv @@ -1,6 +1,7 @@ -import FIFO::*; +import FIFOF::*; import PcieAxiStreamTypes::*; +import DmaTypes::*; function Action immAssert(Bool condition, String assertName, Fmt assertFmtMsg); action @@ -16,74 +17,251 @@ function Action immAssert(Bool condition, String assertName, Fmt assertFmtMsg); endaction endfunction -function Bit#(n) getLowBytes(Bit#(n) data, Bit#(TLog#(TDiv#(n, BYTE_WIDTH))) ptr); - let temp = data; +function Data getDataLowBytes(Data data, DataBytePtr ptr); + Data temp = 0; case(ptr) - 1 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 2 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 3 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 4 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 5 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 6 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 7 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 8 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 9 : temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 10: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 11: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 12: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 13: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 14: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 15: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 16: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 17: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 18: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 19: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 20: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 21: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 22: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 23: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 24: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 25: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 26: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 27: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 28: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 29: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 30: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 31: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 32: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 33: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 34: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 35: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 36: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 37: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 38: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 39: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 40: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 41: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 42: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 43: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 44: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 45: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 46: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 47: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 48: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 49: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 50: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 51: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 52: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 53: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 54: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 55: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 56: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 57: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 58: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 59: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 60: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 61: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 62: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 63: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; - 64: temp = data[valueOf(BYTE_WIDTH)*1-1:0]; + 1 : temp[valueOf(BYTE_WIDTH)*1 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*1 -1:0]); + 2 : temp[valueOf(BYTE_WIDTH)*2 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*2 -1:0]); + 3 : temp[valueOf(BYTE_WIDTH)*3 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*3 -1:0]); + 4 : temp[valueOf(BYTE_WIDTH)*4 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*4 -1:0]); + 5 : temp[valueOf(BYTE_WIDTH)*5 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*5 -1:0]); + 6 : temp[valueOf(BYTE_WIDTH)*6 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*6 -1:0]); + 7 : temp[valueOf(BYTE_WIDTH)*7 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*7 -1:0]); + 8 : temp[valueOf(BYTE_WIDTH)*8 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*8 -1:0]); + 9 : temp[valueOf(BYTE_WIDTH)*9 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*9 -1:0]); + 10: temp[valueOf(BYTE_WIDTH)*10-1:0] = Data'(data[valueOf(BYTE_WIDTH)*10-1:0]); + 11: temp[valueOf(BYTE_WIDTH)*11-1:0] = Data'(data[valueOf(BYTE_WIDTH)*11-1:0]); + 12: temp[valueOf(BYTE_WIDTH)*12-1:0] = Data'(data[valueOf(BYTE_WIDTH)*12-1:0]); + 13: temp[valueOf(BYTE_WIDTH)*13-1:0] = Data'(data[valueOf(BYTE_WIDTH)*13-1:0]); + 14: temp[valueOf(BYTE_WIDTH)*14-1:0] = Data'(data[valueOf(BYTE_WIDTH)*14-1:0]); + 15: temp[valueOf(BYTE_WIDTH)*15-1:0] = Data'(data[valueOf(BYTE_WIDTH)*15-1:0]); + 16: temp[valueOf(BYTE_WIDTH)*16-1:0] = Data'(data[valueOf(BYTE_WIDTH)*16-1:0]); + 17: temp[valueOf(BYTE_WIDTH)*17-1:0] = Data'(data[valueOf(BYTE_WIDTH)*17-1:0]); + 18: temp[valueOf(BYTE_WIDTH)*18-1:0] = Data'(data[valueOf(BYTE_WIDTH)*18-1:0]); + 19: temp[valueOf(BYTE_WIDTH)*19-1:0] = Data'(data[valueOf(BYTE_WIDTH)*19-1:0]); + 20: temp[valueOf(BYTE_WIDTH)*20-1:0] = Data'(data[valueOf(BYTE_WIDTH)*20-1:0]); + 21: temp[valueOf(BYTE_WIDTH)*21-1:0] = Data'(data[valueOf(BYTE_WIDTH)*21-1:0]); + 22: temp[valueOf(BYTE_WIDTH)*22-1:0] = Data'(data[valueOf(BYTE_WIDTH)*22-1:0]); + 23: temp[valueOf(BYTE_WIDTH)*23-1:0] = Data'(data[valueOf(BYTE_WIDTH)*23-1:0]); + 24: temp[valueOf(BYTE_WIDTH)*24-1:0] = Data'(data[valueOf(BYTE_WIDTH)*24-1:0]); + 25: temp[valueOf(BYTE_WIDTH)*25-1:0] = Data'(data[valueOf(BYTE_WIDTH)*25-1:0]); + 26: temp[valueOf(BYTE_WIDTH)*26-1:0] = Data'(data[valueOf(BYTE_WIDTH)*26-1:0]); + 27: temp[valueOf(BYTE_WIDTH)*27-1:0] = Data'(data[valueOf(BYTE_WIDTH)*27-1:0]); + 28: temp[valueOf(BYTE_WIDTH)*28-1:0] = Data'(data[valueOf(BYTE_WIDTH)*28-1:0]); + 29: temp[valueOf(BYTE_WIDTH)*29-1:0] = Data'(data[valueOf(BYTE_WIDTH)*29-1:0]); + 30: temp[valueOf(BYTE_WIDTH)*30-1:0] = Data'(data[valueOf(BYTE_WIDTH)*30-1:0]); + 31: temp[valueOf(BYTE_WIDTH)*31-1:0] = Data'(data[valueOf(BYTE_WIDTH)*31-1:0]); + 32: temp[valueOf(BYTE_WIDTH)*32-1:0] = Data'(data[valueOf(BYTE_WIDTH)*32-1:0]); + 33: temp[valueOf(BYTE_WIDTH)*33-1:0] = Data'(data[valueOf(BYTE_WIDTH)*33-1:0]); + 34: temp[valueOf(BYTE_WIDTH)*34-1:0] = Data'(data[valueOf(BYTE_WIDTH)*34-1:0]); + 35: temp[valueOf(BYTE_WIDTH)*35-1:0] = Data'(data[valueOf(BYTE_WIDTH)*35-1:0]); + 36: temp[valueOf(BYTE_WIDTH)*36-1:0] = Data'(data[valueOf(BYTE_WIDTH)*36-1:0]); + 37: temp[valueOf(BYTE_WIDTH)*37-1:0] = Data'(data[valueOf(BYTE_WIDTH)*37-1:0]); + 38: temp[valueOf(BYTE_WIDTH)*38-1:0] = Data'(data[valueOf(BYTE_WIDTH)*38-1:0]); + 39: temp[valueOf(BYTE_WIDTH)*39-1:0] = Data'(data[valueOf(BYTE_WIDTH)*39-1:0]); + 40: temp[valueOf(BYTE_WIDTH)*40-1:0] = Data'(data[valueOf(BYTE_WIDTH)*40-1:0]); + 41: temp[valueOf(BYTE_WIDTH)*41-1:0] = Data'(data[valueOf(BYTE_WIDTH)*41-1:0]); + 42: temp[valueOf(BYTE_WIDTH)*42-1:0] = Data'(data[valueOf(BYTE_WIDTH)*42-1:0]); + 43: temp[valueOf(BYTE_WIDTH)*43-1:0] = Data'(data[valueOf(BYTE_WIDTH)*43-1:0]); + 44: temp[valueOf(BYTE_WIDTH)*44-1:0] = Data'(data[valueOf(BYTE_WIDTH)*44-1:0]); + 45: temp[valueOf(BYTE_WIDTH)*45-1:0] = Data'(data[valueOf(BYTE_WIDTH)*45-1:0]); + 46: temp[valueOf(BYTE_WIDTH)*46-1:0] = Data'(data[valueOf(BYTE_WIDTH)*46-1:0]); + 47: temp[valueOf(BYTE_WIDTH)*47-1:0] = Data'(data[valueOf(BYTE_WIDTH)*47-1:0]); + 48: temp[valueOf(BYTE_WIDTH)*48-1:0] = Data'(data[valueOf(BYTE_WIDTH)*48-1:0]); + 49: temp[valueOf(BYTE_WIDTH)*49-1:0] = Data'(data[valueOf(BYTE_WIDTH)*49-1:0]); + 50: temp[valueOf(BYTE_WIDTH)*50-1:0] = Data'(data[valueOf(BYTE_WIDTH)*50-1:0]); + 51: temp[valueOf(BYTE_WIDTH)*51-1:0] = Data'(data[valueOf(BYTE_WIDTH)*51-1:0]); + 52: temp[valueOf(BYTE_WIDTH)*52-1:0] = Data'(data[valueOf(BYTE_WIDTH)*52-1:0]); + 53: temp[valueOf(BYTE_WIDTH)*53-1:0] = Data'(data[valueOf(BYTE_WIDTH)*53-1:0]); + 54: temp[valueOf(BYTE_WIDTH)*54-1:0] = Data'(data[valueOf(BYTE_WIDTH)*54-1:0]); + 55: temp[valueOf(BYTE_WIDTH)*55-1:0] = Data'(data[valueOf(BYTE_WIDTH)*55-1:0]); + 56: temp[valueOf(BYTE_WIDTH)*56-1:0] = Data'(data[valueOf(BYTE_WIDTH)*56-1:0]); + 57: temp[valueOf(BYTE_WIDTH)*57-1:0] = Data'(data[valueOf(BYTE_WIDTH)*57-1:0]); + 58: temp[valueOf(BYTE_WIDTH)*58-1:0] = Data'(data[valueOf(BYTE_WIDTH)*58-1:0]); + 59: temp[valueOf(BYTE_WIDTH)*59-1:0] = Data'(data[valueOf(BYTE_WIDTH)*59-1:0]); + 60: temp[valueOf(BYTE_WIDTH)*60-1:0] = Data'(data[valueOf(BYTE_WIDTH)*60-1:0]); + 61: temp[valueOf(BYTE_WIDTH)*61-1:0] = Data'(data[valueOf(BYTE_WIDTH)*61-1:0]); + 62: temp[valueOf(BYTE_WIDTH)*62-1:0] = Data'(data[valueOf(BYTE_WIDTH)*62-1:0]); + 63: temp[valueOf(BYTE_WIDTH)*63-1:0] = Data'(data[valueOf(BYTE_WIDTH)*63-1:0]); default: temp = 0; endcase return temp; endfunction + +function Data getDataHighBytes(Data data, DataBytePtr ptr); + Data temp = 0; + case(ptr) + 1 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*1 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*1 ]); + 2 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*2 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*2 ]); + 3 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*3 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*3 ]); + 4 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*4 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*4 ]); + 5 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*5 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*5 ]); + 6 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*6 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*6 ]); + 7 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*7 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*7 ]); + 8 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*8 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*8 ]); + 9 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*9 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*9 ]); + 10: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*10] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*10]); + 11: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*11] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*11]); + 12: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*12] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*12]); + 13: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*13] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*13]); + 14: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*14] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*14]); + 15: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*15] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*15]); + 16: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*16] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*16]); + 17: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*17] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*17]); + 18: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*18] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*18]); + 19: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*19] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*19]); + 20: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*20] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*20]); + 21: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*21] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*21]); + 22: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*22] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*22]); + 23: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*23] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*23]); + 24: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*24] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*24]); + 25: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*25] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*25]); + 26: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*26] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*26]); + 27: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*27] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*27]); + 28: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*28] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*28]); + 29: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*29] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*29]); + 30: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*30] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*30]); + 31: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*31] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*31]); + 32: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*32] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*32]); + 33: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*33] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*33]); + 34: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*34] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*34]); + 35: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*35] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*35]); + 36: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*36] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*36]); + 37: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*37] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*37]); + 38: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*38] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*38]); + 39: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*39] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*39]); + 40: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*40] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*40]); + 41: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*41] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*41]); + 42: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*42] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*42]); + 43: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*43] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*43]); + 44: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*44] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*44]); + 45: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*45] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*45]); + 46: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*46] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*46]); + 47: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*47] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*47]); + 48: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*48] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*48]); + 49: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*49] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*49]); + 50: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*50] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*50]); + 51: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*51] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*51]); + 52: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*52] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*52]); + 53: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*53] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*53]); + 54: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*54] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*54]); + 55: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*55] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*55]); + 56: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*56] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*56]); + 57: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*57] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*57]); + 58: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*58] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*58]); + 59: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*59] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*59]); + 60: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*60] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*60]); + 61: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*61] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*61]); + 62: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*62] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*62]); + 63: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*63] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*63]); + default: temp = data; + endcase + return temp; +endfunction + +function DmaMemAddr getAddrLowBits(DmaMemAddr addr, Bit#(TLog#(DMA_MEM_ADDR_WIDTH)) ptr); + DmaMemAddr temp = 0; + case(ptr) + 1 : temp[1 -1:0] = DmaMemAddr'(addr[1 -1:0]); + 2 : temp[2 -1:0] = DmaMemAddr'(addr[2 -1:0]); + 3 : temp[3 -1:0] = DmaMemAddr'(addr[3 -1:0]); + 4 : temp[4 -1:0] = DmaMemAddr'(addr[4 -1:0]); + 5 : temp[5 -1:0] = DmaMemAddr'(addr[5 -1:0]); + 6 : temp[6 -1:0] = DmaMemAddr'(addr[6 -1:0]); + 7 : temp[7 -1:0] = DmaMemAddr'(addr[7 -1:0]); + 8 : temp[8 -1:0] = DmaMemAddr'(addr[8 -1:0]); + 9 : temp[9 -1:0] = DmaMemAddr'(addr[9 -1:0]); + 10: temp[10-1:0] = DmaMemAddr'(addr[10-1:0]); + 11: temp[11-1:0] = DmaMemAddr'(addr[11-1:0]); + 12: temp[12-1:0] = DmaMemAddr'(addr[12-1:0]); + 13: temp[13-1:0] = DmaMemAddr'(addr[13-1:0]); + 14: temp[14-1:0] = DmaMemAddr'(addr[14-1:0]); + 15: temp[15-1:0] = DmaMemAddr'(addr[15-1:0]); + 16: temp[16-1:0] = DmaMemAddr'(addr[16-1:0]); + 17: temp[17-1:0] = DmaMemAddr'(addr[17-1:0]); + 18: temp[18-1:0] = DmaMemAddr'(addr[18-1:0]); + 19: temp[19-1:0] = DmaMemAddr'(addr[19-1:0]); + 20: temp[20-1:0] = DmaMemAddr'(addr[20-1:0]); + 21: temp[21-1:0] = DmaMemAddr'(addr[21-1:0]); + 22: temp[22-1:0] = DmaMemAddr'(addr[22-1:0]); + 23: temp[23-1:0] = DmaMemAddr'(addr[23-1:0]); + 24: temp[24-1:0] = DmaMemAddr'(addr[24-1:0]); + 25: temp[25-1:0] = DmaMemAddr'(addr[25-1:0]); + 26: temp[26-1:0] = DmaMemAddr'(addr[26-1:0]); + 27: temp[27-1:0] = DmaMemAddr'(addr[27-1:0]); + 28: temp[28-1:0] = DmaMemAddr'(addr[28-1:0]); + 29: temp[29-1:0] = DmaMemAddr'(addr[29-1:0]); + 30: temp[30-1:0] = DmaMemAddr'(addr[30-1:0]); + 31: temp[31-1:0] = DmaMemAddr'(addr[31-1:0]); + 32: temp[32-1:0] = DmaMemAddr'(addr[32-1:0]); + 33: temp[33-1:0] = DmaMemAddr'(addr[33-1:0]); + 34: temp[34-1:0] = DmaMemAddr'(addr[34-1:0]); + 35: temp[35-1:0] = DmaMemAddr'(addr[35-1:0]); + 36: temp[36-1:0] = DmaMemAddr'(addr[36-1:0]); + 37: temp[37-1:0] = DmaMemAddr'(addr[37-1:0]); + 38: temp[38-1:0] = DmaMemAddr'(addr[38-1:0]); + 39: temp[39-1:0] = DmaMemAddr'(addr[39-1:0]); + 40: temp[40-1:0] = DmaMemAddr'(addr[40-1:0]); + 41: temp[41-1:0] = DmaMemAddr'(addr[41-1:0]); + 42: temp[42-1:0] = DmaMemAddr'(addr[42-1:0]); + 43: temp[43-1:0] = DmaMemAddr'(addr[43-1:0]); + 44: temp[44-1:0] = DmaMemAddr'(addr[44-1:0]); + 45: temp[45-1:0] = DmaMemAddr'(addr[45-1:0]); + 46: temp[46-1:0] = DmaMemAddr'(addr[46-1:0]); + 47: temp[47-1:0] = DmaMemAddr'(addr[47-1:0]); + 48: temp[48-1:0] = DmaMemAddr'(addr[48-1:0]); + 49: temp[49-1:0] = DmaMemAddr'(addr[49-1:0]); + 50: temp[50-1:0] = DmaMemAddr'(addr[50-1:0]); + 51: temp[51-1:0] = DmaMemAddr'(addr[51-1:0]); + 52: temp[52-1:0] = DmaMemAddr'(addr[52-1:0]); + 53: temp[53-1:0] = DmaMemAddr'(addr[53-1:0]); + 54: temp[54-1:0] = DmaMemAddr'(addr[54-1:0]); + 55: temp[55-1:0] = DmaMemAddr'(addr[55-1:0]); + 56: temp[56-1:0] = DmaMemAddr'(addr[56-1:0]); + 57: temp[57-1:0] = DmaMemAddr'(addr[57-1:0]); + 58: temp[58-1:0] = DmaMemAddr'(addr[58-1:0]); + 59: temp[59-1:0] = DmaMemAddr'(addr[59-1:0]); + 60: temp[60-1:0] = DmaMemAddr'(addr[60-1:0]); + 61: temp[61-1:0] = DmaMemAddr'(addr[61-1:0]); + 62: temp[62-1:0] = DmaMemAddr'(addr[62-1:0]); + 63: temp[63-1:0] = DmaMemAddr'(addr[63-1:0]); + default: temp = 0; + endcase + return temp; +endfunction + +typedef 32 CNTFIFO_SIZE_WIDTH; +typedef UInt#(CNTFIFO_SIZE_WIDTH) FifoSize; + +interface CounteredFIFOF#(type t); + method Action enq (t x); + method Action deq; + method t first; + method Action clear; + method Bool notFull; + method Bool notEmpty; + method FifoSize getCurSize; +endinterface + +module mkCounteredFIFOF#(Integer depth)(CounteredFIFOF#(t)) provisos(Bits#(t, tSz)); + Reg#(FifoSize) curSize <- mkReg(0); + FIFOF#(t) fifo <- mkSizedFIFOF(depth); + + method Action enq (t x); + fifo.enq(x); + curSize <= curSize + 1; + endmethod + + method Action deq; + fifo.deq; + curSize <= curSize - 1; + endmethod + + method t first = fifo.first; + method Action clear = fifo.clear; + method Bool notFull = fifo.notFull; + method Bool notEmpty = fifo.notEmpty; + + method FifoSize getCurSize = curSize; +endmodule + diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index 78cc3d3..e40addf 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -99,6 +99,10 @@ function DataBytePtr convertByteEn2BytePtr (ByteEn byteEn); return ptr; endfunction +function Bool isByteEnZero(ByteEn byteEn) begin + return !unpack(remainStream.byteEn[0]); +end + function DataStream getEmptyStream (); return DataStream{ data: 0, @@ -221,7 +225,7 @@ module mkStreamConcat (StreamConcat ifc); streamB.isFirst = False; if (hasRemainReg) begin let {concatStream, remainStream, remainBytePtr} = getConcatStream(remainStreamReg, streamB, remainBytePtrReg, bytePtrB); - hasRemainReg <= unpack(remainStream.byteEn[0]); + hasRemainReg <= !isByteEnZero(remainStream.byteEn); hasLastRemainReg <= streamB.isLast; remainStreamReg <= remainStream; remainBytePtrReg <= remainBytePtr; @@ -247,7 +251,7 @@ module mkStreamConcat (StreamConcat ifc); let streamB = prepareFifoB.first.stream; let bytePtrB = prepareFifoB.first.bytePtr; let {concatStream, remainStream, remainBytePtr} = getConcatStream(streamA, streamB, bytePtrA, bytePtrB); - hasRemainReg <= unpack(remainStream.byteEn[0]); + hasRemainReg <= !isByteEnZero(remainStream.byteEn); hasLastRemainReg <= streamB.isLast; remainStreamReg <= remainStream; remainBytePtrReg <= remainBytePtr; @@ -352,24 +356,24 @@ module mkStreamSplit(StreamSplit ifc); isFirst: True, isLast: True }; - hasRemainReg <= unpack(remainStream.byteEn[0]); + hasRemainReg <= !isByteEnZero(remainStream.byteEn); hasLastRemainReg <= stream.isLast; remainBytePtrReg <= frameBytePtr - truncateBytePtr; - remainStreamReg <= remainStream; + remainStreamReg <= remainStream; end // concat the new frame with the remainReg else if (hasRemainReg) begin let {concatStream, remainStream, remainBytePtr} = getConcatStream(remainStreamReg, stream, remainBytePtrReg, frameBytePtr); outputFifo.enq(concatStream); - hasRemainReg <= unpack(remainStream.byteEn[0]); + hasRemainReg <= !isByteEnZero(remainStream.byteEn); hasLastRemainReg <= stream.isLast; - remainStreamReg <= remainStream; + remainStreamReg <= remainStream; remainBytePtrReg <= remainBytePtr; end end endrule - interface inputStreamFifoIn = convertFifoToFifoIn(inputFifo); + interface inputStreamFifoIn = convertFifoToFifoIn(inputFifo); interface splitLocationFifoIn = convertFifoToFifoIn(splitLocationFifo); interface outputStreamFifoOut = convertFifoToFifoOut(outputFifo); From b70f57db6e7e65d450606340cbb8c076bcadc061 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Sun, 21 Jul 2024 14:48:06 +0800 Subject: [PATCH 23/53] Add CCDescriptor and modify interface --- img/completer.drawio.svg | 384 ++++++++++++++++++++++++++++++++++++ src/DmaController.bsv | 33 ++-- src/DmaRequestCore.bsv | 20 +- src/DmaTypes.bsv | 31 ++- src/PcieCompleter.bsv | 16 +- src/PcieDescriptorTypes.bsv | 38 ++++ test/TestDmaCore.bsv | 4 +- 7 files changed, 487 insertions(+), 39 deletions(-) create mode 100644 img/completer.drawio.svg diff --git a/img/completer.drawio.svg b/img/completer.drawio.svg new file mode 100644 index 0000000..f9f2ce6 --- /dev/null +++ b/img/completer.drawio.svg @@ -0,0 +1,384 @@ + + + + + + + + + + +
+
+
+ rawPcieCompleterRequest +
+
+
+
+ + rawPcieCompleterRequest + +
+
+ + + + +
+
+
+ DataStream#(PcieAxiStream) +
+
+
+
+ + DataStream#(PcieAxiStream) + +
+
+ + + + + + +
+
+
+ DataStream#(CQSideBand) +
+
+
+
+ + DataStream#(CQSideBand) + +
+
+ + + + + + +
+
+
+ CsrWriteReq +
+
+
+
+ + CsrWriteReq + +
+
+ + + + + + +
+
+
+ CsrReadReq +
+
+
+
+ + CsrReadReq + +
+
+ + + + + + +
+
+
+ CQDescripter +
+
+
+
+ + CQDescripter + +
+
+ + + + +
+
+
+ CompleterRequest +
+
+
+
+ + CompleterRequest + +
+
+ + + + + + + + +
+
+
+ parse +
+
+
+
+ + parse + +
+
+ + + + + + + + +
+
+
+ CQDescripterFIFO +
+
+
+
+ + CQDescripterFIFO + +
+
+ + + + +
+
+
+ rawPcieCompleterComplete +
+
+
+
+ + rawPcieCompleterComplete + +
+
+ + + + +
+
+
+ DmaHostToCardWr +
+
+
+
+ + DmaHostToCardWr + +
+
+ + + + + + +
+
+
+ DmaHostToCardRd +
+
+
+
+ + DmaHostToCardRd + +
+
+ + + + + +
+
+
+ CompleterComplete +
+
+
+
+ + CompleterComplete + +
+
+ + + + +
+
+
+ DataStream#(PcieAxiStream) +
+
+
+
+ + DataStream#(PcieAxiStream) + +
+
+ + + + + + +
+
+
+ DataStream#(CCSideBand) +
+
+
+
+ + DataStream#(CCSideBand) + +
+
+ + + + + + +
+
+
+ generate +
+
+
+
+ + generate + +
+
+ + + + + + +
+
+
+ assert +
+
+
+
+ + assert + +
+
+ + + + + + +
+
+
+ CsrReadResp +
+
+
+
+ + CsrReadResp + +
+
+ + + + + +
+
+
+ CQ_credit +
+
+
+
+ + CQ_credit + +
+
+ + + + +
+
+
+ counter +
+
+
+
+ + counter + +
+
+
+ + + + + Text is not SVG - cannot display + + + +
\ No newline at end of file diff --git a/src/DmaController.bsv b/src/DmaController.bsv index 5d8df4b..db36cf0 100755 --- a/src/DmaController.bsv +++ b/src/DmaController.bsv @@ -1,24 +1,31 @@ +import FIFOF::*; + import PcieTypes::*; -import DmaTypes::*; +import DmaTypes::*; interface DmaController#(numeric type dataWidth); - - interface FifoIn#(DataStream) dataC2HPipeIn; - interface FifoIn#(DmaRequestFrame) reqC2HPipeIn; - interface FifoIn#(DmaRequestFrame) reqH2CPipeIn; - interface FifoOut#(DataStream) dataH2CPipeOut; - - interface FifoIn#(DmaCsrFrame) csrC2HPipeIn; - interface FifoOut#(DMACsrAddr) csrC2HPipeOut; // read reg in the card from Host - interface FifoOut#(DmaCsrFrame) csrH2CPipeOut; - + // Requester interfaces, where the Card serve as the Master + interface FifoIn#(DataStream) c2hDataFifoIn; // Card writes Host Memory + interface FifoIn#(DmaRequest) c2hReqFifoIn; // Card writes Host Memory + interface FifoIn#(DmaRequest) h2cReqFifoIn; // Card reads Host Memory + interface FifoOut#(DataStream) h2cDataFifoOut; // Card reads Host Memory + + // Completer interfaces, where the Card serve as the Slave + interface FifoIn#(DmaCsrFrame) c2hCsrValFifoIn; // Host reads Card Registers + interface FifoOut#(DMACsrAddr) c2hCsrReqFifoOut; // Host reads Card Registers + interface FifoOut#(DmaCsrFrame) h2cCsrValFifoOut; // Host writes Card Registers + + // Raw PCIe interfaces, connected to the Xilinx PCIe IP interface RawPcieRequester pcieRequester; interface RawPcieCompleter pcieCompleter; interface RawPcieConfiguration pcieConfig; - endinterface module mkDmaController#() (DmaController ifc); + FIFOF#(DataStream) c2hDataFifo <- mkFIFOF; + FIFOF#(DataStream) h2cDataFifo <- mkFIFOF; + FIFOF#(DmaRequest) c2hReqFifo <- mkFIFOF; + FIFOF#(DmaRequest) h2cReqFifo <- mkFIFOF; - + FIFOF#(DmaCsrFrame) endmodule \ No newline at end of file diff --git a/src/DmaRequestCore.bsv b/src/DmaRequestCore.bsv index 8c7df68..a712d09 100755 --- a/src/DmaRequestCore.bsv +++ b/src/DmaRequestCore.bsv @@ -19,20 +19,20 @@ typedef 3 PCIE_TLP_SIZE_SETTING_WIDTH; typedef Bit#(PCIE_TLP_SIZE_SETTING_WIDTH) PcieTlpSizeSetting; typedef struct { - DmaRequestFrame dmaRequest; + DmaRequest dmaRequest; DmaMemAddr firstChunkLen; } ChunkRequestFrame deriving(Bits, Eq); interface ChunkCompute; - interface FifoIn#(DmaRequestFrame) dmaRequestFifoIn; - interface FifoOut#(DmaRequestFrame) chunkRequestFifoOut; + interface FifoIn#(DmaRequest) dmaRequestFifoIn; + interface FifoOut#(DmaRequest) chunkRequestFifoOut; interface Put#(PcieTlpSizeSetting) setTlpMaxSize; endinterface module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); - FIFOF#(DmaRequestFrame) inputFifo <- mkFIFOF; - FIFOF#(DmaRequestFrame) outputFifo <- mkFIFOF; + FIFOF#(DmaRequest) inputFifo <- mkFIFOF; + FIFOF#(DmaRequest) outputFifo <- mkFIFOF; FIFOF#(ChunkRequestFrame) splitFifo <- mkFIFOF; Reg#(DmaMemAddr) newChunkPtrReg <- mkReg(0); @@ -42,13 +42,13 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); Reg#(DmaMemAddr) tlpMaxSize <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE))); Reg#(PcieTlpSizeWidth) tlpMaxSizeWidth <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH))); - function Bool hasBoundary(DmaRequestFrame request); + function Bool hasBoundary(DmaRequest request); let highIdx = (request.startAddr + request.length - 1) >> valueOf(BUS_BOUNDARY_WIDTH); let lowIdx = request.startAddr >> valueOf(BUS_BOUNDARY_WIDTH); return (highIdx > lowIdx); endfunction - function DmaMemAddr getOffset(DmaRequestFrame request); + function DmaMemAddr getOffset(DmaRequest request); // MPS - startAddr % MPS, MPS means MRRS when the module is set to RX mode DmaMemAddr remainderOfMps = zeroExtend(PcieTlpMaxMaxPayloadSize'(request.startAddr[tlpMaxSizeWidth-1:0])); DmaMemAddr offsetOfMps = tlpMaxSize - remainderOfMps; @@ -71,7 +71,7 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); if (isSplittingReg) begin // !isFirst if (totalLenRemainReg <= tlpMaxSize) begin isSplittingReg <= False; - outputFifo.enq(DmaRequestFrame { + outputFifo.enq(DmaRequest { startAddr: newChunkPtrReg, length: totalLenRemainReg }); @@ -80,7 +80,7 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); end else begin isSplittingReg <= True; - outputFifo.enq(DmaRequestFrame { + outputFifo.enq(DmaRequest { startAddr: newChunkPtrReg, length: tlpMaxSize }); @@ -92,7 +92,7 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); let remainderLength = splitRequest.dmaRequest.length - splitRequest.firstChunkLen; Bool isSplittingNextCycle = (remainderLength > 0); isSplittingReg <= isSplittingNextCycle; - outputFifo.enq(DmaRequestFrame { + outputFifo.enq(DmaRequest { startAddr: splitRequest.dmaRequest.startAddr, length: splitRequest.firstChunkLen }); diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 66c5b44..5069eff 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -29,12 +29,7 @@ typedef Bit#(TAdd#(1, TLog#(BYTE_EN_WIDTH))) DataBytePtr; typedef struct { DmaMemAddr startAddr; DmaMemAddr length; -} DmaRequestFrame deriving(Bits, Bounded, Eq); - -typedef struct { - DMACsrAddr address; - DMACsrValue value; -} DmaCsrFrame deriving(Bits, Bounded, Eq); +} DmaRequest deriving(Bits, Bounded, Eq); typedef enum { DMA_RX, @@ -48,8 +43,8 @@ typedef struct { Bool isLast; } DataStream deriving(Bits, Bounded, Eq); -instance FShow#(DmaRequestFrame); - function Fmt fshow(DmaRequestFrame request); +instance FShow#(DmaRequest); + function Fmt fshow(DmaRequest request); return ($format("> valueOf(BUS_BOUNDARY_WIDTH); let lowIdx = request.startAddr >> valueOf(BUS_BOUNDARY_WIDTH); return (highIdx > lowIdx); @@ -47,7 +47,7 @@ module mkChunkComputerTb(Empty); DmaMemAddr testLength <- lengthRandomVal.next; let testEnd = testAddr + testLength - 1; if (testEnd > testAddr && testEnd <= fromInteger(valueOf(MAX_ADDRESS))) begin - let request = DmaRequestFrame{ + let request = DmaRequest{ startAddr: testAddr, length: testLength }; From 31d1b0f1e44da224165ecceec65822092a519be6 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Sun, 21 Jul 2024 21:35:47 +0800 Subject: [PATCH 24/53] Update dmac interfaces --- src/{PcieCompleter.bsv => DmaCompleter.bsv} | 100 +++++++++++++------- src/DmaController.bsv | 41 ++++---- src/DmaRequester.bsv | 17 ++++ src/DmaTypes.bsv | 13 +-- src/PcieDescriptorTypes.bsv | 8 -- src/PcieTypes.bsv | 89 ++++++++++++++--- test/TestDmaCompleter.bsv | 0 7 files changed, 186 insertions(+), 82 deletions(-) rename src/{PcieCompleter.bsv => DmaCompleter.bsv} (64%) create mode 100644 src/DmaRequester.bsv create mode 100644 test/TestDmaCompleter.bsv diff --git a/src/PcieCompleter.bsv b/src/DmaCompleter.bsv similarity index 64% rename from src/PcieCompleter.bsv rename to src/DmaCompleter.bsv index 624e6eb..be69863 100644 --- a/src/PcieCompleter.bsv +++ b/src/DmaCompleter.bsv @@ -6,8 +6,10 @@ import PcieDescriptorTypes::*; import DmaTypes::*; typedef 1 IDEA_DWORD_CNT_OF_CSR; -typedef 10 CMPL_NPREQ_INFLIGHT_NUM; +typedef 64 CMPL_NPREQ_INFLIGHT_NUM; typedef 20 CMPL_NPREQ_WAITING_CLKS; +typedef 2'b11 NP_CREDIT_INCREMENT; +typedef 2'b00 NP_CREDIT_NOCHANGE; typedef struct { DmaCsrAddr addr; @@ -18,14 +20,14 @@ typedef DmaCsrValue CsrReadResp; typedef struct { DmaCsrAddr rdAddr; - PcieCompleterRequestNonPostedStore npInfo; + PcieCompleterRequestDescriptor npInfo; } CsrReadReq deriving(Bits, Eq, Bounded, FShow); -interface Completer; +interface DmaCompleter; interface RawPcieCompleterRequest rawCompleterRequest; interface RawPcieCompleterComplete rawCompleterComplete; - interface DmaHostToCardWrite h2cCsrWrite; - interface DmaHostToCardRead h2cCsrRead; + interface DmaHostToCardWrite h2cWrite; + interface DmaHostToCardRead h2cRead; method DmaCsrValue getRegisterValue(DmaCsrAddr addr); endinterface @@ -64,7 +66,7 @@ module mkCompleterRequest(CompleterRequest); return (sideBand.firstByteEn[valueOf(PCIE_TLP_FIRST_BE_WIDTH)-1] == 1); endfunction - function DmaCsrAddr getAddrFromCqDescriptor(PcieCompleterRequestDescriptor descriptor); + function DmaCsrAddr getCsrAddrFromCqDescriptor(PcieCompleterRequestDescriptor descriptor); let addr = getAddrLowBits(zeroExtend(descriptor.address), descriptor.barAperture); // Only support one BAR now, no operation if (descriptor.barId == 0) begin @@ -76,16 +78,7 @@ module mkCompleterRequest(CompleterRequest); return truncate(addr); endfunction - function PcieCompleterRequestNonPostedStore convertDescriptorToNpStore(PcieCompleterRequestDescriptor descriptor); - return PcieCompleterRequestNonPostedStore { - attributes : descriptor.attributes, - trafficClass: descriptor.trafficClass, - tag : descriptor.tag, - requesterId : descriptor.requesterId - }; - endfunction - - rule parseData; + rule parse; inFifo.deq; let axiStream = inFifo.first; PcieCompleterRequestSideBandFrame sideBand = pack(axiStream.tUser); @@ -95,8 +88,9 @@ module mkCompleterRequest(CompleterRequest); case (descriptor.reqType) begin MEM_WRITE_REQ: begin if (descriptor.dwordCnt == valueOf(IDEA_DWORD_CNT_OF_CSR) && isFirstBytesAllValid) begin - DmaCsrValue wrValue = getDataFromFirstBeat(axiStream)[valueOf(DMA_CSR_ADDR_WIDTH)-1:0]; - DmaCsrAddr wrAddr = getAddrFromCqDescriptor(descriptor); + let firstData = getDataFromFirstBeat(axiStream); + DmaCsrValue wrValue = firstData[valueOf(DMA_CSR_ADDR_WIDTH)-1:0]; + DmaCsrAddr wrAddr = getCsrAddrFromCqDescriptor(descriptor); let wrReq = CsrWriteReq { address : wrAddr, value : wrValue @@ -108,18 +102,16 @@ module mkCompleterRequest(CompleterRequest); end end MEM_READ_REQ: begin - let rdReqAddr = getAddrFromCqDescriptor(descriptor); - let npInfo = convertDescriptorToNpStore(descriptor); + let rdReqAddr = getCsrAddrFromCqDescriptor(descriptor); let rdReq = CsrReadReq{ rdAddr: rdReqAddr, - npInfo: npInfo + npInfo: descriptor } rdReqFifo.enq(rdReq); end default: illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; end end - outFifo.enq(stream); endrule interface axiStreamFifoIn = convertFifoToFifoIn(inFifo); @@ -137,37 +129,75 @@ module mkCompleterComplete(CompleterComplete); interface axiStreamFifoOut = convertFifoToFifoOut(outFifo); interface csrReadRespFifoIn = convertFifoToFifoIn(rdRespFifo); - interface csrWriteReqFifoOut = convertFifoToFifoIn(rdReqFifo); + interface csrReadReqFifoOut = convertFifoToFifoIn(rdReqFifo); endmodule (* synthesize *) -module mkCompleter(Completer); +module mkDmaCompleter(DmaCompleter); CompleterRequest cmplRequest = mkCompleterRequest; CompleterComplete cmplComplete = mkCompleterComplete; - FIFOF#(csrReadResp) csrRdRespFifo <- mkFIFOF; - FIFOF#(csrReadReq) csrRdReqOutFifo <- mkFIFOF; - FIFOF#(csrReadReq) csrRdReqWaitingFifo <- mkSizedFIFOF(CMPL_NPREQ_INFLIGHT_NUM); + FIFOF#(DmaCsrValue) h2cCsrWriteDataFifo <- mkFIFOF; + FIFOF#(DmaCsrAddr) h2cCsrWriteReqFifo <- mkFIFOF; + FIFOF#(DmaCsrAddr) h2cCsrReadReqFifo <- mkFIFOF; + FIFOF#(DmaCsrValue) h2cCsrReadDataFifo <- mkFIFOF; + CounteredFIFOF#(csrReadReq) csrRdReqStoreFifo <- mkCounteredFIFOF(CMPL_NPREQ_INFLIGHT_NUM); + Reg#(PcieNonPostedRequst) npReqCreditCtrlReg <- mkReg(valueOf(NP_CREDIT_INCREMENT)); Reg#(PcieNonPostedRequstCount) npReqCreditCntReg <- mkReg(0); - interface RawPcieCompleterRequest; + + rule genCsrWriteReq; + let wrReq = cmplRequest.csrWriteReqFifoOut.first; + cmplRequest.csrWriteReqFifoOut.deq; + h2cCsrWriteDataFifo.enq(wrReq.value); + h2cCsrWriteReqFifo.enq(wrReq.addr); + endrule + + rule genCsrReadReq; + let rdReq = cmplRequest.csrReadReqFifoOut.first; + cmplRequest.csrReadReqFifoOut.deq; + h2cCsrReadReqFifo.enq(rdReq.rdAddr); + csrRdReqStoreFifo.enq(rdReq); + endrule + + rule procCsrReadResp; + let req = csrRdReqStoreFifo.first; + let resp = h2cCsrReadDataFifo.first; + cmplComplete.csrReadRespFifoIn(resp); + cmplComplete.csrReadReqFifoIn(req); + endrule + + rule npBackPressure; + if (csrRdReqDescriptorFifo.getCurSize == fromInteger(valueOf(TDiv#(CMPL_NPREQ_INFLIGHT_NUM,2)))) begin + npReqCreditCtrlReg <= valueOf(NP_CREDIT_NOCHANGE); + end + else begin + npReqCreditCtrlReg <= valueOf(NP_CREDIT_INCREMENT); + end + endrule + + interface rawCompleterRequest; interface rawAxiStreamSlave = mkFifoInToRawPcieAxiStreamSlave#(cmplRequest.axiStreamFifoIn); - // TODO: back-pressure according to the temperory stored RdReq Num - method PcieNonPostedRequst nonPostedReqCreditIncrement = 2'b11; + method PcieNonPostedRequst nonPostedReqCreditIncrement = npReqCreditCtrlReg; method Action nonPostedReqCreditCnt(PcieNonPostedRequstCount nonPostedpReqCount); npReqCreditCntReg <= nonPostedpReqCount; endmethod endinterface - interface RawPcieCompleterComplete; + interface rawCompleterComplete; interface rawAxiStreamSlave = mkFifoOutToRawPcieAxiStreamMaster#(cmplComplete.axiStreamFifoOut); endinterface - interface csrWriteReqFifoOut = cmplRequest.csrWriteReqFifoOut; + interface h2cWrite; + interface dataFifoOut = convertFifoToFifoOut(h2cCsrWriteDataFifo); + interface reqFifoOut = convertFifoToFifoOut(h2cCsrWriteReqFifo); + endinterface - interface csrReadReqFifoOut = convertFifoToFifoOut(csrRdReqOutFifo); - interface csrReadRespFifoIn = convertFifoToFifoIn(csrRdRespFifo); + interface h2cRead; + interface reqFifoOut = convertFifoToFifoOut(h2cCsrReadReqFifo); + interface dataFifoIn = convertFifoToFifoIn(h2cCsrReadDataFifo); + endinterface // TODO: get internal registers value method DmaCsrValue getRegisterValue(DmaCsrAddr addr); @@ -175,3 +205,5 @@ module mkCompleter(Completer); method endmodule + +module mkWriteReqTo \ No newline at end of file diff --git a/src/DmaController.bsv b/src/DmaController.bsv index db36cf0..dd680d4 100755 --- a/src/DmaController.bsv +++ b/src/DmaController.bsv @@ -2,30 +2,39 @@ import FIFOF::*; import PcieTypes::*; import DmaTypes::*; +import DmaCompleter::*; +import DmaRequester::*; interface DmaController#(numeric type dataWidth); // Requester interfaces, where the Card serve as the Master - interface FifoIn#(DataStream) c2hDataFifoIn; // Card writes Host Memory - interface FifoIn#(DmaRequest) c2hReqFifoIn; // Card writes Host Memory - interface FifoIn#(DmaRequest) h2cReqFifoIn; // Card reads Host Memory - interface FifoOut#(DataStream) h2cDataFifoOut; // Card reads Host Memory + interface DmaCardToHostWrite c2hWrite; + interface DmaCardToHostRead c2hRead; // Completer interfaces, where the Card serve as the Slave - interface FifoIn#(DmaCsrFrame) c2hCsrValFifoIn; // Host reads Card Registers - interface FifoOut#(DMACsrAddr) c2hCsrReqFifoOut; // Host reads Card Registers - interface FifoOut#(DmaCsrFrame) h2cCsrValFifoOut; // Host writes Card Registers + interface DmaHostToCardWrite h2cWrite; + interface DmaHostToCardRead h2cRead; // Raw PCIe interfaces, connected to the Xilinx PCIe IP - interface RawPcieRequester pcieRequester; - interface RawPcieCompleter pcieCompleter; - interface RawPcieConfiguration pcieConfig; + interface RawPcieRequesterRequest pcieRequesterRequest; + interface RawPcieRequesterComplete pcieRequesterComplete; + interface RawPcieCompleterRequest pcieCompleterRequest; + interface RawPcieCompleterComplete pcieCompleterComplete; + interface RawPcieConfiguration pcieConfiguration; endinterface -module mkDmaController#() (DmaController ifc); - FIFOF#(DataStream) c2hDataFifo <- mkFIFOF; - FIFOF#(DataStream) h2cDataFifo <- mkFIFOF; - FIFOF#(DmaRequest) c2hReqFifo <- mkFIFOF; - FIFOF#(DmaRequest) h2cReqFifo <- mkFIFOF; +module mkDmaController(DmaController); + DmaCompleter completer = mkDmaCompleter; + DmaRequester requester = mkDmaRequester; + + interface c2hWrite = requester.c2hWrite; + interface c2hRead = requester.c2hRead; + + interface h2cWrite = completer.h2cWrite; + interface h2cRead = completer.h2cRead; + + interface pcieRequesterRequest = requester.rawRequesterRequest; + interface pcieRequesterComplete = requester.rawRequesterComplete; + interface pcieCompleterRequest = completer.rawCompleterRequest; + interface pcieCompleterComplete = completer.rawCompleterComplete; - FIFOF#(DmaCsrFrame) endmodule \ No newline at end of file diff --git a/src/DmaRequester.bsv b/src/DmaRequester.bsv new file mode 100644 index 0000000..7a86abd --- /dev/null +++ b/src/DmaRequester.bsv @@ -0,0 +1,17 @@ + +import PcieTypes::*; +import PcieAxiStreamTypes::*; +import PcieDescriptorTypes::*; +import DmaTypes::*; + +interface Requester; + interface DmaCardToHostWrite c2hWrite; + interface DmaCardToHostRead c2hRead; + interface RawPcieRequesterRequest rawRequesterRequest; + interface RawPcieRequesterComplete rawRequesterComplete; +endinterface + +module mkRequester(Empty); + + +endmodule \ No newline at end of file diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 5069eff..ea1645e 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -10,8 +10,8 @@ typedef 32 DMA_CSR_ADDR_WIDTH; typedef 32 DMA_CSR_DATA_WIDTH; typedef Bit#(DMA_MEM_ADDR_WIDTH) DmaMemAddr; -typedef Bit#(DMA_CSR_ADDR_WIDTH) DMACsrAddr; -typedef Bit#(DMA_CSR_DATA_WIDTH) DMACsrValue; +typedef Bit#(DMA_CSR_ADDR_WIDTH) DmaCsrAddr; +typedef Bit#(DMA_CSR_DATA_WIDTH) DmaCsrValue; typedef 8 BYTE_WIDTH; typedef TLog#(BYTE_WIDTH) BYTE_WIDTH_WIDTH; @@ -49,12 +49,6 @@ instance FShow#(DmaRequest); endfunction endinstance -instance FShow#(DmaCsrFrame); - function Fmt fshow(DmaCsrFrame csr); - return ($format(" Date: Mon, 22 Jul 2024 04:13:27 +0800 Subject: [PATCH 25/53] testDmaCompterRequest --- run_one.sh | 2 +- src/DmaCompleter.bsv | 117 ++++++----- src/DmaController.bsv | 4 +- src/DmaRequester.bsv | 23 ++- src/PcieDescriptorTypes.bsv | 8 +- src/PrimUtils.bsv | 378 ++++++++++++++++++------------------ test/TestDmaCompleter.bsv | 97 +++++++++ 7 files changed, 378 insertions(+), 251 deletions(-) diff --git a/run_one.sh b/run_one.sh index f049c0d..7e935a8 100755 --- a/run_one.sh +++ b/run_one.sh @@ -13,7 +13,7 @@ TEST_LOG=run.log TEST_DIR=test cd $TEST_DIR truncate -s 0 $TEST_LOG -FILES=`ls TestDmaCore.bsv` +FILES=`ls TestDmaCompleter.bsv` ########################################################################### for FILE in $FILES; do # echo $FILE diff --git a/src/DmaCompleter.bsv b/src/DmaCompleter.bsv index be69863..f36d9e1 100644 --- a/src/DmaCompleter.bsv +++ b/src/DmaCompleter.bsv @@ -1,20 +1,33 @@ -import FIFO::*; +import FIFOF::*; +import SemiFifo::*; +import PrimUtils::*; import PcieAxiStreamTypes::*; import PcieTypes::*; import PcieDescriptorTypes::*; import DmaTypes::*; -typedef 1 IDEA_DWORD_CNT_OF_CSR; +typedef 1 IDEA_DWORD_CNT_OF_CSR; +typedef 4 IDEA_FIRST_BE_HIGH_VALID_PTR_OF_CSR; + typedef 64 CMPL_NPREQ_INFLIGHT_NUM; typedef 20 CMPL_NPREQ_WAITING_CLKS; typedef 2'b11 NP_CREDIT_INCREMENT; typedef 2'b00 NP_CREDIT_NOCHANGE; +typedef PcieAxiStream#(PCIE_COMPLETER_REQUEST_TUSER_WIDTH) CmplReqAxiStream; +typedef PcieAxiStream#(PCIE_COMPLETER_COMPLETE_TUSER_WIDTH) CmplCmplAxiStream; + typedef struct { DmaCsrAddr addr; DmaCsrValue value; -} CsrWriteReq deriving(Bits, Eq, Bounded, FShow); +} CsrWriteReq deriving(Bits, Eq, Bounded); + +instance FShow#(CsrWriteReq); + function Fmt fshow(CsrWriteReq wrReq); + return ($format("32bit requests (* synthesize *) module mkCompleterRequest(CompleterRequest); - FIFOF#(PcieAxiStream) inFifo <- mkFIFOF; - FIFOF#(CsrWriteReq) wrReqFifo <- mkFIFOF; - FIFOF#(CsrReadReq) rdReqFifo <- mkFIFOF; + FIFOF#(CmplReqAxiStream) inFifo <- mkFIFOF; + FIFOF#(CsrWriteReq) wrReqFifo <- mkFIFOF; + FIFOF#(CsrReadReq) rdReqFifo <- mkFIFOF; Reg#(Bool) isInPacket <- mkReg(False); - Reg#(Uint#(32)) illegalPcieReqCntReg <- mkReg(0); + Reg#(UInt#(32)) illegalPcieReqCntReg <- mkReg(0); - function PcieCompleterRequestDescriptor getDescriptorFromFirstBeat(PcieAxiStream axiStream); - return pack(axiStream.tDATA[valueOf(CQ_DESCRIPTOR_WIDTH)-1:0]); + function PcieCompleterRequestDescriptor getDescriptorFromFirstBeat(CmplReqAxiStream axiStream); + return unpack(axiStream.tData[valueOf(DES_CQ_DESCRIPTOR_WIDTH)-1:0]); endfunction - function Data getDataFromFirstBeat(PcieAxiStream axiStream); - return axiStream.tData >> valueOf(CQ_DESCRIPTOR_WIDTH); + function Data getDataFromFirstBeat(CmplReqAxiStream axiStream); + return axiStream.tData >> valueOf(DES_CQ_DESCRIPTOR_WIDTH); endfunction - function Bool isFirstBytesAllValid(PcieCompleterCompleteSideBandFrame sideBand); - return (sideBand.firstByteEn[valueOf(PCIE_TLP_FIRST_BE_WIDTH)-1] == 1); + function Bool isFirstBytesAllValid(PcieCompleterRequestSideBandFrame sideBand); + return (sideBand.firstByteEn[valueOf(IDEA_FIRST_BE_HIGH_VALID_PTR_OF_CSR)-1] == 1); endfunction function DmaCsrAddr getCsrAddrFromCqDescriptor(PcieCompleterRequestDescriptor descriptor); @@ -81,36 +94,36 @@ module mkCompleterRequest(CompleterRequest); rule parse; inFifo.deq; let axiStream = inFifo.first; - PcieCompleterRequestSideBandFrame sideBand = pack(axiStream.tUser); - isInPacket <= !axiStream.isLast; + PcieCompleterRequestSideBandFrame sideBand = unpack(axiStream.tUser); + isInPacket <= !axiStream.tLast; if (!isInPacket) begin let descriptor = getDescriptorFromFirstBeat(axiStream); - case (descriptor.reqType) begin - MEM_WRITE_REQ: begin - if (descriptor.dwordCnt == valueOf(IDEA_DWORD_CNT_OF_CSR) && isFirstBytesAllValid) begin + case (descriptor.reqType) + fromInteger(valueOf(MEM_WRITE_REQ)): begin + if (descriptor.dwordCnt == fromInteger(valueOf(IDEA_DWORD_CNT_OF_CSR)) && isFirstBytesAllValid(sideBand)) begin let firstData = getDataFromFirstBeat(axiStream); DmaCsrValue wrValue = firstData[valueOf(DMA_CSR_ADDR_WIDTH)-1:0]; DmaCsrAddr wrAddr = getCsrAddrFromCqDescriptor(descriptor); let wrReq = CsrWriteReq { - address : wrAddr, + addr : wrAddr, value : wrValue - } + }; wrReqFifo.enq(wrReq); end else begin illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; end end - MEM_READ_REQ: begin + fromInteger(valueOf(MEM_READ_REQ)): begin let rdReqAddr = getCsrAddrFromCqDescriptor(descriptor); let rdReq = CsrReadReq{ rdAddr: rdReqAddr, npInfo: descriptor - } + }; rdReqFifo.enq(rdReq); end - default: illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; - end + default: begin $display("INFO"); illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; end + endcase end endrule @@ -121,31 +134,33 @@ endmodule (* synthesize *) module mkCompleterComplete(CompleterComplete); - FIFOF#(PcieAxiStream) outFifo <- mkFIFOF; - FIFOF#(CsrReadResp) rdRespFifo <- mkFIFOF; - FIFOF#(CsrReadReq) rdReqFifo <- mkFIFOF; + FIFOF#(CmplCmplAxiStream) outFifo <- mkFIFOF; + FIFOF#(CsrReadResp) rdRespFifo <- mkFIFOF; + FIFOF#(CsrReadReq) rdReqFifo <- mkFIFOF; // TODO: the logic of cc interface axiStreamFifoOut = convertFifoToFifoOut(outFifo); interface csrReadRespFifoIn = convertFifoToFifoIn(rdRespFifo); - interface csrReadReqFifoOut = convertFifoToFifoIn(rdReqFifo); + interface csrReadReqFifoIn = convertFifoToFifoIn(rdReqFifo); endmodule (* synthesize *) module mkDmaCompleter(DmaCompleter); - CompleterRequest cmplRequest = mkCompleterRequest; - CompleterComplete cmplComplete = mkCompleterComplete; + CompleterRequest cmplRequest <- mkCompleterRequest; + CompleterComplete cmplComplete <- mkCompleterComplete; FIFOF#(DmaCsrValue) h2cCsrWriteDataFifo <- mkFIFOF; FIFOF#(DmaCsrAddr) h2cCsrWriteReqFifo <- mkFIFOF; FIFOF#(DmaCsrAddr) h2cCsrReadReqFifo <- mkFIFOF; FIFOF#(DmaCsrValue) h2cCsrReadDataFifo <- mkFIFOF; - CounteredFIFOF#(csrReadReq) csrRdReqStoreFifo <- mkCounteredFIFOF(CMPL_NPREQ_INFLIGHT_NUM); + CounteredFIFOF#(CsrReadReq) csrRdReqStoreFifo <- mkCounteredFIFOF(valueOf(CMPL_NPREQ_INFLIGHT_NUM)); - Reg#(PcieNonPostedRequst) npReqCreditCtrlReg <- mkReg(valueOf(NP_CREDIT_INCREMENT)); + Reg#(PcieNonPostedRequst) npReqCreditCtrlReg <- mkReg(fromInteger(valueOf(NP_CREDIT_INCREMENT))); Reg#(PcieNonPostedRequstCount) npReqCreditCntReg <- mkReg(0); + let rawAxiStreamSlaveIfc <- mkFifoInToRawPcieAxiStreamSlave(cmplRequest.axiStreamFifoIn); + let rawAxiStreamMasterIfc <- mkFifoOutToRawPcieAxiStreamMaster(cmplComplete.axiStreamFifoOut); rule genCsrWriteReq; let wrReq = cmplRequest.csrWriteReqFifoOut.first; @@ -164,37 +179,37 @@ module mkDmaCompleter(DmaCompleter); rule procCsrReadResp; let req = csrRdReqStoreFifo.first; let resp = h2cCsrReadDataFifo.first; - cmplComplete.csrReadRespFifoIn(resp); - cmplComplete.csrReadReqFifoIn(req); + cmplComplete.csrReadRespFifoIn.enq(resp); + cmplComplete.csrReadReqFifoIn.enq(req); endrule rule npBackPressure; - if (csrRdReqDescriptorFifo.getCurSize == fromInteger(valueOf(TDiv#(CMPL_NPREQ_INFLIGHT_NUM,2)))) begin - npReqCreditCtrlReg <= valueOf(NP_CREDIT_NOCHANGE); + if (csrRdReqStoreFifo.getCurSize == fromInteger(valueOf(TDiv#(CMPL_NPREQ_INFLIGHT_NUM,2)))) begin + npReqCreditCtrlReg <= fromInteger(valueOf(NP_CREDIT_NOCHANGE)); end else begin - npReqCreditCtrlReg <= valueOf(NP_CREDIT_INCREMENT); + npReqCreditCtrlReg <= fromInteger(valueOf(NP_CREDIT_INCREMENT)); end endrule - interface rawCompleterRequest; - interface rawAxiStreamSlave = mkFifoInToRawPcieAxiStreamSlave#(cmplRequest.axiStreamFifoIn); + interface RawPcieCompleterRequest rawCompleterRequest; + interface rawAxiStreamSlave = rawAxiStreamSlaveIfc; method PcieNonPostedRequst nonPostedReqCreditIncrement = npReqCreditCtrlReg; method Action nonPostedReqCreditCnt(PcieNonPostedRequstCount nonPostedpReqCount); npReqCreditCntReg <= nonPostedpReqCount; endmethod endinterface - interface rawCompleterComplete; - interface rawAxiStreamSlave = mkFifoOutToRawPcieAxiStreamMaster#(cmplComplete.axiStreamFifoOut); + interface RawPcieCompleterComplete rawCompleterComplete; + interface rawAxiStreamMaster = rawAxiStreamMasterIfc; endinterface - interface h2cWrite; + interface DmaHostToCardWrite h2cWrite; interface dataFifoOut = convertFifoToFifoOut(h2cCsrWriteDataFifo); interface reqFifoOut = convertFifoToFifoOut(h2cCsrWriteReqFifo); endinterface - interface h2cRead; + interface DmaHostToCardRead h2cRead; interface reqFifoOut = convertFifoToFifoOut(h2cCsrReadReqFifo); interface dataFifoIn = convertFifoToFifoIn(h2cCsrReadDataFifo); endinterface @@ -202,8 +217,6 @@ module mkDmaCompleter(DmaCompleter); // TODO: get internal registers value method DmaCsrValue getRegisterValue(DmaCsrAddr addr); return 0; - method + endmethod endmodule - -module mkWriteReqTo \ No newline at end of file diff --git a/src/DmaController.bsv b/src/DmaController.bsv index dd680d4..c846ed4 100755 --- a/src/DmaController.bsv +++ b/src/DmaController.bsv @@ -23,8 +23,8 @@ interface DmaController#(numeric type dataWidth); endinterface module mkDmaController(DmaController); - DmaCompleter completer = mkDmaCompleter; - DmaRequester requester = mkDmaRequester; + DmaCompleter completer <- mkDmaCompleter; + DmaRequester requester <- mkDmaRequester; interface c2hWrite = requester.c2hWrite; interface c2hRead = requester.c2hRead; diff --git a/src/DmaRequester.bsv b/src/DmaRequester.bsv index 7a86abd..ef54591 100644 --- a/src/DmaRequester.bsv +++ b/src/DmaRequester.bsv @@ -11,7 +11,26 @@ interface Requester; interface RawPcieRequesterComplete rawRequesterComplete; endinterface -module mkRequester(Empty); +module mkRequester(Requester); + FIFOF#(DataStream) c2hWriteDataFifo <- mkFIFOF; + FIFOF#(DmaRequest) c2hWriteReqFifo <- mkFIFOF; + FIFOF#(DataStream) c2hReadDataFifo <- mkFIFOF; + FIFOF#(DmaRequest) c2hReadReqFifo <- mkFIFOF; + interface c2hWrite; + interface dataFifoOut = convertFifoToFifoOut(c2hWriteDataFifo); + interface reqFifoOut = convertFifoToFifoOut(c2hWriteReqFifo); + endinterface -endmodule \ No newline at end of file + interface c2hRead; + interface reqFifoOut = convertFifoToFifoOut(c2hReadReqFifo); + interface dataFifoIn = convertFifoToFifoIn(c2hReadDataFifo); + endinterface + + interface rawRequesterRequest; + endinterface + + interface rawRequesterComplete; + endinterface + +endmodule diff --git a/src/PcieDescriptorTypes.bsv b/src/PcieDescriptorTypes.bsv index 0435f3d..bcd9b22 100644 --- a/src/PcieDescriptorTypes.bsv +++ b/src/PcieDescriptorTypes.bsv @@ -8,7 +8,7 @@ typedef Bit#(1) ReserveBit1; typedef Bit#(2) ReserveBit2; typedef Bit#(6) ReserveBit6; -typedef 64 DES_CQ_DESCRIPTOR_WIDTH; +typedef 128 DES_CQ_DESCRIPTOR_WIDTH; typedef 3 DES_ATTR_WIDTH; typedef 3 DES_TC_WIDTH; typedef 6 DES_BAR_APERTURE_WIDTH; @@ -65,7 +65,7 @@ typedef struct { ReserveBit1 reserve0; Attributes attributes; TrafficClass trafficClass; - Bool completerIdEn + Bool completerIdEn; BusDeviceFunc completerId; Tag tag; // DW + 1 @@ -81,9 +81,7 @@ typedef struct { ReserveBit6 reserve3; AddrType addrType; LowerAddr lowerAddr; -} - - +} PcieCompleterCompleteDescriptor deriving(Bits, Eq, Bounded, FShow); // Pcie Tlp types of descriptor typedef 4'b0000 MEM_READ_REQ; diff --git a/src/PrimUtils.bsv b/src/PrimUtils.bsv index fe768e8..a2f6c40 100644 --- a/src/PrimUtils.bsv +++ b/src/PrimUtils.bsv @@ -20,69 +20,69 @@ endfunction function Data getDataLowBytes(Data data, DataBytePtr ptr); Data temp = 0; case(ptr) - 1 : temp[valueOf(BYTE_WIDTH)*1 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*1 -1:0]); - 2 : temp[valueOf(BYTE_WIDTH)*2 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*2 -1:0]); - 3 : temp[valueOf(BYTE_WIDTH)*3 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*3 -1:0]); - 4 : temp[valueOf(BYTE_WIDTH)*4 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*4 -1:0]); - 5 : temp[valueOf(BYTE_WIDTH)*5 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*5 -1:0]); - 6 : temp[valueOf(BYTE_WIDTH)*6 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*6 -1:0]); - 7 : temp[valueOf(BYTE_WIDTH)*7 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*7 -1:0]); - 8 : temp[valueOf(BYTE_WIDTH)*8 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*8 -1:0]); - 9 : temp[valueOf(BYTE_WIDTH)*9 -1:0] = Data'(data[valueOf(BYTE_WIDTH)*9 -1:0]); - 10: temp[valueOf(BYTE_WIDTH)*10-1:0] = Data'(data[valueOf(BYTE_WIDTH)*10-1:0]); - 11: temp[valueOf(BYTE_WIDTH)*11-1:0] = Data'(data[valueOf(BYTE_WIDTH)*11-1:0]); - 12: temp[valueOf(BYTE_WIDTH)*12-1:0] = Data'(data[valueOf(BYTE_WIDTH)*12-1:0]); - 13: temp[valueOf(BYTE_WIDTH)*13-1:0] = Data'(data[valueOf(BYTE_WIDTH)*13-1:0]); - 14: temp[valueOf(BYTE_WIDTH)*14-1:0] = Data'(data[valueOf(BYTE_WIDTH)*14-1:0]); - 15: temp[valueOf(BYTE_WIDTH)*15-1:0] = Data'(data[valueOf(BYTE_WIDTH)*15-1:0]); - 16: temp[valueOf(BYTE_WIDTH)*16-1:0] = Data'(data[valueOf(BYTE_WIDTH)*16-1:0]); - 17: temp[valueOf(BYTE_WIDTH)*17-1:0] = Data'(data[valueOf(BYTE_WIDTH)*17-1:0]); - 18: temp[valueOf(BYTE_WIDTH)*18-1:0] = Data'(data[valueOf(BYTE_WIDTH)*18-1:0]); - 19: temp[valueOf(BYTE_WIDTH)*19-1:0] = Data'(data[valueOf(BYTE_WIDTH)*19-1:0]); - 20: temp[valueOf(BYTE_WIDTH)*20-1:0] = Data'(data[valueOf(BYTE_WIDTH)*20-1:0]); - 21: temp[valueOf(BYTE_WIDTH)*21-1:0] = Data'(data[valueOf(BYTE_WIDTH)*21-1:0]); - 22: temp[valueOf(BYTE_WIDTH)*22-1:0] = Data'(data[valueOf(BYTE_WIDTH)*22-1:0]); - 23: temp[valueOf(BYTE_WIDTH)*23-1:0] = Data'(data[valueOf(BYTE_WIDTH)*23-1:0]); - 24: temp[valueOf(BYTE_WIDTH)*24-1:0] = Data'(data[valueOf(BYTE_WIDTH)*24-1:0]); - 25: temp[valueOf(BYTE_WIDTH)*25-1:0] = Data'(data[valueOf(BYTE_WIDTH)*25-1:0]); - 26: temp[valueOf(BYTE_WIDTH)*26-1:0] = Data'(data[valueOf(BYTE_WIDTH)*26-1:0]); - 27: temp[valueOf(BYTE_WIDTH)*27-1:0] = Data'(data[valueOf(BYTE_WIDTH)*27-1:0]); - 28: temp[valueOf(BYTE_WIDTH)*28-1:0] = Data'(data[valueOf(BYTE_WIDTH)*28-1:0]); - 29: temp[valueOf(BYTE_WIDTH)*29-1:0] = Data'(data[valueOf(BYTE_WIDTH)*29-1:0]); - 30: temp[valueOf(BYTE_WIDTH)*30-1:0] = Data'(data[valueOf(BYTE_WIDTH)*30-1:0]); - 31: temp[valueOf(BYTE_WIDTH)*31-1:0] = Data'(data[valueOf(BYTE_WIDTH)*31-1:0]); - 32: temp[valueOf(BYTE_WIDTH)*32-1:0] = Data'(data[valueOf(BYTE_WIDTH)*32-1:0]); - 33: temp[valueOf(BYTE_WIDTH)*33-1:0] = Data'(data[valueOf(BYTE_WIDTH)*33-1:0]); - 34: temp[valueOf(BYTE_WIDTH)*34-1:0] = Data'(data[valueOf(BYTE_WIDTH)*34-1:0]); - 35: temp[valueOf(BYTE_WIDTH)*35-1:0] = Data'(data[valueOf(BYTE_WIDTH)*35-1:0]); - 36: temp[valueOf(BYTE_WIDTH)*36-1:0] = Data'(data[valueOf(BYTE_WIDTH)*36-1:0]); - 37: temp[valueOf(BYTE_WIDTH)*37-1:0] = Data'(data[valueOf(BYTE_WIDTH)*37-1:0]); - 38: temp[valueOf(BYTE_WIDTH)*38-1:0] = Data'(data[valueOf(BYTE_WIDTH)*38-1:0]); - 39: temp[valueOf(BYTE_WIDTH)*39-1:0] = Data'(data[valueOf(BYTE_WIDTH)*39-1:0]); - 40: temp[valueOf(BYTE_WIDTH)*40-1:0] = Data'(data[valueOf(BYTE_WIDTH)*40-1:0]); - 41: temp[valueOf(BYTE_WIDTH)*41-1:0] = Data'(data[valueOf(BYTE_WIDTH)*41-1:0]); - 42: temp[valueOf(BYTE_WIDTH)*42-1:0] = Data'(data[valueOf(BYTE_WIDTH)*42-1:0]); - 43: temp[valueOf(BYTE_WIDTH)*43-1:0] = Data'(data[valueOf(BYTE_WIDTH)*43-1:0]); - 44: temp[valueOf(BYTE_WIDTH)*44-1:0] = Data'(data[valueOf(BYTE_WIDTH)*44-1:0]); - 45: temp[valueOf(BYTE_WIDTH)*45-1:0] = Data'(data[valueOf(BYTE_WIDTH)*45-1:0]); - 46: temp[valueOf(BYTE_WIDTH)*46-1:0] = Data'(data[valueOf(BYTE_WIDTH)*46-1:0]); - 47: temp[valueOf(BYTE_WIDTH)*47-1:0] = Data'(data[valueOf(BYTE_WIDTH)*47-1:0]); - 48: temp[valueOf(BYTE_WIDTH)*48-1:0] = Data'(data[valueOf(BYTE_WIDTH)*48-1:0]); - 49: temp[valueOf(BYTE_WIDTH)*49-1:0] = Data'(data[valueOf(BYTE_WIDTH)*49-1:0]); - 50: temp[valueOf(BYTE_WIDTH)*50-1:0] = Data'(data[valueOf(BYTE_WIDTH)*50-1:0]); - 51: temp[valueOf(BYTE_WIDTH)*51-1:0] = Data'(data[valueOf(BYTE_WIDTH)*51-1:0]); - 52: temp[valueOf(BYTE_WIDTH)*52-1:0] = Data'(data[valueOf(BYTE_WIDTH)*52-1:0]); - 53: temp[valueOf(BYTE_WIDTH)*53-1:0] = Data'(data[valueOf(BYTE_WIDTH)*53-1:0]); - 54: temp[valueOf(BYTE_WIDTH)*54-1:0] = Data'(data[valueOf(BYTE_WIDTH)*54-1:0]); - 55: temp[valueOf(BYTE_WIDTH)*55-1:0] = Data'(data[valueOf(BYTE_WIDTH)*55-1:0]); - 56: temp[valueOf(BYTE_WIDTH)*56-1:0] = Data'(data[valueOf(BYTE_WIDTH)*56-1:0]); - 57: temp[valueOf(BYTE_WIDTH)*57-1:0] = Data'(data[valueOf(BYTE_WIDTH)*57-1:0]); - 58: temp[valueOf(BYTE_WIDTH)*58-1:0] = Data'(data[valueOf(BYTE_WIDTH)*58-1:0]); - 59: temp[valueOf(BYTE_WIDTH)*59-1:0] = Data'(data[valueOf(BYTE_WIDTH)*59-1:0]); - 60: temp[valueOf(BYTE_WIDTH)*60-1:0] = Data'(data[valueOf(BYTE_WIDTH)*60-1:0]); - 61: temp[valueOf(BYTE_WIDTH)*61-1:0] = Data'(data[valueOf(BYTE_WIDTH)*61-1:0]); - 62: temp[valueOf(BYTE_WIDTH)*62-1:0] = Data'(data[valueOf(BYTE_WIDTH)*62-1:0]); - 63: temp[valueOf(BYTE_WIDTH)*63-1:0] = Data'(data[valueOf(BYTE_WIDTH)*63-1:0]); + 1 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*1 -1:0])); + 2 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*2 -1:0])); + 3 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*3 -1:0])); + 4 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*4 -1:0])); + 5 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*5 -1:0])); + 6 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*6 -1:0])); + 7 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*7 -1:0])); + 8 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*8 -1:0])); + 9 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*9 -1:0])); + 10: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*10-1:0])); + 11: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*11-1:0])); + 12: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*12-1:0])); + 13: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*13-1:0])); + 14: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*14-1:0])); + 15: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*15-1:0])); + 16: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*16-1:0])); + 17: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*17-1:0])); + 18: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*18-1:0])); + 19: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*19-1:0])); + 20: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*20-1:0])); + 21: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*21-1:0])); + 22: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*22-1:0])); + 23: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*23-1:0])); + 24: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*24-1:0])); + 25: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*25-1:0])); + 26: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*26-1:0])); + 27: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*27-1:0])); + 28: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*28-1:0])); + 29: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*29-1:0])); + 30: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*30-1:0])); + 31: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*31-1:0])); + 32: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*32-1:0])); + 33: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*33-1:0])); + 34: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*34-1:0])); + 35: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*35-1:0])); + 36: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*36-1:0])); + 37: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*37-1:0])); + 38: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*38-1:0])); + 39: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*39-1:0])); + 40: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*40-1:0])); + 41: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*41-1:0])); + 42: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*42-1:0])); + 43: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*43-1:0])); + 44: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*44-1:0])); + 45: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*45-1:0])); + 46: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*46-1:0])); + 47: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*47-1:0])); + 48: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*48-1:0])); + 49: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*49-1:0])); + 50: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*50-1:0])); + 51: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*51-1:0])); + 52: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*52-1:0])); + 53: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*53-1:0])); + 54: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*54-1:0])); + 55: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*55-1:0])); + 56: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*56-1:0])); + 57: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*57-1:0])); + 58: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*58-1:0])); + 59: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*59-1:0])); + 60: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*60-1:0])); + 61: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*61-1:0])); + 62: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*62-1:0])); + 63: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*63-1:0])); default: temp = 0; endcase return temp; @@ -91,69 +91,69 @@ endfunction function Data getDataHighBytes(Data data, DataBytePtr ptr); Data temp = 0; case(ptr) - 1 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*1 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*1 ]); - 2 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*2 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*2 ]); - 3 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*3 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*3 ]); - 4 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*4 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*4 ]); - 5 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*5 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*5 ]); - 6 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*6 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*6 ]); - 7 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*7 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*7 ]); - 8 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*8 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*8 ]); - 9 : temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*9 ] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*9 ]); - 10: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*10] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*10]); - 11: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*11] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*11]); - 12: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*12] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*12]); - 13: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*13] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*13]); - 14: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*14] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*14]); - 15: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*15] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*15]); - 16: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*16] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*16]); - 17: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*17] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*17]); - 18: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*18] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*18]); - 19: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*19] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*19]); - 20: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*20] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*20]); - 21: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*21] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*21]); - 22: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*22] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*22]); - 23: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*23] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*23]); - 24: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*24] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*24]); - 25: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*25] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*25]); - 26: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*26] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*26]); - 27: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*27] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*27]); - 28: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*28] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*28]); - 29: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*29] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*29]); - 30: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*30] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*30]); - 31: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*31] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*31]); - 32: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*32] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*32]); - 33: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*33] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*33]); - 34: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*34] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*34]); - 35: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*35] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*35]); - 36: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*36] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*36]); - 37: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*37] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*37]); - 38: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*38] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*38]); - 39: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*39] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*39]); - 40: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*40] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*40]); - 41: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*41] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*41]); - 42: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*42] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*42]); - 43: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*43] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*43]); - 44: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*44] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*44]); - 45: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*45] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*45]); - 46: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*46] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*46]); - 47: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*47] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*47]); - 48: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*48] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*48]); - 49: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*49] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*49]); - 50: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*50] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*50]); - 51: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*51] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*51]); - 52: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*52] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*52]); - 53: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*53] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*53]); - 54: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*54] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*54]); - 55: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*55] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*55]); - 56: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*56] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*56]); - 57: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*57] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*57]); - 58: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*58] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*58]); - 59: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*59] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*59]); - 60: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*60] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*60]); - 61: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*61] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*61]); - 62: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*62] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*62]); - 63: temp[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*63] = Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*63]); + 1 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*1 ])); + 2 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*2 ])); + 3 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*3 ])); + 4 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*4 ])); + 5 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*5 ])); + 6 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*6 ])); + 7 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*7 ])); + 8 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*8 ])); + 9 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*9 ])); + 10: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*10])); + 11: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*11])); + 12: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*12])); + 13: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*13])); + 14: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*14])); + 15: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*15])); + 16: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*16])); + 17: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*17])); + 18: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*18])); + 19: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*19])); + 20: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*20])); + 21: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*21])); + 22: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*22])); + 23: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*23])); + 24: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*24])); + 25: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*25])); + 26: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*26])); + 27: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*27])); + 28: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*28])); + 29: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*29])); + 30: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*30])); + 31: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*31])); + 32: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*32])); + 33: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*33])); + 34: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*34])); + 35: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*35])); + 36: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*36])); + 37: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*37])); + 38: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*38])); + 39: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*39])); + 40: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*40])); + 41: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*41])); + 42: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*42])); + 43: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*43])); + 44: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*44])); + 45: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*45])); + 46: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*46])); + 47: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*47])); + 48: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*48])); + 49: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*49])); + 50: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*50])); + 51: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*51])); + 52: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*52])); + 53: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*53])); + 54: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*54])); + 55: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*55])); + 56: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*56])); + 57: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*57])); + 58: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*58])); + 59: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*59])); + 60: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*60])); + 61: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*61])); + 62: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*62])); + 63: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*63])); default: temp = data; endcase return temp; @@ -162,69 +162,69 @@ endfunction function DmaMemAddr getAddrLowBits(DmaMemAddr addr, Bit#(TLog#(DMA_MEM_ADDR_WIDTH)) ptr); DmaMemAddr temp = 0; case(ptr) - 1 : temp[1 -1:0] = DmaMemAddr'(addr[1 -1:0]); - 2 : temp[2 -1:0] = DmaMemAddr'(addr[2 -1:0]); - 3 : temp[3 -1:0] = DmaMemAddr'(addr[3 -1:0]); - 4 : temp[4 -1:0] = DmaMemAddr'(addr[4 -1:0]); - 5 : temp[5 -1:0] = DmaMemAddr'(addr[5 -1:0]); - 6 : temp[6 -1:0] = DmaMemAddr'(addr[6 -1:0]); - 7 : temp[7 -1:0] = DmaMemAddr'(addr[7 -1:0]); - 8 : temp[8 -1:0] = DmaMemAddr'(addr[8 -1:0]); - 9 : temp[9 -1:0] = DmaMemAddr'(addr[9 -1:0]); - 10: temp[10-1:0] = DmaMemAddr'(addr[10-1:0]); - 11: temp[11-1:0] = DmaMemAddr'(addr[11-1:0]); - 12: temp[12-1:0] = DmaMemAddr'(addr[12-1:0]); - 13: temp[13-1:0] = DmaMemAddr'(addr[13-1:0]); - 14: temp[14-1:0] = DmaMemAddr'(addr[14-1:0]); - 15: temp[15-1:0] = DmaMemAddr'(addr[15-1:0]); - 16: temp[16-1:0] = DmaMemAddr'(addr[16-1:0]); - 17: temp[17-1:0] = DmaMemAddr'(addr[17-1:0]); - 18: temp[18-1:0] = DmaMemAddr'(addr[18-1:0]); - 19: temp[19-1:0] = DmaMemAddr'(addr[19-1:0]); - 20: temp[20-1:0] = DmaMemAddr'(addr[20-1:0]); - 21: temp[21-1:0] = DmaMemAddr'(addr[21-1:0]); - 22: temp[22-1:0] = DmaMemAddr'(addr[22-1:0]); - 23: temp[23-1:0] = DmaMemAddr'(addr[23-1:0]); - 24: temp[24-1:0] = DmaMemAddr'(addr[24-1:0]); - 25: temp[25-1:0] = DmaMemAddr'(addr[25-1:0]); - 26: temp[26-1:0] = DmaMemAddr'(addr[26-1:0]); - 27: temp[27-1:0] = DmaMemAddr'(addr[27-1:0]); - 28: temp[28-1:0] = DmaMemAddr'(addr[28-1:0]); - 29: temp[29-1:0] = DmaMemAddr'(addr[29-1:0]); - 30: temp[30-1:0] = DmaMemAddr'(addr[30-1:0]); - 31: temp[31-1:0] = DmaMemAddr'(addr[31-1:0]); - 32: temp[32-1:0] = DmaMemAddr'(addr[32-1:0]); - 33: temp[33-1:0] = DmaMemAddr'(addr[33-1:0]); - 34: temp[34-1:0] = DmaMemAddr'(addr[34-1:0]); - 35: temp[35-1:0] = DmaMemAddr'(addr[35-1:0]); - 36: temp[36-1:0] = DmaMemAddr'(addr[36-1:0]); - 37: temp[37-1:0] = DmaMemAddr'(addr[37-1:0]); - 38: temp[38-1:0] = DmaMemAddr'(addr[38-1:0]); - 39: temp[39-1:0] = DmaMemAddr'(addr[39-1:0]); - 40: temp[40-1:0] = DmaMemAddr'(addr[40-1:0]); - 41: temp[41-1:0] = DmaMemAddr'(addr[41-1:0]); - 42: temp[42-1:0] = DmaMemAddr'(addr[42-1:0]); - 43: temp[43-1:0] = DmaMemAddr'(addr[43-1:0]); - 44: temp[44-1:0] = DmaMemAddr'(addr[44-1:0]); - 45: temp[45-1:0] = DmaMemAddr'(addr[45-1:0]); - 46: temp[46-1:0] = DmaMemAddr'(addr[46-1:0]); - 47: temp[47-1:0] = DmaMemAddr'(addr[47-1:0]); - 48: temp[48-1:0] = DmaMemAddr'(addr[48-1:0]); - 49: temp[49-1:0] = DmaMemAddr'(addr[49-1:0]); - 50: temp[50-1:0] = DmaMemAddr'(addr[50-1:0]); - 51: temp[51-1:0] = DmaMemAddr'(addr[51-1:0]); - 52: temp[52-1:0] = DmaMemAddr'(addr[52-1:0]); - 53: temp[53-1:0] = DmaMemAddr'(addr[53-1:0]); - 54: temp[54-1:0] = DmaMemAddr'(addr[54-1:0]); - 55: temp[55-1:0] = DmaMemAddr'(addr[55-1:0]); - 56: temp[56-1:0] = DmaMemAddr'(addr[56-1:0]); - 57: temp[57-1:0] = DmaMemAddr'(addr[57-1:0]); - 58: temp[58-1:0] = DmaMemAddr'(addr[58-1:0]); - 59: temp[59-1:0] = DmaMemAddr'(addr[59-1:0]); - 60: temp[60-1:0] = DmaMemAddr'(addr[60-1:0]); - 61: temp[61-1:0] = DmaMemAddr'(addr[61-1:0]); - 62: temp[62-1:0] = DmaMemAddr'(addr[62-1:0]); - 63: temp[63-1:0] = DmaMemAddr'(addr[63-1:0]); + 1 : temp = zeroExtend(DmaMemAddr'(addr[1 -1:0])); + 2 : temp = zeroExtend(DmaMemAddr'(addr[2 -1:0])); + 3 : temp = zeroExtend(DmaMemAddr'(addr[3 -1:0])); + 4 : temp = zeroExtend(DmaMemAddr'(addr[4 -1:0])); + 5 : temp = zeroExtend(DmaMemAddr'(addr[5 -1:0])); + 6 : temp = zeroExtend(DmaMemAddr'(addr[6 -1:0])); + 7 : temp = zeroExtend(DmaMemAddr'(addr[7 -1:0])); + 8 : temp = zeroExtend(DmaMemAddr'(addr[8 -1:0])); + 9 : temp = zeroExtend(DmaMemAddr'(addr[9 -1:0])); + 10: temp = zeroExtend(DmaMemAddr'(addr[10-1:0])); + 11: temp = zeroExtend(DmaMemAddr'(addr[11-1:0])); + 12: temp = zeroExtend(DmaMemAddr'(addr[12-1:0])); + 13: temp = zeroExtend(DmaMemAddr'(addr[13-1:0])); + 14: temp = zeroExtend(DmaMemAddr'(addr[14-1:0])); + 15: temp = zeroExtend(DmaMemAddr'(addr[15-1:0])); + 16: temp = zeroExtend(DmaMemAddr'(addr[16-1:0])); + 17: temp = zeroExtend(DmaMemAddr'(addr[17-1:0])); + 18: temp = zeroExtend(DmaMemAddr'(addr[18-1:0])); + 19: temp = zeroExtend(DmaMemAddr'(addr[19-1:0])); + 20: temp = zeroExtend(DmaMemAddr'(addr[20-1:0])); + 21: temp = zeroExtend(DmaMemAddr'(addr[21-1:0])); + 22: temp = zeroExtend(DmaMemAddr'(addr[22-1:0])); + 23: temp = zeroExtend(DmaMemAddr'(addr[23-1:0])); + 24: temp = zeroExtend(DmaMemAddr'(addr[24-1:0])); + 25: temp = zeroExtend(DmaMemAddr'(addr[25-1:0])); + 26: temp = zeroExtend(DmaMemAddr'(addr[26-1:0])); + 27: temp = zeroExtend(DmaMemAddr'(addr[27-1:0])); + 28: temp = zeroExtend(DmaMemAddr'(addr[28-1:0])); + 29: temp = zeroExtend(DmaMemAddr'(addr[29-1:0])); + 30: temp = zeroExtend(DmaMemAddr'(addr[30-1:0])); + 31: temp = zeroExtend(DmaMemAddr'(addr[31-1:0])); + 32: temp = zeroExtend(DmaMemAddr'(addr[32-1:0])); + 33: temp = zeroExtend(DmaMemAddr'(addr[33-1:0])); + 34: temp = zeroExtend(DmaMemAddr'(addr[34-1:0])); + 35: temp = zeroExtend(DmaMemAddr'(addr[35-1:0])); + 36: temp = zeroExtend(DmaMemAddr'(addr[36-1:0])); + 37: temp = zeroExtend(DmaMemAddr'(addr[37-1:0])); + 38: temp = zeroExtend(DmaMemAddr'(addr[38-1:0])); + 39: temp = zeroExtend(DmaMemAddr'(addr[39-1:0])); + 40: temp = zeroExtend(DmaMemAddr'(addr[40-1:0])); + 41: temp = zeroExtend(DmaMemAddr'(addr[41-1:0])); + 42: temp = zeroExtend(DmaMemAddr'(addr[42-1:0])); + 43: temp = zeroExtend(DmaMemAddr'(addr[43-1:0])); + 44: temp = zeroExtend(DmaMemAddr'(addr[44-1:0])); + 45: temp = zeroExtend(DmaMemAddr'(addr[45-1:0])); + 46: temp = zeroExtend(DmaMemAddr'(addr[46-1:0])); + 47: temp = zeroExtend(DmaMemAddr'(addr[47-1:0])); + 48: temp = zeroExtend(DmaMemAddr'(addr[48-1:0])); + 49: temp = zeroExtend(DmaMemAddr'(addr[49-1:0])); + 50: temp = zeroExtend(DmaMemAddr'(addr[50-1:0])); + 51: temp = zeroExtend(DmaMemAddr'(addr[51-1:0])); + 52: temp = zeroExtend(DmaMemAddr'(addr[52-1:0])); + 53: temp = zeroExtend(DmaMemAddr'(addr[53-1:0])); + 54: temp = zeroExtend(DmaMemAddr'(addr[54-1:0])); + 55: temp = zeroExtend(DmaMemAddr'(addr[55-1:0])); + 56: temp = zeroExtend(DmaMemAddr'(addr[56-1:0])); + 57: temp = zeroExtend(DmaMemAddr'(addr[57-1:0])); + 58: temp = zeroExtend(DmaMemAddr'(addr[58-1:0])); + 59: temp = zeroExtend(DmaMemAddr'(addr[59-1:0])); + 60: temp = zeroExtend(DmaMemAddr'(addr[60-1:0])); + 61: temp = zeroExtend(DmaMemAddr'(addr[61-1:0])); + 62: temp = zeroExtend(DmaMemAddr'(addr[62-1:0])); + 63: temp = zeroExtend(DmaMemAddr'(addr[63-1:0])); default: temp = 0; endcase return temp; diff --git a/test/TestDmaCompleter.bsv b/test/TestDmaCompleter.bsv index e69de29..c2b36a2 100644 --- a/test/TestDmaCompleter.bsv +++ b/test/TestDmaCompleter.bsv @@ -0,0 +1,97 @@ + +import FIFOF::*; +import Vector::*; +import FShow::*; + +import SemiFifo::*; +import PrimUtils::*; +import PcieAxiStreamTypes::*; +import PcieTypes::*; +import PcieDescriptorTypes::*; +import DmaTypes::*; +import DmaCompleter::*; + +typedef 'hABCD TEST_DATA; +typedef 'h1234 TEST_ADDR; + +typedef 2'b10 TRANSLATED_ADDR_TYPE; + +function PcieTlpCtlIsEopCommon getEmptyEop(); + return PcieTlpCtlIsEopCommon { + isEopPtrs: replicate(0), + isEop : 0 + }; +endfunction + +function PcieTlpCtlIsSopCommon getEmptySop(); + return PcieTlpCtlIsSopCommon { + isSopPtrs: replicate(0), + isSop : 0 + }; +endfunction + +function CmplReqAxiStream genPseudoHostWriteRequest(); + let descriptor = PcieCompleterRequestDescriptor { + reserve0 : 0, + attributes : 0, + trafficClass : 0, + barAperture : fromInteger(valueOf(DMA_CSR_ADDR_WIDTH)), + barId : 0, + targetFunction: 0, + tag : 0, + requesterId : fromInteger(valueOf(TEST_DATA)), + reserve1 : 0, + reqType : fromInteger(valueOf(MEM_WRITE_REQ)), + dwordCnt : 1, + address : fromInteger(valueOf(TEST_ADDR)), + addrType : fromInteger(valueOf(TRANSLATED_ADDR_TYPE)) + }; + Data data = 0; + data = data | zeroExtend(pack(descriptor)); + Data value = fromInteger(valueOf(TEST_DATA)); + data = data | (value << valueOf(DES_CQ_DESCRIPTOR_WIDTH)); + let sideBand = PcieCompleterRequestSideBandFrame { + parity : 0, + tphSteeringTag : 0, + tphType : 0, + tphPresent : 0, + discontinue : False, + isEop : getEmptyEop, + isSop : getEmptySop, + dataByteEn : 'hFFF, + lastByteEn : 'hF, + firstByteEn : 'hF + }; + return CmplReqAxiStream { + tData : data, + tKeep : 'h3FF, + tLast : True, + tUser : pack(sideBand) + }; +endfunction + +(* doc = "testcase" *) +module mkTestDmaCompleterRequest(Empty); + CompleterRequest dut <- mkCompleterRequest; + Reg#(Bool) isInitReg <- mkReg(False); + + rule testInit if (!isInitReg); + $display("INFO: Start CompleterRequest test"); + let testAxiStram = genPseudoHostWriteRequest; + dut.axiStreamFifoIn.enq(testAxiStram); + isInitReg <= True; + endrule + + rule testOutput if (isInitReg); + dut.csrWriteReqFifoOut.deq; + let wrReq = dut.csrWriteReqFifoOut.first; + immAssert( + (wrReq.addr == fromInteger(valueOf(TEST_ADDR)) && wrReq.value == fromInteger(valueOf(TEST_DATA))), + "wrReq test @ mkTestDmaCompleterRequest", + fshow(wrReq) + ); + $display("INFO: Pass CompleterRequest test"); + $finish(); + endrule + +endmodule \ No newline at end of file From 27d3953a792bde1e62f2695fd3d633d8f7aba994 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Tue, 23 Jul 2024 17:36:39 +0800 Subject: [PATCH 26/53] Add TestDmacVivado for simulation with IP --- backend/Makefile | 4 +- src/DmaCompleter.bsv | 7 +- src/DmaController.bsv | 25 +-- src/DmaRequester.bsv | 91 ++++++++-- src/PcieConfigurator.bsv | 372 ++++++++++++++++++++++++++++++++++++++ src/PcieTypes.bsv | 221 +++++++++++++++++++--- test/TestDmaCompleter.bsv | 51 +++++- test/TestDmacVivado.bsv | 63 +++++++ 8 files changed, 777 insertions(+), 57 deletions(-) create mode 100644 src/PcieConfigurator.bsv create mode 100644 test/TestDmacVivado.bsv diff --git a/backend/Makefile b/backend/Makefile index 01c68f1..b629e29 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -8,8 +8,8 @@ OUTPUTDIR ?= output LOGFILE ?= run.log RUNTOPHASE ?= place # synth place route all PARTNAME = xcvu13p-fhgb2104-2-i -TARGETFILE ?= ../src/StreamUtils.bsv -TOPMODULE ?= mkStreamConcat +TARGETFILE ?= ../test/TestDmacVivado.bsv +TOPMODULE ?= mkTestDmacCsrWrRdLoop export TOP = $(TOPMODULE) export RTL = $(VLOGDIR) diff --git a/src/DmaCompleter.bsv b/src/DmaCompleter.bsv index f36d9e1..aa2afdf 100644 --- a/src/DmaCompleter.bsv +++ b/src/DmaCompleter.bsv @@ -37,8 +37,8 @@ typedef struct { } CsrReadReq deriving(Bits, Eq, Bounded, FShow); interface DmaCompleter; - interface RawPcieCompleterRequest rawCompleterRequest; - interface RawPcieCompleterComplete rawCompleterComplete; + (* prefix = "" *) interface RawPcieCompleterRequest rawCompleterRequest; + (* prefix = "" *) interface RawPcieCompleterComplete rawCompleterComplete; interface DmaHostToCardWrite h2cWrite; interface DmaHostToCardRead h2cRead; method DmaCsrValue getRegisterValue(DmaCsrAddr addr); @@ -58,7 +58,6 @@ endinterface // PcieCompleter does not support straddle mode now // The completer is designed only for CSR Rd/Wr, and will ignore any len>32bit requests -(* synthesize *) module mkCompleterRequest(CompleterRequest); FIFOF#(CmplReqAxiStream) inFifo <- mkFIFOF; FIFOF#(CsrWriteReq) wrReqFifo <- mkFIFOF; @@ -98,6 +97,7 @@ module mkCompleterRequest(CompleterRequest); isInPacket <= !axiStream.tLast; if (!isInPacket) begin let descriptor = getDescriptorFromFirstBeat(axiStream); + // TODO: parity check! case (descriptor.reqType) fromInteger(valueOf(MEM_WRITE_REQ)): begin if (descriptor.dwordCnt == fromInteger(valueOf(IDEA_DWORD_CNT_OF_CSR)) && isFirstBytesAllValid(sideBand)) begin @@ -132,7 +132,6 @@ module mkCompleterRequest(CompleterRequest); interface csrReadReqFifoOut = convertFifoToFifoOut(rdReqFifo); endmodule -(* synthesize *) module mkCompleterComplete(CompleterComplete); FIFOF#(CmplCmplAxiStream) outFifo <- mkFIFOF; FIFOF#(CsrReadResp) rdRespFifo <- mkFIFOF; diff --git a/src/DmaController.bsv b/src/DmaController.bsv index c846ed4..e38bfb1 100755 --- a/src/DmaController.bsv +++ b/src/DmaController.bsv @@ -1,11 +1,12 @@ import FIFOF::*; import PcieTypes::*; +import PcieConfigurator::*; import DmaTypes::*; import DmaCompleter::*; import DmaRequester::*; -interface DmaController#(numeric type dataWidth); +interface DmaController; // Requester interfaces, where the Card serve as the Master interface DmaCardToHostWrite c2hWrite; interface DmaCardToHostRead c2hRead; @@ -15,16 +16,14 @@ interface DmaController#(numeric type dataWidth); interface DmaHostToCardRead h2cRead; // Raw PCIe interfaces, connected to the Xilinx PCIe IP - interface RawPcieRequesterRequest pcieRequesterRequest; - interface RawPcieRequesterComplete pcieRequesterComplete; - interface RawPcieCompleterRequest pcieCompleterRequest; - interface RawPcieCompleterComplete pcieCompleterComplete; - interface RawPcieConfiguration pcieConfiguration; + interface RawXilinxPcieIp rawPcie; endinterface +(* synthesize *) module mkDmaController(DmaController); DmaCompleter completer <- mkDmaCompleter; DmaRequester requester <- mkDmaRequester; + PcieConfigurator pcieConfigurator <- mkPcieConfigurator; interface c2hWrite = requester.c2hWrite; interface c2hRead = requester.c2hRead; @@ -32,9 +31,11 @@ module mkDmaController(DmaController); interface h2cWrite = completer.h2cWrite; interface h2cRead = completer.h2cRead; - interface pcieRequesterRequest = requester.rawRequesterRequest; - interface pcieRequesterComplete = requester.rawRequesterComplete; - interface pcieCompleterRequest = completer.rawCompleterRequest; - interface pcieCompleterComplete = completer.rawCompleterComplete; - -endmodule \ No newline at end of file + interface RawXilinxPcieIp rawPcie; + interface requesterRequest = requester.rawRequesterRequest; + interface requesterComplete = requester.rawRequesterComplete; + interface completerRequest = completer.rawCompleterRequest; + interface completerComplete = completer.rawCompleterComplete; + interface configuration = pcieConfigurator.rawConfiguration; + endinterface +endmodule diff --git a/src/DmaRequester.bsv b/src/DmaRequester.bsv index ef54591..10e56b0 100644 --- a/src/DmaRequester.bsv +++ b/src/DmaRequester.bsv @@ -1,36 +1,103 @@ +import FIFOF::*; +import SemiFifo::*; import PcieTypes::*; import PcieAxiStreamTypes::*; import PcieDescriptorTypes::*; import DmaTypes::*; -interface Requester; +typedef PcieAxiStream#(PCIE_REQUESTER_REQUEST_TUSER_WIDTH) ReqReqAxiStream; +typedef PcieAxiStream#(PCIE_REQUESTER_COMPLETE_TUSER_WIDTH) ReqCmplAxiStream; + +interface DmaRequester; interface DmaCardToHostWrite c2hWrite; interface DmaCardToHostRead c2hRead; - interface RawPcieRequesterRequest rawRequesterRequest; - interface RawPcieRequesterComplete rawRequesterComplete; + (* prefix = "" *) interface RawPcieRequesterRequest rawRequesterRequest; + (* prefix = "" *) interface RawPcieRequesterComplete rawRequesterComplete; +endinterface + +interface RequesterRequest; + interface FifoIn#(DataStream) wrDataFifoIn; + interface FifoIn#(DmaRequest) wrReqFifoIn; + interface FifoIn#(DmaRequest) rdReqFifoIn; + interface FifoOut#(ReqReqAxiStream) axiStreamFifoOut; +endinterface + +interface RequesterComplete; + interface FifoIn#(DmaRequest) rdReqFifoIn; + interface FifoOut#(DataStream) rdDataFifoOut; + interface FifoIn#(ReqCmplAxiStream) axiStreamFifoIn; endinterface -module mkRequester(Requester); +module mkRequesterRequest(RequesterRequest); + FIFOF#(DataStream) wrDataInFifo <- mkFIFOF; + FIFOF#(DmaRequest) wrReqInFifo <- mkFIFOF; + FIFOF#(DmaRequest) rdReqInFifo <- mkFIFOF; + FIFOF#(ReqReqAxiStream) axiStreamOutFifo <- mkFIFOF; + + // TODO: RQ Logic + + interface wrDataFifoIn = convertFifoToFifoIn(wrDataInFifo); + interface wrReqFifoIn = convertFifoToFifoIn(wrReqInFifo); + interface rdReqFifoIn = convertFifoToFifoIn(rdReqInFifo); + interface axiStreamFifoOut = convertFifoToFifoOut(axiStreamOutFifo); +endmodule + +module mkRequesterComplete(RequesterComplete); + FIFOF#(DataStream) rdDataOutFifo <- mkFIFOF; + FIFOF#(DmaRequest) rdReqInFifo <- mkFIFOF; + FIFOF#(ReqCmplAxiStream) axiStreamInFifo <- mkFIFOF; + + // TODO: RC Logic + + interface rdReqFifoIn = convertFifoToFifoIn(rdReqInFifo); + interface rdDataFifoOut = convertFifoToFifoOut(rdDataOutFifo); + interface axiStreamFifoIn = convertFifoToFifoIn(axiStreamInFifo); +endmodule + +(* synthesize *) +module mkDmaRequester(DmaRequester); + RequesterRequest reqRequest <- mkRequesterRequest; + RequesterComplete reqComplete <- mkRequesterComplete; + FIFOF#(DataStream) c2hWriteDataFifo <- mkFIFOF; FIFOF#(DmaRequest) c2hWriteReqFifo <- mkFIFOF; FIFOF#(DataStream) c2hReadDataFifo <- mkFIFOF; FIFOF#(DmaRequest) c2hReadReqFifo <- mkFIFOF; - interface c2hWrite; - interface dataFifoOut = convertFifoToFifoOut(c2hWriteDataFifo); - interface reqFifoOut = convertFifoToFifoOut(c2hWriteReqFifo); + let rawAxiStreamSlaveIfc <- mkFifoInToRawPcieAxiStreamSlave(reqComplete.axiStreamFifoIn); + let rawAxiStreamMasterIfc <- mkFifoOutToRawPcieAxiStreamMaster(reqRequest.axiStreamFifoOut); + + interface DmaCardToHostWrite c2hWrite; + interface dataFifoIn = convertFifoToFifoIn(c2hWriteDataFifo); + interface reqFifoIn = convertFifoToFifoIn(c2hWriteReqFifo); + // TODO: isDone need assertion + method Bool isDone = True; endinterface - interface c2hRead; - interface reqFifoOut = convertFifoToFifoOut(c2hReadReqFifo); - interface dataFifoIn = convertFifoToFifoIn(c2hReadDataFifo); + interface DmaCardToHostRead c2hRead; + interface reqFifoIn = convertFifoToFifoIn(c2hReadReqFifo); + interface dataFifoOut = convertFifoToFifoOut(c2hReadDataFifo); endinterface - interface rawRequesterRequest; + interface RawPcieRequesterRequest rawRequesterRequest; + interface rawAxiStreamMaster = rawAxiStreamMasterIfc; + method Action pcieProgressTrack( + Bool tagValid0, + Bool tagValid1, + PcieRqTag tag0, + PcieRqTag tag1, + Bool seqNumValid0, + Bool seqNumValid1, + PcieRqSeqNum seqNum0, + PcieRqSeqNum seqNum1 + ); + // Not support progress track now + endmethod endinterface - interface rawRequesterComplete; + interface RawPcieRequesterComplete rawRequesterComplete; + interface rawAxiStreamSlave = rawAxiStreamSlaveIfc; endinterface endmodule diff --git a/src/PcieConfigurator.bsv b/src/PcieConfigurator.bsv new file mode 100644 index 0000000..78474a9 --- /dev/null +++ b/src/PcieConfigurator.bsv @@ -0,0 +1,372 @@ + +import PcieTypes::*; +import PcieAxiStreamTypes::*; + +typedef 256 PCIE_CFG_VF_FLR_INPROC_EXTEND_WIDTH; + +interface PcieConfigurator; + interface RawPcieConfiguration rawConfiguration; + // TODO: translate raw Ifcs to bluespec style Get Ifcs + method PcieCfgLtssmState getPcieLtssmState(); +endinterface + +module mkPcieConfigurator(PcieConfigurator); + // TODO: the powerStateChangeAck must waitng for completing Done + Reg#(Bool) powerStateChangeIntrReg <- mkReg(False); + + // Here has a 2-stage pipeline for FLR, according to the Xilinx PCIe Example Design + // Reg0 means stage0, and Reg1 means stage1 + Reg#(PcieCfgFlrDone) cfgFlrDoneReg0 <- mkReg(0); + Reg#(PcieCfgFlrDone) cfgFlrDoneReg1 <- mkReg(0); + Reg#(PcieCfgVFFlrFuncNum) cfgVFFlrFuncNumReg <- mkReg(0); + Reg#(PcieCfgVFFlrFuncNum) cfgVFFlrFuncNumReg1 <- mkReg(0); + Reg#(Bool) cfgVFFlrDoneReg1 <- mkReg(False); + Reg#(Bit#(PCIE_CFG_VF_FLR_INPROC_EXTEND_WIDTH)) cfgVfFlrInprocReg0 <- mkReg(0); + + rule functionLevelRst; + cfgVFFlrFuncNumReg <= cfgVFFlrFuncNumReg + 1; + cfgFlrDoneReg1 <= cfgFlrDoneReg0; + cfgVFFlrDoneReg1 <= unpack(cfgVfFlrInprocReg0[cfgVFFlrFuncNumReg]); + cfgVFFlrFuncNumReg1 <= cfgVFFlrFuncNumReg; + endrule + + interface RawPcieConfiguration rawConfiguration; + + // not use mgmt + interface RawPcieCfgMgmt mgmt; + method PcieCfgMgmtAddr addr; + return 0; + endmethod + + method PcieCfgMgmtByteEn byteEn; + return 0; + endmethod + + method Bool debugAccess; + return False; + endmethod + + method PcieCfgMgmtFuncNum funcNum; + return 0; + endmethod + + method Bool read; + return False; + endmethod + + method PCieCfgMgmtData writeData; + return 0; + endmethod + + method Bool write; + return False; + endmethod + + method Action getResp( + PCieCfgMgmtData cfgMgmtRdData, + Bool cfgMgmtRdWrDone); + endmethod + endinterface + + // assign to 0 + interface RawPcieCfgPm pm; + method Bool aspmL1EntryReject; + return False; + endmethod + method Bool aspmL0EntryDisable; + return False; + endmethod + endinterface + + // Doesn't support msi now + interface RawPcieCfgMsi msi; + method PcieCfgMsiInt msiInt; + return 0; + endmethod + + method PcieCfgMsiFuncNum funcNum; + return 0; + endmethod + + method PcieCfgMsiPendingStatus pendingStatus; + return 0; + endmethod + + method PcieCfgMsiPendingStatusFuncNum pendingStatusFuncNum; + return 0; + endmethod + + method Bool pendingStatusDataEn; + return False; + endmethod + + method PcieCfgMsiSel sel; + return 0; + endmethod + + method PcieCfgMsiAttr attr; + return 0; + endmethod + + method Bool tphPresent; + return False; + endmethod + + method PcieCfgMsiTphType tphType; + return 0; + endmethod + + method PcieCfgMsiTphStTag tphStTag; + return 0; + endmethod + + method Action getMsiSignals( + Bool msiEn, + Bool msiSent, + Bool msiFail, + PcieCfgMsiMmEn msiMmEn, + Bool maskUpdate, + PcieCfgMsiData data); + endmethod + endinterface + + // Only for Legacy Devices + interface RawPcieCfgInterrupt interrupt; + method PcieCfgIntrInt intrInt; + return 0; + endmethod + + method PcieCfgIntrPending intrPending; + return 0; + endmethod + + method Action isIntrSent(Bool isSent); + endmethod + endinterface + + interface RawPcieCfgControl control; + method Bool hotResetOut; + return True; + endmethod + + method Action hotResetIn(Bool hotReset); + endmethod + + method Bool cfgSpaceEn; + return True; + endmethod + + method PcieCfgDsn deviceSerialNum; + return 0; + endmethod + + method PcieCfgDsBusNum downStreamBusNum; + return 0; + endmethod + + method PcieCfgDsDeviceNum downStreamDeviceNum; + return 0; + endmethod + + method PcieCfgDsFuncNum downStreamFuncNum; + return 0; + endmethod + + // TODO: the powerStateChangeAck must waitng for completing Done + method Bool powerStateChangeAck; + return powerStateChangeIntrReg; + endmethod + + method Action powerStateChangeIntr(Bool powerStateChangeIntrrupt); + powerStateChangeIntrReg <= powerStateChangeIntrrupt; + endmethod + + method PcieCfgDsPortNum downStreamPortNum; + return 0; + endmethod + + method Bool errorCorrectableOut; + return False; + endmethod + + method Action getError( + Bool errorCorrectable, + Bool errorFatal, + Bool errorNonFatal); + endmethod + + method Bool errorUncorrectable; + return False; + endmethod + + method PcieCfgFlrDone funcLevelRstDone; + PcieCfgFlrDone cfgFlrDone = 0; + cfgFlrDone[0] = (~cfgFlrDoneReg1[0]) & cfgFlrDoneReg0[0]; + cfgFlrDone[1] = (~cfgFlrDoneReg1[1]) & cfgFlrDoneReg0[1]; + return cfgFlrDone; + endmethod + + method Bool vfFuncLevelRstDone; + return cfgVFFlrDoneReg1; + endmethod + + method PcieCfgVFFlrFuncNum vfFlrFuncNum; + return cfgVFFlrFuncNumReg1; + endmethod + + method Action getInproc( + PcieCfgFlrInProc flrInProcess, + PcieCfgVFFlrInProc vfFlrInProcess + ); + cfgFlrDoneReg0 <= flrInProcess; + cfgVfFlrInprocReg0 <= zeroExtend(vfFlrInProcess); + endmethod + + method Bool reqPmTransL23Ready; + return False; + endmethod + + method Bool linkTrainEn; + return True; + endmethod + + method Action busNumber(PcieCfgBusNum busNum); + endmethod + + method PcieCfgVendId vendId; + return 0; + endmethod + + method PcieCfgVendId subsysVendId; + return 0; + endmethod + + method PcieCfgDevId devIdPf0; + return 0; + endmethod + + method PcieCfgDevId devIdPf1; + return 0; + endmethod + + method PcieCfgDevId devIdPf2; + return 0; + endmethod + + method PcieCfgDevId devIdPf3; + return 0; + endmethod + + method PcieCfgRevId revIdPf0; + return 0; + endmethod + + method PcieCfgRevId revIdPf1; + return 0; + endmethod + + method PcieCfgRevId revIdPf2; + return 0; + endmethod + + method PcieCfgRevId revIdPf3; + return 0; + endmethod + + method PcieCfgSubsysId subsysIdPf0; + return 0; + endmethod + + method PcieCfgSubsysId subsysIdPf1; + return 0; + endmethod + + method PcieCfgSubsysId subsysIdPf2; + return 0; + endmethod + + method PcieCfgSubsysId subsysIdPf3; + return 0; + endmethod + endinterface + + interface RawPcieCfgFC flowControl; + method Action flowControl( + PcieCfgFlowControlHeaderCredit postedHeaderCredit, + PcieCfgFlowControlHeaderCredit nonPostedHeaderCredit, + PcieCfgFlowControlHeaderCredit cmplHeaderCredit, + PcieCfgFlowControlDataCredit postedDataCredit, + PcieCfgFlowControlDataCredit nonPostedDataCredit, + PcieCfgFlowControlDataCredit cmplDataCredit); + endmethod + + method PcieCfgFlowControlSel flowControlSel; + return 0; + endmethod + endinterface + + // Doesn't support sending Meg + interface RawPcieCfgMsgTx msgTx; + method Bool msegTransmit; + return False; + endmethod + + method PcieCfgMsgTransType msegTransmitType; + return 0; + endmethod + + method PcieCfgMsgTransData msegTransmitData; + return 0; + endmethod + + method Action msegTransmitDone(Bool isDone); + endmethod + endinterface + + interface RawPcieCfgMsgRx msgRx; + method Action receiveMsg( + Bool isMsgReceived, + PcieCfgMsgRecvData recvData, + PcieCfgMsgRecvType recvType + ); + endmethod + endinterface + + interface RawPcieCfgStatus status; + method Action getStatus ( + PcieCfgPhyLinkDown phyLinkDown, + PcieCfgPhyLinkStatus phyLinkStatus, + PcieCfgNegotiatedWidth negotiatedWidth, + PCieCfgCurrentSpeed currentSpeed, + PcieCfgMaxPayloadSize maxPayloadSize, + PCieCfgMaxReadReqSize maxReadReqSize, + PcieCfgFunctionStatus functionStatus, + PcieCfgVirtualFuncStatus virtualFuncStatus, + PcieCfgFuncPowerState functionPowerState, + PcieCfgVFPowerState virtualFuncPowerState, + PcieCfgLinkPowerState linkPowerState, + PcieCfgLocalError localError, + Bool localErrorValid, + PcieCfgRxPmState rxPmState, + PcieCfgTxPmState txPmState, + PcieCfgLtssmState ltssmState, + PcieCfgRcbStatus rcbStatus, + PcieCfgDpaSubstageChange dpaSubstageChange, + PcieCfgObffEn obffEnable); + endmethod + endinterface + + interface RawPcieCfgTransmitFC txFlowControl; + method Action getTransCredit( + PcieCfgTfcNphAv nphAvailable, + PcieCfgTfcNpdAv npdAvailable); + endmethod + endinterface + + endinterface + + method PcieCfgLtssmState getPcieLtssmState(); + return 0; + endmethod + +endmodule diff --git a/src/PcieTypes.bsv b/src/PcieTypes.bsv index a11d8df..65726dc 100755 --- a/src/PcieTypes.bsv +++ b/src/PcieTypes.bsv @@ -143,7 +143,7 @@ typedef Bit#(PCIE_CR_NP_REQ_COUNT_WIDTH) PcieNonPostedRequstCount; // Interface to PCIe IP Completer Interface (*always_ready, always_enabled*) interface RawPcieCompleterRequest; - (* prefix = "s_axis_cq_" *) interface RawPcieAxiStreamSlave#(PCIE_COMPLETER_REQUEST_TUSER_WIDTH) rawAxiStreamSlave; + (* prefix = "s_axis_cq" *) interface RawPcieAxiStreamSlave#(PCIE_COMPLETER_REQUEST_TUSER_WIDTH) rawAxiStreamSlave; (* result = "pcie_cq_np_req" *) method PcieNonPostedRequst nonPostedReqCreditIncrement; (* prefix = "" *) method Action nonPostedReqCreditCnt( (* port = "pcie_cq_np_req_count" *) PcieNonPostedRequstCount nonPostedpReqCount ); @@ -151,7 +151,7 @@ endinterface (*always_ready, always_enabled*) interface RawPcieCompleterComplete; - (* prefix = "m_axis_cc_" *) interface RawPcieAxiStreamMaster#(PCIE_COMPLETER_COMPLETE_TUSER_WIDTH) rawAxiStreamMaster; + (* prefix = "m_axis_cc" *) interface RawPcieAxiStreamMaster#(PCIE_COMPLETER_COMPLETE_TUSER_WIDTH) rawAxiStreamMaster; endinterface typedef 8 PCIE_RQ_TAG_WIDTH; @@ -161,8 +161,8 @@ typedef PcieTlpCtlSeqNum PcieRqSeqNum; // Interface to PCIe IP Requester Interface (*always_ready, always_enabled*) interface RawPcieRequesterRequest; - (* prefix = "m_axis_rq_" *) interface RawPcieAxiStreamMaster#(PCIE_REQUESTER_REQUEST_TUSER_WIDTH) rawAxiStreamMaster; - (* prefix = "pcie_rq_" *) method Action pcieProgressTrack( + (* prefix = "m_axis_rq" *) interface RawPcieAxiStreamMaster#(PCIE_REQUESTER_REQUEST_TUSER_WIDTH) rawAxiStreamMaster; + (* prefix = "pcie_rq" *) method Action pcieProgressTrack( (* port = "tag_vld0" *) Bool tagValid0, (* port = "tag_vld1" *) Bool tagValid1, (* port = "tag0" *) PcieRqTag tag0, @@ -176,7 +176,7 @@ endinterface (*always_ready, always_enabled*) interface RawPcieRequesterComplete; - (* prefix = "s_axis_rc_" *) interface RawPcieAxiStreamSlave#(PCIE_REQUESTER_COMPLETE_TUSER_WIDTH) rawAxiStreamSlave; + (* prefix = "s_axis_rc" *) interface RawPcieAxiStreamSlave#(PCIE_REQUESTER_COMPLETE_TUSER_WIDTH) rawAxiStreamSlave; endinterface // Pcie Configuration Interfaces @@ -199,10 +199,9 @@ interface RawPcieCfgMgmt; (* result = "read" *) method Bool read; (* result = "write_data" *) method PCieCfgMgmtData writeData; (* result = "write" *) method Bool write; - (* prefix = "" *) method Action readData( - (* port = "read_data" *) PCieCfgMgmtData cfgMgmtRdData); - (* prefix = "" *) method Action rdWrDone( - (* port = "read_write_done" *) Bool cfgMgmtRdWrDone); + (* prefix = "" *) method Action getResp( + (* port = "read_data" *) PCieCfgMgmtData cfgMgmtRdData, + (* port = "read_write_done" *) Bool cfgMgmtRdWrDone); endinterface (*always_ready, always_enabled*) @@ -211,34 +210,188 @@ interface RawPcieCfgPm; (* result = "aspm_tx_l0s_entry_disable" *) method Bool aspmL0EntryDisable; endinterface +typedef 4 PCIE_CFG_MSI_ENABLE_WIDTH; +typedef 32 PCIE_CFG_MSI_INT_WIDTH; +typedef 8 PCIE_CFG_MSI_FUNC_NUM_WIDTH; +typedef 12 PCIE_CFG_MSI_MMENABLE_WIDTH; +typedef 32 PCIE_CFG_MSI_PENDING_STATUS_WIDTH; +typedef 2 PCIE_CFG_MSI_PENDING_STATUS_FUNC_NUM_WIDTH; +typedef 2 PCIE_CFG_MSI_SELECT_WIDTH; +typedef 32 PCIE_CFG_MSI_DATA; +typedef 3 PCIE_CFG_MSI_ATTR; +typedef 2 PCIE_CFG_MSI_TPH_TYPE_WIDTH; +typedef 8 PCIE_CFG_MSI_TPH_ST_TAG_WIDTH; + +typedef Bit#(PCIE_CFG_MSI_ENABLE_WIDTH) PcieCfgMsiEn; +typedef Bit#(PCIE_CFG_MSI_INT_WIDTH) PcieCfgMsiInt; +typedef Bit#(PCIE_CFG_MSI_FUNC_NUM_WIDTH) PcieCfgMsiFuncNum; +typedef Bit#(PCIE_CFG_MSI_MMENABLE_WIDTH) PcieCfgMsiMmEn; +typedef Bit#(PCIE_CFG_MSI_PENDING_STATUS_WIDTH) PcieCfgMsiPendingStatus; +typedef Bit#(PCIE_CFG_MSI_PENDING_STATUS_FUNC_NUM_WIDTH) PcieCfgMsiPendingStatusFuncNum; +typedef Bit#(PCIE_CFG_MSI_SELECT_WIDTH) PcieCfgMsiSel; +typedef Bit#(PCIE_CFG_MSI_DATA) PcieCfgMsiData; +typedef Bit#(PCIE_CFG_MSI_ATTR) PcieCfgMsiAttr; +typedef Bit#(PCIE_CFG_MSI_TPH_TYPE_WIDTH) PcieCfgMsiTphType; +typedef Bit#(PCIE_CFG_MSI_TPH_ST_TAG_WIDTH) PcieCfgMsiTphStTag; + (*always_ready, always_enabled*) interface RawPcieCfgMsi; - + (* result = "int" *) method PcieCfgMsiInt msiInt; + (* result = "function_number" *) method PcieCfgMsiFuncNum funcNum; + (* result = "pending_status" *) method PcieCfgMsiPendingStatus pendingStatus; + (* result = "pending_status_function_num" *) method PcieCfgMsiPendingStatusFuncNum pendingStatusFuncNum; + (* result = "pending_status_data_enable" *) method Bool pendingStatusDataEn; + (* result = "select" *) method PcieCfgMsiSel sel; + (* result = "attr" *) method PcieCfgMsiAttr attr; + (* result = "tph_present" *) method Bool tphPresent; + (* result = "tph_type" *) method PcieCfgMsiTphType tphType; + (* result = "tph_st_tag" *) method PcieCfgMsiTphStTag tphStTag; + (* prefix = "" *) method Action getMsiSignals( + (* port = "enable" *) Bool msiEn, + (* port = "sent" *) Bool msiSent, + (* port = "fail" *) Bool msiFail, + (* port = "mmenable" *) PcieCfgMsiMmEn msiMmEn, + (* port = "mask_update" *) Bool maskUpdate, + (* port = "data" *) PcieCfgMsiData data + ); endinterface +typedef 4 PCIE_CFG_INTR_INT_WIDTH; +typedef 4 PCIE_CFG_INTR_PENDING_WIDTH; +typedef Bit#(PCIE_CFG_INTR_INT_WIDTH) PcieCfgIntrInt; +typedef Bit#(PCIE_CFG_INTR_PENDING_WIDTH) PcieCfgIntrPending; + (*always_ready, always_enabled*) interface RawPcieCfgInterrupt; - + (* result = "int" *) method PcieCfgIntrInt intrInt; + (* result = "pending" *) method PcieCfgIntrPending intrPending; + (* prefix = "" *) method Action isIntrSent( + (* port = "sent" *) Bool isSent); endinterface +typedef 64 PCIE_CFG_DSN_WIDTH; +typedef Bit#(PCIE_CFG_DSN_WIDTH) PcieCfgDsn; + +typedef 8 PCIE_CFG_DS_BUS_NUM_WIDTH; +typedef 5 PCIE_CFG_DS_DEVICE_NUM_WIDTH; +typedef 3 PCIE_CFG_DS_FUNC_NUM_WIDTH; +typedef 8 PCIE_CFG_DS_PORT_NUM_WIDTH; +typedef Bit#(PCIE_CFG_DS_BUS_NUM_WIDTH) PcieCfgDsBusNum; +typedef Bit#(PCIE_CFG_DS_DEVICE_NUM_WIDTH) PcieCfgDsDeviceNum; +typedef Bit#(PCIE_CFG_DS_FUNC_NUM_WIDTH) PcieCfgDsFuncNum; +typedef Bit#(PCIE_CFG_DS_PORT_NUM_WIDTH) PcieCfgDsPortNum; + +typedef 4 PCIE_CFG_FLR_DONE_WIDTH; +typedef 8 PCIE_CFG_VF_FLR_FUNCNUM_WIDTH; +typedef 4 PCIE_CFG_FLR_INPROC_WIDTH; +typedef 252 PCIE_CFG_VF_FLR_INPROC_WIDTH; +typedef Bit#(PCIE_CFG_FLR_DONE_WIDTH) PcieCfgFlrDone; +typedef Bit#(PCIE_CFG_VF_FLR_FUNCNUM_WIDTH) PcieCfgVFFlrFuncNum; +typedef Bit#(PCIE_CFG_FLR_DONE_WIDTH) PcieCfgFlrInProc; +typedef Bit#(PCIE_CFG_VF_FLR_INPROC_WIDTH) PcieCfgVFFlrInProc; + +typedef 8 PCIE_CFG_BUS_NUM_WIDTH; +typedef 16 PCIE_CFG_VEND_ID_WIDTH; +typedef 16 PCIE_CFG_DEV_ID_WIDTH; +typedef 8 PCIE_CFG_REV_ID_WIDTH; +typedef 16 PCIE_CFG_SUBSYS_ID_WIDTH; +typedef Bit#(PCIE_CFG_BUS_NUM_WIDTH) PcieCfgBusNum; +typedef Bit#(PCIE_CFG_VEND_ID_WIDTH) PcieCfgVendId; +typedef Bit#(PCIE_CFG_DEV_ID_WIDTH) PcieCfgDevId; +typedef Bit#(PCIE_CFG_REV_ID_WIDTH) PcieCfgRevId; +typedef Bit#(PCIE_CFG_SUBSYS_ID_WIDTH) PcieCfgSubsysId; + (*always_ready, always_enabled*) interface RawPcieCfgControl; - + (* result = "hot_reset_out" *) method Bool hotResetOut; + (* prefix = "" *) method Action hotResetIn( + (* port = "hot_reset_in" *) Bool hotReset); + (* result = "cofig_space_enable" *) method Bool cfgSpaceEn; + (* result = "dsn" *) method PcieCfgDsn deviceSerialNum; + (* result = "ds_bus_number" *) method PcieCfgDsBusNum downStreamBusNum; + (* result = "ds_device_number" *) method PcieCfgDsDeviceNum downStreamDeviceNum; + (* result = "ds_function_number" *) method PcieCfgDsFuncNum downStreamFuncNum; + (* result = "power_state_change_ack" *) method Bool powerStateChangeAck; + (* prefix = "" *) method Action powerStateChangeIntr( + (* port = "power_state_change_interrupt" *) Bool powerStateChangeIntrrupt); + (* result = "ds_port_number" *) method PcieCfgDsPortNum downStreamPortNum; + (* result = "err_cor_in" *) method Bool errorCorrectableOut; + (* prefix = "" *) method Action getError( + (* port = "err_cor_out" *) Bool errorCorrectable, + (* port = "err_fatal_out" *) Bool errorFatal, + (* port = "err_nonfatal_out" *) Bool errorNonFatal); + (* result = "err_uncor_in" *) method Bool errorUncorrectable; + (* result = "flr_done" *) method PcieCfgFlrDone funcLevelRstDone; + (* result = "vf_flr_done" *) method Bool vfFuncLevelRstDone; + (* result = "cf_flr_func_num" *) method PcieCfgVFFlrFuncNum vfFlrFuncNum; + (* prefix = "" *) method Action getInproc( + (* port = "flr_in_process" *) PcieCfgFlrInProc flrInProcess, + (* port = "vf_flr_in_process" *) PcieCfgVFFlrInProc vfFlrInProcess); + (* result = "req_pm_transition_l23_ready" *) method Bool reqPmTransL23Ready; + (* result = "link_training_enable" *) method Bool linkTrainEn; + (* prefix = "" *) method Action busNumber( + (* port = "bus_number" *) PcieCfgBusNum busNum); + (* result = "vend_id" *) method PcieCfgVendId vendId; + (* result = "subsys_vend_id" *) method PcieCfgVendId subsysVendId; + (* result = "dev_id_pf0" *) method PcieCfgDevId devIdPf0; + (* result = "dev_id_pf1" *) method PcieCfgDevId devIdPf1; + (* result = "dev_id_pf2" *) method PcieCfgDevId devIdPf2; + (* result = "dev_id_pf3" *) method PcieCfgDevId devIdPf3; + (* result = "rev_id_pf0" *) method PcieCfgRevId revIdPf0; + (* result = "rev_id_pf1" *) method PcieCfgRevId revIdPf1; + (* result = "rev_id_pf2" *) method PcieCfgRevId revIdPf2; + (* result = "rev_id_pf3" *) method PcieCfgRevId revIdPf3; + (* result = "subsys_id_pf0" *) method PcieCfgSubsysId subsysIdPf0; + (* result = "subsys_id_pf1" *) method PcieCfgSubsysId subsysIdPf1; + (* result = "subsys_id_pf2" *) method PcieCfgSubsysId subsysIdPf2; + (* result = "subsys_id_pf3" *) method PcieCfgSubsysId subsysIdPf3; endinterface +typedef 8 PCIE_CFG_FC_HEADER_WIDTH; +typedef 12 PCIE_CFG_FC_DATA_WIDTH; +typedef 3 PCIE_CFG_FC_SEL_WIDTH; +typedef Bit#(PCIE_CFG_FC_HEADER_WIDTH) PcieCfgFlowControlHeaderCredit; +typedef Bit#(PCIE_CFG_FC_DATA_WIDTH) PcieCfgFlowControlDataCredit; +typedef Bit#(PCIE_CFG_FC_SEL_WIDTH) PcieCfgFlowControlSel; + (*always_ready, always_enabled*) interface RawPcieCfgFC; - + (* prefix = "" *) method Action flowControl( + (* port = "ph" *) PcieCfgFlowControlHeaderCredit postedHeaderCredit, + (* port = "nph" *) PcieCfgFlowControlHeaderCredit nonPostedHeaderCredit, + (* port = "cplh" *) PcieCfgFlowControlHeaderCredit cmplHeaderCredit, + (* port = "pd" *) PcieCfgFlowControlDataCredit postedDataCredit, + (* port = "npd" *) PcieCfgFlowControlDataCredit nonPostedDataCredit, + (* port = "cpld" *) PcieCfgFlowControlDataCredit cmplDataCredit + ); + (* result = "fc_sel" *) method PcieCfgFlowControlSel flowControlSel; endinterface +typedef 3 PCIE_CFG_MSG_TXTYPE_WIDTH; +typedef 32 PCIE_CFG_MSG_TXDATA_WIDTH; +typedef Bit#(PCIE_CFG_MSG_TXTYPE_WIDTH) PcieCfgMsgTransType; +typedef Bit#(PCIE_CFG_MSG_TXDATA_WIDTH) PcieCfgMsgTransData; (*always_ready, always_enabled*) interface RawPcieCfgMsgTx; - + (* result = "transmit" *) method Bool msegTransmit; + (* result = "transmit_type" *) method PcieCfgMsgTransType msegTransmitType; + (* result = "transmit_data" *) method PcieCfgMsgTransData msegTransmitData; + (* prefix = "" *) method Action msegTransmitDone( + (* port = "transmit_done" *) Bool isDone); endinterface +typedef 8 PCIE_CFG_MSG_RXDATA_WIDTH; +typedef 5 PCIE_CFG_MSG_RXTYPE_WIDTH; +typedef Bit#(PCIE_CFG_MSG_RXTYPE_WIDTH) PcieCfgMsgRecvType; +typedef Bit#(PCIE_CFG_MSG_RXDATA_WIDTH) PcieCfgMsgRecvData; + (*always_ready, always_enabled*) interface RawPcieCfgMsgRx; - + method Action receiveMsg( + (* port = "received" *) Bool isMsgReceived, + (* port = "received_data" *) PcieCfgMsgRecvData recvData, + (* port = "received_type" *) PcieCfgMsgRecvType recvType + ); endinterface typedef 1 PCIE_CFG_PHY_LINK_DOWN_WIDTH; @@ -287,7 +440,7 @@ typedef Bit#(PCIE_CFG_OBFF_ENABLE_WIDTH) PcieCfgObffEn; (*always_ready, always_enabled*) interface RawPcieCfgStatus; - method Action getStatus ( + (* prefix = "" *) method Action getStatus ( (* port = "phy_link_down" *) PcieCfgPhyLinkDown phyLinkDown, (* port = "phy_link_status" *) PcieCfgPhyLinkStatus phyLinkStatus, (* port = "negotiated_width" *) PcieCfgNegotiatedWidth negotiatedWidth, @@ -310,21 +463,37 @@ interface RawPcieCfgStatus; ); endinterface +typedef 4 PCIE_CFG_TFC_NPH_WIDTH; +typedef 4 PCIE_CFG_TFC_NPD_WIDTH; +typedef Bit#(PCIE_CFG_TFC_NPH_WIDTH) PcieCfgTfcNphAv; +typedef Bit#(PCIE_CFG_TFC_NPD_WIDTH) PcieCfgTfcNpdAv; + (*always_ready, always_enabled*) interface RawPcieCfgTransmitFC; - + (* prefix = "" *) method Action getTransCredit( + (* port = "nph_av" *) PcieCfgTfcNphAv nphAvailable, + (* port = "npd_av" *) PcieCfgTfcNpdAv npdAvailable + ); endinterface interface RawPcieConfiguration; - (* prefix = "cfg_mgmt_" *) interface RawPcieCfgMgmt mgmt; - (* prefix = "cfg_pm_" *) interface RawPcieCfgPm pm; - (* prefix = "cfg_msi_" *) interface RawPcieCfgMsi msi; - (* prefix = "cfg_interrupt_" *) interface RawPcieCfgInterrupt interrupt; - (* prefix = "cfg_" *) interface RawPcieCfgControl control; - (* prefix = "cfg_fc_" *) interface RawPcieCfgFC flowControl; - (* prefix = "cfg_msg_transmit_" *) interface RawPcieCfgMsgTx msgTx; - (* prefix = "cfg_msg_received_" *) interface RawPcieCfgMsgRx msgRx; + (* prefix = "cfg_mgmt" *) interface RawPcieCfgMgmt mgmt; + (* prefix = "cfg_pm" *) interface RawPcieCfgPm pm; + (* prefix = "cfg_msi" *) interface RawPcieCfgMsi msi; + (* prefix = "cfg_interrupt" *) interface RawPcieCfgInterrupt interrupt; + (* prefix = "cfg" *) interface RawPcieCfgControl control; + (* prefix = "cfg_fc" *) interface RawPcieCfgFC flowControl; + (* prefix = "cfg_msg" *) interface RawPcieCfgMsgTx msgTx; + (* prefix = "cfg_msg" *) interface RawPcieCfgMsgRx msgRx; (* prefix = "" *) interface RawPcieCfgStatus status; - (* prefix = "pcie_tfc_" *) interface RawPcieCfgTransmitFC txFlowControl; + (* prefix = "pcie_tfc" *) interface RawPcieCfgTransmitFC txFlowControl; endinterface +interface RawXilinxPcieIp; + // Raw PCIe interfaces, connected to the Xilinx PCIe IP + (* prefix = "" *) interface RawPcieRequesterRequest requesterRequest; + (* prefix = "" *) interface RawPcieRequesterComplete requesterComplete; + (* prefix = "" *) interface RawPcieCompleterRequest completerRequest; + (* prefix = "" *) interface RawPcieCompleterComplete completerComplete; + (* prefix = "" *) interface RawPcieConfiguration configuration; +endinterface diff --git a/test/TestDmaCompleter.bsv b/test/TestDmaCompleter.bsv index c2b36a2..626c5c5 100644 --- a/test/TestDmaCompleter.bsv +++ b/test/TestDmaCompleter.bsv @@ -94,4 +94,53 @@ module mkTestDmaCompleterRequest(Empty); $finish(); endrule -endmodule \ No newline at end of file +endmodule + +(* doc = "testcase" *) +module mkTestDmaCompleter(Empty); + DmaCompleter dut <- mkDmaCompleter; + Reg#(Bool) isInitReg <- mkReg(False); + + rule alwaysEnables; + dut.rawCompleterComplete.rawAxiStreamMaster.tReady(True); + dut.rawCompleterRequest.nonPostedReqCreditCnt(32); + endrule + + rule testInit; + let testAxiStram = genPseudoHostWriteRequest; + if (!isInitReg) begin + $display("INFO: Start Completer test"); + dut.rawCompleterRequest.rawAxiStreamSlave.tValid( + True, + testAxiStram.tData, + testAxiStram.tKeep, + testAxiStram.tLast, + testAxiStram.tUser + ); + isInitReg <= True; + end + else begin + dut.rawCompleterRequest.rawAxiStreamSlave.tValid( + False, + 0, + 0, + False, + 0 + ); + end + endrule + + rule testOutput if (isInitReg); + let wrData = dut.h2cWrite.dataFifoOut.first; + dut.h2cWrite.dataFifoOut.deq; + let wrAddr = dut.h2cWrite.reqFifoOut.first; + dut.h2cWrite.reqFifoOut.deq; + immAssert( + (wrAddr == fromInteger(valueOf(TEST_ADDR)) && wrData == fromInteger(valueOf(TEST_DATA))), + "wrReq test @ mkTestDmaCompleter", + $format("write value: %h, write addr: %h", wrData, wrAddr) + ); + $display("INFO: Pass Completer test"); + $finish(); + endrule +endmodule diff --git a/test/TestDmacVivado.bsv b/test/TestDmacVivado.bsv new file mode 100644 index 0000000..49d007c --- /dev/null +++ b/test/TestDmacVivado.bsv @@ -0,0 +1,63 @@ +import FIFOF::*; +import BRAM::*; +import GetPut::*; + +import SemiFifo::*; +import PcieTypes::*; +import DmaTypes::*; +import DmaController::*; + +typedef 16384 TEST_BRAM_SIZE; + +interface TestDmacWrRdLoop; + (* prefix = "" *) interface RawXilinxPcieIp rawPcie; +endinterface + +(* synthesize *) +module mkTestDmacCsrWrRdLoop(TestDmacWrRdLoop); + + DmaController dmac <- mkDmaController; + + BRAM2Port#(DmaCsrAddr, DmaCsrValue) ram <- mkBRAM2Server( + BRAM_Configure { + memorySize : valueOf(TEST_BRAM_SIZE), + loadFormat : None, + latency : 2, + outFIFODepth: 3, + allowWriteResponseBypass : False + } + ); + + rule testWriteReq; + dmac.h2cWrite.dataFifoOut.deq; + dmac.h2cWrite.reqFifoOut.deq; + ram.portA.request.put( + BRAMRequest { + write : True, + responseOnWrite : False, + address : dmac.h2cWrite.reqFifoOut.first, + datain : dmac.h2cWrite.dataFifoOut.first + } + ); + endrule + + rule testReadReq; + dmac.h2cRead.reqFifoOut.deq; + ram.portB.request.put( + BRAMRequest { + write : False, + responseOnWrite : False, + address : dmac.h2cRead.reqFifoOut.first, + datain : 0 + } + ); + endrule + + rule testReadResp; + let value <- ram.portB.response.get; + dmac.h2cRead.dataFifoIn.enq(value); + endrule + + interface rawPcie = dmac.rawPcie; +endmodule + From 438dda2966798151143bf4dba3c156a12151b7fd Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Tue, 23 Jul 2024 21:08:06 +0800 Subject: [PATCH 27/53] Update rawPcie interfaces --- src/DmaCompleter.bsv | 49 +++++++++++++++++++++++++++++++------ src/DmaController.bsv | 2 ++ src/PcieDescriptorTypes.bsv | 1 + src/PcieTypes.bsv | 44 ++++++++++++++++++--------------- test/TestDmacVivado.bsv | 4 +-- 5 files changed, 71 insertions(+), 29 deletions(-) diff --git a/src/DmaCompleter.bsv b/src/DmaCompleter.bsv index aa2afdf..93757b2 100644 --- a/src/DmaCompleter.bsv +++ b/src/DmaCompleter.bsv @@ -32,8 +32,8 @@ endinstance typedef DmaCsrValue CsrReadResp; typedef struct { - DmaCsrAddr rdAddr; - PcieCompleterRequestDescriptor npInfo; + DmaCsrAddr addr; + PcieCompleterRequestDescriptor cqDescriptor; } CsrReadReq deriving(Bits, Eq, Bounded, FShow); interface DmaCompleter; @@ -90,7 +90,7 @@ module mkCompleterRequest(CompleterRequest); return truncate(addr); endfunction - rule parse; + rule parseTlp; inFifo.deq; let axiStream = inFifo.first; PcieCompleterRequestSideBandFrame sideBand = unpack(axiStream.tUser); @@ -117,8 +117,8 @@ module mkCompleterRequest(CompleterRequest); fromInteger(valueOf(MEM_READ_REQ)): begin let rdReqAddr = getCsrAddrFromCqDescriptor(descriptor); let rdReq = CsrReadReq{ - rdAddr: rdReqAddr, - npInfo: descriptor + addr: rdReqAddr, + cqDescriptor: descriptor }; rdReqFifo.enq(rdReq); end @@ -137,7 +137,42 @@ module mkCompleterComplete(CompleterComplete); FIFOF#(CsrReadResp) rdRespFifo <- mkFIFOF; FIFOF#(CsrReadReq) rdReqFifo <- mkFIFOF; - // TODO: the logic of cc + // TODO: the logic of cc, not completed + rule genTlp; + let value = rdRespFifo.first; + rdRespFifo.deq; + let cqDescriptor = rdReqFifo.first.cqDescriptor; + let addr = rdReqFifo.first.addr; + rdReqFifo.deq; + let ccDescriptor = PcieCompleterCompleteDescriptor { + reserve0 : 0, + attributes : cqDescriptor.attributes, + trafficClass : cqDescriptor.trafficClass, + completerIdEn : False, + completerId : 0, + tag : cqDescriptor.tag, + requesterId : cqDescriptor.requesterId, + reserve1 : 0, + isPoisoned : False, + status : 0, + dwordCnt : 0, + reserve2 : 0, + isLockedReadCmpl: False, + byteCnt : 0, + reserve3 : 0, + addrType : 0, + lowerAddr : 0 + }; + Data data = zeroExtend(pack(ccDescriptor)); + data = data | (zeroExtend(value) << valueOf(DES_CC_DESCRIPTOR_WIDTH)); + let axiStream = CmplCmplAxiStream { + tData : data, + tKeep : 0, + tLast : True, + tUser : 0 + }; + outFifo.enq(axiStream); + endrule interface axiStreamFifoOut = convertFifoToFifoOut(outFifo); interface csrReadRespFifoIn = convertFifoToFifoIn(rdRespFifo); @@ -171,7 +206,7 @@ module mkDmaCompleter(DmaCompleter); rule genCsrReadReq; let rdReq = cmplRequest.csrReadReqFifoOut.first; cmplRequest.csrReadReqFifoOut.deq; - h2cCsrReadReqFifo.enq(rdReq.rdAddr); + h2cCsrReadReqFifo.enq(rdReq.addr); csrRdReqStoreFifo.enq(rdReq); endrule diff --git a/src/DmaController.bsv b/src/DmaController.bsv index e38bfb1..c169ab8 100755 --- a/src/DmaController.bsv +++ b/src/DmaController.bsv @@ -37,5 +37,7 @@ module mkDmaController(DmaController); interface completerRequest = completer.rawCompleterRequest; interface completerComplete = completer.rawCompleterComplete; interface configuration = pcieConfigurator.rawConfiguration; + method Action linkUp(Bool isLinkUp); + endmethod endinterface endmodule diff --git a/src/PcieDescriptorTypes.bsv b/src/PcieDescriptorTypes.bsv index bcd9b22..7a92912 100644 --- a/src/PcieDescriptorTypes.bsv +++ b/src/PcieDescriptorTypes.bsv @@ -53,6 +53,7 @@ typedef struct { AddrType addrType; } PcieCompleterRequestDescriptor deriving(Bits, Eq, Bounded, FShow); +typedef 96 DES_CC_DESCRIPTOR_WIDTH; typedef 3 DES_CMPL_STATUS_WIDTH; typedef 13 DES_CMPL_BYTE_CNT_WIDTH; typedef 7 DES_LOWER_ADDR_WIDTH; diff --git a/src/PcieTypes.bsv b/src/PcieTypes.bsv index 65726dc..d88d205 100755 --- a/src/PcieTypes.bsv +++ b/src/PcieTypes.bsv @@ -141,7 +141,7 @@ typedef Bit#(PCIE_CR_NP_REQ_WIDTH) PcieNonPostedRequst; typedef Bit#(PCIE_CR_NP_REQ_COUNT_WIDTH) PcieNonPostedRequstCount; // Interface to PCIe IP Completer Interface -(*always_ready, always_enabled*) +(* always_ready, always_enabled *) interface RawPcieCompleterRequest; (* prefix = "s_axis_cq" *) interface RawPcieAxiStreamSlave#(PCIE_COMPLETER_REQUEST_TUSER_WIDTH) rawAxiStreamSlave; (* result = "pcie_cq_np_req" *) method PcieNonPostedRequst nonPostedReqCreditIncrement; @@ -149,7 +149,7 @@ interface RawPcieCompleterRequest; (* port = "pcie_cq_np_req_count" *) PcieNonPostedRequstCount nonPostedpReqCount ); endinterface -(*always_ready, always_enabled*) +(* always_ready, always_enabled *) interface RawPcieCompleterComplete; (* prefix = "m_axis_cc" *) interface RawPcieAxiStreamMaster#(PCIE_COMPLETER_COMPLETE_TUSER_WIDTH) rawAxiStreamMaster; endinterface @@ -159,7 +159,7 @@ typedef Bit#(PCIE_RQ_TAG_WIDTH) PcieRqTag; typedef PcieTlpCtlSeqNum PcieRqSeqNum; // Interface to PCIe IP Requester Interface -(*always_ready, always_enabled*) +(* always_ready, always_enabled *) interface RawPcieRequesterRequest; (* prefix = "m_axis_rq" *) interface RawPcieAxiStreamMaster#(PCIE_REQUESTER_REQUEST_TUSER_WIDTH) rawAxiStreamMaster; (* prefix = "pcie_rq" *) method Action pcieProgressTrack( @@ -174,7 +174,7 @@ interface RawPcieRequesterRequest; ); endinterface -(*always_ready, always_enabled*) +(* always_ready, always_enabled *) interface RawPcieRequesterComplete; (* prefix = "s_axis_rc" *) interface RawPcieAxiStreamSlave#(PCIE_REQUESTER_COMPLETE_TUSER_WIDTH) rawAxiStreamSlave; endinterface @@ -190,7 +190,7 @@ typedef Bit#(PCIE_CFG_MGMT_BE_WIDTH) PcieCfgMgmtByteEn; typedef Bit#(PCIE_CFG_MGMT_FUNC_NUM_WIDTH) PcieCfgMgmtFuncNum; typedef Bit#(PCIE_CFG_MGMT_DATA_WIDTH) PCieCfgMgmtData; -(*always_ready, always_enabled*) +(* always_ready, always_enabled *) interface RawPcieCfgMgmt; (* result = "addr" *) method PcieCfgMgmtAddr addr; (* result = "byte_enable" *) method PcieCfgMgmtByteEn byteEn; @@ -204,7 +204,7 @@ interface RawPcieCfgMgmt; (* port = "read_write_done" *) Bool cfgMgmtRdWrDone); endinterface -(*always_ready, always_enabled*) +(* always_ready, always_enabled *) interface RawPcieCfgPm; (* result = "aspm_l1_entry_reject" *) method Bool aspmL1EntryReject; (* result = "aspm_tx_l0s_entry_disable" *) method Bool aspmL0EntryDisable; @@ -234,7 +234,7 @@ typedef Bit#(PCIE_CFG_MSI_ATTR) PcieCfgMsiAttr; typedef Bit#(PCIE_CFG_MSI_TPH_TYPE_WIDTH) PcieCfgMsiTphType; typedef Bit#(PCIE_CFG_MSI_TPH_ST_TAG_WIDTH) PcieCfgMsiTphStTag; -(*always_ready, always_enabled*) +(* always_ready, always_enabled *) interface RawPcieCfgMsi; (* result = "int" *) method PcieCfgMsiInt msiInt; (* result = "function_number" *) method PcieCfgMsiFuncNum funcNum; @@ -261,7 +261,7 @@ typedef 4 PCIE_CFG_INTR_PENDING_WIDTH; typedef Bit#(PCIE_CFG_INTR_INT_WIDTH) PcieCfgIntrInt; typedef Bit#(PCIE_CFG_INTR_PENDING_WIDTH) PcieCfgIntrPending; -(*always_ready, always_enabled*) +(* always_ready, always_enabled *) interface RawPcieCfgInterrupt; (* result = "int" *) method PcieCfgIntrInt intrInt; (* result = "pending" *) method PcieCfgIntrPending intrPending; @@ -301,12 +301,12 @@ typedef Bit#(PCIE_CFG_DEV_ID_WIDTH) PcieCfgDevId; typedef Bit#(PCIE_CFG_REV_ID_WIDTH) PcieCfgRevId; typedef Bit#(PCIE_CFG_SUBSYS_ID_WIDTH) PcieCfgSubsysId; -(*always_ready, always_enabled*) +(* always_ready, always_enabled *) interface RawPcieCfgControl; (* result = "hot_reset_out" *) method Bool hotResetOut; (* prefix = "" *) method Action hotResetIn( (* port = "hot_reset_in" *) Bool hotReset); - (* result = "cofig_space_enable" *) method Bool cfgSpaceEn; + (* result = "config_space_enable" *) method Bool cfgSpaceEn; (* result = "dsn" *) method PcieCfgDsn deviceSerialNum; (* result = "ds_bus_number" *) method PcieCfgDsBusNum downStreamBusNum; (* result = "ds_device_number" *) method PcieCfgDsDeviceNum downStreamDeviceNum; @@ -323,7 +323,7 @@ interface RawPcieCfgControl; (* result = "err_uncor_in" *) method Bool errorUncorrectable; (* result = "flr_done" *) method PcieCfgFlrDone funcLevelRstDone; (* result = "vf_flr_done" *) method Bool vfFuncLevelRstDone; - (* result = "cf_flr_func_num" *) method PcieCfgVFFlrFuncNum vfFlrFuncNum; + (* result = "vf_flr_func_num" *) method PcieCfgVFFlrFuncNum vfFlrFuncNum; (* prefix = "" *) method Action getInproc( (* port = "flr_in_process" *) PcieCfgFlrInProc flrInProcess, (* port = "vf_flr_in_process" *) PcieCfgVFFlrInProc vfFlrInProcess); @@ -354,7 +354,7 @@ typedef Bit#(PCIE_CFG_FC_HEADER_WIDTH) PcieCfgFlowControlHeaderCredit; typedef Bit#(PCIE_CFG_FC_DATA_WIDTH) PcieCfgFlowControlDataCredit; typedef Bit#(PCIE_CFG_FC_SEL_WIDTH) PcieCfgFlowControlSel; -(*always_ready, always_enabled*) +(* always_ready, always_enabled *) interface RawPcieCfgFC; (* prefix = "" *) method Action flowControl( (* port = "ph" *) PcieCfgFlowControlHeaderCredit postedHeaderCredit, @@ -364,14 +364,14 @@ interface RawPcieCfgFC; (* port = "npd" *) PcieCfgFlowControlDataCredit nonPostedDataCredit, (* port = "cpld" *) PcieCfgFlowControlDataCredit cmplDataCredit ); - (* result = "fc_sel" *) method PcieCfgFlowControlSel flowControlSel; + (* result = "sel" *) method PcieCfgFlowControlSel flowControlSel; endinterface typedef 3 PCIE_CFG_MSG_TXTYPE_WIDTH; typedef 32 PCIE_CFG_MSG_TXDATA_WIDTH; typedef Bit#(PCIE_CFG_MSG_TXTYPE_WIDTH) PcieCfgMsgTransType; typedef Bit#(PCIE_CFG_MSG_TXDATA_WIDTH) PcieCfgMsgTransData; -(*always_ready, always_enabled*) +(* always_ready, always_enabled *) interface RawPcieCfgMsgTx; (* result = "transmit" *) method Bool msegTransmit; (* result = "transmit_type" *) method PcieCfgMsgTransType msegTransmitType; @@ -385,9 +385,9 @@ typedef 5 PCIE_CFG_MSG_RXTYPE_WIDTH; typedef Bit#(PCIE_CFG_MSG_RXTYPE_WIDTH) PcieCfgMsgRecvType; typedef Bit#(PCIE_CFG_MSG_RXDATA_WIDTH) PcieCfgMsgRecvData; -(*always_ready, always_enabled*) +(* always_ready, always_enabled *) interface RawPcieCfgMsgRx; - method Action receiveMsg( + (* prefix = "" *) method Action receiveMsg( (* port = "received" *) Bool isMsgReceived, (* port = "received_data" *) PcieCfgMsgRecvData recvData, (* port = "received_type" *) PcieCfgMsgRecvType recvType @@ -438,7 +438,7 @@ typedef Bit#(PCIE_CFG_DPA_SUBSTAGE_CHANGE_WIDTH) PcieCfgDpaSubstageChange; typedef Bit#(PCIE_CFG_OBFF_ENABLE_WIDTH) PcieCfgObffEn; -(*always_ready, always_enabled*) +(* always_ready, always_enabled *) interface RawPcieCfgStatus; (* prefix = "" *) method Action getStatus ( (* port = "phy_link_down" *) PcieCfgPhyLinkDown phyLinkDown, @@ -468,7 +468,7 @@ typedef 4 PCIE_CFG_TFC_NPD_WIDTH; typedef Bit#(PCIE_CFG_TFC_NPH_WIDTH) PcieCfgTfcNphAv; typedef Bit#(PCIE_CFG_TFC_NPD_WIDTH) PcieCfgTfcNpdAv; -(*always_ready, always_enabled*) +(* always_ready, always_enabled *) interface RawPcieCfgTransmitFC; (* prefix = "" *) method Action getTransCredit( (* port = "nph_av" *) PcieCfgTfcNphAv nphAvailable, @@ -476,19 +476,21 @@ interface RawPcieCfgTransmitFC; ); endinterface +(* always_ready, always_enabled *) interface RawPcieConfiguration; (* prefix = "cfg_mgmt" *) interface RawPcieCfgMgmt mgmt; (* prefix = "cfg_pm" *) interface RawPcieCfgPm pm; - (* prefix = "cfg_msi" *) interface RawPcieCfgMsi msi; + (* prefix = "cfg_interrupt_msi" *) interface RawPcieCfgMsi msi; (* prefix = "cfg_interrupt" *) interface RawPcieCfgInterrupt interrupt; (* prefix = "cfg" *) interface RawPcieCfgControl control; (* prefix = "cfg_fc" *) interface RawPcieCfgFC flowControl; (* prefix = "cfg_msg" *) interface RawPcieCfgMsgTx msgTx; (* prefix = "cfg_msg" *) interface RawPcieCfgMsgRx msgRx; - (* prefix = "" *) interface RawPcieCfgStatus status; + (* prefix = "cfg" *) interface RawPcieCfgStatus status; (* prefix = "pcie_tfc" *) interface RawPcieCfgTransmitFC txFlowControl; endinterface +(* always_ready, always_enabled *) interface RawXilinxPcieIp; // Raw PCIe interfaces, connected to the Xilinx PCIe IP (* prefix = "" *) interface RawPcieRequesterRequest requesterRequest; @@ -496,4 +498,6 @@ interface RawXilinxPcieIp; (* prefix = "" *) interface RawPcieCompleterRequest completerRequest; (* prefix = "" *) interface RawPcieCompleterComplete completerComplete; (* prefix = "" *) interface RawPcieConfiguration configuration; + (* prefix = "" *) method Action linkUp( + (* port = "user_lnk_up" *) Bool isLinkUp); endinterface diff --git a/test/TestDmacVivado.bsv b/test/TestDmacVivado.bsv index 49d007c..d5b290d 100644 --- a/test/TestDmacVivado.bsv +++ b/test/TestDmacVivado.bsv @@ -13,8 +13,8 @@ interface TestDmacWrRdLoop; (* prefix = "" *) interface RawXilinxPcieIp rawPcie; endinterface -(* synthesize *) -module mkTestDmacCsrWrRdLoop(TestDmacWrRdLoop); +(* synthesize, clock_prefix = "user_clk", reset_prefix = "user_reset" *) +module mkTestDmacCsrWrRdLoop((* reset="sys_rst" *) Reset sysRst, TestDmacWrRdLoop ifc); DmaController dmac <- mkDmaController; From ec3c3f4ee48e6b359a81ce5144c20c283580c12b Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Thu, 25 Jul 2024 12:24:20 +0800 Subject: [PATCH 28/53] Finish CsrWrRd --- src/DmaCompleter.bsv | 40 +++++++++---- src/DmaController.bsv | 22 +++++++ src/DmaTypes.bsv | 7 ++- src/PcieConfigurator.bsv | 2 +- src/PcieDescriptorTypes.bsv | 4 ++ src/PcieTypes.bsv | 6 ++ src/PrimUtils.bsv | 16 +++++ test/TestDmaCompleter.bsv | 114 +++++++++++++++++++++++++----------- test/TestDmacVivado.bsv | 57 ++++++++++++++++++ 9 files changed, 220 insertions(+), 48 deletions(-) diff --git a/src/DmaCompleter.bsv b/src/DmaCompleter.bsv index 93757b2..1df2da9 100644 --- a/src/DmaCompleter.bsv +++ b/src/DmaCompleter.bsv @@ -1,4 +1,5 @@ import FIFOF::*; +import Vector::*; import SemiFifo::*; import PrimUtils::*; @@ -8,6 +9,7 @@ import PcieDescriptorTypes::*; import DmaTypes::*; typedef 1 IDEA_DWORD_CNT_OF_CSR; +typedef 4 IDEA_BYTE_CNT_OF_CSR; typedef 4 IDEA_FIRST_BE_HIGH_VALID_PTR_OF_CSR; typedef 64 CMPL_NPREQ_INFLIGHT_NUM; @@ -15,6 +17,9 @@ typedef 20 CMPL_NPREQ_WAITING_CLKS; typedef 2'b11 NP_CREDIT_INCREMENT; typedef 2'b00 NP_CREDIT_NOCHANGE; +typedef 'h1F IDEA_CQ_TKEEP_OF_CSR; +typedef 'hF IDEA_CC_TKEEP_OF_CSR; + typedef PcieAxiStream#(PCIE_COMPLETER_REQUEST_TUSER_WIDTH) CmplReqAxiStream; typedef PcieAxiStream#(PCIE_COMPLETER_COMPLETE_TUSER_WIDTH) CmplCmplAxiStream; @@ -87,7 +92,7 @@ module mkCompleterRequest(CompleterRequest); else begin addr = 0; end - return truncate(addr); + return truncate(addr << valueOf(TSub#(DMA_MEM_ADDR_WIDTH, DES_ADDR_WIDTH))); endfunction rule parseTlp; @@ -97,7 +102,6 @@ module mkCompleterRequest(CompleterRequest); isInPacket <= !axiStream.tLast; if (!isInPacket) begin let descriptor = getDescriptorFromFirstBeat(axiStream); - // TODO: parity check! case (descriptor.reqType) fromInteger(valueOf(MEM_WRITE_REQ)): begin if (descriptor.dwordCnt == fromInteger(valueOf(IDEA_DWORD_CNT_OF_CSR)) && isFirstBytesAllValid(sideBand)) begin @@ -122,7 +126,7 @@ module mkCompleterRequest(CompleterRequest); }; rdReqFifo.enq(rdReq); end - default: begin $display("INFO"); illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; end + default: illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; endcase end endrule @@ -137,7 +141,7 @@ module mkCompleterComplete(CompleterComplete); FIFOF#(CsrReadResp) rdRespFifo <- mkFIFOF; FIFOF#(CsrReadReq) rdReqFifo <- mkFIFOF; - // TODO: the logic of cc, not completed + // Only response MemRd TLP in this rule rule genTlp; let value = rdRespFifo.first; rdRespFifo.deq; @@ -154,22 +158,36 @@ module mkCompleterComplete(CompleterComplete); requesterId : cqDescriptor.requesterId, reserve1 : 0, isPoisoned : False, - status : 0, - dwordCnt : 0, + status : fromInteger(valueOf(DES_CC_STAUS_SUCCESS)), + dwordCnt : fromInteger(valueOf(IDEA_DWORD_CNT_OF_CSR)), reserve2 : 0, isLockedReadCmpl: False, - byteCnt : 0, + byteCnt : fromInteger(valueOf(IDEA_BYTE_CNT_OF_CSR)), reserve3 : 0, - addrType : 0, - lowerAddr : 0 + addrType : cqDescriptor.addrType, + lowerAddr : truncate(addr) }; Data data = zeroExtend(pack(ccDescriptor)); data = data | (zeroExtend(value) << valueOf(DES_CC_DESCRIPTOR_WIDTH)); + let isSop = PcieTlpCtlIsSopCommon { + isSopPtrs : replicate(0), // Straddle mode is disable of completer + isSop : 1 + }; + let isEop = PcieTlpCtlIsEopCommon { + isEopPtrs : replicate(0), // Straddle mode is disable of completer + isEop : 1 + }; + let sideBand = PcieCompleterCompleteSideBandFrame { + parity : 0, // Do not enable parity check in the core + discontinue : False, + isSop : isSop, + isEop : isEop + }; let axiStream = CmplCmplAxiStream { tData : data, - tKeep : 0, + tKeep : fromInteger(valueOf(IDEA_CC_TKEEP_OF_CSR)), tLast : True, - tUser : 0 + tUser : pack(sideBand) }; outFifo.enq(axiStream); endrule diff --git a/src/DmaController.bsv b/src/DmaController.bsv index c169ab8..c1d0999 100755 --- a/src/DmaController.bsv +++ b/src/DmaController.bsv @@ -41,3 +41,25 @@ module mkDmaController(DmaController); endmethod endinterface endmodule + +interface DmaControllerCompleter; + // Completer interfaces, where the Card serve as the Slave + interface DmaHostToCardWrite h2cWrite; + interface DmaHostToCardRead h2cRead; + + // Raw PCIe interfaces, connected to the Xilinx PCIe IP + interface RawXilinxPcieIpCompleter rawPcie; +endinterface + +// Only for testing in bsv, do not use for synthesize +module mkDmaControllerCompleter(DmaControllerCompleter); + DmaCompleter completer <- mkDmaCompleter; + + interface h2cWrite = completer.h2cWrite; + interface h2cRead = completer.h2cRead; + + interface RawXilinxPcieIpCompleter rawPcie; + interface completerRequest = completer.rawCompleterRequest; + interface completerComplete = completer.rawCompleterComplete; + endinterface +endmodule diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index ea1645e..10a0404 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -2,8 +2,9 @@ import FShow::*; import SemiFifo::*; import PcieTypes::*; +import PcieAxiStreamTypes::*; -typedef 512 DATA_WIDTH; +typedef PCIE_AXIS_DATA_WIDTH DATA_WIDTH; typedef 64 DMA_MEM_ADDR_WIDTH; typedef 32 DMA_CSR_ADDR_WIDTH; @@ -17,6 +18,10 @@ typedef 8 BYTE_WIDTH; typedef TLog#(BYTE_WIDTH) BYTE_WIDTH_WIDTH; typedef TMul#(4, BYTE_WIDTH) DWORD_WIDTH; +typedef Bit#(BYTE_WIDTH) Byte; +typedef Bit#(DWORD_WIDTH) DWord; +typedef Bit#(1) ByteParity; + typedef 2 CONCAT_STREAM_NUM; typedef TDiv#(DATA_WIDTH, BYTE_WIDTH) BYTE_EN_WIDTH; diff --git a/src/PcieConfigurator.bsv b/src/PcieConfigurator.bsv index 78474a9..c0553c9 100644 --- a/src/PcieConfigurator.bsv +++ b/src/PcieConfigurator.bsv @@ -146,7 +146,7 @@ module mkPcieConfigurator(PcieConfigurator); interface RawPcieCfgControl control; method Bool hotResetOut; - return True; + return False; endmethod method Action hotResetIn(Bool hotReset); diff --git a/src/PcieDescriptorTypes.bsv b/src/PcieDescriptorTypes.bsv index 7a92912..17660d0 100644 --- a/src/PcieDescriptorTypes.bsv +++ b/src/PcieDescriptorTypes.bsv @@ -61,6 +61,10 @@ typedef Bit#(DES_CMPL_STATUS_WIDTH) CmplStatus; typedef Bit#(DES_CMPL_BYTE_CNT_WIDTH) CmplByteCnt; typedef Bit#(DES_LOWER_ADDR_WIDTH) LowerAddr; +typedef 0 DES_CC_STAUS_SUCCESS; +typedef 1 DES_CC_STATUS_UPSUPPORT; +typedef 4 DES_CC_STATUS_ABORT; + typedef struct { // DW + 2 ReserveBit1 reserve0; diff --git a/src/PcieTypes.bsv b/src/PcieTypes.bsv index d88d205..fabcc0d 100755 --- a/src/PcieTypes.bsv +++ b/src/PcieTypes.bsv @@ -501,3 +501,9 @@ interface RawXilinxPcieIp; (* prefix = "" *) method Action linkUp( (* port = "user_lnk_up" *) Bool isLinkUp); endinterface + +(* always_ready, always_enabled *) +interface RawXilinxPcieIpCompleter; + (* prefix = "" *) interface RawPcieCompleterRequest completerRequest; + (* prefix = "" *) interface RawPcieCompleterComplete completerComplete; +endinterface diff --git a/src/PrimUtils.bsv b/src/PrimUtils.bsv index a2f6c40..02b681d 100644 --- a/src/PrimUtils.bsv +++ b/src/PrimUtils.bsv @@ -1,4 +1,5 @@ import FIFOF::*; +import Vector::*; import PcieAxiStreamTypes::*; import DmaTypes::*; @@ -265,3 +266,18 @@ module mkCounteredFIFOF#(Integer depth)(CounteredFIFOF#(t)) provisos(Bits#(t, tS method FifoSize getCurSize = curSize; endmodule +function ByteParity calByteParity(Byte data); + return (data[0] ^ data[1] ^ data[2] ^ data[3] ^ data[4] ^ data[5] ^ data[6] ^ data[7]); +endfunction + +typedef Bit#(BYTE_EN_WIDTH) DataParity; +typedef Bit#(TDiv#(DWORD_WIDTH, BYTE_WIDTH)) DwordParity; + +function DataParity calDataParity(Data data); + Vector#(BYTE_EN_WIDTH, Byte) dataBytes = unpack(data); + Vector#(BYTE_EN_WIDTH, ByteParity) dataParities= newVector(); + for (Integer idx = 0; idx < valueOf(BYTE_EN_WIDTH); idx = idx + 1) begin + dataParities[idx] = calByteParity(dataBytes[idx]); + end + return pack(dataParities); +endfunction \ No newline at end of file diff --git a/test/TestDmaCompleter.bsv b/test/TestDmaCompleter.bsv index 626c5c5..03d03d3 100644 --- a/test/TestDmaCompleter.bsv +++ b/test/TestDmaCompleter.bsv @@ -10,12 +10,15 @@ import PcieTypes::*; import PcieDescriptorTypes::*; import DmaTypes::*; import DmaCompleter::*; +import TestDmacVivado::*; typedef 'hABCD TEST_DATA; -typedef 'h1234 TEST_ADDR; +typedef 'h1A28 TEST_ADDR; typedef 2'b10 TRANSLATED_ADDR_TYPE; +typedef 10 READ_TIMEOUT_THRESH; + function PcieTlpCtlIsEopCommon getEmptyEop(); return PcieTlpCtlIsEopCommon { isEopPtrs: replicate(0), @@ -30,7 +33,7 @@ function PcieTlpCtlIsSopCommon getEmptySop(); }; endfunction -function CmplReqAxiStream genPseudoHostWriteRequest(); +function CmplReqAxiStream genPseudoHostRequest(DmaCsrValue testValue, DmaCsrAddr testAddr, Bool isWrite); let descriptor = PcieCompleterRequestDescriptor { reserve0 : 0, attributes : 0, @@ -41,15 +44,14 @@ function CmplReqAxiStream genPseudoHostWriteRequest(); tag : 0, requesterId : fromInteger(valueOf(TEST_DATA)), reserve1 : 0, - reqType : fromInteger(valueOf(MEM_WRITE_REQ)), + reqType : isWrite ? fromInteger(valueOf(MEM_WRITE_REQ)) :fromInteger(valueOf(MEM_READ_REQ)) , dwordCnt : 1, - address : fromInteger(valueOf(TEST_ADDR)), + address : zeroExtend(testAddr >> valueOf(TSub#(DMA_MEM_ADDR_WIDTH, DES_ADDR_WIDTH))), addrType : fromInteger(valueOf(TRANSLATED_ADDR_TYPE)) }; Data data = 0; data = data | zeroExtend(pack(descriptor)); - Data value = fromInteger(valueOf(TEST_DATA)); - data = data | (value << valueOf(DES_CQ_DESCRIPTOR_WIDTH)); + data = data | zeroExtend(testValue) << valueOf(DES_CQ_DESCRIPTOR_WIDTH); let sideBand = PcieCompleterRequestSideBandFrame { parity : 0, tphSteeringTag : 0, @@ -58,13 +60,13 @@ function CmplReqAxiStream genPseudoHostWriteRequest(); discontinue : False, isEop : getEmptyEop, isSop : getEmptySop, - dataByteEn : 'hFFF, + dataByteEn : isWrite ? 'hFFF : 'hFF, lastByteEn : 'hF, firstByteEn : 'hF }; return CmplReqAxiStream { tData : data, - tKeep : 'h3FF, + tKeep : fromInteger(valueOf(IDEA_CQ_TKEEP_OF_CSR)), tLast : True, tUser : pack(sideBand) }; @@ -77,7 +79,7 @@ module mkTestDmaCompleterRequest(Empty); rule testInit if (!isInitReg); $display("INFO: Start CompleterRequest test"); - let testAxiStram = genPseudoHostWriteRequest; + let testAxiStram = genPseudoHostRequest(fromInteger(valueOf(TEST_DATA)), fromInteger(valueOf(TEST_ADDR)), True); dut.axiStreamFifoIn.enq(testAxiStram); isInitReg <= True; endrule @@ -88,7 +90,7 @@ module mkTestDmaCompleterRequest(Empty); immAssert( (wrReq.addr == fromInteger(valueOf(TEST_ADDR)) && wrReq.value == fromInteger(valueOf(TEST_DATA))), "wrReq test @ mkTestDmaCompleterRequest", - fshow(wrReq) + $format("RawReq: Addr %h, Value %h \n But", fromInteger(valueOf(TEST_ADDR)), fromInteger(valueOf(TEST_DATA)),fshow(wrReq)) ); $display("INFO: Pass CompleterRequest test"); $finish(); @@ -98,49 +100,91 @@ endmodule (* doc = "testcase" *) module mkTestDmaCompleter(Empty); - DmaCompleter dut <- mkDmaCompleter; - Reg#(Bool) isInitReg <- mkReg(False); + TestDmacCsrWrRdLoopTb dut <- mkTestDmacCsrWrRdLoopTb; + Reg#(Bool) isInitReg <- mkReg(False); + Reg#(Bool) isWriteDoneReg <- mkReg(False); + Reg#(Bool) isWriteDoneReg1 <- mkReg(False); + Reg#(Bool) isReadDoneReg <- mkReg(False); + Reg#(UInt#(32)) timeoutReg <- mkReg(0); + + function Action setEmptyRawAxiStream(); + return action + dut.rawPcie.completerRequest.rawAxiStreamSlave.tValid( + False, + 0, + 0, + False, + 0 + ); + endaction; + endfunction rule alwaysEnables; - dut.rawCompleterComplete.rawAxiStreamMaster.tReady(True); - dut.rawCompleterRequest.nonPostedReqCreditCnt(32); + dut.rawPcie.completerComplete.rawAxiStreamMaster.tReady(True); + dut.rawPcie.completerRequest.nonPostedReqCreditCnt(32); endrule rule testInit; - let testAxiStram = genPseudoHostWriteRequest; if (!isInitReg) begin + setEmptyRawAxiStream; + isInitReg <= True; $display("INFO: Start Completer test"); - dut.rawCompleterRequest.rawAxiStreamSlave.tValid( + end + else if (isInitReg && !isWriteDoneReg) begin + let testAxiStram = genPseudoHostRequest(fromInteger(valueOf(TEST_DATA)), fromInteger(valueOf(TEST_ADDR)), True); + dut.rawPcie.completerRequest.rawAxiStreamSlave.tValid( True, testAxiStram.tData, testAxiStram.tKeep, testAxiStram.tLast, testAxiStram.tUser ); - isInitReg <= True; + isWriteDoneReg <= True; end - else begin - dut.rawCompleterRequest.rawAxiStreamSlave.tValid( - False, - 0, - 0, - False, - 0 + else if (isInitReg && isWriteDoneReg1 && !isReadDoneReg) begin + let testAxiStram = genPseudoHostRequest(0, fromInteger(valueOf(TEST_ADDR)), False); + dut.rawPcie.completerRequest.rawAxiStreamSlave.tValid( + True, + testAxiStram.tData, + testAxiStram.tKeep, + testAxiStram.tLast, + testAxiStram.tUser ); + isReadDoneReg <= True; + end + else begin + setEmptyRawAxiStream; + isWriteDoneReg1 <= isWriteDoneReg; end endrule rule testOutput if (isInitReg); - let wrData = dut.h2cWrite.dataFifoOut.first; - dut.h2cWrite.dataFifoOut.deq; - let wrAddr = dut.h2cWrite.reqFifoOut.first; - dut.h2cWrite.reqFifoOut.deq; - immAssert( - (wrAddr == fromInteger(valueOf(TEST_ADDR)) && wrData == fromInteger(valueOf(TEST_DATA))), - "wrReq test @ mkTestDmaCompleter", - $format("write value: %h, write addr: %h", wrData, wrAddr) - ); - $display("INFO: Pass Completer test"); - $finish(); + if (timeoutReg > fromInteger(valueOf(READ_TIMEOUT_THRESH))) begin + $display("Error: no valid cc axiStream out until timeout!"); + $finish(); + end + else begin + if (dut.rawPcie.completerComplete.rawAxiStreamMaster.tValid) begin + let data = dut.rawPcie.completerComplete.rawAxiStreamMaster.tData; + let keep = dut.rawPcie.completerComplete.rawAxiStreamMaster.tKeep; + let isLast = dut.rawPcie.completerComplete.rawAxiStreamMaster.tLast; + immAssert( + (isLast && (keep == 'hF)), + "completer output keep&last check @ mkTestDmaCompleter", + $format("tKeep: %h, tLast: %h", keep, isLast) + ); + DmaCsrValue value = truncate(data >> valueOf(DES_CC_DESCRIPTOR_WIDTH)); + immAssert( + (value == fromInteger(valueOf(TEST_DATA))), + "complete output data check @ mkTestDmaCompleter", + $format("write value: %h, read value: %h", valueOf(TEST_DATA), value) + ); + $display("INFO: Pass Completer test"); + $finish(); + end + else begin + timeoutReg <= timeoutReg + 1; + end + end endrule endmodule diff --git a/test/TestDmacVivado.bsv b/test/TestDmacVivado.bsv index d5b290d..5e369df 100644 --- a/test/TestDmacVivado.bsv +++ b/test/TestDmacVivado.bsv @@ -31,6 +31,7 @@ module mkTestDmacCsrWrRdLoop((* reset="sys_rst" *) Reset sysRst, TestDmacWrRdLoo rule testWriteReq; dmac.h2cWrite.dataFifoOut.deq; dmac.h2cWrite.reqFifoOut.deq; + $display("SIM INFO @ mkTestDmacCsrWrRdLoop: h2cWrite req detect!"); ram.portA.request.put( BRAMRequest { write : True, @@ -43,6 +44,7 @@ module mkTestDmacCsrWrRdLoop((* reset="sys_rst" *) Reset sysRst, TestDmacWrRdLoo rule testReadReq; dmac.h2cRead.reqFifoOut.deq; + $display("SIM INFO @ mkTestDmacCsrWrRdLoop: h2cRead req detect!"); ram.portB.request.put( BRAMRequest { write : False, @@ -61,3 +63,58 @@ module mkTestDmacCsrWrRdLoop((* reset="sys_rst" *) Reset sysRst, TestDmacWrRdLoo interface rawPcie = dmac.rawPcie; endmodule +// Only use for testing in bsv, do not use for synthesize +interface TestDmacCsrWrRdLoopTb; + interface RawXilinxPcieIpCompleter rawPcie; +endinterface + +module mkTestDmacCsrWrRdLoopTb(TestDmacCsrWrRdLoopTb); + + DmaControllerCompleter dmac <- mkDmaControllerCompleter; + + BRAM2Port#(DmaCsrAddr, DmaCsrValue) ram <- mkBRAM2Server( + BRAM_Configure { + memorySize : valueOf(TEST_BRAM_SIZE), + loadFormat : None, + latency : 2, + outFIFODepth: 3, + allowWriteResponseBypass : False + } + ); + + rule testWriteReq; + dmac.h2cWrite.dataFifoOut.deq; + dmac.h2cWrite.reqFifoOut.deq; + $display("SIM INFO @ mkTestDmacCsrWrRdLoop: h2cWrite req detect!"); + $display("BRAM: PortA write addr %h data %h", dmac.h2cWrite.reqFifoOut.first, dmac.h2cWrite.dataFifoOut.first); + ram.portA.request.put( + BRAMRequest { + write : True, + responseOnWrite : False, + address : dmac.h2cWrite.reqFifoOut.first, + datain : dmac.h2cWrite.dataFifoOut.first + } + ); + endrule + + rule testReadReq; + dmac.h2cRead.reqFifoOut.deq; + $display("SIM INFO @ mkTestDmacCsrWrRdLoop: h2cRead req detect!"); + $display("BRAM: PortB read addr %h", dmac.h2cRead.reqFifoOut.first); + ram.portB.request.put( + BRAMRequest { + write : False, + responseOnWrite : False, + address : dmac.h2cRead.reqFifoOut.first, + datain : 0 + } + ); + endrule + + rule testReadResp; + let value <- ram.portB.response.get; + dmac.h2cRead.dataFifoIn.enq(value); + endrule + + interface rawPcie = dmac.rawPcie; +endmodule \ No newline at end of file From 98c2005b717ab6e199a0df9823d769693e967bad Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Thu, 25 Jul 2024 21:25:13 +0800 Subject: [PATCH 29/53] Fix streamUtils and pass all test --- src/DmaCompleter.bsv | 7 ++++ src/DmaRequester.bsv | 66 +++++++++++++++++++++++++++++++++++++ src/PcieDescriptorTypes.bsv | 55 +++++++++++++++++++++++++++++-- src/PrimUtils.bsv | 14 ++++++-- src/StreamUtils.bsv | 6 ++-- test/TestDmacVivado.bsv | 5 +-- 6 files changed, 143 insertions(+), 10 deletions(-) diff --git a/src/DmaCompleter.bsv b/src/DmaCompleter.bsv index 1df2da9..eccc3ac 100644 --- a/src/DmaCompleter.bsv +++ b/src/DmaCompleter.bsv @@ -104,10 +104,12 @@ module mkCompleterRequest(CompleterRequest); let descriptor = getDescriptorFromFirstBeat(axiStream); case (descriptor.reqType) fromInteger(valueOf(MEM_WRITE_REQ)): begin + $display("SIM INFO @ mkCompleterRequest: MemWrite Detect!"); if (descriptor.dwordCnt == fromInteger(valueOf(IDEA_DWORD_CNT_OF_CSR)) && isFirstBytesAllValid(sideBand)) begin let firstData = getDataFromFirstBeat(axiStream); DmaCsrValue wrValue = firstData[valueOf(DMA_CSR_ADDR_WIDTH)-1:0]; DmaCsrAddr wrAddr = getCsrAddrFromCqDescriptor(descriptor); + $display("SIM INFO @ mkCompleterRequest: Valid wrReq with Addr %h, data %h", wrAddr, wrValue); let wrReq = CsrWriteReq { addr : wrAddr, value : wrValue @@ -119,11 +121,13 @@ module mkCompleterRequest(CompleterRequest); end end fromInteger(valueOf(MEM_READ_REQ)): begin + $display("SIM INFO @ mkCompleterRequest: MemRead Detect!"); let rdReqAddr = getCsrAddrFromCqDescriptor(descriptor); let rdReq = CsrReadReq{ addr: rdReqAddr, cqDescriptor: descriptor }; + $display("SIM INFO @ mkCompleterRequest: Valid rdReq with Addr %h", rdReqAddr); rdReqFifo.enq(rdReq); end default: illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; @@ -148,6 +152,7 @@ module mkCompleterComplete(CompleterComplete); let cqDescriptor = rdReqFifo.first.cqDescriptor; let addr = rdReqFifo.first.addr; rdReqFifo.deq; + $display("SIM INFO @ mkCompleterComplete: Valid rdResp with Addr %h, data %h", addr, value); let ccDescriptor = PcieCompleterCompleteDescriptor { reserve0 : 0, attributes : cqDescriptor.attributes, @@ -231,6 +236,8 @@ module mkDmaCompleter(DmaCompleter); rule procCsrReadResp; let req = csrRdReqStoreFifo.first; let resp = h2cCsrReadDataFifo.first; + csrRdReqStoreFifo.deq; + h2cCsrReadDataFifo.deq; cmplComplete.csrReadRespFifoIn.enq(resp); cmplComplete.csrReadReqFifoIn.enq(req); endrule diff --git a/src/DmaRequester.bsv b/src/DmaRequester.bsv index 10e56b0..10c75a3 100644 --- a/src/DmaRequester.bsv +++ b/src/DmaRequester.bsv @@ -1,11 +1,15 @@ import FIFOF::*; +import GetPut::*; import SemiFifo::*; +import StreamUtils::*; import PcieTypes::*; import PcieAxiStreamTypes::*; import PcieDescriptorTypes::*; import DmaTypes::*; +typedef TSub#(DATA_WIDTH, DES_RQ_DESCRIPTOR_WIDTH) ONE_TLP_THRESH; + typedef PcieAxiStream#(PCIE_REQUESTER_REQUEST_TUSER_WIDTH) ReqReqAxiStream; typedef PcieAxiStream#(PCIE_REQUESTER_COMPLETE_TUSER_WIDTH) ReqCmplAxiStream; @@ -21,6 +25,9 @@ interface RequesterRequest; interface FifoIn#(DmaRequest) wrReqFifoIn; interface FifoIn#(DmaRequest) rdReqFifoIn; interface FifoOut#(ReqReqAxiStream) axiStreamFifoOut; + interface Put#(Bool) postedEn; + interface Put#(Bool) nonPostedEn; + interface Get#(Bool) isWriteDataRecvDone; endinterface interface RequesterComplete; @@ -30,17 +37,76 @@ interface RequesterComplete; endinterface module mkRequesterRequest(RequesterRequest); + StreamConcat streamConcat <- mkStreamConcat; + FIFOF#(DataStream) wrDataInFifo <- mkFIFOF; FIFOF#(DmaRequest) wrReqInFifo <- mkFIFOF; FIFOF#(DmaRequest) rdReqInFifo <- mkFIFOF; FIFOF#(ReqReqAxiStream) axiStreamOutFifo <- mkFIFOF; + Reg#(DmaMemAddr) inflightRemainBytesReg <- mkReg(0); + Reg#(Bool) isInWritingReg <- mkReg(False); + Wire#(Bool) postedEnWire <- mkDWire(False); + Wire#(Bool) nonPostedEnWire <- mkDWire(True); + + function DataStream genRQDescriptorStream(DmaRequest req, Bool isWrite); + let descriptor = PcieRequesterRequestDescriptor { + forceECRC : False, + attributes : 0, + trafficClass : 0, + requesterIdEn : False, + completerId : 0, + tag : 0, + requesterId : 0, + isPoisoned : False, + reqType : isWrite ? fromInteger(valueOf(MEM_WRITE_REQ)) : fromInteger(valueOf(MEM_READ_REQ)), + dwordCnt : truncate(req.length >> 2 + (req.length[0] | req.length[1])), + address : truncate(req.startAddr >> 2), + addrType : 2'b10 + }; + ByteEn byteEn = 1; + let stream = DataStream { + data : zeroExtend(pack(descriptor)), + byteEn : (byteEn << (valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH)) + 1)) - 1, + isFirst : True, + isLast : False + }; + return stream; + endfunction + // TODO: RQ Logic + rule recvWriteReq if (postedEnWire); + if (!isInWritingReg) begin + let wrReq = wrReqInFifo.first; + let wrData = wrDataInFifo.first; + wrReqInFifo.deq; + wrDataInFifo.deq; + isInWritingReg <= (wrReq.length > fromInteger(valueOf(ONE_TLP_THRESH))); + end + endrule interface wrDataFifoIn = convertFifoToFifoIn(wrDataInFifo); interface wrReqFifoIn = convertFifoToFifoIn(wrReqInFifo); interface rdReqFifoIn = convertFifoToFifoIn(rdReqInFifo); interface axiStreamFifoOut = convertFifoToFifoOut(axiStreamOutFifo); + + interface Put postedEn; + method Action put(Bool postedEnable); + postedEnWire <= postedEnable; + endmethod + endinterface + + interface Put nonPostedEn; + method Action put(Bool nonPostedEnable); + nonPostedEnWire <= nonPostedEnable; + endmethod + endinterface + + interface Get isWriteDataRecvDone; + method ActionValue#(Bool) get(); + return (inflightRemainBytesReg == 0); + endmethod + endinterface endmodule module mkRequesterComplete(RequesterComplete); diff --git a/src/PcieDescriptorTypes.bsv b/src/PcieDescriptorTypes.bsv index 17660d0..2375a3b 100644 --- a/src/PcieDescriptorTypes.bsv +++ b/src/PcieDescriptorTypes.bsv @@ -56,10 +56,10 @@ typedef struct { typedef 96 DES_CC_DESCRIPTOR_WIDTH; typedef 3 DES_CMPL_STATUS_WIDTH; typedef 13 DES_CMPL_BYTE_CNT_WIDTH; -typedef 7 DES_LOWER_ADDR_WIDTH; +typedef 7 DES_CC_LOWER_ADDR_WIDTH; typedef Bit#(DES_CMPL_STATUS_WIDTH) CmplStatus; typedef Bit#(DES_CMPL_BYTE_CNT_WIDTH) CmplByteCnt; -typedef Bit#(DES_LOWER_ADDR_WIDTH) LowerAddr; +typedef Bit#(DES_CC_LOWER_ADDR_WIDTH) CCLowerAddr; typedef 0 DES_CC_STAUS_SUCCESS; typedef 1 DES_CC_STATUS_UPSUPPORT; @@ -85,9 +85,58 @@ typedef struct { CmplByteCnt byteCnt; ReserveBit6 reserve3; AddrType addrType; - LowerAddr lowerAddr; + CCLowerAddr lowerAddr; } PcieCompleterCompleteDescriptor deriving(Bits, Eq, Bounded, FShow); +typedef 128 DES_RQ_DESCRIPTOR_WIDTH; + +typedef struct { + // DW + 3 + Bool forceECRC; + Attributes attributes; + TrafficClass trafficClass; + Bool requesterIdEn; + BusDeviceFunc completerId; + Tag tag; + // DW + 2 + BusDeviceFunc requesterId; + Bool isPoisoned; + ReqType reqType; + DwordCount dwordCnt; + // DW + 1 & DW + 0 + Address address; + AddrType addrType; +} PcieRequesterRequestDescriptor deriving(Bits, Eq, Bounded, FShow); + +typedef 96 DES_RC_DESCRIPTOR_WIDTH; +typedef 4 DES_ERROR_CODE_WIDTH; +typedef 12 DES_RC_LOWER_ADDR_WIDTH; + +typedef Bit#(DES_ERROR_CODE_WIDTH) ErrorCode; +typedef Bit#(DES_RC_LOWER_ADDR_WIDTH) RCLowerAddr; + +typedef struct { + // DW + 2 + ReserveBit1 reserve0; + Attributes attributes; + TrafficClass trafficClass; + ReserveBit1 reserve1; + BusDeviceFunc completerId; + Tag tag; + // DW + 1 + BusDeviceFunc requesterId; + ReserveBit1 reserve2; + Bool isPoisoned; + CmplStatus status; + DwordCount dwordCnt; + ReserveBit1 reserve3; + Bool isRequestCompleted; + Bool isLockedReadCmpl; + CmplByteCnt byteCnt; + ErrorCode errorcode; + RCLowerAddr lowerAddr; +} PcieRequesterCompleteDescriptor deriving(Bits, Eq, Bounded, FShow); + // Pcie Tlp types of descriptor typedef 4'b0000 MEM_READ_REQ; typedef 4'b0001 MEM_WRITE_REQ; diff --git a/src/PrimUtils.bsv b/src/PrimUtils.bsv index 02b681d..66257ba 100644 --- a/src/PrimUtils.bsv +++ b/src/PrimUtils.bsv @@ -245,17 +245,27 @@ interface CounteredFIFOF#(type t); endinterface module mkCounteredFIFOF#(Integer depth)(CounteredFIFOF#(t)) provisos(Bits#(t, tSz)); + Wire#(Bool) hasDeqCall <- mkDWire(False); + Wire#(Bool) hasEnqCall <- mkDWire(False); Reg#(FifoSize) curSize <- mkReg(0); FIFOF#(t) fifo <- mkSizedFIFOF(depth); + rule updateSize; + case({pack(hasEnqCall), pack(hasDeqCall)}) + 2'b10: curSize <= curSize + 1; + 2'b01: curSize <= curSize -1; + default: curSize <= curSize; + endcase + endrule + method Action enq (t x); fifo.enq(x); - curSize <= curSize + 1; + hasEnqCall <= True; endmethod method Action deq; fifo.deq; - curSize <= curSize - 1; + hasDeqCall <= True; endmethod method t first = fifo.first; diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index e40addf..58e6793 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -99,9 +99,9 @@ function DataBytePtr convertByteEn2BytePtr (ByteEn byteEn); return ptr; endfunction -function Bool isByteEnZero(ByteEn byteEn) begin - return !unpack(remainStream.byteEn[0]); -end +function Bool isByteEnZero(ByteEn byteEn); + return !unpack(byteEn[0]); +endfunction function DataStream getEmptyStream (); return DataStream{ diff --git a/test/TestDmacVivado.bsv b/test/TestDmacVivado.bsv index 5e369df..2663ed2 100644 --- a/test/TestDmacVivado.bsv +++ b/test/TestDmacVivado.bsv @@ -22,7 +22,7 @@ module mkTestDmacCsrWrRdLoop((* reset="sys_rst" *) Reset sysRst, TestDmacWrRdLoo BRAM_Configure { memorySize : valueOf(TEST_BRAM_SIZE), loadFormat : None, - latency : 2, + latency : 1, outFIFODepth: 3, allowWriteResponseBypass : False } @@ -56,6 +56,7 @@ module mkTestDmacCsrWrRdLoop((* reset="sys_rst" *) Reset sysRst, TestDmacWrRdLoo endrule rule testReadResp; + $display("SIM INFO @ mkTestDmacCsrWrRdLoop: h2cRead resp detect!"); let value <- ram.portB.response.get; dmac.h2cRead.dataFifoIn.enq(value); endrule @@ -76,7 +77,7 @@ module mkTestDmacCsrWrRdLoopTb(TestDmacCsrWrRdLoopTb); BRAM_Configure { memorySize : valueOf(TEST_BRAM_SIZE), loadFormat : None, - latency : 2, + latency : 1, outFIFODepth: 3, allowWriteResponseBypass : False } From ac1beeccfc3fe4cb4a6d75ca3606d00567eee6f7 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Fri, 2 Aug 2024 14:23:40 +0800 Subject: [PATCH 30/53] Add dmaRequester --- img/chunkSplit.drawio.svg | 561 +++++++++++++++++++++++- img/requester.drawio.svg | 837 ++++++++++++++++++++++++++++++++++++ run_one.sh | 2 +- src/DmaCompleter.bsv | 7 +- src/DmaRequestCore.bsv | 473 +++++++++++++++++++- src/DmaRequester.bsv | 57 ++- src/DmaTypes.bsv | 18 +- src/PcieAxiStreamTypes.bsv | 6 +- src/PcieDescriptorTypes.bsv | 5 + src/PrimUtils.bsv | 442 +++++++++++++------ src/StreamUtils.bsv | 367 +++++++--------- test/TestDmaCore.bsv | 27 +- test/TestStreamUtils.bsv | 1 + 13 files changed, 2421 insertions(+), 382 deletions(-) create mode 100644 img/requester.drawio.svg diff --git a/img/chunkSplit.drawio.svg b/img/chunkSplit.drawio.svg index 44d1e49..5cc7635 100644 --- a/img/chunkSplit.drawio.svg +++ b/img/chunkSplit.drawio.svg @@ -1 +1,560 @@ -
FifoIn
FifoIn
getFirstChunkLen
getFirstChunkLen
splitFifo
splitFifo
genSplitChunks
genSplitChunks
  • dmaRequest
    • startAddr
    • length
  • firstChunkLen
dmaRequest...
FifoOut
FifoOut
  • startAddr
  • length
startAddrlength
  • startAddr
  • length
startAddrlength
  • newChunkPtrReg
  • totalLenRemainReg
  • isSplittingReg
newChunkPtrRegtotalLenRemainRe...
4KB
4KB
4KB
4KB
4KB
4KB
offset
offset
512
512...
512
512...
512
512...
firstChunk
firstChunk
512
512...
512
×
512...
512
×
512...
...
...
...
...
Text is not SVG - cannot display
\ No newline at end of file + + + + + + + + + +
+
+
+ FifoIn#(DmaRequest) +
+
+
+
+ + FifoIn#(DmaRequest) + +
+
+ + + + + + + + +
+
+
+ getFirstChunkLen +
+
+
+
+ + getFirstChunkLen + +
+
+ + + + +
+
+
+
    +
  • + startAddr +
  • +
  • + length +
  • +
+
+
+
+
+ + startAddrlength + +
+
+ + + + +
+
+
+ 4KB +
+
+
+
+ + 4KB + +
+
+ + + + +
+
+
+ 4KB +
+
+
+
+ + 4KB + +
+
+ + + + +
+
+
+ 4KB +
+
+
+
+ + 4KB + +
+
+ + + + +
+
+
+ offset +
+
+
+
+ + offset + +
+
+ + + + + +
+
+
+ 512 +
+ √ +
+
+
+
+ + 512... + +
+
+ + + + +
+
+
+ 512 +
+ √ +
+
+
+
+ + 512... + +
+
+ + + + +
+
+
+ 512 +
+ √ +
+
+
+
+ + 512... + +
+
+ + + + +
+
+
+ firstChunk +
+
+
+
+ + firstChunk + +
+
+ + + + +
+
+
+ 512 +
+ √ +
+
+
+
+ + 512... + +
+
+ + + + +
+
+
+ 512 +
+ × +
+
+
+
+ + 512... + +
+
+ + + + +
+
+
+ 512 +
+ × +
+
+
+
+ + 512... + +
+
+ + + + +
+
+
+ ... +
+
+
+
+ + ... + +
+
+ + + + +
+
+
+ ... +
+
+
+
+ + ... + +
+
+ + + + + + + +
+
+
+ FifoIn#(DataStream) +
+
+
+
+ + FifoIn#(DataStream) + +
+
+ + + + + + +
+
+
+ streamSplit +
+
+
+
+ + streamSplit + +
+
+ + + + + + + + +
+
+
+ chunkSplit +
+
+
+
+ + chunkSplit + +
+
+ + + + +
+
+
+ split the first chunk +
+
+
+
+ + split the first chunk + +
+
+ + + + +
+
+
+ split of MaxSizePayload +
+
+
+
+ + split of MaxSizePayload + +
+
+ + + + +
+
+
+ only need tag isLast +
+ MPS must be n*512 +
+
+
+
+ + only need tag isLast... + +
+
+ + + + +
+
+
+ firstChunkLen < MPS +
+
+
+
+ + firstChunkLen < MPS + +
+
+ + + + +
+
+
+ FifoOut#(DataStream) +
+
+
+
+ + FifoOut#(DataStream) + +
+
+ + + + +
+
+
+ FifoOut#(DmaRequest) +
+
+
+
+ + FifoOut#(DmaRequest) + +
+
+ + + + +
+
+
+ latency=0 +
+
+
+
+ + latency=0 + +
+
+ + + + +
+
+
+ latency=3 +
+
+
+
+ + latency=3 + +
+
+ + + + +
+
+
+ latency=0 +
+
+
+
+ + latency=0 + +
+
+ + + + +
+
+
+ latency=1 +
+
+
+
+ + latency=1 + +
+
+ + + + +
+
+
+ latency=1 +
+
+
+
+ + latency=1 + +
+
+
+ + + + + Text is not SVG - cannot display + + + +
\ No newline at end of file diff --git a/img/requester.drawio.svg b/img/requester.drawio.svg new file mode 100644 index 0000000..2100c59 --- /dev/null +++ b/img/requester.drawio.svg @@ -0,0 +1,837 @@ + + + + + + + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + + + +
+
+
+ Request +
+
+
+
+ + Request + +
+
+ + + + + + +
+
+
+ + StreamConcat + +
+
+
+
+ + StreamConcat + +
+
+ + + + +
+
+
+ first +
+
+
+
+ + first + +
+
+ + + + +
+
+
+ second +
+
+
+
+ + second + +
+
+ + + + + + + + + + +
+
+
+ + convertToAxis + +
+
+
+
+ + convertToAxis + +
+
+ + + + +
+
+
+ RawPcieRequeser +
+
+
+
+ + RawPcieRequeser + +
+
+ + + + + +
+
+
+ req +
+
+
+
+ + req + +
+
+ + + + + +
+
+
+ data +
+
+
+
+ + data + +
+
+ + + + +
+
+
+ ChunkSplit +
+
+
+
+ + ChunkSplit + +
+
+ + + + +
+
+
+ Descriptor +
+
+
+
+ + Descriptor + +
+
+ + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ isDone +
+
+
+
+ + isDone + +
+
+ + + + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + + + +
+
+
+ Request +
+
+
+
+ + Request + +
+
+ + + + +
+
+
+ isDone +
+
+
+
+ + isDone + +
+
+ + + + +
+
+
+ + dsFifoInA + +
+
+
+
+ + dsFifoInA + +
+
+ + + + +
+
+
+ + dsFifoInB + +
+
+
+
+ + dsFifoInB + +
+
+ + + + +
+
+
+ + tData + +
+
+
+
+ + tData + +
+
+ + + + +
+
+
+ + tKeep + +
+
+
+
+ + tKeep + +
+
+ + + + +
+
+
+ + tUser + +
+
+
+
+ + tUser + +
+
+ + + + +
+
+
+ + tLast + +
+
+
+
+ + tLast + +
+
+ + + + +
+
+
+ straddle 1 +
+
+
+
+ + straddle 1 + +
+
+ + + + +
+
+
+ straddle 0 +
+
+
+
+ + straddle 0 + +
+
+ + + + +
+
+
+ StreamA/B last beat bytePtr > 256bit +
+
+
+
+ + StreamA/B last beat bytePtr > 256bit + +
+
+ + + + +
+
+
+ Only singe streamA/B +
+
+
+
+ + Only singe streamA/B + +
+
+ + + + +
+
+
+ StreamA +
+
+
+
+ + StreamA + +
+
+ + + + +
+
+
+ StreamB +
+
+
+
+ + StreamB + +
+
+ + + + +
+
+
+ + x + +
+
+
+
+ + x + +
+
+ + + + +
+
+
+ + x + +
+
+
+
+ + x + +
+
+ + + + +
+
+
+ StreamA/B last beat bytePtr +
+ +remainPtr <= 256bit +
+
+
+
+ + StreamA/B last beat bytePtr... + +
+
+ + + + +
+
+
+ The other stream first +
+
+
+
+ + The other stream first + +
+
+ + + + +
+
+
+ Remain Data +
+
+
+
+ + Remain Data + +
+
+ + + + +
+
+
+ StreamA/B last beat bytePtr+remainPtr > 256bit +
+
+
+
+ + StreamA/B last beat bytePt... + +
+
+ + + + +
+
+
+ Remain Data +
+
+
+
+ + Remain Data + +
+
+ + + + +
+
+
+ Remain Data +
+
+
+
+ + Remain... + +
+
+ + + + + +
+
+
+ + isSop +
+ isEop +
+
+
+
+
+
+ + isSop... + +
+
+ + + + +
+
+
+ Align +
+ Bytes +
+
+
+
+ + Align... + +
+
+ + + + + + +
+
+
+ Same as above +
+
+
+
+ + Same as above + +
+
+ + + + + + + +
+
+
+ firstBE +
+ lastBE +
+
+
+
+ + firstBE... + +
+
+ + + + +
+
+
+ AlignDescGen +
+
+
+
+ + AlignDescGen + +
+
+ + + + + + +
+
+
+ DataPipe +
+
+
+
+ + DataPipe + +
+
+ + + + +
+
+
+ + remainDs + +
+
+
+
+ + remainDs + +
+
+ + + + +
+
+
+ latency=5 +
+
+
+
+ + latency=5 + +
+
+ + + + +
+
+
+ latency=3 +
+
+
+
+ + latency=3 + +
+
+
+ + + + + Text is not SVG - cannot display + + + +
\ No newline at end of file diff --git a/run_one.sh b/run_one.sh index 7e935a8..f049c0d 100755 --- a/run_one.sh +++ b/run_one.sh @@ -13,7 +13,7 @@ TEST_LOG=run.log TEST_DIR=test cd $TEST_DIR truncate -s 0 $TEST_LOG -FILES=`ls TestDmaCompleter.bsv` +FILES=`ls TestDmaCore.bsv` ########################################################################### for FILE in $FILES; do # echo $FILE diff --git a/src/DmaCompleter.bsv b/src/DmaCompleter.bsv index eccc3ac..2b1ed04 100644 --- a/src/DmaCompleter.bsv +++ b/src/DmaCompleter.bsv @@ -8,7 +8,8 @@ import PcieTypes::*; import PcieDescriptorTypes::*; import DmaTypes::*; -typedef 1 IDEA_DWORD_CNT_OF_CSR; +typedef 1 IDEA_CQ_CSR_DWORD_CNT; +typedef 2 IDEA_CC_CSR_DWORD_CNT; typedef 4 IDEA_BYTE_CNT_OF_CSR; typedef 4 IDEA_FIRST_BE_HIGH_VALID_PTR_OF_CSR; @@ -105,7 +106,7 @@ module mkCompleterRequest(CompleterRequest); case (descriptor.reqType) fromInteger(valueOf(MEM_WRITE_REQ)): begin $display("SIM INFO @ mkCompleterRequest: MemWrite Detect!"); - if (descriptor.dwordCnt == fromInteger(valueOf(IDEA_DWORD_CNT_OF_CSR)) && isFirstBytesAllValid(sideBand)) begin + if (descriptor.dwordCnt == fromInteger(valueOf(IDEA_CQ_CSR_DWORD_CNT)) && isFirstBytesAllValid(sideBand)) begin let firstData = getDataFromFirstBeat(axiStream); DmaCsrValue wrValue = firstData[valueOf(DMA_CSR_ADDR_WIDTH)-1:0]; DmaCsrAddr wrAddr = getCsrAddrFromCqDescriptor(descriptor); @@ -164,7 +165,7 @@ module mkCompleterComplete(CompleterComplete); reserve1 : 0, isPoisoned : False, status : fromInteger(valueOf(DES_CC_STAUS_SUCCESS)), - dwordCnt : fromInteger(valueOf(IDEA_DWORD_CNT_OF_CSR)), + dwordCnt : fromInteger(valueOf(IDEA_CC_CSR_DWORD_CNT)), reserve2 : 0, isLockedReadCmpl: False, byteCnt : fromInteger(valueOf(IDEA_BYTE_CNT_OF_CSR)), diff --git a/src/DmaRequestCore.bsv b/src/DmaRequestCore.bsv index a712d09..a0baeb9 100755 --- a/src/DmaRequestCore.bsv +++ b/src/DmaRequestCore.bsv @@ -1,9 +1,13 @@ import FIFOF::*; import GetPut :: *; +import Vector::*; import SemiFifo::*; import PcieTypes::*; import DmaTypes::*; +import PcieAxiStreamTypes::*; +import StreamUtils::*; +import PcieDescriptorTypes::*; typedef 4096 BUS_BOUNDARY; @@ -18,6 +22,16 @@ typedef TAdd#(1, TLog#(DEFAULT_TLP_SIZE)) DEFAULT_TLP_SIZE_WIDTH; typedef 3 PCIE_TLP_SIZE_SETTING_WIDTH; typedef Bit#(PCIE_TLP_SIZE_SETTING_WIDTH) PcieTlpSizeSetting; +typedef TAdd#(1, TLog#(TDiv#(BUS_BOUNDARY, BYTE_EN_WIDTH))) DATA_BEATS_WIDTH; +typedef Bit#(DATA_BEATS_WIDTH) DataBeats; + +typedef PcieAxiStream#(PCIE_REQUESTER_REQUEST_TUSER_WIDTH) RqAxisStream; + +typedef Tuple2#( + DWordByteEn, + DWordByteEn +) SideBandByteEn; + typedef struct { DmaRequest dmaRequest; DmaMemAddr firstChunkLen; @@ -29,6 +43,22 @@ interface ChunkCompute; interface Put#(PcieTlpSizeSetting) setTlpMaxSize; endinterface +interface ChunkSplit; + interface FifoIn#(DataStream) dataFifoIn; + interface FifoIn#(DmaRequest) reqFifoIn; + interface FifoOut#(DataStream) chunkDataFifoOut; + interface FifoOut#(DmaRequest) chunkReqFifoOut; + interface Put#(PcieTlpSizeSetting) setTlpMaxSize; +endinterface + +interface ConvertDataStreamsToStraddleAxis; + interface FifoIn#(DataStream) dataAFifoIn; + interface FifoIn#(SideBandByteEn) byteEnAFifoIn; + interface FifoIn#(DataStream) dataBFifoIn; + interface FifoIn#(SideBandByteEn) byteEnBFifoIn; + interface FifoOut#(PcieAxiStream) axiStreamFifoOut; +endinterface + module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); FIFOF#(DmaRequest) inputFifo <- mkFIFOF; @@ -66,7 +96,7 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); }); endrule - rule execChunkSplit; + rule execChunkCompute; let splitRequest = splitFifo.first; if (isSplittingReg) begin // !isFirst if (totalLenRemainReg <= tlpMaxSize) begin @@ -118,4 +148,443 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); endmethod endinterface -endmodule \ No newline at end of file +endmodule + +// Split the single input DataStream to a list of DataStream chunks +// - Chunks cannot violate bus boundary requirement +// - Only the first and the last chunk can be shorter than MaxPayloadSize +// - Other chunks length must equal to MaxPayloadSize +// - The module may block the pipeline if one input beat is splited to two beats +module mkChunkSplit(TRXDirection direction, ChunkCompute ifc); + FIFOF#(DataStream) dataInFifo <- mkFIFOF; + FIFOF#(DmaRequest) reqInFifo <- mkFIFOF; + FIFOF#(DataStream) chunkOutFifo <- mkFIFOF; + FIFOF#(DmaRequest) reqOutFifo <- mkFIFOF; + FIFOF#(DmaRequest) firstReqPipeFifo <- mkSizedFIFOF(STREAM_SPLIT_LATENCY); + FIFOF#(DmaRequest) inputReqPipeFifo <- mkSizedFIFOF(STREAM_SPLIT_LATENCY); + + StreamSplit firstChunkSplitor <- mkStreamSplit; + + Reg#(DmaMemAddr) tlpMaxSizeReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE))); + Reg#(PcieTlpSizeWidth) tlpMaxSizeWidthReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH))); + Reg#(DataBeats) tlpMaxBeatsReg <- mkReg(fromInteger(valueOf(TDiv#(DEFAULT_TLP_SIZE, BYTE_EN_WIDTH)))); + + Reg#(Bool) isInProcReg <- mkReg(False); + Reg#(DataBeats) beatsReg <- mkReg(0); + + Reg#(DmaMemAddr) nextStartAddrReg <- mkReg(0); + Reg#(DmaMemAddr) remainLenReg <- mkReg(0); + + + function Bool hasBoundary(DmaRequest request); + let highIdx = (request.startAddr + request.length - 1) >> valueOf(BUS_BOUNDARY_WIDTH); + let lowIdx = request.startAddr >> valueOf(BUS_BOUNDARY_WIDTH); + return (highIdx > lowIdx); + endfunction + + function DmaMemAddr getOffset(DmaRequest request); + // MPS - startAddr % MPS, MPS means MRRS when the module is set to RX mode + DmaMemAddr remainderOfMps = zeroExtend(PcieTlpMaxMaxPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); + DmaMemAddr offsetOfMps = tlpMaxSizeReg - remainderOfMps; + return offsetOfMps; + endfunction + + // Pipeline stage 1, calculate the first chunkLen which may be smaller than MPS + rule getfirstChunkLen; + // If is the first beat of a new request, get firstChunkLen and pipe into the splitor + if (!isInProcReg) begin + let request = reqInFifo.first; + reqInFifo.deq; + let stream = dataInFifo.first; + dataInFifo.deq; + let offset = getOffset(request); + let firstLen = (request.length > tlpMaxSizeReg) ? tlpMaxSizeReg : request.length; + let firstChunkLen = hasBoundary(request) ? offset : firstLen; + firstChunkSplitor.splitLocationFifoIn.enq(unpack(truncate(firstChunkLen))); + let firstReq = DmaRequest { + startAddr : request.startAddr, + length : firstChunkLen + }; + firstReqPipeFifo.enq(firstReq); + firstChunkSplitor.inputStreamFifoIn.enq(stream); + inputReqPipeFifo.enq(request); + isInProcReg <= !stream.isLast; + end + // If is the remain beats of the request, continue pipe into the splitor + else begin + let stream = dataInFifo.first; + dataInFifo.deq; + firstChunkSplitor.inputStreamFifoIn.enq(stream); + isInProcReg <= !stream.isLast; + end + endrule + + // Pipeline stage 2: use StreamUtils::StreamSplit to split the input datastream to the firstChunk and the remain chunks + // In StreamUtils::StreamSplit firstChunkSplitor + + // Pipeline stage 3, set isFirst/isLast accroding to MaxPayloadSize, i.e. split the remain chunks + rule splitToMps; + let stream = firstChunkSplitor.outputStreamFifoOut.first; + firstChunkSplitor.outputStreamFifoOut.deq; + // End of a TLP, reset beatsReg and tag isLast=True + if (stream.isLast || beatsReg == tlpMaxBeatsReg) begin + stream.isLast = True; + beatsReg <= 0; + end + else begin + beatsReg <= beatsReg + 1; + end + // Start of a TLP, get Req Infos and tag isFirst=True + if (beatsReg == 0) begin + stream.isFirst = True; + // The first TLP of chunks + if (firstReqPipeFifo.notEmpty) begin + let chunkReq = firstReqPipeFifo.first; + let oriReq = inputReqPipeFifo.first; + firstReqPipeFifo.deq; + nextStartAddrReg <= oriReq.startAddr + chunkReq.length; + remainLenReg <= oriReq.length - chunkReq.length; + reqOutFifo.enq(chunkReq); + end + // The following chunks + else begin + if (remainLenReg == 0) begin + // Do nothing + end + else if (remainLenReg <= tlpMaxSizeReg) begin + nextStartAddrReg <= 0; + remainLenReg <= 0; + let chunkReq = DmaRequest { + startAddr: nextStartAddrReg, + length : remainLenReg + }; + reqOutFifo.enq(chunkReq); + end + else begin + nextStartAddrReg <= nextStartAddrReg + tlpMaxSizeReg; + remainLenReg <= remainLenReg - tlpMaxSizeReg; + let chunkReq = DmaRequest { + startAddr: nextStartAddrReg, + length : tlpMaxSizeReg + }; + reqOutFifo.enq(chunkReq); + end + end + end + chunkOutFifo.enq(stream); + endrule + + interface dataFifoIn = convertFifoToFifoIn(dataInFifo); + interface reqFifoIn = convertFifoToFifoIn(reqInFifo); + + interface chunkDataFifoOut = convertFifoToFifoOut(chunkOutFifo); + interface chunkReqFifoOut = convertFifoToFifoOut(reqOutFifo); + + interface Put setTlpMaxSize; + method Action put (PcieTlpSizeSetting tlpSizeSetting); + let setting = tlpSizeSetting; + setting[valueOf(PCIE_TLP_SIZE_SETTING_WIDTH)-1] = (direction == DMA_TX) ? 0 : setting[valueOf(PCIE_TLP_SIZE_SETTING_WIDTH)-1]; + DmaMemAddr defaultTlpMaxSize = fromInteger(valueOf(DEFAULT_TLP_SIZE)); + tlpMaxSizeReg <= DmaMemAddr'(defaultTlpMaxSize << setting); + PcieTlpSizeWidth defaultTlpMaxSizeWidth = fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH)); + tlpMaxSizeWidthReg <= PcieTlpSizeWidth'(defaultTlpMaxSizeWidth + zeroExtend(setting)); + // BeatsNum = (MaxPayloadSize + DescriptorSize) / BytesPerBeat + tlpMaxBeatsReg <= truncate(DmaMemAddr'(defaultTlpMaxSize << setting) >> valueOf(BYTE_EN_WIDTH)); + endmethod + endinterface +endmodule + +typedef 2'b00 NO_TLP_IN_THIS_BEAT; +typedef 2'b01 SINGLE_TLP_IN_THIS_BEAT; +typedef 2'b11 TWO_TLP_IN_THIS_BEAT; + +typedef 3 BYTEEN_INFIFO_DEPTH; + +// Convert 2 DataStream input to 1 PcieAxiStream output +// - The axistream is in straddle mode which means tKeep and tLast are ignored +// - The core use isSop and isEop to location Tlp and allow 2 Tlp in one beat +// - The input dataStream should be added Descriptor and aligned to DW already +module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); + FIFOF#(DataStream) dataAInFifo <- mkFIFOF; + FIFOF#(SideBandByteEn) byteEnAFifo <- mkSizedFIFOF(BYTEEN_INFIFO_DEPTH); + FIFOF#(DataStream) dataBInFifo <- mkFIFOF; + FIFOF#(SideBandByteEn) byteEnBFifo <- mkSizedFIFOF(BYTEEN_INFIFO_DEPTH); + + FIFOF#(DataBytePtr) dataPrepareAFifo <- mkFIFOF; + FIFOF#(DataBytePtr) dataPrepareBFifo <- mkFIFOF; + + FIFOF#(PcieAxiStream) axiStreamOutFifo <- mkFIFOF; + + Reg#(StreamWithPtr) remainStreamAWpReg <- mkRegU; + Reg#(StreamWithPtr) remainStreamBWpReg <- mkRegU; + + StreamConcat streamAconcater <- mkStreamConcat; + StreamConcat streamBconcater <- mkStreamConcat; + + Reg#(Bool) isInStreamAReg <- mkReg(False); + Reg#(Bool) isInStreamBReg <- mkReg(False); + Reg#(Bool) hasStreamARemainReg <- mkReg(False); + Reg#(Bool) hasStreamBRemainReg <- mkReg(False); + Reg#(Bool) hasLastStreamARemainReg <- mkReg(False); + Reg#(Bool) hasLastStreamBRemainReg <- mkReg(False); + + function PcieRequsterRequestSideBandFrame genRQSideBand( + PcieTlpCtlIsEopCommon isEop, PcieTlpCtlIsSopCommon isSop, SideBandByteEn byteEnA, SideBandByteEn byteEnB + ); + let {firstByteEnA, lastByteEnA} = byteEnA; + let {firstByteEnB, lastByteEnB} = byteEnB; + let sideBand = PcieRequsterRequestSideBandFrame { + // Do not use parity check in the core + parity : 0, + // Do not support progress track + seqNum1 : 0, + seqNum0 : 0, + //TODO: Do not support Transaction Processing Hint now, maybe we need TPH for better performance + tphSteeringTag : 0, + tphIndirectTagEn : 0, + tphType : 0, + tphPresent : 0, + // Do not support discontinue + discontinue : False, + // Indicates end of the tlp + isEop : isEop, + // Indicates starts of a new tlp + isSop : isSop, + // Disable when use DWord-aligned Mode + addrOffset : 0, + // Indicates byte enable in the first/last DWord + lastByteEn : {pack(lastByteEnB), pack(lastByteEnA)}, + firstByteEn : {pack(firstByteEnB), pack(firstByteEnA)} + }; + return sideBand; + endfunction + + // Pipeline stage 1: get the byte pointer of each stream + rule prepareBytePtr; + if (dataInAFifo.notEmpty && dataPrepareAFifo.notFull) begin + let stream = dataInAFifo.first; + dataInAFifo.deq; + let bytePtr = convertByteEn2BytePtr(stream.byteEn); + dataPrepareAFifo.enq(StreamWithPtr { + stream : stream, + bytePtr: bytePtr + }); + end + if (dataInBFifo.notEmpty && dataPrepareBFifo.notFull) begin + let stream = dataInBFifo.first; + dataInAFifo.deq; + let bytePtr = convertByteEn2BytePtr(stream.byteEn); + dataPrepareBFifo.enq(StreamWithPtr { + stream : stream, + bytePtr: bytePtr + }); + end + endrule + + // Pipeline Stage 2: concat the stream with its remain data (if exist) + rule genStraddlePcie; + let straddleWpA = getEmptyStreamWithPtr; + let straddleWpB = getEmptyStreamWithPtr; + Data straddleData = 0; + let isSop = PcieTlpCtlIsSopCommon { + isSopPtrs : replicate(0), + isSop : 0 + }; + let isEop = PcieTlpCtlIsEopCommon { + isEopPtrs : replicate(0), + isEop : 0 + }; + // This cycle isInStreamA, only transfer StreamA or StreamA + StreamB + if (isInStreamAReg) begin + // First: get the whole streamA data to transfer to the PCIe bus in this cycle + if (hasStreamARemainReg && hasLastStreamARemainReg) begin + straddleWpA = remainStreamAWpReg; + isInStreamAReg <= False; + hasStreamARemainReg <= False; + end + else if (hasStreamARemainReg) begin + let {concatStreamWpA, remainStreamWpA} = getConcatStream(remainStreamAWpReg, dataPrepareAFifo.first); + dataPrepareAFifo.deq; + if (isByteEnZero(remainStreamWpA.stream.byteEn)) begin + isInStreamAReg <= False; + hasStreamARemainReg <= False; + end + else begin + isInStreamAReg <= True; + hasStreamARemainReg <= True; + end + straddleWpA = concatStreamWpA; + remainStreamAWpReg <= remainStreamWpA; + hasLastStreamARemainReg <= dataPrepareAFifo.first.stream.isLast; + end + else begin + straddleWpA = dataPrepareAFifo.first; + dataPrepareAFifo.deq; + end + if (dataPrepareBFifo.notEmpty) begin + straddleWpB = dataPrepareBFifo.first; + end + // Second: generate straddle data + straddleData = straddleWpA.stream.data; + if (straddleWpA.stream.isLast) begin + isEop.isEop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); + isEop.isEopPtrs[0] = convertByteEn2DwordPtr(straddleWpA.stream.byteEn); + end + // only can contains straddleA + if (straddleWpA.bytePtr > fromInteger(valueOf(STRADDLE_THRESH_WIDTH))) begin + + end + // transfer straddleA and straddleB at the same time + else begin + if (straddleWpB.bytePtr > 0) begin + + end + else begin + + end + end + end + // This cycle isInStreamB, only transfer StreamB or StreamB + StreamA + else if (isInStreamBReg) begin + // get the whole streamB data to transfer to the PCIe bus in this cycle + if (hasStreamBRemainReg && hasLastStreamBRemainReg) begin + straddleWpB = remainStreamBWpReg; + isInStreamBReg <= False; + hasStreamBRemainReg <= False; + end + else if (hasStreamBRemainReg) begin + dataPrepareBFifo.deq; + let {concatStreamWpB, remainStreamWpB} = getConcatStream(remainStreamBWpReg, dataPrepareBFifo.first); + if (isByteEnZero(remainStreamWpB.stream.byteEn)) begin + isInStreamBReg <= False; + hasStreamBRemainReg <= False; + end + else begin + isInStreamBReg <= True; + hasStreamBRemainReg <= True; + end + straddleWpB = concatStreamWpB; + remainStreamBWpReg <= remainStreamWpB; + hasLastStreamBRemainReg <= dataPrepareBFifo.first.stream.isLast; + end + else begin + straddleWpB = dataPrepareBFifo.first; + dataPrepareBFifo.deq; + end + if (dataPrepareAFifo.notEmpty) begin + straddleWpA = dataPrepareAFifo.first; + end + end + // This cycle is idle + else begin + if (dataPrepareAFifo.notEmpty) begin + straddleWpA = dataPrepareAFifo.first; + dataPrepareAFifo.deq; + end + if (dataPrepareBFifo.notEmpty) begin + straddleWpB = dataPrepareBFifo.first; + dataPrepareBFifo.deq; + end + end + + endrule + + + + interface dataAFifoIn = convertFifoToFifoIn(dataInAFifo); + interface reqAFifoIn = convertFifoToFifoIn(reqInAFifo); + interface dataBFifoIn = convertFifoToFifoIn(dataInBFifo); + interface reqBFifoIn = convertFifoToFifoIn(reqInBFifo); + +endmodule + +interface AlignedDescGen; + interface FifoIn#(DmaRequest) reqFifoIn; + interface FifoOut#(DataStream) dataFifoOut; + interface FifoOut#(SideBandByteEn) byteEnFifoOut; +endinterface + +typedef Tuple5#( + DmaRequest , + ByteModDWord, + ByteModDWord, + DataBytePtr , + DmaMemAddr + ) AlignedDescGenPipeTuple; + +// Descriptor is 4DW aligned while the input datastream may be not +// This module will add 0~3 Bytes Dummy Data in the end of DescStream to make sure concat(desc, data) is aligned +module mkAlignedRqDescGen(Bool isWrite, AlignedDescGen ifc); + FIFOF#(DmaRequest) reqInFifo <- mkFIFOF; + FIFOF#(DataStream) dataOutFifo <- mkFIFOF; + FIFOF#(SideBandByteEn) byteEnOutFifo <- mkFIFOF; + + FIFOF#(AlignedDescGenPipeTuple) pipelineFifo <- mkFIFOF; + + function DwordCount getDWordCount(DmaMemAddr startAddr, DmaMemAddr endAddr); + let endOffset = byteModDWord(endAddr); + DwordCount dwCnt = (endAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)) - (startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)); + return (endOffset == 0) ? dwCnt : dwCnt + 1; + endfunction + + // Pipeline Stage 1: calculate endAddress, first/lastBytePtr and aligned BytePtr + rule getAlignedPtr; + let request = reqInFifo.first; + reqInFifo.deq; + immAssert( + (request.length <= fromInteger(valueOf(BUS_BOUNDARY))), + "Request Check @ mkAlignedRqDescGen", + fshow(request) + ); + DmaMemAddr endAddress = request.startAddr + length - 1; + // firstOffset values from {0, 1, 2, 3} + ByteModDWord firstOffset = byteModDWord(request.startAddr); + ByteModDWord lastOffset = byteModDWord(endAddress); + ByteModDWord alignOffset = ~firstOffset + 1; + DataBytePtr bytePtr = fromInteger(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH))) + zeroExtend(alignOffset); + pipelineFifo.enq(tuple5( + request, + firstOffset, + lastOffset, + bytePtr, + endAddress) + ); + endrule + + // Pipeline Stage 2: generate Descriptor and the dataStream + rule genDescriptor; + let {request, firstBytePtr, lastBytePtr, bytePtr, endAddress} = pipelineFifo.first; + pipelineFifo.deq; + let firstByteEn = convertDWordOffset2FirstByteEn(firstOffset); + let lastByteEn = convertDWordOffset2LastByteEn(lastOffset); + let dwordCnt = getDWordCount(request.startAddr, endAddress); + lastByteEn = (request.startAddr == endAddress) ? 0 : lastByteEn; + let byteEn = convertBytePtr2ByteEn(bytePtr); + let descriptor = PcieRequesterRequestDescriptor { + forceECRC : False, + attributes : 0, + trafficClass : 0, + requesterIdEn : False, + completerId : 0, + tag : 0, + requesterId : 0, + isPoisoned : False, + reqType : isWrite ? fromInteger(valueOf(MEM_WRITE_REQ)) : fromInteger(valueOf(MEM_READ_REQ)), + dwordCnt : dwordCnt, + address : truncate(request.startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)), + addrType : fromInteger(valueOf(TRANSLATED_ADDR)) + }; + let stream = DataStream { + data : zeroExtend(pack(descriptor)), + byteEn : byteEn, + isFirst : True, + isLast : True + }; + dataOutFifo.enq(stream); + byteEnOutFifo.enq(tuple2(firstByteEn, lastByteEn)); + endrule + + interface reqFifoIn = convertFifoToFifoIn(reqInFifo); + interface dataFifoOut = convertFifoToFifoOut(dataOutFifo); + interface byteEnFifoOut = convertFifoToFifoOut(byteEnOutFifo); +endmodule diff --git a/src/DmaRequester.bsv b/src/DmaRequester.bsv index 10c75a3..c7b799d 100644 --- a/src/DmaRequester.bsv +++ b/src/DmaRequester.bsv @@ -37,8 +37,6 @@ interface RequesterComplete; endinterface module mkRequesterRequest(RequesterRequest); - StreamConcat streamConcat <- mkStreamConcat; - FIFOF#(DataStream) wrDataInFifo <- mkFIFOF; FIFOF#(DmaRequest) wrReqInFifo <- mkFIFOF; FIFOF#(DmaRequest) rdReqInFifo <- mkFIFOF; @@ -49,39 +47,32 @@ module mkRequesterRequest(RequesterRequest); Wire#(Bool) postedEnWire <- mkDWire(False); Wire#(Bool) nonPostedEnWire <- mkDWire(True); - function DataStream genRQDescriptorStream(DmaRequest req, Bool isWrite); - let descriptor = PcieRequesterRequestDescriptor { - forceECRC : False, - attributes : 0, - trafficClass : 0, - requesterIdEn : False, - completerId : 0, - tag : 0, - requesterId : 0, - isPoisoned : False, - reqType : isWrite ? fromInteger(valueOf(MEM_WRITE_REQ)) : fromInteger(valueOf(MEM_READ_REQ)), - dwordCnt : truncate(req.length >> 2 + (req.length[0] | req.length[1])), - address : truncate(req.startAddr >> 2), - addrType : 2'b10 - }; - ByteEn byteEn = 1; - let stream = DataStream { - data : zeroExtend(pack(descriptor)), - byteEn : (byteEn << (valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH)) + 1)) - 1, - isFirst : True, - isLast : False - }; - return stream; - endfunction - - // TODO: RQ Logic - rule recvWriteReq if (postedEnWire); - if (!isInWritingReg) begin - let wrReq = wrReqInFifo.first; - let wrData = wrDataInFifo.first; + ChunkSplit chunkSplit <- mkChunkSplit; + AlignedDescGen rqDescGenarator <- mkAlignedRqDescGen; + + // Pipeline stage 1: split the whole write request to chunks, latency = 3 + rule recvWriting if (postedEnWire); + if (wrReqInFifo.notEmpty && chunkSplit.dataFifoIn.notFull) begin wrReqInFifo.deq; + chunkSplit.reqFifoIn.enq(wrReqInFifo.first); + end + if (wrDataInFifo.notEmpty && chunkSplit.reqFifoIn.notFull) begin wrDataInFifo.deq; - isInWritingReg <= (wrReq.length > fromInteger(valueOf(ONE_TLP_THRESH))); + chunkSplit.dataFifoIn.enq(wrDataInFifo.first); + end + endrule + + // Pipeline stage 2: generate the RQ descriptor, which may be with 0~3 Byte invalid data for DW alignment, latency = 2 + rule addDescriptor; + if (chunkSplit.chunkReqFifoOut.notEmpty) begin + let chunkReq = chunkSplit.chunkReqFifoOut.first; + chunkSplit.chunkReqFifoOut.deq; + rqDescGenarator.reqFifoIn.enq(chunkReq); + end + if (chunkSplit.chunkDataFifoOut.notEmpty) begin + let chunkDataStream = chunkSplit.chunkDataFifoOut.first; + chunkSplit.chunkDataFifoOut.deq; + descriptorConcat.inputStreamSecondFifoIn.enq(chunkDataStream); end endrule diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 10a0404..2677774 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -5,6 +5,8 @@ import PcieTypes::*; import PcieAxiStreamTypes::*; typedef PCIE_AXIS_DATA_WIDTH DATA_WIDTH; +typedef TDiv#(DATA_WIDTH, 2) STRADDLE_THRESH_WIDTH; + typedef 64 DMA_MEM_ADDR_WIDTH; typedef 32 DMA_CSR_ADDR_WIDTH; @@ -14,9 +16,8 @@ typedef Bit#(DMA_MEM_ADDR_WIDTH) DmaMemAddr; typedef Bit#(DMA_CSR_ADDR_WIDTH) DmaCsrAddr; typedef Bit#(DMA_CSR_DATA_WIDTH) DmaCsrValue; -typedef 8 BYTE_WIDTH; typedef TLog#(BYTE_WIDTH) BYTE_WIDTH_WIDTH; -typedef TMul#(4, BYTE_WIDTH) DWORD_WIDTH; +typedef 2 BYTE_DWORD_SHIFT_WIDTH; typedef Bit#(BYTE_WIDTH) Byte; typedef Bit#(DWORD_WIDTH) DWord; @@ -24,12 +25,19 @@ typedef Bit#(1) ByteParity; typedef 2 CONCAT_STREAM_NUM; -typedef TDiv#(DATA_WIDTH, BYTE_WIDTH) BYTE_EN_WIDTH; +typedef TDiv#(DATA_WIDTH, BYTE_WIDTH) BYTE_EN_WIDTH; +typedef TDiv#(DATA_WIDTH, DWORD_WIDTH) DWORD_EN_WIDTH; typedef Bit#(DATA_WIDTH) Data; typedef Bit#(BYTE_EN_WIDTH) ByteEn; -typedef Bit#(TAdd#(1, TLog#(DATA_WIDTH))) DataBitPtr; -typedef Bit#(TAdd#(1, TLog#(BYTE_EN_WIDTH))) DataBytePtr; +typedef Bit#(DWORD_BYTES) DWordByteEn; + +typedef Bit#(TAdd#(1, TLog#(DATA_WIDTH))) DataBitPtr; +typedef Bit#(TAdd#(1, TLog#(BYTE_EN_WIDTH))) DataBytePtr; +typedef Bit#(TAdd#(1, TLog#(DWORD_EN_WIDTH))) DataDwordPtr; + +typedef Bit#(TAdd#(1, TLog#(DWORD_BYTES))) DWordBytePtr; +typedef Bit#(BYTE_DWORD_SHIFT_WIDTH) ByteModDWord; typedef struct { DmaMemAddr startAddr; diff --git a/src/PcieAxiStreamTypes.bsv b/src/PcieAxiStreamTypes.bsv index b133c78..bc833bc 100644 --- a/src/PcieAxiStreamTypes.bsv +++ b/src/PcieAxiStreamTypes.bsv @@ -6,8 +6,10 @@ import BusConversion :: *; import SemiFifo :: *; typedef 8 BYTE_WIDTH; -typedef TMul#(2, BYTE_WIDTH) WORD_WIDTH; -typedef TMul#(4, BYTE_WIDTH) DWORD_WIDTH; +typedef 2 WORD_BYTES; +typedef 4 DWORD_BYTES; +typedef TMul#(WORD_BYTES, BYTE_WIDTH) WORD_WIDTH; +typedef TMul#(DWORD_BYTES, BYTE_WIDTH) DWORD_WIDTH; typedef 512 PCIE_AXIS_DATA_WIDTH; typedef TDiv#(PCIE_AXIS_DATA_WIDTH, DWORD_WIDTH) PCIE_AXIS_KEEP_WIDTH; diff --git a/src/PcieDescriptorTypes.bsv b/src/PcieDescriptorTypes.bsv index 2375a3b..2513002 100644 --- a/src/PcieDescriptorTypes.bsv +++ b/src/PcieDescriptorTypes.bsv @@ -149,3 +149,8 @@ typedef 4'b0111 LOCK_READ_REQ; // allowed only in legacy devices typedef 4'b1100 COMMON_MESG; typedef 4'b1101 VENDOR_DEF_MESG; typedef 4'b1110 ATS_MESG; + +// Pcie Addr Types +typedef 2'b00 UNTRANSLATED_ADDR; +typedef 2'b01 TRANSLATION_REQ; +typedef 2'b10 TRANSLATED_ADDR; \ No newline at end of file diff --git a/src/PrimUtils.bsv b/src/PrimUtils.bsv index 66257ba..30a5810 100644 --- a/src/PrimUtils.bsv +++ b/src/PrimUtils.bsv @@ -18,144 +18,319 @@ function Action immAssert(Bool condition, String assertName, Fmt assertFmtMsg); endaction endfunction -function Data getDataLowBytes(Data data, DataBytePtr ptr); - Data temp = 0; - case(ptr) - 1 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*1 -1:0])); - 2 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*2 -1:0])); - 3 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*3 -1:0])); - 4 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*4 -1:0])); - 5 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*5 -1:0])); - 6 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*6 -1:0])); - 7 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*7 -1:0])); - 8 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*8 -1:0])); - 9 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*9 -1:0])); - 10: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*10-1:0])); - 11: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*11-1:0])); - 12: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*12-1:0])); - 13: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*13-1:0])); - 14: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*14-1:0])); - 15: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*15-1:0])); - 16: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*16-1:0])); - 17: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*17-1:0])); - 18: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*18-1:0])); - 19: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*19-1:0])); - 20: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*20-1:0])); - 21: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*21-1:0])); - 22: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*22-1:0])); - 23: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*23-1:0])); - 24: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*24-1:0])); - 25: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*25-1:0])); - 26: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*26-1:0])); - 27: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*27-1:0])); - 28: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*28-1:0])); - 29: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*29-1:0])); - 30: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*30-1:0])); - 31: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*31-1:0])); - 32: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*32-1:0])); - 33: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*33-1:0])); - 34: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*34-1:0])); - 35: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*35-1:0])); - 36: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*36-1:0])); - 37: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*37-1:0])); - 38: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*38-1:0])); - 39: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*39-1:0])); - 40: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*40-1:0])); - 41: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*41-1:0])); - 42: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*42-1:0])); - 43: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*43-1:0])); - 44: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*44-1:0])); - 45: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*45-1:0])); - 46: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*46-1:0])); - 47: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*47-1:0])); - 48: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*48-1:0])); - 49: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*49-1:0])); - 50: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*50-1:0])); - 51: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*51-1:0])); - 52: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*52-1:0])); - 53: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*53-1:0])); - 54: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*54-1:0])); - 55: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*55-1:0])); - 56: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*56-1:0])); - 57: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*57-1:0])); - 58: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*58-1:0])); - 59: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*59-1:0])); - 60: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*60-1:0])); - 61: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*61-1:0])); - 62: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*62-1:0])); - 63: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*63-1:0])); - default: temp = 0; +function ByteModDWord byteModDWord(Bit#(tSz) bytes) provisos(Add#(_a, BYTE_DWORD_SHIFT_WIDTH, tSz)); + return truncate(bytes); +endfunction + +function DataBytePtr convertByteEn2BytePtr (ByteEn byteEn); + DataBytePtr ptr = 0; + case(byteEn) + 'h0000000000000001: ptr = 1; + 'h0000000000000003: ptr = 2; + 'h0000000000000007: ptr = 3; + 'h000000000000000F: ptr = 4; + 'h000000000000001F: ptr = 5; + 'h000000000000003F: ptr = 6; + 'h000000000000007F: ptr = 7; + 'h00000000000000FF: ptr = 8; + 'h00000000000001FF: ptr = 9; + 'h00000000000003FF: ptr = 10; + 'h00000000000007FF: ptr = 11; + 'h0000000000000FFF: ptr = 12; + 'h0000000000001FFF: ptr = 13; + 'h0000000000003FFF: ptr = 14; + 'h0000000000007FFF: ptr = 15; + 'h000000000000FFFF: ptr = 16; + 'h000000000001FFFF: ptr = 17; + 'h000000000003FFFF: ptr = 18; + 'h000000000007FFFF: ptr = 19; + 'h00000000000FFFFF: ptr = 20; + 'h00000000001FFFFF: ptr = 21; + 'h00000000003FFFFF: ptr = 22; + 'h00000000007FFFFF: ptr = 23; + 'h0000000000FFFFFF: ptr = 24; + 'h0000000001FFFFFF: ptr = 25; + 'h0000000003FFFFFF: ptr = 26; + 'h0000000007FFFFFF: ptr = 27; + 'h000000000FFFFFFF: ptr = 28; + 'h000000001FFFFFFF: ptr = 29; + 'h000000003FFFFFFF: ptr = 30; + 'h000000007FFFFFFF: ptr = 31; + 'h00000000FFFFFFFF: ptr = 32; + 'h00000001FFFFFFFF: ptr = 33; + 'h00000003FFFFFFFF: ptr = 34; + 'h00000007FFFFFFFF: ptr = 35; + 'h0000000FFFFFFFFF: ptr = 36; + 'h0000001FFFFFFFFF: ptr = 37; + 'h0000003FFFFFFFFF: ptr = 38; + 'h0000007FFFFFFFFF: ptr = 39; + 'h000000FFFFFFFFFF: ptr = 40; + 'h000001FFFFFFFFFF: ptr = 41; + 'h000003FFFFFFFFFF: ptr = 42; + 'h000007FFFFFFFFFF: ptr = 43; + 'h00000FFFFFFFFFFF: ptr = 44; + 'h00001FFFFFFFFFFF: ptr = 45; + 'h00003FFFFFFFFFFF: ptr = 46; + 'h00007FFFFFFFFFFF: ptr = 47; + 'h0000FFFFFFFFFFFF: ptr = 48; + 'h0001FFFFFFFFFFFF: ptr = 49; + 'h0003FFFFFFFFFFFF: ptr = 50; + 'h0007FFFFFFFFFFFF: ptr = 51; + 'h000FFFFFFFFFFFFF: ptr = 52; + 'h001FFFFFFFFFFFFF: ptr = 53; + 'h003FFFFFFFFFFFFF: ptr = 54; + 'h007FFFFFFFFFFFFF: ptr = 55; + 'h00FFFFFFFFFFFFFF: ptr = 56; + 'h01FFFFFFFFFFFFFF: ptr = 57; + 'h03FFFFFFFFFFFFFF: ptr = 58; + 'h07FFFFFFFFFFFFFF: ptr = 59; + 'h0FFFFFFFFFFFFFFF: ptr = 60; + 'h1FFFFFFFFFFFFFFF: ptr = 61; + 'h3FFFFFFFFFFFFFFF: ptr = 62; + 'h7FFFFFFFFFFFFFFF: ptr = 63; + 'hFFFFFFFFFFFFFFFF: ptr = 64; + default : ptr = 0; endcase - return temp; + return ptr; endfunction -function Data getDataHighBytes(Data data, DataBytePtr ptr); +function ByteEn convertBytePtr2ByteEn (DataBytePtr bytePtr); + ByteEn byteEn = 0; + case(bytePtr) + 1 : byteEn = 'h0000000000000001; + 2 : byteEn = 'h0000000000000003; + 3 : byteEn = 'h0000000000000007; + 4 : byteEn = 'h000000000000000F; + 5 : byteEn = 'h000000000000001F; + 6 : byteEn = 'h000000000000003F; + 7 : byteEn = 'h000000000000007F; + 8 : byteEn = 'h00000000000000FF; + 9 : byteEn = 'h00000000000001FF; + 10 : byteEn = 'h00000000000003FF; + 11 : byteEn = 'h00000000000007FF; + 12 : byteEn = 'h0000000000000FFF; + 13 : byteEn = 'h0000000000001FFF; + 14 : byteEn = 'h0000000000003FFF; + 15 : byteEn = 'h0000000000007FFF; + 16 : byteEn = 'h000000000000FFFF; + 17 : byteEn = 'h000000000001FFFF; + 18 : byteEn = 'h000000000003FFFF; + 19 : byteEn = 'h000000000007FFFF; + 20 : byteEn = 'h00000000000FFFFF; + 21 : byteEn = 'h00000000001FFFFF; + 22 : byteEn = 'h00000000003FFFFF; + 23 : byteEn = 'h00000000007FFFFF; + 24 : byteEn = 'h0000000000FFFFFF; + 25 : byteEn = 'h0000000001FFFFFF; + 26 : byteEn = 'h0000000003FFFFFF; + 27 : byteEn = 'h0000000007FFFFFF; + 28 : byteEn = 'h000000000FFFFFFF; + 29 : byteEn = 'h000000001FFFFFFF; + 30 : byteEn = 'h000000003FFFFFFF; + 31 : byteEn = 'h000000007FFFFFFF; + 32 : byteEn = 'h00000000FFFFFFFF; + 33 : byteEn = 'h00000001FFFFFFFF; + 34 : byteEn = 'h00000003FFFFFFFF; + 35 : byteEn = 'h00000007FFFFFFFF; + 36 : byteEn = 'h0000000FFFFFFFFF; + 37 : byteEn = 'h0000001FFFFFFFFF; + 38 : byteEn = 'h0000003FFFFFFFFF; + 39 : byteEn = 'h0000007FFFFFFFFF; + 40 : byteEn = 'h000000FFFFFFFFFF; + 41 : byteEn = 'h000001FFFFFFFFFF; + 42 : byteEn = 'h000003FFFFFFFFFF; + 43 : byteEn = 'h000007FFFFFFFFFF; + 44 : byteEn = 'h00000FFFFFFFFFFF; + 45 : byteEn = 'h00001FFFFFFFFFFF; + 46 : byteEn = 'h00003FFFFFFFFFFF; + 47 : byteEn = 'h00007FFFFFFFFFFF; + 48 : byteEn = 'h0000FFFFFFFFFFFF; + 49 : byteEn = 'h0001FFFFFFFFFFFF; + 50 : byteEn = 'h0003FFFFFFFFFFFF; + 51 : byteEn = 'h0007FFFFFFFFFFFF; + 52 : byteEn = 'h000FFFFFFFFFFFFF; + 53 : byteEn = 'h001FFFFFFFFFFFFF; + 54 : byteEn = 'h003FFFFFFFFFFFFF; + 55 : byteEn = 'h007FFFFFFFFFFFFF; + 56 : byteEn = 'h00FFFFFFFFFFFFFF; + 57 : byteEn = 'h01FFFFFFFFFFFFFF; + 58 : byteEn = 'h03FFFFFFFFFFFFFF; + 59 : byteEn = 'h07FFFFFFFFFFFFFF; + 60 : byteEn = 'h0FFFFFFFFFFFFFFF; + 61 : byteEn = 'h1FFFFFFFFFFFFFFF; + 62 : byteEn = 'h3FFFFFFFFFFFFFFF; + 63 : byteEn = 'h7FFFFFFFFFFFFFFF; + 64 : byteEn = 'hFFFFFFFFFFFFFFFF; + default : byteEn = 0; + endcase + return byteEn; +endfunction + +function DWordByteEn convertDWordOffset2FirstByteEn (ByteModDWord dwOffset); + DWordByteEn dwByteEn = 0; + case(dwOffset) + 0: dwByteEn = 'b1111; + 1: dwByteEn = 'b1110; + 2: dwByteEn = 'b1100; + 3: dwByteEn = 'b1000; + default: dwByteEn = 'b0000; + endcase + return dwByteEn; +endfunction + +function DWordByteEn convertDWordOffset2LastByteEn (ByteModDWord dwOffset); + DWordByteEn dwByteEn = 0; + case(dwOffset) + 0: dwByteEn = 'b1111; + 1: dwByteEn = 'b0001; + 2: dwByteEn = 'b0011; + 3: dwByteEn = 'b0111; + default: dwByteEn = 'b0000; + endcase + return dwByteEn; +endfunction + +// DWordPtr strarts from 0 not 1 to align to PcieTlpIsEop +function DataDwordPtr convertByteEn2DwordPtr (ByteEn byteEn); + DataDwordPtr ptr = 0; + case(byteEn) + 'h0000000000000001: ptr = 0; + 'h0000000000000003: ptr = 0; + 'h0000000000000007: ptr = 0; + 'h000000000000000F: ptr = 0; + 'h000000000000001F: ptr = 1; + 'h000000000000003F: ptr = 1; + 'h000000000000007F: ptr = 1; + 'h00000000000000FF: ptr = 1; + 'h00000000000001FF: ptr = 2; + 'h00000000000003FF: ptr = 2; + 'h00000000000007FF: ptr = 2; + 'h0000000000000FFF: ptr = 2; + 'h0000000000001FFF: ptr = 3; + 'h0000000000003FFF: ptr = 3; + 'h0000000000007FFF: ptr = 3; + 'h000000000000FFFF: ptr = 3; + 'h000000000001FFFF: ptr = 4; + 'h000000000003FFFF: ptr = 4; + 'h000000000007FFFF: ptr = 4; + 'h00000000000FFFFF: ptr = 4; + 'h00000000001FFFFF: ptr = 5; + 'h00000000003FFFFF: ptr = 5; + 'h00000000007FFFFF: ptr = 5; + 'h0000000000FFFFFF: ptr = 5; + 'h0000000001FFFFFF: ptr = 6; + 'h0000000003FFFFFF: ptr = 6; + 'h0000000007FFFFFF: ptr = 6; + 'h000000000FFFFFFF: ptr = 6; + 'h000000001FFFFFFF: ptr = 7; + 'h000000003FFFFFFF: ptr = 7; + 'h000000007FFFFFFF: ptr = 7; + 'h00000000FFFFFFFF: ptr = 7; + 'h00000001FFFFFFFF: ptr = 8; + 'h00000003FFFFFFFF: ptr = 8; + 'h00000007FFFFFFFF: ptr = 8; + 'h0000000FFFFFFFFF: ptr = 8; + 'h0000001FFFFFFFFF: ptr = 9; + 'h0000003FFFFFFFFF: ptr = 9; + 'h0000007FFFFFFFFF: ptr = 9; + 'h000000FFFFFFFFFF: ptr = 9; + 'h000001FFFFFFFFFF: ptr = 10; + 'h000003FFFFFFFFFF: ptr = 10; + 'h000007FFFFFFFFFF: ptr = 10; + 'h00000FFFFFFFFFFF: ptr = 10; + 'h00001FFFFFFFFFFF: ptr = 11; + 'h00003FFFFFFFFFFF: ptr = 11; + 'h00007FFFFFFFFFFF: ptr = 11; + 'h0000FFFFFFFFFFFF: ptr = 11; + 'h0001FFFFFFFFFFFF: ptr = 12; + 'h0003FFFFFFFFFFFF: ptr = 12; + 'h0007FFFFFFFFFFFF: ptr = 12; + 'h000FFFFFFFFFFFFF: ptr = 12; + 'h001FFFFFFFFFFFFF: ptr = 13; + 'h003FFFFFFFFFFFFF: ptr = 13; + 'h007FFFFFFFFFFFFF: ptr = 13; + 'h00FFFFFFFFFFFFFF: ptr = 13; + 'h01FFFFFFFFFFFFFF: ptr = 14; + 'h03FFFFFFFFFFFFFF: ptr = 14; + 'h07FFFFFFFFFFFFFF: ptr = 14; + 'h0FFFFFFFFFFFFFFF: ptr = 14; + 'h1FFFFFFFFFFFFFFF: ptr = 15; + 'h3FFFFFFFFFFFFFFF: ptr = 15; + 'h7FFFFFFFFFFFFFFF: ptr = 15; + 'hFFFFFFFFFFFFFFFF: ptr = 15; + default : ptr = 0; + endcase + return ptr; +endfunction + +function Data getDataLowBytes(Data data, DataBytePtr ptr); Data temp = 0; case(ptr) - 1 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*1 ])); - 2 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*2 ])); - 3 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*3 ])); - 4 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*4 ])); - 5 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*5 ])); - 6 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*6 ])); - 7 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*7 ])); - 8 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*8 ])); - 9 : temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*9 ])); - 10: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*10])); - 11: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*11])); - 12: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*12])); - 13: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*13])); - 14: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*14])); - 15: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*15])); - 16: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*16])); - 17: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*17])); - 18: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*18])); - 19: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*19])); - 20: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*20])); - 21: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*21])); - 22: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*22])); - 23: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*23])); - 24: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*24])); - 25: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*25])); - 26: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*26])); - 27: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*27])); - 28: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*28])); - 29: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*29])); - 30: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*30])); - 31: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*31])); - 32: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*32])); - 33: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*33])); - 34: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*34])); - 35: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*35])); - 36: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*36])); - 37: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*37])); - 38: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*38])); - 39: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*39])); - 40: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*40])); - 41: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*41])); - 42: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*42])); - 43: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*43])); - 44: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*44])); - 45: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*45])); - 46: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*46])); - 47: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*47])); - 48: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*48])); - 49: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*49])); - 50: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*50])); - 51: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*51])); - 52: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*52])); - 53: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*53])); - 54: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*54])); - 55: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*55])); - 56: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*56])); - 57: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*57])); - 58: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*58])); - 59: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*59])); - 60: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*60])); - 61: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*61])); - 62: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*62])); - 63: temp = zeroExtend(Data'(data[valueOf(DATA_WIDTH)-1:valueOf(DATA_WIDTH)-valueOf(BYTE_WIDTH)*63])); - default: temp = data; + 1 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*1 -1 : 0])); + 2 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*2 -1 : 0])); + 3 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*3 -1 : 0])); + 4 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*4 -1 : 0])); + 5 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*5 -1 : 0])); + 6 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*6 -1 : 0])); + 7 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*7 -1 : 0])); + 8 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*8 -1 : 0])); + 9 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*9 -1 : 0])); + 10: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*10-1 : 0])); + 11: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*11-1 : 0])); + 12: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*12-1 : 0])); + 13: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*13-1 : 0])); + 14: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*14-1 : 0])); + 15: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*15-1 : 0])); + 16: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*16-1 : 0])); + 17: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*17-1 : 0])); + 18: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*18-1 : 0])); + 19: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*19-1 : 0])); + 20: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*20-1 : 0])); + 21: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*21-1 : 0])); + 22: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*22-1 : 0])); + 23: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*23-1 : 0])); + 24: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*24-1 : 0])); + 25: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*25-1 : 0])); + 26: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*26-1 : 0])); + 27: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*27-1 : 0])); + 28: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*28-1 : 0])); + 29: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*29-1 : 0])); + 30: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*30-1 : 0])); + 31: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*31-1 : 0])); + 32: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*32-1 : 0])); + 33: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*33-1 : 0])); + 34: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*34-1 : 0])); + 35: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*35-1 : 0])); + 36: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*36-1 : 0])); + 37: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*37-1 : 0])); + 38: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*38-1 : 0])); + 39: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*39-1 : 0])); + 40: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*40-1 : 0])); + 41: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*41-1 : 0])); + 42: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*42-1 : 0])); + 43: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*43-1 : 0])); + 44: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*44-1 : 0])); + 45: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*45-1 : 0])); + 46: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*46-1 : 0])); + 47: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*47-1 : 0])); + 48: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*48-1 : 0])); + 49: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*49-1 : 0])); + 50: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*50-1 : 0])); + 51: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*51-1 : 0])); + 52: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*52-1 : 0])); + 53: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*53-1 : 0])); + 54: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*54-1 : 0])); + 55: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*55-1 : 0])); + 56: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*56-1 : 0])); + 57: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*57-1 : 0])); + 58: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*58-1 : 0])); + 59: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*59-1 : 0])); + 60: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*60-1 : 0])); + 61: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*61-1 : 0])); + 62: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*62-1 : 0])); + 63: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*63-1 : 0])); + 64: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*64-1 : 0])); + default: temp = 0; endcase return temp; endfunction @@ -226,6 +401,7 @@ function DmaMemAddr getAddrLowBits(DmaMemAddr addr, Bit#(TLog#(DMA_MEM_ADDR_WIDT 61: temp = zeroExtend(DmaMemAddr'(addr[61-1:0])); 62: temp = zeroExtend(DmaMemAddr'(addr[62-1:0])); 63: temp = zeroExtend(DmaMemAddr'(addr[63-1:0])); + 64: temp = zeroExtend(DmaMemAddr'(addr[64-1:0])); default: temp = 0; endcase return temp; diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index 58e6793..bfe2466 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -16,9 +16,10 @@ typedef struct { } StreamWithPtr deriving(Bits, Bounded, Eq, FShow); interface StreamConcat; - interface FifoIn#(DataStream) inputStreamFirstFifoIn; - interface FifoIn#(DataStream) inputStreamSecondFifoIn; - interface FifoOut#(DataStream) outputStreamFifoOut; + interface FifoIn#(DataStream) inputStreamFirstFifoIn; + interface FifoIn#(DataStream) inputStreamSecondFifoIn; + interface FifoOut#(DataStream) outputStreamFifoOut; + interface FifoOut#(DataBytePtr) outputBytePtrFifoOut; endinterface interface StreamSplit; @@ -27,84 +28,12 @@ interface StreamSplit; interface FifoOut#(DataStream) outputStreamFifoOut; endinterface -function DataBytePtr convertByteEn2BytePtr (ByteEn byteEn); - DataBytePtr ptr = 0; - case(byteEn) - 'h0000000000000001: ptr = 1; - 'h0000000000000003: ptr = 2; - 'h0000000000000007: ptr = 3; - 'h000000000000000F: ptr = 4; - 'h000000000000001F: ptr = 5; - 'h000000000000003F: ptr = 6; - 'h000000000000007F: ptr = 7; - 'h00000000000000FF: ptr = 8; - 'h00000000000001FF: ptr = 9; - 'h00000000000003FF: ptr = 10; - 'h00000000000007FF: ptr = 11; - 'h0000000000000FFF: ptr = 12; - 'h0000000000001FFF: ptr = 13; - 'h0000000000003FFF: ptr = 14; - 'h0000000000007FFF: ptr = 15; - 'h000000000000FFFF: ptr = 16; - 'h000000000001FFFF: ptr = 17; - 'h000000000003FFFF: ptr = 18; - 'h000000000007FFFF: ptr = 19; - 'h00000000000FFFFF: ptr = 20; - 'h00000000001FFFFF: ptr = 21; - 'h00000000003FFFFF: ptr = 22; - 'h00000000007FFFFF: ptr = 23; - 'h0000000000FFFFFF: ptr = 24; - 'h0000000001FFFFFF: ptr = 25; - 'h0000000003FFFFFF: ptr = 26; - 'h0000000007FFFFFF: ptr = 27; - 'h000000000FFFFFFF: ptr = 28; - 'h000000001FFFFFFF: ptr = 29; - 'h000000003FFFFFFF: ptr = 30; - 'h000000007FFFFFFF: ptr = 31; - 'h00000000FFFFFFFF: ptr = 32; - 'h00000001FFFFFFFF: ptr = 33; - 'h00000003FFFFFFFF: ptr = 34; - 'h00000007FFFFFFFF: ptr = 35; - 'h0000000FFFFFFFFF: ptr = 36; - 'h0000001FFFFFFFFF: ptr = 37; - 'h0000003FFFFFFFFF: ptr = 38; - 'h0000007FFFFFFFFF: ptr = 39; - 'h000000FFFFFFFFFF: ptr = 40; - 'h000001FFFFFFFFFF: ptr = 41; - 'h000003FFFFFFFFFF: ptr = 42; - 'h000007FFFFFFFFFF: ptr = 43; - 'h00000FFFFFFFFFFF: ptr = 44; - 'h00001FFFFFFFFFFF: ptr = 45; - 'h00003FFFFFFFFFFF: ptr = 46; - 'h00007FFFFFFFFFFF: ptr = 47; - 'h0000FFFFFFFFFFFF: ptr = 48; - 'h0001FFFFFFFFFFFF: ptr = 49; - 'h0003FFFFFFFFFFFF: ptr = 50; - 'h0007FFFFFFFFFFFF: ptr = 51; - 'h000FFFFFFFFFFFFF: ptr = 52; - 'h001FFFFFFFFFFFFF: ptr = 53; - 'h003FFFFFFFFFFFFF: ptr = 54; - 'h007FFFFFFFFFFFFF: ptr = 55; - 'h00FFFFFFFFFFFFFF: ptr = 56; - 'h01FFFFFFFFFFFFFF: ptr = 57; - 'h03FFFFFFFFFFFFFF: ptr = 58; - 'h07FFFFFFFFFFFFFF: ptr = 59; - 'h0FFFFFFFFFFFFFFF: ptr = 60; - 'h1FFFFFFFFFFFFFFF: ptr = 61; - 'h3FFFFFFFFFFFFFFF: ptr = 62; - 'h7FFFFFFFFFFFFFFF: ptr = 63; - 'hFFFFFFFFFFFFFFFF: ptr = 64; - default : ptr = 0; - endcase - return ptr; -endfunction - function Bool isByteEnZero(ByteEn byteEn); return !unpack(byteEn[0]); endfunction function DataStream getEmptyStream (); - return DataStream{ + return DataStream { data: 0, byteEn: 0, isFirst: False, @@ -112,6 +41,13 @@ function DataStream getEmptyStream (); }; endfunction +function StreamWithPtr getEmptyStreamWithPtr (); + return StreamWithPtr { + stream : getEmptyStream, + bytePtr: 0 + }; +endfunction + function DataBitPtr getMaxBitPtr (); return fromInteger(valueOf(DATA_WIDTH)); endfunction @@ -121,42 +57,47 @@ function DataBytePtr getMaxBytePtr (); endfunction // Concat two DataStream frames into one. StreamA.isLast must be True, otherwise the function will return a empty frame to end the stream. -function Tuple3#(DataStream, DataStream, DataBytePtr) getConcatStream (DataStream streamA, DataStream streamB, DataBytePtr bytePtrA, DataBytePtr bytePtrB); - Bool isCallLegally = (streamA.isLast && bytePtrA <= getMaxBytePtr && bytePtrA > 0); - DataBitPtr bitPtrA = zeroExtend(bytePtrA) << fromInteger(valueOf(BYTE_WIDTH_WIDTH)); +function Tuple2#(StreamWithPtr, StreamWithPtr) getConcatStream (StreamWithPtr streamA, StreamWithPtr streamB); + Bool isCallLegally = (streamA.stream.isLast && streamA.bytePtr <= getMaxBytePtr && streamA.bytePtr >= 0); + DataBitPtr bitPtrA = zeroExtend(streamA.bytePtr) << fromInteger(valueOf(BYTE_WIDTH_WIDTH)); // Fill the low PtrA bytes by streamA data - Data concatDataA = streamA.data; - ByteEn concatByteEnA = streamA.byteEn; + Data concatDataA = streamA.stream.data; + ByteEn concatByteEnA = streamA.stream.byteEn; // Fill the high bytes by streamB data - Data concatDataB = streamB.data << bitPtrA; - ByteEn concatByteEnB = streamB.byteEn << bytePtrA; - Data concatData = concatDataA | concatDataB; + Data concatDataB = streamB.stream.data << bitPtrA; + ByteEn concatByteEnB = streamB.stream.byteEn << streamA.bytePtr; + Data concatData = concatDataA | concatDataB; ByteEn concatByteEn = concatByteEnA | concatByteEnB; // Get the remain bytes of streamB data - DataBitPtr resBitPtr = getMaxBitPtr - bitPtrA; - DataBytePtr resBytePtr = getMaxBytePtr - bytePtrA; - Data remainData = streamB.data >> resBitPtr; - ByteEn remainByteEn = streamB.byteEn >> resBytePtr; + DataBitPtr resBitPtr = getMaxBitPtr - bitPtrA; + DataBytePtr resBytePtr = getMaxBytePtr - streamA.bytePtr; + Data remainData = streamB.stream.data >> resBitPtr; + ByteEn remainByteEn = streamB.stream.byteEn >> resBytePtr; - // Get if the concat frame is the last - Bool isConcatStreamLast = streamB.isLast; + // Get if the concat frame is the last, i.e. can streamB be contained by the residual empty bytes + Bool isConcatStreamLast = streamB.stream.isLast; DataBytePtr remainBytePtr = 0; - if (resBytePtr < bytePtrB ) begin + DataBytePtr concatStreamPtr = streamA.bytePtr; + if (resBytePtr < streamB.bytePtr ) begin isConcatStreamLast = False; - remainBytePtr = bytePtrB - resBytePtr; + remainBytePtr = streamB.bytePtr - resBytePtr; + concatStreamPtr = getMaxBytePtr; + end + else begin + concatStreamPtr = streamA.bytePtr + streamB.bytePtr; end - DataStream concatStream = getEmptyStream; - DataStream remainStream = getEmptyStream; // package the return concatStream and remainStream + DataStream concatStream = getEmptyStream; + DataStream remainStream = getEmptyStream; if(isCallLegally) begin concatStream = DataStream { data : concatData, byteEn : concatByteEn, - isFirst: streamA.isFirst, + isFirst: streamA.stream.isFirst, isLast : isConcatStreamLast }; remainStream = DataStream { @@ -166,7 +107,15 @@ function Tuple3#(DataStream, DataStream, DataBytePtr) getConcatStream (DataStrea isLast : True }; end - return tuple3(concatStream, remainStream, remainBytePtr); + let concatStreamWithPtr = StreamWithPtr { + stream : concatStream, + bytePtr: concatStreamPtr + }; + let remainStreamWithPtr = StreamWithPtr { + stream : remainStream, + bytePtr: remainBytePtr + }; + return tuple2(concatStreamWithPtr, remainStreamWithPtr); endfunction (* synthesize *) @@ -175,119 +124,133 @@ module mkStreamConcat (StreamConcat ifc); FIFOF#(DataStream) inputFifoA <- mkFIFOF; FIFOF#(DataStream) inputFifoB <- mkFIFOF; FIFOF#(DataStream) outputFifo <- mkFIFOF; + FIFOF#(DataBytePtr) bytePtrFifo <- mkFIFOF; FIFOF#(StreamWithPtr) prepareFifoA <- mkFIFOF; FIFOF#(StreamWithPtr) prepareFifoB <- mkFIFOF; - Reg#(DataBytePtr) bytePtrRegA <- mkReg(0); - Reg#(DataBytePtr) bytePtrRegB <- mkReg(0); - Reg#(DataBytePtr) remainBytePtrReg <- mkReg(0); - Reg#(Bool) hasRemainReg <- mkReg(False); + Reg#(Bool) hasRemainReg <- mkReg(False); Reg#(Bool) hasLastRemainReg <- mkReg(False); - Reg#(Bool) isStreamAEnd <- mkReg(False); + Reg#(Bool) isStreamAEndReg <- mkReg(False); - Reg#(DataStream) remainStreamReg <- mkRegU; + Reg#(StreamWithPtr) remainStreamWpReg <- mkRegU; - - rule prepareStreamA; - let streamA = inputFifoA.first; - inputFifoA.deq; - DataBytePtr bytePtr = convertByteEn2BytePtr(streamA.byteEn); - prepareFifoA.enq(StreamWithPtr { - stream: streamA, - bytePtr: bytePtr - }); - endrule - - rule prepareStreamB; - let streamB = inputFifoB.first; - inputFifoB.deq; - DataBytePtr bytePtr = convertByteEn2BytePtr(streamB.byteEn); - prepareFifoB.enq(StreamWithPtr { - stream: streamB, - bytePtr: bytePtr - }); + // Pipeline stage 1: get the bytePtr of each stream + rule prepareStream; + if (inputFifoA.notEmpty) begin + let streamA = inputFifoA.first; + inputFifoA.deq; + let bytePtr = convertByteEn2BytePtr(streamA.byteEn); + prepareFifoA.enq(StreamWithPtr { + stream: streamA, + bytePtr: bytePtr + }); + end + if (inputFifoB.notEmpty) begin + let streamB = inputFifoB.first; + inputFifoB.deq; + let bytePtr = convertByteEn2BytePtr(streamB.byteEn); + prepareFifoB.enq(StreamWithPtr { + stream: streamB, + bytePtr: bytePtr + }); + end endrule + // Pipeline stage 2: concat the stream frame rule concatStream; // Only the remain data if (hasRemainReg && hasLastRemainReg) begin - outputFifo.enq(remainStreamReg); - hasRemainReg <= False; - isStreamAEnd <= False; + outputFifo.enq(remainStreamWpReg.stream); + bytePtrFifo.enq(remainStreamWpReg.bytePtr); + hasRemainReg <= False; + isStreamAEndReg <= False; end // StreamB or streamB + the remain data - else if (prepareFifoB.notEmpty && isStreamAEnd) begin - let streamB = prepareFifoB.first.stream; - let bytePtrB = prepareFifoB.first.bytePtr; + else if (prepareFifoB.notEmpty && isStreamAEndReg) begin + let streamBWp = prepareFifoB.first; prepareFifoB.deq; - streamB.isFirst = False; + streamBWp.stream.isFirst = False; if (hasRemainReg) begin - let {concatStream, remainStream, remainBytePtr} = getConcatStream(remainStreamReg, streamB, remainBytePtrReg, bytePtrB); - hasRemainReg <= !isByteEnZero(remainStream.byteEn); - hasLastRemainReg <= streamB.isLast; - remainStreamReg <= remainStream; - remainBytePtrReg <= remainBytePtr; - outputFifo.enq(concatStream); + let {concatStreamWp, remainStreamWp} = getConcatStream(remainStreamWpReg, streamBWp); + hasRemainReg <= !isByteEnZero(remainStreamWp.stream.byteEn); + hasLastRemainReg <= streamBWp.stream.isLast; + remainStreamWpReg <= remainStreamWp; + outputFifo.enq(concatStreamWp.stream); + bytePtrFifo.enq(concatStreamWp.bytePtr); end else begin - outputFifo.enq(streamB); + outputFifo.enq(streamBWp.stream); + bytePtrFifo.enq(streamBWp.bytePtr); end - isStreamAEnd <= !streamB.isLast; + // reset isStreamAEnd to False when the whole concat end + isStreamAEndReg <= streamBWp.stream.isLast ? False : isStreamAEndReg; end // StreamA or StreamA + first StreamB else if (prepareFifoA.notEmpty) begin - let streamA = prepareFifoA.first.stream; - let bytePtrA = prepareFifoA.first.bytePtr; + let streamAWp = prepareFifoA.first; // Only StreamA frame - if (!streamA.isLast) begin - outputFifo.enq(streamA); + if (!streamAWp.stream.isLast) begin + outputFifo.enq(streamAWp.stream); + bytePtrFifo.enq(streamAWp.bytePtr); prepareFifoA.deq; - isStreamAEnd <= False; + isStreamAEndReg <= False; end // the last StreamA + the first StreamB - else if (streamA.isLast && prepareFifoB.notEmpty) begin - let streamB = prepareFifoB.first.stream; - let bytePtrB = prepareFifoB.first.bytePtr; - let {concatStream, remainStream, remainBytePtr} = getConcatStream(streamA, streamB, bytePtrA, bytePtrB); - hasRemainReg <= !isByteEnZero(remainStream.byteEn); - hasLastRemainReg <= streamB.isLast; - remainStreamReg <= remainStream; - remainBytePtrReg <= remainBytePtr; - isStreamAEnd <= !streamB.isLast; - outputFifo.enq(concatStream); + else if(streamAWp.stream.isLast && prepareFifoB.notEmpty) begin + let streamBWp = prepareFifoB.first; + let {concatStreamWp, remainStreamWp} = getConcatStream(streamAWp, streamBWp); + hasRemainReg <= !isByteEnZero(remainStreamWp.stream.byteEn); + hasLastRemainReg <= streamBWp.stream.isLast; + remainStreamWpReg <= remainStreamWp; + // If streamB.isLast, reset isStreamAEnd; otherwise assert isStreamAEnd + isStreamAEndReg <= streamBWp.stream.isLast ? False : True; + outputFifo.enq(concatStreamWp.stream); + bytePtrFifo.enq(concatStreamWp.bytePtr); prepareFifoA.deq; prepareFifoB.deq; end + // Do nothing + else begin + // - !prepareB.notEmpty ==> waiting StreamB for concatation + end + end + // Do nothing + else begin + // - prepareB.notEmpty && !isStreamAEnd ==> waiting streamAEnd asserts + // - !prepareB.notEmpty && !prepareA.notEmpty ==> waiting new data end endrule interface inputStreamFirstFifoIn = convertFifoToFifoIn(inputFifoA); interface inputStreamSecondFifoIn = convertFifoToFifoIn(inputFifoB); interface outputStreamFifoOut = convertFifoToFifoOut(outputFifo); + interface outputBytePtrFifoOut = convertFifoToFifoOut(bytePtrFifo); endmodule +typedef 3 STREAM_SPLIT_LATENCY; + (* synthesize *) module mkStreamSplit(StreamSplit ifc); Reg#(StreamSize) streamByteCntReg <- mkReg(0); - FIFOF#(StreamSize) splitLocationFifo <- mkFIFOF; + FIFOF#(StreamSize) splitLocationFifo <- mkSizedFIFOF(valueOf(STREAM_SPLIT_LATENCY)); FIFOF#(DataStream) inputFifo <- mkFIFOF; FIFOF#(DataStream) outputFifo <- mkFIFOF; FIFOF#(StreamWithPtr) prepareFifo <- mkFIFOF; FIFOF#(StreamWithPtr) assertFifo <- mkFIFOF; - FIFOF#(Tuple2#(DataBytePtr,DataBytePtr)) splitPtrFifo <- mkFIFOF; + FIFOF#(DataBytePtr) splitPtrFifo <- mkSizedFIFOF(valueOf(STREAM_SPLIT_LATENCY)); - Reg#(DataStream) remainStreamReg <- mkRegU; - Reg#(DataBytePtr) remainBytePtrReg <- mkReg(0); + Reg#(StreamWithPtr) remainStreamWpReg <- mkRegU; Reg#(Bool) hasRemainReg <- mkReg(False); Reg#(Bool) hasLastRemainReg <- mkReg(False); - Reg#(Bool) isSplitted <- mkReg(False); + Reg#(Bool) isSplittedReg <- mkReg(False); + // Pipeline stage 1: get the bytePtr of the input stream frame rule prepareStream; let stream = inputFifo.first; inputFifo.deq; @@ -298,77 +261,79 @@ module mkStreamSplit(StreamSplit ifc); prepareFifo.enq(streamWithPtr); endrule + // Pipeline stage 2: assert if splitLocation in this beat and calculate the offsetBytePtr rule assertSplitStream; let stream = prepareFifo.first.stream; - let bytePtr = prepareFifo.first.bytePtr; + let bytePtr = prepareFifo.first.bytePtr; + prepareFifo.deq; let splitLocation = splitLocationFifo.first; - DataBytePtr truncateBytePtr = 0; - if (!isSplitted && unpack(zeroExtend(bytePtr)) + streamByteCntReg >= splitLocation) begin - truncateBytePtr = truncate(pack(splitLocation - streamByteCntReg)); + if (stream.isLast) begin + splitLocationFifo.deq; + end + DataBytePtr offsetBytePtr = 0; + let curLocation = unpack(zeroExtend(bytePtr)) + streamByteCntReg; + if (!isSplittedReg && curLocation >= splitLocation) begin + offsetBytePtr = truncate(pack(splitLocation - streamByteCntReg)); end - DataBytePtr resBytePtr = getMaxBytePtr - truncateBytePtr; - splitPtrFifo.enq(tuple2(truncateBytePtr, resBytePtr)); - if (truncateBytePtr > 0 && !stream.isLast) begin - isSplitted <= True; + splitPtrFifo.enq(offsetBytePtr); + if (offsetBytePtr > 0 && !stream.isLast) begin + isSplittedReg <= True; end else if (stream.isLast) begin - isSplitted <= False; + isSplittedReg <= False; end streamByteCntReg <= stream.isLast ? 0 : streamByteCntReg + unpack(zeroExtend(bytePtr)); assertFifo.enq(prepareFifo.first); - prepareFifo.deq; - if (stream.isLast) begin - splitLocationFifo.deq; - end endrule - + // Pipeline stage 3: split the stream frame or output it without modify accroding to offsetBytePtr rule execSplitStream; // Only output remainStreamReg if (hasRemainReg && hasLastRemainReg) begin - outputFifo.enq(remainStreamReg); + outputFifo.enq(remainStreamWpReg.stream); hasRemainReg <= False; hasLastRemainReg <= False; end + // not the last remain stream else if (assertFifo.notEmpty && splitPtrFifo.notEmpty) begin - let stream = assertFifo.first.stream; - let frameBytePtr = assertFifo.first.bytePtr; - let {truncateBytePtr, resBytePtr} = splitPtrFifo.first; + let streamWp = assertFifo.first; + let offsetBytePtr = splitPtrFifo.first; assertFifo.deq; splitPtrFifo.deq; - // no operatation - if (!hasRemainReg && truncateBytePtr == 0) begin - outputFifo.enq(stream); + // split location not in this beat, do nothing + if (!hasRemainReg && offsetBytePtr == 0) begin + outputFifo.enq(streamWp.stream); end - // split the frame in this cycle to a last frame and a remain frame - else if (!hasRemainReg && truncateBytePtr > 0) begin - DataBitPtr truncateBitPtr = zeroExtend(truncateBytePtr) << valueOf(BYTE_WIDTH_WIDTH); - DataBitPtr resBitPtr = zeroExtend(resBytePtr) << valueOf(BYTE_WIDTH_WIDTH); - outputFifo.enq(DataStream { - data: (stream.data << resBitPtr) >> resBitPtr, - byteEn: (stream.byteEn << resBytePtr) >> resBytePtr, - isFirst: stream.isFirst, + // split the frame in this cycle to a isLast=True frame and a remain frame + else if (!hasRemainReg && offsetBytePtr > 0) begin + DataBitPtr offsetBitPtr = zeroExtend(offsetBytePtr) << valueOf(BYTE_WIDTH_WIDTH); + let splitStream = DataStream { + data: getDataLowBytes(streamWp.stream.data, offsetBytePtr), + byteEn: convertBytePtr2ByteEn(offsetBytePtr), + isFirst: streamWp.stream.isFirst, isLast: True - }); - DataStream remainStream = DataStream { - data: stream.data >> truncateBitPtr, - byteEn: stream.byteEn >> truncateBytePtr, + }; + outputFifo.enq(splitStream); + let remainStream = DataStream { + data: streamWp.stream.data >> offsetBitPtr, + byteEn: streamWp.stream.byteEn >> offsetBytePtr, isFirst: True, isLast: True }; - hasRemainReg <= !isByteEnZero(remainStream.byteEn); - hasLastRemainReg <= stream.isLast; - remainBytePtrReg <= frameBytePtr - truncateBytePtr; - remainStreamReg <= remainStream; + hasRemainReg <= True; + hasLastRemainReg <= streamWp.stream.isLast; + remainStreamWpReg <= StreamWithPtr { + stream : remainStream, + bytePtr: streamWp.bytePtr - offsetBytePtr + }; end - // concat the new frame with the remainReg - else if (hasRemainReg) begin - let {concatStream, remainStream, remainBytePtr} = getConcatStream(remainStreamReg, stream, remainBytePtrReg, frameBytePtr); - outputFifo.enq(concatStream); - hasRemainReg <= !isByteEnZero(remainStream.byteEn); - hasLastRemainReg <= stream.isLast; - remainStreamReg <= remainStream; - remainBytePtrReg <= remainBytePtr; + // concat the stream frame with the remainReg + else begin + let {concatStreamWp, remainStreamWp} = getConcatStream(remainStreamWpReg, streamWp); + outputFifo.enq(concatStreamWp.stream); + hasRemainReg <= streamWp.stream.isLast ? !isByteEnZero(remainStreamWp.stream.byteEn) : True; + hasLastRemainReg <= streamWp.stream.isLast; + remainStreamWpReg <= remainStreamWp; end end endrule diff --git a/test/TestDmaCore.bsv b/test/TestDmaCore.bsv index 14d6178..696ff80 100755 --- a/test/TestDmaCore.bsv +++ b/test/TestDmaCore.bsv @@ -13,7 +13,6 @@ typedef 2'b00 DEFAULT_TLP_SIZE_SETTING; typedef 4 CHUNK_TX_TEST_SETTING_NUM; typedef 6 CHUNK_RX_TEST_SETTING_NUM; -(* doc = "testcase" *) module mkChunkComputerTb(Empty); ChunkCompute dut <- mkChunkComputer(DMA_TX); @@ -90,4 +89,30 @@ module mkChunkComputerTb(Empty); end endrule +endmodule + +typedef 'hABCD311 SIMPLE_TEST_ADDR ; +typedef 'h1111 SIMPLE_TEST_LEN ; + +(* doc = "testcase" *) +module mkSimpleTestAlignedRqDescGen(Empty); + AlignedDescGen dut <- mkAlignedRqDescGen; + + Reg#(Bool) isInitReg <- mkReg(False); + + rule testInit if (!isInitReg); + isInitReg <= True; + dut.reqFifoIn.enq(DmaRequest { + startAddr: fromInteger(valueOf(SIMPLE_TEST_ADDR)), + length : fromInteger(valueOf(SIMPLE_TEST_LEN)) + }); + endrule + + rule testOutput if (isInitReg); + let stream = dut.dataFifoOut.first; + stream.deq; + $display(fshow(stream)); + $finish(); + endrule + endmodule \ No newline at end of file diff --git a/test/TestStreamUtils.bsv b/test/TestStreamUtils.bsv index 3241357..9682120 100755 --- a/test/TestStreamUtils.bsv +++ b/test/TestStreamUtils.bsv @@ -160,6 +160,7 @@ module mkStreamConcatTb(Empty); ); end dut.outputStreamFifoOut.deq; + dut.outputBytePtrFifoOut.deq; endrule rule testFinish; From 63777bcbac1c1044a2cc2d3fc43d1a88f82970d5 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Mon, 5 Aug 2024 11:07:07 +0800 Subject: [PATCH 31/53] Add streamShift --- img/requester.drawio.svg | 784 ++++++++++++++++++++++++++------------- run_one.sh | 2 +- src/DmaRequestCore.bsv | 151 ++------ src/DmaTypes.bsv | 2 +- src/StreamUtils.bsv | 110 ++++-- test/TestDmaCore.bsv | 5 +- test/TestStreamUtils.bsv | 146 +++++++- 7 files changed, 758 insertions(+), 442 deletions(-) diff --git a/img/requester.drawio.svg b/img/requester.drawio.svg index 2100c59..d678ef1 100644 --- a/img/requester.drawio.svg +++ b/img/requester.drawio.svg @@ -1,13 +1,13 @@ - + - - - - + + + + - -
+ +
DataStream @@ -15,18 +15,18 @@
- + DataStream - - - - + + + + - -
+ +
Request @@ -34,18 +34,18 @@
- + Request - - - - + + + + - -
+ +
@@ -55,16 +55,16 @@
- + StreamConcat - - + + - -
+ +
first @@ -72,16 +72,16 @@
- + first - - + + - -
+ +
second @@ -89,22 +89,22 @@
- + second - - - - - - - - + + + + + + + + - -
+ +
@@ -114,16 +114,16 @@
- + convertToAxis - - + + - -
+ +
RawPcieRequeser @@ -131,17 +131,17 @@
- + RawPcieRequeser - - - + + + - -
+ +
req @@ -149,17 +149,17 @@
- + req - - - + + + - -
+ +
data @@ -167,16 +167,16 @@
- + data - - + + - -
+ +
ChunkSplit @@ -184,16 +184,16 @@
- + ChunkSplit - - + + - -
+ +
Descriptor @@ -201,16 +201,16 @@
- + Descriptor - - + + - -
+ +
DataStream @@ -218,16 +218,16 @@
- + DataStream - - + + - -
+ +
isDone @@ -235,18 +235,18 @@
- + isDone - - - - + + + + - -
+ +
DataStream @@ -254,18 +254,18 @@
- + DataStream - - - - + + + + - -
+ +
Request @@ -273,16 +273,16 @@
- + Request - - + + - -
+ +
isDone @@ -290,16 +290,16 @@
- + isDone - - + + - -
+ +
@@ -309,35 +309,16 @@
- + dsFifoInA - - + + - -
-
-
- - dsFifoInB - -
-
-
-
- - dsFifoInB - -
-
- - - - -
+ +
@@ -347,16 +328,16 @@
- + tData - - + + - -
+ +
@@ -366,16 +347,16 @@
- + tKeep - - + + - -
+ +
@@ -385,16 +366,16 @@
- + tUser - - + + - -
+ +
@@ -404,16 +385,16 @@
- + tLast - - + + - -
+ +
straddle 1 @@ -421,16 +402,16 @@
- + straddle 1 - - + + - -
+ +
straddle 0 @@ -438,16 +419,16 @@
- + straddle 0 - - + + - -
+ +
StreamA/B last beat bytePtr > 256bit @@ -455,16 +436,16 @@
- + StreamA/B last beat bytePtr > 256bit - - + + - -
+ +
Only singe streamA/B @@ -472,16 +453,16 @@
- + Only singe streamA/B - - + + - -
+ +
StreamA @@ -489,16 +470,16 @@
- + StreamA - - + + - -
+ +
StreamB @@ -506,16 +487,16 @@
- + StreamB - - + + - -
+ +
@@ -525,16 +506,16 @@
- + x - - + + - -
+ +
@@ -544,16 +525,16 @@
- + x - - + + - -
+ +
StreamA/B last beat bytePtr @@ -563,16 +544,16 @@
- + StreamA/B last beat bytePtr... - - + + - -
+ +
The other stream first @@ -580,16 +561,16 @@
- + The other stream first - - + + - -
+ +
Remain Data @@ -597,16 +578,16 @@
- + Remain Data - - + + - -
+ +
StreamA/B last beat bytePtr+remainPtr > 256bit @@ -614,16 +595,16 @@
- + StreamA/B last beat bytePt... - - + + - -
+ +
Remain Data @@ -631,16 +612,16 @@
- + Remain Data - - + + - -
+ +
Remain Data @@ -648,17 +629,17 @@
- + Remain... - - - + + + - -
+ +
@@ -671,16 +652,16 @@
- + isSop... - - + + - -
+ +
Align @@ -690,18 +671,18 @@
- + Align... - - - - + + + + - -
+ +
Same as above @@ -709,19 +690,19 @@
- + Same as above - - - - - + + + + + - -
+ +
firstBE @@ -731,16 +712,16 @@
- + firstBE... - - + + - -
+ +
AlignDescGen @@ -748,18 +729,18 @@
- + AlignDescGen - - - - + + + + - -
+ +
DataPipe @@ -767,35 +748,35 @@
- + DataPipe - - + + - -
+ +
- remainDs + remainA
- - remainDs + + remainA - - + + - -
+ +
latency=5 @@ -803,16 +784,16 @@
- + latency=5 - - + + - -
+ +
latency=3 @@ -820,11 +801,278 @@
- + latency=3 + + + + +
+
+
+ + straddle 1 + +
+
+
+
+ + straddle 1 + +
+
+ + + + +
+
+
+ + straddle 0 + +
+
+
+
+ + straddle 0 + +
+
+ + + + +
+
+
+ 首先,如果存在remainStream,先拼接new coming dataStream与remainStream +
+
+ 1. 没有isSop与isEop:无数据或一整个完整数据 +
+
+ + 2. 只有一个isSop,没有isEop:第一帧且还有后续数据 + +
+
+ + 3. 只有一个isEop,没有isSop:最后一帧,另一个通道没有数据或者这个最后一帧长度大于256无法拼接 + +
+
+ + 4. 一个isSop,一个isEop:①一个短报文,长度大于256或者另一个通道没有数据;②一个通道结尾加另一个通道开头,通道一最后一帧,长度小于256,可以拼接另一个通道数据,可能会产生余数 + +
+
+ + 5. 一个isSop, 两个isEop:一个通道<256的结束,另一个通道开头,可能会产生遗留数据 + +
+
+ + 6. 两个isSop,一个isEop:一个通道有一个<256的短报文,另一个通道有 + +
+
+ + 7. 两个isSop,两个isEop + +
+
+
+
+
+ + 首先,如果存在remainStream,先拼接new coming dataStream与remainStream... + +
+
+ + + + +
+
+
+ + dsFifoInB + +
+
+
+
+ + dsFifoInB + +
+
+ + + + +
+
+
+ + remainB + +
+
+
+
+ + remainB + +
+
+ + + + + + + + + + + + + + + + + + + +
+
+
+ StreamShift +
+
+
+
+ + StreamShift + +
+
+ + + + + + + + + + + + + + + +
+
+
+ StreamShift +
+
+
+
+ + StreamShift + +
+
+ + + + + + + + + + + + + + +
+
+
+
+ + If 只有一个有数据 直接生成 + +
+
+ else if 两个都有数据 +
+
+ + + 先填充正在发送的,没有的话选一个 +
+
+
+ + + If 正在发送的byteEn[256] == 0 +
+
+
+ + + + 下半部分 + + 填另一个数据流的shift +
+
+
+ + + else 全部填充正在发送的 +
+
+
+
+
+
+ + If 只有一个有数据 直接生成... + +
+
+ + + + +
+
+
+ 有isFirst,就转换为isSop +
+ 有isLast,就转换为isEop +
+
+
+
+ + 有isFirst,就转换为isSop... + +
+
diff --git a/run_one.sh b/run_one.sh index f049c0d..ad1eac1 100755 --- a/run_one.sh +++ b/run_one.sh @@ -13,7 +13,7 @@ TEST_LOG=run.log TEST_DIR=test cd $TEST_DIR truncate -s 0 $TEST_LOG -FILES=`ls TestDmaCore.bsv` +FILES=`ls TestStreamUtils.bsv` ########################################################################### for FILE in $FILES; do # echo $FILE diff --git a/src/DmaRequestCore.bsv b/src/DmaRequestCore.bsv index a0baeb9..0beca84 100755 --- a/src/DmaRequestCore.bsv +++ b/src/DmaRequestCore.bsv @@ -6,6 +6,7 @@ import SemiFifo::*; import PcieTypes::*; import DmaTypes::*; import PcieAxiStreamTypes::*; +import PrimUtils::*; import StreamUtils::*; import PcieDescriptorTypes::*; @@ -25,7 +26,7 @@ typedef Bit#(PCIE_TLP_SIZE_SETTING_WIDTH) PcieTlpSizeSetting; typedef TAdd#(1, TLog#(TDiv#(BUS_BOUNDARY, BYTE_EN_WIDTH))) DATA_BEATS_WIDTH; typedef Bit#(DATA_BEATS_WIDTH) DataBeats; -typedef PcieAxiStream#(PCIE_REQUESTER_REQUEST_TUSER_WIDTH) RqAxisStream; +typedef PcieAxiStream#(PCIE_REQUESTER_REQUEST_TUSER_WIDTH) RqAxiStream; typedef Tuple2#( DWordByteEn, @@ -56,7 +57,7 @@ interface ConvertDataStreamsToStraddleAxis; interface FifoIn#(SideBandByteEn) byteEnAFifoIn; interface FifoIn#(DataStream) dataBFifoIn; interface FifoIn#(SideBandByteEn) byteEnBFifoIn; - interface FifoOut#(PcieAxiStream) axiStreamFifoOut; + interface FifoOut#(RqAxiStream) axiStreamFifoOut; endinterface module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); @@ -155,13 +156,13 @@ endmodule // - Only the first and the last chunk can be shorter than MaxPayloadSize // - Other chunks length must equal to MaxPayloadSize // - The module may block the pipeline if one input beat is splited to two beats -module mkChunkSplit(TRXDirection direction, ChunkCompute ifc); +module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); FIFOF#(DataStream) dataInFifo <- mkFIFOF; FIFOF#(DmaRequest) reqInFifo <- mkFIFOF; FIFOF#(DataStream) chunkOutFifo <- mkFIFOF; FIFOF#(DmaRequest) reqOutFifo <- mkFIFOF; - FIFOF#(DmaRequest) firstReqPipeFifo <- mkSizedFIFOF(STREAM_SPLIT_LATENCY); - FIFOF#(DmaRequest) inputReqPipeFifo <- mkSizedFIFOF(STREAM_SPLIT_LATENCY); + FIFOF#(DmaRequest) firstReqPipeFifo <- mkSizedFIFOF(valueOf(STREAM_SPLIT_LATENCY)); + FIFOF#(DmaRequest) inputReqPipeFifo <- mkSizedFIFOF(valueOf(STREAM_SPLIT_LATENCY)); StreamSplit firstChunkSplitor <- mkStreamSplit; @@ -305,15 +306,15 @@ typedef 3 BYTEEN_INFIFO_DEPTH; // - The core use isSop and isEop to location Tlp and allow 2 Tlp in one beat // - The input dataStream should be added Descriptor and aligned to DW already module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); - FIFOF#(DataStream) dataAInFifo <- mkFIFOF; - FIFOF#(SideBandByteEn) byteEnAFifo <- mkSizedFIFOF(BYTEEN_INFIFO_DEPTH); - FIFOF#(DataStream) dataBInFifo <- mkFIFOF; - FIFOF#(SideBandByteEn) byteEnBFifo <- mkSizedFIFOF(BYTEEN_INFIFO_DEPTH); + FIFOF#(DataStream) dataInAFifo <- mkFIFOF; + FIFOF#(SideBandByteEn) byteEnAFifo <- mkSizedFIFOF(valueOf(BYTEEN_INFIFO_DEPTH)); + FIFOF#(DataStream) dataInBFifo <- mkFIFOF; + FIFOF#(SideBandByteEn) byteEnBFifo <- mkSizedFIFOF(valueOf(BYTEEN_INFIFO_DEPTH)); - FIFOF#(DataBytePtr) dataPrepareAFifo <- mkFIFOF; - FIFOF#(DataBytePtr) dataPrepareBFifo <- mkFIFOF; + FIFOF#(DataBytePtr) dataShiftAFifo <- mkFIFOF; + FIFOF#(DataBytePtr) dataShiftBFifo <- mkFIFOF; - FIFOF#(PcieAxiStream) axiStreamOutFifo <- mkFIFOF; + FIFOF#(RqAxiStream) axiStreamOutFifo <- mkFIFOF; Reg#(StreamWithPtr) remainStreamAWpReg <- mkRegU; Reg#(StreamWithPtr) remainStreamBWpReg <- mkRegU; @@ -361,24 +362,6 @@ module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); // Pipeline stage 1: get the byte pointer of each stream rule prepareBytePtr; - if (dataInAFifo.notEmpty && dataPrepareAFifo.notFull) begin - let stream = dataInAFifo.first; - dataInAFifo.deq; - let bytePtr = convertByteEn2BytePtr(stream.byteEn); - dataPrepareAFifo.enq(StreamWithPtr { - stream : stream, - bytePtr: bytePtr - }); - end - if (dataInBFifo.notEmpty && dataPrepareBFifo.notFull) begin - let stream = dataInBFifo.first; - dataInAFifo.deq; - let bytePtr = convertByteEn2BytePtr(stream.byteEn); - dataPrepareBFifo.enq(StreamWithPtr { - stream : stream, - bytePtr: bytePtr - }); - end endrule // Pipeline Stage 2: concat the stream with its remain data (if exist) @@ -394,108 +377,16 @@ module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); isEopPtrs : replicate(0), isEop : 0 }; - // This cycle isInStreamA, only transfer StreamA or StreamA + StreamB - if (isInStreamAReg) begin - // First: get the whole streamA data to transfer to the PCIe bus in this cycle - if (hasStreamARemainReg && hasLastStreamARemainReg) begin - straddleWpA = remainStreamAWpReg; - isInStreamAReg <= False; - hasStreamARemainReg <= False; - end - else if (hasStreamARemainReg) begin - let {concatStreamWpA, remainStreamWpA} = getConcatStream(remainStreamAWpReg, dataPrepareAFifo.first); - dataPrepareAFifo.deq; - if (isByteEnZero(remainStreamWpA.stream.byteEn)) begin - isInStreamAReg <= False; - hasStreamARemainReg <= False; - end - else begin - isInStreamAReg <= True; - hasStreamARemainReg <= True; - end - straddleWpA = concatStreamWpA; - remainStreamAWpReg <= remainStreamWpA; - hasLastStreamARemainReg <= dataPrepareAFifo.first.stream.isLast; - end - else begin - straddleWpA = dataPrepareAFifo.first; - dataPrepareAFifo.deq; - end - if (dataPrepareBFifo.notEmpty) begin - straddleWpB = dataPrepareBFifo.first; - end - // Second: generate straddle data - straddleData = straddleWpA.stream.data; - if (straddleWpA.stream.isLast) begin - isEop.isEop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); - isEop.isEopPtrs[0] = convertByteEn2DwordPtr(straddleWpA.stream.byteEn); - end - // only can contains straddleA - if (straddleWpA.bytePtr > fromInteger(valueOf(STRADDLE_THRESH_WIDTH))) begin - - end - // transfer straddleA and straddleB at the same time - else begin - if (straddleWpB.bytePtr > 0) begin - - end - else begin - - end - end - end - // This cycle isInStreamB, only transfer StreamB or StreamB + StreamA - else if (isInStreamBReg) begin - // get the whole streamB data to transfer to the PCIe bus in this cycle - if (hasStreamBRemainReg && hasLastStreamBRemainReg) begin - straddleWpB = remainStreamBWpReg; - isInStreamBReg <= False; - hasStreamBRemainReg <= False; - end - else if (hasStreamBRemainReg) begin - dataPrepareBFifo.deq; - let {concatStreamWpB, remainStreamWpB} = getConcatStream(remainStreamBWpReg, dataPrepareBFifo.first); - if (isByteEnZero(remainStreamWpB.stream.byteEn)) begin - isInStreamBReg <= False; - hasStreamBRemainReg <= False; - end - else begin - isInStreamBReg <= True; - hasStreamBRemainReg <= True; - end - straddleWpB = concatStreamWpB; - remainStreamBWpReg <= remainStreamWpB; - hasLastStreamBRemainReg <= dataPrepareBFifo.first.stream.isLast; - end - else begin - straddleWpB = dataPrepareBFifo.first; - dataPrepareBFifo.deq; - end - if (dataPrepareAFifo.notEmpty) begin - straddleWpA = dataPrepareAFifo.first; - end - end - // This cycle is idle - else begin - if (dataPrepareAFifo.notEmpty) begin - straddleWpA = dataPrepareAFifo.first; - dataPrepareAFifo.deq; - end - if (dataPrepareBFifo.notEmpty) begin - straddleWpB = dataPrepareBFifo.first; - dataPrepareBFifo.deq; - end - end endrule - interface dataAFifoIn = convertFifoToFifoIn(dataInAFifo); - interface reqAFifoIn = convertFifoToFifoIn(reqInAFifo); - interface dataBFifoIn = convertFifoToFifoIn(dataInBFifo); - interface reqBFifoIn = convertFifoToFifoIn(reqInBFifo); - + interface dataAFifoIn = convertFifoToFifoIn(dataInAFifo); + interface byteEnAFifoIn = convertFifoToFifoIn(byteEnAFifo); + interface dataBFifoIn = convertFifoToFifoIn(dataInBFifo); + interface byteEnBFifoIn = convertFifoToFifoIn(byteEnBFifo); + interface axiStreamFifoOut = convertFifoToFifoOut(axiStreamOutFifo); endmodule interface AlignedDescGen; @@ -523,7 +414,7 @@ module mkAlignedRqDescGen(Bool isWrite, AlignedDescGen ifc); function DwordCount getDWordCount(DmaMemAddr startAddr, DmaMemAddr endAddr); let endOffset = byteModDWord(endAddr); - DwordCount dwCnt = (endAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)) - (startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)); + DwordCount dwCnt = truncate((endAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)) - (startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH))); return (endOffset == 0) ? dwCnt : dwCnt + 1; endfunction @@ -536,7 +427,7 @@ module mkAlignedRqDescGen(Bool isWrite, AlignedDescGen ifc); "Request Check @ mkAlignedRqDescGen", fshow(request) ); - DmaMemAddr endAddress = request.startAddr + length - 1; + DmaMemAddr endAddress = request.startAddr + request.length - 1; // firstOffset values from {0, 1, 2, 3} ByteModDWord firstOffset = byteModDWord(request.startAddr); ByteModDWord lastOffset = byteModDWord(endAddress); @@ -553,7 +444,7 @@ module mkAlignedRqDescGen(Bool isWrite, AlignedDescGen ifc); // Pipeline Stage 2: generate Descriptor and the dataStream rule genDescriptor; - let {request, firstBytePtr, lastBytePtr, bytePtr, endAddress} = pipelineFifo.first; + let {request, firstOffset, lastOffset, bytePtr, endAddress} = pipelineFifo.first; pipelineFifo.deq; let firstByteEn = convertDWordOffset2FirstByteEn(firstOffset); let lastByteEn = convertDWordOffset2LastByteEn(lastOffset); diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 2677774..74c467a 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -67,7 +67,7 @@ instance FShow#(DataStream); return ($format(" 0 && offset < getMaxBytePtr) begin + isLast = stream.isLast && !unpack(stream.byteEn[resByte]); + end + else if (offset == 0) begin + isLast = stream.isLast; + end + else begin + isLast = False; + end + return isLast; + endfunction + + rule execShift; + if (hasLastRemainReg) begin + outFifo.enq(remainStreamReg); + hasLastRemainReg <= False; + remainStreamReg <= getEmptyStream; + end + else begin + let stream = inFifo.first; + inFifo.deq; + let shiftStream = DataStream { + data : (stream.data << offsetBits) | remainStreamReg.data, + byteEn : (stream.byteEn << offset) | remainStreamReg.byteEn, + isFirst : stream.isFirst, + isLast : isShiftStreamLast(stream) + }; + let remainStream = DataStream { + data : stream.data >> resBits, + byteEn : stream.byteEn >> resByte, + isFirst : False, + isLast : True + }; + outFifo.enq(shiftStream); + remainStreamReg <= remainStream; + hasLastRemainReg <= stream.isLast && !isByteEnZero(remainStream.byteEn); + end + endrule + + interface streamFifoIn = convertFifoToFifoIn(inFifo); + interface streamFifoOut = convertFifoToFifoOut(outFifo); endmodule \ No newline at end of file diff --git a/test/TestDmaCore.bsv b/test/TestDmaCore.bsv index 696ff80..1f13a0c 100755 --- a/test/TestDmaCore.bsv +++ b/test/TestDmaCore.bsv @@ -94,9 +94,8 @@ endmodule typedef 'hABCD311 SIMPLE_TEST_ADDR ; typedef 'h1111 SIMPLE_TEST_LEN ; -(* doc = "testcase" *) module mkSimpleTestAlignedRqDescGen(Empty); - AlignedDescGen dut <- mkAlignedRqDescGen; + AlignedDescGen dut <- mkAlignedRqDescGen(True); Reg#(Bool) isInitReg <- mkReg(False); @@ -110,7 +109,7 @@ module mkSimpleTestAlignedRqDescGen(Empty); rule testOutput if (isInitReg); let stream = dut.dataFifoOut.first; - stream.deq; + dut.dataFifoOut.deq; $display(fshow(stream)); $finish(); endrule diff --git a/test/TestStreamUtils.bsv b/test/TestStreamUtils.bsv index 9682120..ba0154e 100755 --- a/test/TestStreamUtils.bsv +++ b/test/TestStreamUtils.bsv @@ -1,13 +1,16 @@ import FIFOF::*; import SemiFifo::*; import LFSR::*; +import Vector::*; import PrimUtils::*; import DmaTypes::*; import StreamUtils::*; +typedef 0 LOG_DETAILS_EN; + typedef 'hAB PSEUDO_DATA; -typedef 8 PSEUDO_DATA_WIDTH; +typedef 8 PSEUDO_DATA_WIDTH; typedef 10 TEST_IDEAL_FIFO_DEPTH; @@ -20,7 +23,7 @@ typedef 'hABCDEF01 SEED_2; // TEST HYPER PARAMETERS CASE 2 typedef 16 MAX_STREAM_SIZE_PTR; -typedef 10000 TEST_NUM; +typedef 1000 TEST_NUM; interface RandomStreamSize; method ActionValue#(StreamSize) next(); @@ -93,13 +96,15 @@ module mkStreamConcatTb(Empty); Reg#(UInt#(32)) testRoundReg <- mkReg(0); Reg#(UInt#(32)) testFinishCntReg <- mkReg(0); + Bool logDetailEn = unpack(fromInteger(valueOf(LOG_DETAILS_EN))); + rule testInit if (!isInitReg); $display("INFO: start mkStreamConcatTb!"); isInitReg <= True; endrule rule testInput if (isInitReg && testCntReg < fromInteger(valueOf(TEST_NUM))); - if (testRoundReg == 0 && dut.inputStreamFirstFifoIn.notFull && dut.inputStreamSecondFifoIn.notFull) begin + if (testRoundReg == 0) begin StreamSize sizeA <- streamASizeRandomValue.next; StreamSize sizeB <- streamBSizeRandomValue.next; ideaConcatSizeFifo.enq(sizeA + sizeB); @@ -115,8 +120,10 @@ module mkStreamConcatTb(Empty); streamARemainSizeReg <= sizeA - firstSizeA; streamBRemainSizeReg <= sizeB - firstSizeB; testCntReg <= testCntReg + 1; - // $display("INFO: Add Input of %d Epoch", testCntReg + 1); - // $display("INFO: streamASize = %d, streamBSize = %d, ideaSize = %d", sizeA, sizeB, sizeA+sizeB); + if (logDetailEn) begin + $display("INFO: Add Input of %d Epoch", testCntReg + 1); + $display("INFO: streamASize = %d, streamBSize = %d, ideaSize = %d", sizeA, sizeB, sizeA+sizeB); + end end else if (testRoundReg > 0) begin @@ -146,8 +153,10 @@ module mkStreamConcatTb(Empty); "outStream length check @ mkStreamConcatTb::testOutput", $format("ideaSize = %d, realSize = %d \n", ideaSize, concatSize) ); - // $display("INFO: verify output ideaSize=%d, realSize=%d, ideaLastSize=%d", ideaSize, concatSize, ideaSize%getMaxFrameSize()); - ideaConcatSizeFifo.deq; + if (logDetailEn) begin + $display("INFO: verify output ideaSize=%d, realSize=%d, ideaLastSize=%d", ideaSize, concatSize, ideaSize%getMaxFrameSize); + end + ideaConcatSizeFifo.deq; testFinishCntReg <= testFinishCntReg + 1; concatSizeReg <= 0; end @@ -180,7 +189,6 @@ module mkStreamSplitTb(Empty); RandomStreamSize streamSizeRandomValue <- mkRandomStreamSize(fromInteger(valueOf(SEED_1)), fromInteger(valueOf(MAX_STREAM_SIZE_PTR))); RandomStreamSize splitLocationRandomValue <- mkRandomStreamSize(fromInteger(valueOf(SEED_2)), fromInteger(valueOf(MAX_STREAM_SIZE_PTR)-1)); - Reg#(Bool) isInitReg <- mkReg(False); Reg#(UInt#(32)) testCntReg <- mkReg(0); Reg#(UInt#(32)) testRoundReg <- mkReg(0); @@ -188,9 +196,12 @@ module mkStreamSplitTb(Empty); FIFOF#(StreamSize) ideaSplitSizeFifo <- mkSizedFIFOF(valueOf(TEST_IDEAL_FIFO_DEPTH)); Reg#(StreamSize) streamSize2PutReg <- mkReg(0); + Reg#(StreamSize) totalRecvSizeReg <- mkReg(0); - Reg#(Bool) hasRecvFirstChunkReg <- mkReg(False); - Reg#(StreamSize) totalRecvSizeReg <- mkReg(0); + Reg#(Bool) isInitReg <- mkReg(False); + Reg#(Bool) hasRecvFirstChunkReg <- mkReg(False); + + Bool logDetailEn = unpack(fromInteger(valueOf(LOG_DETAILS_EN))); rule testInit if (!isInitReg); isInitReg <= True; @@ -211,7 +222,9 @@ module mkStreamSplitTb(Empty); ideaTotalSizeFifo.enq(size); ideaSplitSizeFifo.enq(splitLocation); streamSize2PutReg <= size - firstSize; - // $display("INFO: Add input stream size %d, split at %d", size, splitLocation); + if (logDetailEn) begin + $display("INFO: Add input stream size %d, split at %d", size, splitLocation); + end end end else begin @@ -234,7 +247,9 @@ module mkStreamSplitTb(Empty); "outStream total length check @ mkStreamSplitTb", $format("Wrong total length, ideaLen=%d, realLen=%d \n", ideaTotalSizeFifo.first, totalSize) ); - // $display("INFO: receive total size", totalSize); + if (logDetailEn) begin + $display("INFO: receive total size", totalSize); + end ideaTotalSizeFifo.deq; testCntReg <= testCntReg + 1; hasRecvFirstChunkReg <= False; @@ -246,7 +261,9 @@ module mkStreamSplitTb(Empty); "outStream split location check @ mkStreamSplitTb", $format("Wrong split location, ideaLen=%d, realLen=%d \n", ideaSplitSizeFifo.first, totalSize) ); - // $display("INFO: receive first chunk at %d, total size %d", ideaSplitSizeFifo.first, ideaTotalSizeFifo.first); + if (logDetailEn) begin + $display("INFO: receive first chunk at %d, total size %d", ideaSplitSizeFifo.first, ideaTotalSizeFifo.first); + end ideaSplitSizeFifo.deq; hasRecvFirstChunkReg <= True; totalRecvSizeReg <= totalSize; @@ -264,4 +281,105 @@ module mkStreamSplitTb(Empty); end endrule -endmodule \ No newline at end of file +endmodule + +(* doc = "testcase" *) +module mkStreamShiftTb(Empty); + RandomStreamSize streamSizeRandomValue <- mkRandomStreamSize(fromInteger(valueOf(SEED_1)), fromInteger(valueOf(MAX_STREAM_SIZE_PTR))); + Vector#(TAdd#(BYTE_EN_WIDTH, 1), FIFOF#(StreamSize)) setSizeFifo <- replicateM(mkSizedFIFOF(10)); + Vector#(TAdd#(BYTE_EN_WIDTH, 1), StreamShift) duts = newVector; + for (DataBytePtr idx = 0; idx <= getMaxBytePtr; idx = idx + 1) begin + duts[idx] <- mkStreamShift(idx); + end + + Reg#(Bool) isInitReg <- mkReg(False); + Reg#(UInt#(32)) testCntReg <- mkReg(0); + Reg#(UInt#(32)) testRoundReg <- mkReg(0); + Reg#(StreamSize) remainSizeReg <- mkReg(0); + Reg#(UInt#(32)) recvNumReg <- mkReg(0); + + UInt#(32) testCnt = fromInteger(valueOf(TEST_NUM)); + Bool logDetailEn = unpack(fromInteger(valueOf(LOG_DETAILS_EN))); + + rule testInit if (!isInitReg); + isInitReg <= True; + $display("INFO: Start StreamShift test"); + endrule + + rule testInput if (isInitReg && testCntReg < testCnt); + if (testRoundReg == 0) begin + let size <- streamSizeRandomValue.next; + if (logDetailEn) begin + $display("INFO: mkStreamShiftTb input stream size ", size); + end + testRoundReg <= size / getMaxFrameSize; + Bool isLast = (size <= getMaxFrameSize); + let firstSize = isLast ? size : getMaxFrameSize; + let testStream = generatePsuedoStream(firstSize, True, isLast); + remainSizeReg <= size - firstSize; + testCntReg <= testCntReg + 1; + for (DataBytePtr idx = 0; idx <= getMaxBytePtr; idx = idx + 1) begin + setSizeFifo[idx].enq(size); + duts[idx].streamFifoIn.enq(testStream); + end + end + else begin + Bool isLast = (remainSizeReg <= getMaxFrameSize); + let size = isLast ? remainSizeReg : getMaxFrameSize; + remainSizeReg <= remainSizeReg - size; + let testStream = generatePsuedoStream(size, False, isLast); + testRoundReg <= testRoundReg - 1; + if (size > 0) begin + for (DataBytePtr idx = 0; idx <= getMaxBytePtr; idx = idx + 1) begin + duts[idx].streamFifoIn.enq(testStream); + end + end + end + endrule + + rule testFinish if (isInitReg && testCntReg == testCnt); + $display("INFO: End StreamShift test!"); + $finish(); + endrule + + for (DataBytePtr shiftOffset = 0; shiftOffset <= getMaxBytePtr; shiftOffset = shiftOffset + 1) begin + StreamShift dut = duts[shiftOffset]; + + rule testOutput if (isInitReg); + let shiftStream = dut.streamFifoOut.first; + dut.streamFifoOut.deq; + let ideaSize = setSizeFifo[shiftOffset].first; + let refStream = getEmptyStream; + if (shiftStream.isFirst) begin + let firstSize = ideaSize > getMaxFrameSize ? getMaxFrameSize : ideaSize; + refStream = generatePsuedoStream(firstSize, True, False); + refStream.byteEn = refStream.byteEn << shiftOffset; + DataBitPtr dataShiftOffset = zeroExtend(shiftOffset) << valueOf(BYTE_WIDTH_WIDTH); + refStream.data = refStream.data << dataShiftOffset; + end + else if (shiftStream.isLast) begin + let oriLastSize = ideaSize % fromInteger(valueOf(BYTE_EN_WIDTH)); + let lastSize = oriLastSize + unpack(zeroExtend(shiftOffset)); + lastSize = (lastSize > getMaxFrameSize) ? (lastSize - getMaxFrameSize) : lastSize; + lastSize = (lastSize == 0) ? getMaxFrameSize : lastSize; + refStream = generatePsuedoStream(lastSize, False, True); + end + else begin + refStream = generatePsuedoStream(getMaxFrameSize, False, False); + end + if (shiftStream.isLast) begin + setSizeFifo[shiftOffset].deq; + if (shiftOffset == getMaxBytePtr) begin + if (logDetailEn) begin + $display("INFO: StreamShift test epoch %d end!", ideaSize); + end + end + end + immAssert( + (refStream.data == shiftStream.data && refStream.byteEn == shiftStream.byteEn), + "shift stream check @ mkStreamShiftTb", + $format("streamSize:%d, shiftOffset: %d\n", ideaSize, shiftOffset, "shiftStream", fshow(shiftStream), "refStream", fshow(refStream)) + ); + endrule + end +endmodule From 06365643f50c6273aaf441085a89647ba73fe744 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Mon, 5 Aug 2024 21:18:07 +0800 Subject: [PATCH 32/53] Add streamShift --- backend/verilog.tar | Bin 0 -> 276480 bytes img/requester.drawio.svg | 255 +++++++++--------------------------- src/DmaRequestCore.bsv | 275 ++++++++++++++++++++++++--------------- src/DmaRequester.bsv | 6 +- src/DmaTypes.bsv | 5 +- src/StreamUtils.bsv | 84 ++++++++++++ 6 files changed, 328 insertions(+), 297 deletions(-) create mode 100644 backend/verilog.tar diff --git a/backend/verilog.tar b/backend/verilog.tar new file mode 100644 index 0000000000000000000000000000000000000000..6d8509e48444ce7d315173052708c34bdc5d3a44 GIT binary patch literal 276480 zcmeFadv{#NaWATW#ZR&2oNI$8v@{P49!zvj_y8Tm6iEY=w0yC=MnhmgP6W)rGXqhK z?EBg8uliZlUEO=nfL5GDShfi4{?)UqtDn2~57XuBYVr1qPyU8KJ^bvCMxWrH{{E=P z|D_`O`J}(QH|p(<`u$zt`n~<#-Y3oIZ>UpXeY;vumQC}?`+xtB@J#!qtz4t|FYxRK zSO3@FKfRuuO#gg4U9G3f&wu!ss#68;?e6k=_i=!d;b^$mH}&7&+Z~wr=LrJv(3UN+5Y_8-&cX1Tao5&q=qAIIl|ci%2&>*@GnvYtGfT`rF2 zS3H zbg}Y>(EZ`Q=e=@Po{!f{1Q@xF|0KMhJpVl}O@BW&5Bkmd)#Chn(@sxck6)yy__xO| zr>m)H*P$u-TD2L*Bpi?to`qLHi&o*m2HhWoVDzD@gHtrCAoO`r^6R=3u+N)EEDu5xiwn|9WRvB7rtA=lEs|>BRRi}m0`n0x6=>A^lRgYfO zx?mRveX@`vMZA;ie8ezhlK(^BlJm@RuFK90NeaAJH0O)!H?#RsU< z<>I=T&2Mhkt7g4u7Po8q6$JFZd*972FQ%8x zWXMRx>t6yR#&wC>h^|GR8&I^f3cy@_5tSxoM|NC$fr}8Ke1fS73mOIg@^Nis6mU5+ zJ7O!S;tM%h-ndkIb66%rgH$91BNAg1HUXBg@}Uz(dAddlEU>b5X(VzOvPL^E0}7mc z|IOJzbu-{==M|BGA?xBzhryzr6_Gf-gn9$LgfhQgLRnrfS<>i>6{W+9Xkt>*S4(>8 zQw7U)ZUBb%I42i9Cq*Y6e=>bw@6W#n`=+TU*;HoZTUpr+|FVLaFQ$hL!C0@)6- zHu?5?!lk$9Ny=)vRvwvZJ&6N#5FLMn}VvWGHE zyWGS-x@_B9C{Y4j!SzXcFMB=*>W$hcYA`=yskr9g|_(IaP@60bZOE1 zqV4M7f(jW^-zu~0k+DS|2Mt^vDtDG01cQ?on?_qE6`nHvW;DLmyLJ%n8LApAC+|=E z3b*@ZjAxI2oSo95!}V9_E~m@!`Wsqii7K&97zWKQ?!@~ky79>R3>zLcO*0`>i+7Uo zdkhGejQ%JSLA}_WL_CM9q6nrub%S@}{53`BfD!2Qy_kT6c_JI zf5gGXhwjX24D@Qbc#D;z)$8R1t4)DvGCb;0^g4M-QJ)o^xekdzIHgj|sXY}NE^%t5 zm{Xs*KuKVnLjeLVur3>>@amw=gsUkQ?Rkqky_)D=e{Hn8=SUVDI42Hqb1Q9ABPOc6ofeGGX@Z`2E{y=!|Y?%duHp zHmln=u5CF_QI||1S!`n-Fn&5de(`{2R20hW>C3NQQwle&E-iiX{3IBfmOVZCL$U1W zt5>i8NK0)Zb^FG&iH(XY;fhqu+#!yrWoLh6#ab>)Js+OMbEx| z{(StRuDpp)@8O_+HmUW9%m6_@PE3%6i4n3eF+&!1HpE#T?BLa4SbEn`fbn_|g_;$C zP)3rV(`@i`ttKp|e-1r2_95*+`FJkcOKg#4u&kj9S5a9t9Iv+;j+AC(#%ui3FrETEJxx6469b5f!o??yf85e(`31>Gs>%gs`6SDc9OuP|C@_`(6HbRIFkaV z>J5Qm%Av-HcHGGqkxAJV8Kx#ocZ9}5_(88JJE~A>#}-6q>QYk_7-f?W*I~+|Dp4XI zZbl<8inKo9y<>~N&A2wQ(A=#m~)!E+O=wW}f`=<~bqQ1K7;nux! zPtfeYlOFxi@L_*=a7R7d;#jT+O^3SaG2FkS4sIDO*MZhDI_j|Z5c@}JkIQQ$3=es9 zx`iETX0*=c)JP|KR*%3SM5i5Jru|%P;`Z$L*~>xm4?CZ~pIxlq?FFnP=Q}Phu*7)$Lc%a8 z%20SFK^To?AUu;G4DB)yo=K2tLK)0Fl|b8%?Z}5N17!5>`F-q9q5{#oX6{E)absV_ z`#A7c_P(PmIA8Pq=iK#<+>E2SF`qNXdrac@&zajjB=I`L2d8^T;&W(&&QLh=bLU(a z9iGrWl}g%){Z7HNW_}VCA zye(mgzlmGsaXgGZm*lzE0rr7j^Si4y-R;GYHo_fg$?IBBCqDNvL!r(iMta<%QiltF zdr<0bAKP4Gns7^Timy{wdjX24tBzilR|s^c?;Z9?In-a0$Sa(*IuQ1BO>hTNIV&@!{c9MiW+N5WojS#}K^#V-=vjaB zt_Mb8$eIEX1i;~tZWV=k2daFGq9y?W5G6p7`p`?-gQzkqM3m3K#3?VAK+KAqqJXEh z7BbxtC~?WELTHUHxW;pDvrcs6q8dbrX~5P(^E_E*)NGXW+)9l@Vy7f7S}I|kY;}&w zP4ym&b!qh!)loFmQ|ERHdd*BvTDQ`pszyRqX?7P4u7F%MyY$g9+pyMKV{JU+IhXbH zv(R7vnr9rWPvA|gj*@go)^uxg>PflJ^_jWaTbS&Ct~g0{P^rXuYy&B6-8Bkhc64x= zvz-t%@Gx6DTGG-%;^86-;jiiZ!fo(U?vyVz+`CtL5BPuR^Z5R^Z>UTE%SU?&y#D?- zUIN6s;duXlcYkk)|Lx)Z|51Oq`^)>^KSxM@?DxN4)ZAjk{h91$s5=m1~vu9vlE26)$dNr33 zm>mVe7Sm8*P>78x2Xn@fHZ3#-d~T_7h#@d(-wj8?GtC_+p``C@%>J8d#UB3>>E8TU}zCQ;#9clNRRnJUZ~=67o(7fir9zZ4;@LLDn#A+q`dTh%|zwe^`~jYLg9D!}y^7NJ9+vU|;#p|r{i zM{jF6N(af5t;q=p=fS|I*lGhJ(=}xg0I3ikxXM{)v<(^_BY5m<522b|fmuX)Kuw5X zcCDO(!=U|BtROH5PSXawTMK6o;+aQ9o25Tg?IpzK6N-#RL529(C=vg}0?LCX{)rVB z{-@onsrZE`ts8?XKEcJJTop@Vj^P^N?(d+C`#Wgk{+8;9#FoW{33Vvt)~fV!tnT{C zG20t_EvVdp7?rCy&_Ptz7i2@b5ZF%}53WCOOc6zF8V>7xfBwF2Zn8z!Q3q%Fi z!4>IAGSj76_XTQ`g}LI+g%OZA5#?~*PApFxY2@PIfbWdNArG&CIKyNg+Q!Z3?1NOb z$`K|KX_JTU$KjyhMoYA{A1!&!kq%Mzq-P;xlDX0WlE9?8P)=Oj_|TWCwM`rgVt1sAqU&Dg>ljg0(AU zG!Al^I8s^l3p04GU?j0Nr54BXRugRPN^N{dxczjkcT}x#bh#O}z;?Z<9bTBJ9L*31 z5SFC7UD={lealPP|8cu)A8q@G?)|OY|GSGDfJf>6Pp?1tW&h`w{U81QUomudP04g$ z*-l4_bOSR7)-Iu~P-J@%$Z6(C1{#dE8)fi&yMFcGO~p$To;$yUHs?=M5c)~biY8%7 zkfRHlL<-M6?7GP8{Zd}GYqeqf9FD);yUJbTxl2nEv^P|-G(kqSiLH^E!`+ z3f3fzUol2Ua9a|D(bQ}L>8Vayk`PA~D;zwi<35sH=SgG0u6FoMj(vYCi~>^@^A%LG zvaTZR96>CTq&V0}M-+g>ECcuN%1u;+wF9(>3n)dT5=VaR(aAj98-~4&G-3@jkzV5H z7lvd_C}_k66cu|&sa!zXr5aE-lYKBnPb+$PP`l)a)Ws+9o;tj+n-m5WxdIx)qQ4Wc z;_e*LDzf72gqwGc@O1&xD%Lae#&+1F>f&9uP0}Io_3bR*agk|C?m#wQ9@1<0j(yZDbs<@O-Ca;j#g0S4Ij77CU+Gd8~@{H~$Y-X$D`72yC zhfftWG`V08D56ZAMT}X1dB89@$?M}u*}j^|t6f58VV`?u282Jbf*3{Ghgh6|GzXv! zEcbiZM)QBuZkGF9?j0#wg;gAHUujc~Z;(m#0tU;i1k`SuK8gl?1H2C zwAGULh;TC{3YPc|D3mxS^S`;D<8yMgS1dl|hQz0-A*_TM5}!&#mUKP*>ipdl_r%ZU zZ}CkYb5WhQ;oD=}==ALP1h?BiAKy%ttErE$<@9m}7Z=O% z_}zYgKSWe0TTicVP}axy!)W*y{v4VcpTnP{Ls5Eg!2j&`_r;&Pd*aXi5&v`0+ZBHf zhvLt@f%x-KXf+%Nt%kcotKq)T3Lm=StagV&D|{18lpYGLMgyVMXe6{69SE(kQp{QH z4TV;FdqS(dL!s6FKxlDmS7>#xFSI)B39SwXLaW1((CYAjYc=Tg zxmKt#uGIivkYh>?`aQ1Ipg-hV4e)IX0Y2nf4F-L#)nKs8wHgfeg;v9!&}ujoS`GJv zR>MP~)$TxOg|Fdpt#%KDR-+!*YG26N-xFnrT;YQO*Y{w=)jc@i+8*|~vWL4|*Ta3I z>W|c72e=>5m9VLl+Piyb+VdFF-zmBP#Q*nLPN3LCib(q#@iB;G1W7G1R?9x)Z0h&0qfF z9yU9^`4}6{2KxsO@fq1aHD7+&?9!!A&Hu%m!4E4uX7}M>5BH6IhBFc-Oa0|9&1b&t z)3o+%_$MRcQ`SdY`*O-6;r|yYY#W zpBgth#j$=|WEK`1Yy=Dk`ws^_+;2wLMg-X`N6`5h7h#G^@vib`v8M3qvV)2cN+MC* z7!f6*<-*YHsEgm0Q?;u)Cp5)qS9s)Os>L~@!-vBGuE7(|iA+qbe_ZGEi#~KM6|LE} zRNdFIQAAa_chiMD8sswfMk2+%)rAb#io!w!zL5MSE;|`fE=b~T(C;Je28n9}<*;Wd zhZ^OeMu~Scj7;2om&?SPxMi6K2bL0fFsFp~i{mIJI zVrWo{AuLr{N-c&4wHU&xmZj8UXi$qGEPz=`ErtfQ7{Xeap&V*4G^oW8menk!7DJ|A zD8AugRCA59@lCQj4KsErzfJXeqTALZ?`kNw3gTHW_L$ zG_1uC78Wg~7DK~Y3}Fq@Qfe_Yti=$PD=noKL&I7OVP(@$4zw5=)?x^Ypq5gLp2}GF|=EWAzCxHlv)gpDltUM>y}cBp;0ARYB4|{LAnZgKLM?zswE)7N zg(1}92h)bKU$MJk2(|FptA!8tK@6c5J$tq2!H$U`)PiTP7ChKnF@##|>{Vh1yD)}s zPm@?0UShMx5^2(UHED-7Q1`Sz?NtI5TR>K#7N@;RoMN}g5RSAs?N#Cw8%vf@i_=~u zPO%4N3AH%wRpJ!eRhCeT(_SS`v9o0fwK(l-aq5;CKqVKVhW*`#!#;+Rn7UFxU?HlB zyWBW+wnq_dXP{T_Cm&Y-BIujNlBU>hYDl3aA0TYx1B8QkK-DZCp!(zkREu~(?v@YW zM)?5l5f5lnK7E2&Y=Xh;Ga%UiOCbp{^S#y)g@0bTT7l`7M480>`FeN zSyS=^v!Ub(W;yZ+&0dlxn3W_?FxyC;U=|^t(Ci?2f>D3+1fzNRghuJf6O6u-Cm2;H zPcT}RPiPdJJi+KTd4f@^d_tqq#;x4m)*xg;2WGCo8#Z6IiIYiX{N$b#IQg569MUma^^#xtG=|k ziWbXUc|R$SX^Mdc1<>9h0}TtH!vhA|)j>#c3^Xc$MtcmjR{$N17-(MyAw4qCK>@To zWT3+WXn(*!=vJh+!+Kz^&tV0pBdX0eT7A_N+}`hvxZiO-UqR>r_dGf=9fo|(bt^c( zkBc2Stl)ky4ufI z0&JJp0+!VZuzg+&SVPlc=z+MHH*VawSZ2M!%DS)bt(=k)dCi!IIL6)Sb^fOQZ1mP<*-sMU~P%RO0|H6Bn~Up z0{UqVE7by)h!_lOLV7J=J&40fwSaD(!%DS)l^zZ&)dH4vIIL6)Si|A4LM_nHfx`;5 zK&vwxR;UG9is7(AEzr6PhZSmp7Fie!t145Q&PdKbl3$#|kVTD?tg%J)b z)B>%7a9F7pFc{*nQY~PCgTqR-fMFAdm1+Tt7aUfq1q`ql42u(bEnwiqVWnEYG6aVW z9?5IXV{7$ z9A{t8CRfs{;gt-m0`zeu2We)t`VRlV>0nxtN+%l>h3870!zK(3oAqS%y{Ya3sj=0E zg1g={4~2hlH5Nkx$5%=rL7=YZAoN2J_H2yb=dWRUh=>1mFiEU8m%lRa zL0X3vY@XjkvAFj=+=SuY#;sq3@pD|Z@OLP~-~Vwk#c%7A>D!NY?MJYd?*G7;hVB8u zFu%wDrJ~;Mevj_|81?sdac~Re{UJKvU+(`14*VPba*w6NG^^h;`S9L%NSU+w)Z{M( zLh0?^J=gL=7@1e!n?KM|x(5v%!F_-uPWD@Z4c-u%Tuvgp(h?>N(k0DI^*o{^kRqnc9<2xI75d774#)6L+U&OI@7^(rf*0q zw<{4IUu&a^=A0FDK?jbcgP>AdO*(Wnw=YX%NqR(Qro9`S%lHjJaf%lh;eaA}%E#eJ-LbcuJQ z@XBygd1FW{Etp!O@Z6>AQ?#=*RUW3M$f$Q{mWiol3a?AB)H%mc_=aTP@Qe|RF_+Wb zYr~Mx#wX%b_X6)gktu`E!o515O~>rxVR#7+pJ%f_4o_`EL6S}mXcQUs2L+`0h&<=n z-zy-^XWluo=@*dZ!<7t)Z((sH`zYqpd=AY*6^k75{LkaHg7g*P?Y|*+0Xx5;^zP*Q zum~Tw@o4AQ^Yigq@oUPPp%vd&(29R6XvN1Bbn52@d+9DXy_M&eZ^zefuh(Nbq7htT zgA@DgA@no+n-5rCn$9P0a7BXm;cf+gF@1CUc04&hpRQIOj}I0t=BqWY`=HO|?fiT_ zTi`mR+v_)&Gr9J1P$8iZNK8O$EQ@%*oWTZPT_(KH|FqzPfMSCr6K)U>EBxoG4(y+GznrL&K@z{2vnA~ft@O>` zj#ul+`qo!%H?fj*%(d{ID%3|!1yiFTG&k_tYI?=$sFUQ$dL2S@sE3YJb$#=0e6yUc z5Eb6ULWt&c>kl_*Yu2Dn+L5r;dc2;z^<;(S6J_By8{t+=p+!f$t3uDefPc4GqXn#{ z>oHa!IRAp}XoHtCgw)OCJSLfHO+bNRe_g*VCZ$l@#G$?B1(FGf(%jNid zu|#$VuB!T!@W{fPqKdz~T8=LkSTc)07Ee>)Fbbjc_`~Hm1$)j)|K!l*PqLM`Tl9g} zo1tr`^lSD&uw{WBb@bJ+_%?S}7fh#zc-A9lkp3;H*O5DoZ5|5kjWe=9!Gzl3-7#jBv| ze~x!1bjCXqI^&%QRs7}o7`XbzZ-cO z{(QXfx4w)xEFiIoS$Nmme(TaLu>_=>WZSu37%|fI!u^3+jPVO6I?!A%?2yW=kO{XJ z5yKlv8f23^KG9(p9Qy&fndB@nA(|k~Ug4m%{b@(~7FS1le#F%xjs1#r&#wi!Lb^s$ z_qtZ3GbHhDM|xc?QcPRDmmE*8tAb>cQ9cb+q%)+SQ+V5x=`}<0>7U^#r`HV0r+#(m zeA-7y9Gl@Cu;WRS=57`dQn4tFQN~xXs)AN5tDrOM4u&aOPV_P6xizC&>$Sb+b`BmF z+$C)G-qO6n{FHDZ6NF})vz7#I%4r5kt`Ze>h$arPX+q~G+MCeWImRXwmy;R#icKi4 zuZ3o~!(I1)wB{NIImWj)h$)N}HxXx}&`=;pwu!P1h^eAL5GN;LU|iQp8L9{SqCH6l zr#kfm@GuFAXx$bfxWrQ3Rh&1~RU?8qCQpo}PC&!(*&I{Sg$fy&DF_)chSsVHVw-Ui}dOn@~F!f@(6Sy2_2}u(qCFjO}R$PcST~bbh;BVpRr<4pZ+Yt5S*Jdh#P~ zef@B?h^vm4prjHJm~JDkKG!9L6@}qgA~enOHc|YPTmaAW=2#E;y#2ZdNQT`Apd z>|i`reYDsm1_q*i;Q~MN*_et*w|CruSwKeRf=tmkY9iAMY$lDrQ@?kt|&E+@-ajI&Vny_F>YIDLU;Bsa@lqgNOFFUBr6kKG+EaW)k zr-F?cD)K@o8SOa)`nzwsx9S$5k)@htHDhbQ$PU)3z}(z2EOJvZ(F$R33szz3!oF0y zfnkn|%(%S_S1~t5MP8b~He(sL2o)@OxK~Uqz#=PaX$n&fRY8#x>`W0+_q0yS$ef{X z7R6NKB}-t^4oje_JQ!fVSEK6B#_KWDu*WRQAV!m16`eFrQzF|-%gr8CveFto2^H?$_|m^ zt!5Bg*ENK#>Keki3TnPonq;3!KsM|w`7JCR%w8umx5cY_h zR)Z2@*Tyv*TM}<;)^tR!rCAfCrCAfCrCAfCr_xF|8i@`!fitqrc7bt}KtsAhT z?M(DKlo>&@c}bjd zp05POLB4Vk+fu54#6i6Zu47(cE7$B~UWu=Q4l!Axkq7xoD8GAHcC$yA+RBDZRHK9l zn}dazBn;m}tm)&A2bM7YlwYabZ79+%RttnGiw=sEU~(g~U|P0+S>lEXYUvPRi9|a+ zSi;7g(GtR)5G)nTbA%;SJU!ToP)GX|F@XoZe5PlKfWVj1bVNAShiOL(qNqpUQPd+K zihA(Vc|}z?sHhDG6&2#3qE2SdyAZB+@1KuMBqCl@%e z{o?|HZ5S62afi47PbB~R?tis)0;r=pv zXZ}k~$u}JJ9batTIoh)k?izjA;A=)HDx^pZf< zoFsrHMH7(KQFl>q={ z1gUvlhqMZ9@wwsZTTj@)iQX4&R|gkV$e8+8nKg@yEw+7P?bu|XVLAu~CoeXQwu~b@ zWwtt_@wE-n4#GV{RfDh{m-#8m$EV?JMks?5HEKZc&61LePu*Vvb&*}F0F|eASoA;@ z)^A%JF;htg1^`-$b*8aQH zu7BsjuR?Pb3b2CTQ?%lC1=23H;`EfCcaiXU3Dhpxv-2L8nHFuyu=WWj8-TD-SAa@4 zsB@Q;!Jo<5VN_%w%X_YAXV1I^+e)2IRq=YK~ga2Z(f28c)kA z3;}iVVM9qYoqM%hyv0lUtJh1sc%DlzGd}iWAvKFci3DPz_XcbA|^lI|MH0FTBx9l_n^2_Xk zxj%2d3~ZZksH(HH(IRW(eD80^$tcUzhRDusq)D8L{RKAMUR6R&?qR?l|at zb453AYPq+Q+v(6nXSWl$quU8ucRL|6w;v2tw-dJ2?OmKv_d1CYURSF(lY>2_R}1uW z7pR3N%jv~zeLO$M2Zpgni$>!|XGuq7+9Cwn11~+e%DDb;H>+EwzCxeBMrR@%oBp1wJ1aRVUx1^Tf

oye9sU=AvcW6?u${lh#H8;;iABGhk9wJh%JgK*#okHxhEPW&+7)l>C z;o_L!T{NLQz|(P8>rKOQ#|$@=kD~z0?X8B}tgURbA8j&_Jl{?%%B zI(v)vNLLV<3}Ny*vPU@C6t?VbI-l4HNJlss@GM=@%C+!kIT=-14|Zn>!*@tE9Y)%` zFN$@7-7`Sd?L@ME!2EU34QNV2O`EOtVBK4GR zzAPnku z6Fue;Nz#4jd7Jd4W*9<*bKI;y;Ze4_>qoqu`_eLQpNG8zZbHBG4vL|F*mc`D;+zb+C8m>%uwA!7 zl{guLt}^gR)otw)BiUM+C())&Vr zimEMm>bHenrr1$(@J@^R_VR3^pN?+iU%i2nCii==jvi(4dAynh-=$A z%(z+{Me|B2BcFVuMrUg0FM3&2osQtx`<67%icz*5r+Uql?o@J>*-UIMby3Qm$moLf z2N7~?dPLXh&M&~F{>Z0)yvFnZJ)!w2VO$r{g7U8=sq9>@VP8gSLAdvKQk3kRbWRzj z%FACjEDf_fstTtNO^WYoy|WdC6-bQ}>v_{!^PCtYuQUY&aqiw<<^$lQA88<D+Y#-!40SE_(`fCEE!6;aK?)TjVzu+FI)VC)R27PdDs3vNcsIJS z7m+1Z#s;qRc0oqZ%FRT-1fZAaY7D4cXW(9|3NPMd64-lJRJGytKYWC0Mz5AO)JbCn z+r1rt$j8X*+0~Mk{!XTEX;%)=#?Pi(&@`*I<8wEhYr&77j*njiqBigmpb&t~=8oFa zRz`R2v6-=v_Wez??~glb|IM@LeXqaf7=a6#w<3+sAxhfen`r$EsS`Wj2pjKrH~%V% zi4?&CzhTmWrrrO>Nm0l~br8vXHEf_7i>2;9=nzneLEWS35>a(*j)S0jLaQG56;jpP zk|x&BCR82jNQ&uHM`i_=mfzh$w_}_-i(!`;Q-BAHx)QWBo`M)=5iKjMn#E;<1r)ag z7~TO3$tS#XABx7TBcUP01NuUbeQ|ZndH^a4Hl6kTyPzu8zMSUsonPLIP$p4A{rX*m zVg(kyH@(R>bpO?Oa&bXRAfHBv9~rX2qyM(84rJp#H>yt98OcZt@;wN_fyN0@&9oqz^+XSFy})K7mQGxij0*t*mJZuh+Yx zGY4E86IIcQvvI@5G>XXp}l#f9m-E1K%TjBhr#66oA1UGUx5vxX!F zw^0FxFoRPiFc?IRfFl={0Sj^%q9lhwj2u7GR5CWKw->g=jP0)*3`O*1t`q-mqF>+& z8480^$D;HFuds`q?)3yh=NOtA*e)URbha$ z+UPXtW~63dmJ_4a9v(GLv$Mcv8h1b^Yjq00F2Y&k6S5#rkvzj;Rq zQU!+XDZjpwHL>`;r8bDhg=K_-?F><}ok2tofs0CTsC+hmi$hFUP@ykox}E;eKe)w^ z<7Zgy_-0I|msec+;YlRS$ zS)gIO4d`S=y49|-ZN6lQP_!K1z%hwx~86M2SR!cM}0$!F_C5eHy8b& zuRP-wWW+X9_LRT~RfVG@tnR0Xb?QdD6xE_hptFOJID*Jn!DQV@Ir2+k_6Xs-h*Poz#=dzbNQVUa@gsK`(y zT{B=yOk{Xp9fgo!_;N%@kg!MuWP?3%h znk-0=OBSjU?a3mLRXtJ77NmsB7J(c0M0>WdCm^*OUXwVallA66AZ^p5DfD~4x#-n< zCWUvn;2__;@(^aHU z@wPU+r?Z)Qn-5kPjM;^Ql;L0-3k-{j38s7t8wyvgDz#oXNqPsF>6NP$q1utuEhLMp zR~2#cATa*gs=n}YNj0*il#dYN%GIl_#>p2|wzbo7p)+B(t7o<5ERB}Z#cMK9W#bl- zI)K`vxo>DgH<&i+PKs|CUiJ#sR=M^?pDk^5SFpAUAsp50rU?h`#3A^21#7Dyc4U4_ zw}FGZ*rAg!dMj&7=RuOcinWs{dj%_mZQAq1h{td4=FqA{z#wMRK3j@;6>bi#igN95 z3vn2fD7Shl!o)ip5VYwC!3#n!8gJiJtQ9pL5I6m6wLMeP`!W zeha4}5|?knpuJ6ND#!?TD27`&RUzYeR~f`@8h*L7r|cF^U7)p|uD-+Tw2(R4FmvQG zt9?TLQYU0?nytLd^cb+cg;TN90L@(&I4f!{>>T3*OJS8bJ13AbKDMWxHyOKo9ktst z6%FN^(08bNymzJCo~cNTFK9v9wk_32`#QOuvLFmY1@pSs9mLVVDY-gQ5$4}!4)yy} zv+aS9v*E+RpW^$r{xC@M>$bOf&iagOYw7r|@?WiyW%6rLMP>~=2MJ8>3DWAzPaq(#U3YvUO}qK=zjzm zbRAz!#^58!po{owGKP1N;VxvPx0@6h;hGDok8Q^=PDG(Pf(WXPJBldOMi4=@aYqq_ z$_OH;GPV_ATUn)fxMq0w77`^DYK2R>Q#*;03YEhp-Kn)iNrgJ%lD29y>?+3mT1KVy z*y~IX5SxyQafeBMEu$iU-*o~pn-2YQG38l27iF$x{3vEsmR{Mr@%rK`s#AF#@cR3w z*Au^|u#8)0JD)#!{`(#L|31A-K^Mn-escPH{9?!aH$GHf%-f0k3u<{JSN7liOM8q- z1r=Gb?Jcv{$LQ0dNF;aW>{k~Vp;jD;-5w`xJ9CfuIsGwQ7|eS zs@wQ7OlFv2BQcQ;3xqT%)1pWuqmBub1(ATocUpOAkxGb_-w;D^|sX@2mqcD+1uCqlt6O+<=c zRY+vw+@+#&ud1+CW~xt9R;f$V^56jB(Gsdvr@&4}`UY&zB^DeZL6MQK7*-B(bjpx1 zDuW^;UO5W02u#OHEJ}z+UVPu5TZv;kL~nR;*o^H?{DXLAuM--eFi)_OS)55!nB@|sT9#LiimY6JLT4wJbb^i1r$v#-biyFE_!7`Q(PskNF`!LGG2+)B+}zt5}s-j=&=ee*#m-tEv}>>C9%Pc zulyV8%yCIcRE`yx@l6cPq(mh#;~SMk9i5Y6W_+WP$ngymZwGiPO+Gp=G(6@D`+~UE z?4pev1Ho)89MvddQZ_u-yS$uo$ssBepG;_YMxB$&CILl8lu_H=Q$a;m8XMnLM5QXI z$f_jIvnwDWMXeRVPJkk#pjRyS(iG)0P-IjP=9xuckZ51bWEGc`L=wTn8-$#>19Wxa+oO;Y)znRSnxHZOo2d*cIEj zA=pu2wqjqGVXwvuTjzB!oo4Q+J6)i+k?vyWxh_ROox|Ph!J7Wx$b0b4RLnD6;}^G^ z|I}uvRAC!aqp%LfMsNpnr76un(pD1%P_^caK}^iuOlk?gL|BIyOei+AX7gZ_A=BbI z36?eVlR51dKN&(N#@#UtmJ;aHN4jmb{MB0WL{N|lVUy(#v&hYbe5Y|drrA2nGgQ6EtOg{u3-c?zV%7F4y zdc`4Qq86)+T({@9Y8a2Sq>Y^VbDi9jw2@Qau9KURHgXyP=;WrPjob`}bb4E)GN^3e zfLE=wm7SYTxG7x1Eh(%p<3sY4msZxs%8!rj1U0B-SS4lkB{;tc?HA!H%Vm(_N5al< zE8XtqGL6OTK%2R{{a-f@0Mjz4&Q{UGC|O!rH3LE$*U;S+c4#YS9QwVszFhHOD-zY5;`h2lKd%uw*}o zJJ{|}1?ko$ss?QCLXD7_otCtb>q(qeXLYmOE;mjZ+u05en%4KS;SU6A8Jn5<#v8Izq(V{Exti18Q&?oTf z9eXa1Z+o0MOe%N$^?Du7QTP6Izxls3W~Qs0-Rl=&WE}W>+5CT$oyzm@{u$0vGuMZc z+#M+UM2WE_F3`fe0~a`^`<8ANpUv^x^a49d_Db#*VJ27aCm&Y-vU9UoO)uU~8#+6L z$8Ug{<6nG#^|G<2hng?HY`%MZ`px*{==A9I_?t)1zdj-;{X{tZRky6y{Ig^9==AjX zcQ3v=dhvSv^yvAcKN@%a)4fmY^!=^q#eBESfex1yG)QM~MJwoFWH%laQ)*1)RH|&u zU_@u7HB}0;mLe-VD6!6=8g3dKqc(Lvy;@DxAfULZS7Z+>Ecc0X3#>(}R(dn*gBRL0 zqG@&qjmp*L25&7E`L8(?H4?#E(@6lg@{{1u=BcC!v}zWYW46~V^4L(jYGo#tY*(Wx zlU4;~!>zhcLbr3D)vTA3`RWSa z`WP=RFH!d0?DArI*-$6-;w5g>J$`-s==u2Z^OsM4e@exatcU@2ABrGc*t?!ht^kjn zT3dpmH)7vCqTaeK8PV{$juj0O=suQx%AAuB1pWY;R8&a_NS`(e8AdcHqK*Yp?)FnK zst^#&6SnL54*wvdqc%&`U5J$VJy5zc)kN;jQkz$Xucpg;_dd~oewZ$2SBtk_oP3K9 zGJpPq4!Yyh9)9p`@=x$je}B~D|58zJ)E|A)-`yMa_jU)Syg%sg_daQQca+) z2hF$t`sft@#h4oZ#ps&0_b8j(OqP@DDSeFX{p@1>jv9d(*<&n!bGvTN?9<;i?z1tV zAReY=Sp;RI~d@%}Mz^w`P_4X@P3{(fu! zlTSkXpT7P2#eYuw?+5zvYMTTi8vbf`W>1Y9;~M8&qIQs zK%!ze9-)1{PJr`4`_oe$$=Uzh?8VMU+qJIoEHiiaQ~;lZ{GBKf3;hbe&Sjj z9JGxq$UF4^!omqk;USsXSlW0Ji$GzGfW;HG>DSbY%y=pQ}2G>>o+@ZZf92)&15>53@*?2 z?t=%hc{y*s0_xvxubRPO(;qzS4IYjTnyJ1TJ=x>*lz!uQX#>`o^F6F|VXQxEKB|*Q+@h zVLlFM2xZMCF%V%%GRSakf?bm7#4unrrfKX|%}K#U@j0+ncF;0Zf$Sm#%K#2+wKih% zq|~tj3#?Fu8VoJ2otFUxP8u7iZc=7DFOM`6DieTV&gA|wQEFB?(Dvnb6v_dMcH+p- zM*ah!WweP08u2@tYJ3yU+T7B?d!2E(Aby0cpoFXI$hwZ}rh{YZ^(gvIL>&@;bcE`> z6khjUiY}|3BcTZDyyQQDuQ^Zyc2pyAp-@E&eA60ZEk^$p!HFUjob`sRMGcPT8EdfI z&N~POCm%emmcvv2V_a4#*GS~3&Z9aAk5Fs4`*34(xmY%?bF+e0D0iRY^-W!Ki>Vrt zYp`v7lN)kXU@kPmhcOeV2s$*XPB@LHW{b(dOhpajI))aM_i|9Lo_I8vur#$m)6C2C zvE`4$;H=E*InMc>n6Gx8U{=)7m_K_QDlx^IJX>h1nRhMi*BCG!UV~T(RhJY7sq9EqXjwEdpM%=<&E{5sEa6 z9*~X6_H_Iv3{ijLX9qigB$V;77 z$s=x;DB_lh0&bfqNUf8F%sx@DiG?f<6$^bNGWKaKilTyrG9Cp*#X=dAf=XJryuF$> ztMhl$i(7LLGx-Aax5sFd&yG(pjem}d%xFp3=AYPSR6Zh2+1+PX`m74_GJObo&>ud) z`lSDRf3zF&P0Ud^6^kD1#P@dNy~FYAf5m_e&yzGdJ{*nxH>ZcgftgkT#f*Mr7tF@v z1N)Kc{qeyda04z-?`hPJ$G%S@ebN7(9g%kEu2f zoYUlEYz6cav<2_xPl2^M+KMI47JFc@;wiBE(aU@B1Ldr`yE}QG1gJiy@I0DZFB_Fx zPWa2*Vhs*>2_RnPAeV!fbTKQpi+Z|C zQXtpS5Aa)kBA7T;Ri|jcg2a+n3M_3$YZru07)HFIjyenL%+H=JgOv2)Rv|wV<4(3zLg)F22^=={OskR z`G=j)%}Jsi`XC;*E$=^QK7V^VSzb&pc8JUU`>a9j&{fO)qGfvlGZLM=lsP`8<5oLS zteLRc>tG@VGhYtDpD66mxHrz!O=_T}{%l+9Uze&!2woLR-j4oD3!H zQm5q9&BLXf(&k|`t90>jl1(xX%T$tiSmu$;!!n5|9!@ie=HV=Ts2)~whgr%>mZ@U5 zU0lhEuTk^$rsmhb&bCVM=5(0-vBqQrqInp~wwt|S{uhQnSU8)l=)H<`6DQh}mYAl4 z@dt}l=k2q7@v3%4NOu7_-OEn;^{*Xd=QH7^zx<`42V!UHKAh*!LK{EG(U;NK1Paq1 z)fFdxHBh)AJasPEO404=3^Q&%;N_mdrocvS<+NJxHGlbwGfYs|RaHtok3h z4cY=13>LwK&XocH^RU9E7)~R&)Nnh<@+#P16zekK{4gyU=hT|zDZQ$+8J>B$WO^7S zQ4V3I`!QC5OV#u^@`=jFXR^iZwDoFvE(>j!rLisB9CnA8gPD%T!LB#$GUREq;?8}u z>qSB^FkNjhmrtq{OsrNRiv-qi`BPVo%};Cv3C}QLN_K8#9uq8V81oiwCrU{yYRypa zF&@~Uwl?aNc0OaWVsoKkt?YBPA?+i7C1ID|lpGiGewd9YU=OMAr&r35d~$ z6vQgfO$T@d(pG%22F+Y>+iW6rp@jT5t3e@(rFvd}dJER1}&x`M{~8is&tKRUi@s*>jOHxAWN$1G}w#wta?; zK9p)FVO?$hthbj;+71VVxhPhgwzfU%0Ql*yx=bv&JBO0I$2)~3<+Hpz$#nKT-q zhcg;Vf%5KEbhx*@IjYNjFO3s;ZMEZ|bfS+BEw`+g!~KtJN$i?%$gRzm0H#Re)@(l% z@J;8lWW|Nzn8y6=Lzs{$Qz%_P8u}%(Sj4)f6x?=t$#hMI2@7A=JFy_WL;vT!K?n5w z0XlQat@&r(`+B*i_f;itY@RBa4$*a#tR0ID;evy)yD@I9Zeh&0(OA{4`K8wR!4F@& zP)=F0@ZL*r+Rd7IHg1asun1_Im?T{hANIt7I1aeQ@Z$CCx$^;D#YP;59Bpi)Ph~i} z94-7gtLUjaJdHEQO!-kCi?ZVT6>1H6?J@k0+W(!^k24UpgSRf+q#IX6v>PY}119DA zR$3SmhD8qP5w{)U2}Z0UL~1%O zZ>VE<*i+6L8SiTRdjY{sEE)N3dw+n%jpm<4mo8F0>(W!+@W23y)wrfA^MM*?XAl3! zkv`+;wXvH11vi+`i$*jgIhqFx3tG~qCthiQl4*eKjUEm#s>`?;n=)=x<^FvZ-C|lL z+8-KE#+ozrUStMQn67E=Q<<4CP?=fhk(G+uJTGsmFr~x}s-X|doQ5}1AMWi%w|QR1 z62?oY(J|xurYvY<6QMijFn~t4rORoXBC`V|*iX@0gkSGS7FFW%u+S}{Kkn-O2A0rK z7{zaK?IpDYe~TsHFkcb-8MLZYFjYPe6Y9{}GqR`!tFf35VizsKR_3Mu^M4S7>GY$_ z4sl4CQYhhcG-)nn2fUaFNA2QkByn7Vw<$uKa5EyDickYnFk){e>9xh8{I}BHs)C*` z-e=mBFTJe98&aaLc?&A=yu%zaE?FlM!kbuZd}HRvYUP@jx9ss1AMQ~p!I_S!w7mPn z*3kv2cPCyTYgNh(0pP|$y!ou_B96b{miygtLuF&csYP#r8wkW%sPt7jFzfel_`g4B ze%q9LB~E5o>SSsElQ&GCp?}M5@ZER1J|M_VoJVp|FVx0NRJgCbusvhbaO5Xn&L_Ag zpP$}$jf=W_e~~F>0`KUyO7_!#DBn4T>ky8(u-Z=Y6^fxsF{n`Ns}$z7V3MN9c&1XA z7ov5FzD{9Yozy9YI)!;(QK#6~Da?zvI>j@c!n`}GQuGTHhehuW3lx3#{QZ&4Z#qQmT3~r9e`udora!QmTA1r9e`ueWK7w z28G%;rW8mDwQo!*kQ8d)m{K4q)V?vLK%&>aHKjnJ*SaHKjnJ*S?@TF>sI~7*DUfKj?@cL?XtnQ6DUfKj?@cL?XtnQ6DUc|&A51Bb zD77C+K;9bNb=f`rW8oB+GnN|NV3{z zrW8oB+GnN|NV3{zrW8oh+UKSeNYdKprsyQ2wD!3v1(LM(xhVybr1pg=1(Kxpg((G+ zr1pg=1(Kxpg((FRS^LtI0*S1BX-a`a*1j~QKq6~jnxd2JiP}$0DUgWTPfRJ0h}us~ zDUgWTPfRJ0gxbdkdM3E*N1YQmgK>i~O@J_K!EbbSG@UcK<(2B3$uGf}CDN{QCUaPH zvq@7V5sqZDGWu1_nj(pCWI`ew8A*a8^O=^MH${@98F4vs;uJ|_GvX9IIdh66vKa{y zIk1f+vKes_L_3d=NFtjNCqcC9B;99EQA}SnBrc|hcrVB)1^~6)0!=XjsPPtPiXlL) zw?I>j0g#kvNLH4dCP1)Aw2+*VT8a1J&3Et9=VcvJq`g z_nYU)pOras6z7;a*KJSB~H_hhpyMg9+karWdT-KN5R@Zm*z5I<$l%HprDj$a); zKYnraw85~@&U9KRNS*Pw$4_7XD~&SE+^B`(>7&<=Hlz6J=&RG?|M!U0G&0DLV)nMi zP+b~jT5zb~E-=(OgPldAf7_=kB>ayF1rCh1N+9CHz+4Y=R=_=d30OC4A2YFDo$TME z7oA`TI>xPPtpFCRXb_&=tObm$#o9wtatVsjjL&)xT~PeC|EK1Q|J`6#h8Jm9vlaa| zUQVu8>kn7cFMl;(jF<1n_$bi%_rGfX_ZQ}DZX17ieSBgRZc*gv%g0@T;AoeFy08S2 z=fD>)arqp+K>Xvip29%~2Ho{Y7Y(#)+ z?o8kep*!}Jnwh)NDb?I(>>Ja09aD|ib+_hUcG$w(Bk5~e20{{(4?#-Lk+rhtD5Cab zG8J9`D=nk>1O>SLv9;0>A1f2Yv_6)G2OhUDj7s_lAy(#`2q_eQGkgXWOG0ZVnvz!^ zOGjC+CJ!OjdNqHJBCqh{s#k3yBn!-sjope@IX>D9+M-txt7T_IC0o5}V!o_dNPXU` z21p}xV_ACIsCgxtg#u+1vECbzE^A3gouQR6z|FaoP^>6N+6F)q7!I}IyUOwUKB ze|+)8E6KS(vPzN`2V>@j|JIralW;IexuH{ZmZv3xDkDlHW{^7Ez)a|_` z8nuZweC=xey?e;oPp3EQcg?_jipI~xO`E5gcrr7FWyurTnMoeun%3lry-O~6XkM~Q zpRq6H+OFBXF`eLWmuWVWHeW76kCo1T-3N@Q1TmFPx$jO35bEaTrc?)#g5 zFRC#2h4rQvjr^=3_P@A#wo*!0v|0uAHBakD!$1c(Z*fGfzWF3B4GU-zr2JgoUR~is zeQyny0%m%Bv;H7JsT?rAzzqn!E}*^zkxp*#k*eixBmA=Q%Gnw!p#5w1Up7I{D&B-5 z0wgF*wLsd?XzFG?yPo~Ww86x-xmrvv=)Tpf#oO6AE?b?S<6F0DO!hDmK3|w3%zbeQ zJTlaK%=xgBKlqFonJ<2)M!`e93!1!P>hK48f&Me5?|r&HYzNnsn-aRR7=U2Cz4Wlc zmTrk~M4v@6Q>8^+-jOOhpAj8>aihVnBpzS=B2_+f?4nDZiWc?jUwh?Rbn6?Fja|%K zIGm;5d>;0l8EKV{`+Q(E0Wrg^!;{cI+y_ag4JNPd)p zvSa0R38^lLtT8@clqW@WF}{gULty^nBdV!V*@W+?yw@=AZq<|>86j##ZJKCOUqi0r zKMK?o$4V|v*%UJk=@zzHkuFSBO--XABYYk)jk~!|f%WgVt2NTrpFgf`qB6Y8f^9{w z@XVJ40bgZs45L{g#hFCa)s&!5_qg(=bg1E&Z#7Fyo2D8ms_sIVjC_bo<1fpBZ0&V; zcRCE$?Jia{tfk{&h0*DK+q)^}$?{}C!$pw{Senj8tIkUlajmUSZE!IW7enPIo11^4 zh9pw55gzmMHVoj(5JM(AxN)+t{T7{z;S`ugM&UPW&rdWIfH}Wz%r{Q{(!|Go^EevD z%HD+{GtRPuoWR+PXKW``az;h53+kw`Kt!KhdB|Atpza>3Myc&0m2@n0dUl0fiDMqI z51Z+7xmeom2eTe#l%bI=zWHj(9#A4QztPCgJJ6S+Uv|52&I$+hdvoR~i}}y^jPRRB z&%Zw69f;C*m<-KRAO>HQ9ypm1qR^V>TvIgz|F4VL>gH)klTktA(E{N?USdZarNUSixja==x5d%m! z54+#ps!%6>QfEZP|Fjx)H1|fW7tO?I*VU$w2`Vvbs;3Z{bGA16Uvo*+um4?t|9ZMw zKfRuuKUpomU7lP#Uo37u(t04B|JcKU4*b*KANBaZz22a=zdPtB=RZb+{rz9gfBhV5 zfM0w;t3$t=&ZkREa4&GzQSGDBCTMV4RV&ET-9-e? z>uGH3$#2JBylgQ4#~_z>Tl z^VxI^?ED7nE`LtAHpc!23xW>q{CW;8Dt=Ya72hi8ihmV!#m5S|yt(4hYU_Jf!Cy?@+`b)8&d;Z-Rdl_J z4+VdDJ3n8eg^lO8*Kb0&Sp0#4r}~e4{cmOd@0T+ekk*yK4+jc=gZFI1zbgM5gw6{8 zx$46~>m{uh{`>UYw2dl${p0wmw{kQXxLDt+B<90M%iEjv_<-88(SbS2kQru*{ zjxE6AhcXZ(etq+9jO#O3sAJE+--cg*xUtQpgFeI`A0b*#Xtmnni!}tzVg%4{7N0c4 zsK2Y4NTcH4E!K$L)pR|^vU|iv{)xS)%gY%;W#P z$0e}m%`K@r4$^LT?G{%kg+%?KQxkY+YOv!==0@efz{q=5V>Ba@hEbY#kp zxLf2wB0c3-K(3G|kxo-x73mDgd6$u?t`=$Lciu~mC+8RSG)OjU)8Ef@Y99hax#mVx(mVeHB-tE;D&K&w6x)Rvig2}dxJ2-z^sb|5paeeb8D0!3@#fT z2maaMQ&&kD2?wFk3^qvggWGIk5z*31L~w~!tE+r2qN_v%b4*AS8l8ZKxU)H?bPKsg z5kh0vHya97H_Furi2Yy`7Lo{EnzjVq2T4$1P;m2(Dk%T4+A9R}0!5MF{N_rPQ6#ug zmRLwZLy!eFY?v1)iUd$em7x=ep^co&SR44z!z37totD${DUOJzV%iCkEM^JPdLWg& zgk-d32}Vgx-@N-UHdC65#rtHYr>hhQ{8CH|5lfIJ7@YT9;4BRmv&VK1IVBh-DxKdh zmsr)nM1GpO$x28Gt|vc^Zzdn;m{{a(f74YW0@E?%3$eO{FfT6{jY*lz7igb3G^#Yh zmyRSskic@8GsdWtg*PPR&?sSll&WNEStrrGgh4R!9TFMl^hu($1ZgaBf@H{I3DWkk zM7>?x6%1!<_(xX>Ym}Q2T&-8Ds)W=_%kwv-nzJgU6?btn8Pl8&19`s;7H=*u z{9V5US@Skt(@Nx9SCRsyRynm)W(V9nzC z&1^oQB@-NzyP2%tt?VAQIf088@)qs}0oo;^|M7!{|1%D@yH|A7o!lKab)}E1e`@^h zr>4mhGqrqhFTmIg(=;)?h(=|42@U!65*ps?C8U~OVn*St$j{gOtTq~xl=Rh-p88b5 zvN@(g!%EAvWjfFVce9C|bGNTAR${%a6j)~~1=iO}LE>sABy}~L#_5{*>0tCJBT+RY z3RN>I&^4oyR5Mf6D6kuy%$c&l&i7a`o81*cn|4F>(54bSw5dc7Z7NBuVk)Fog$W{U zRWhx^bG9X^74scPtvn0>>}G)mA(gb}aWk=4sud7BGpLLa5{xlraTc7AqYu`$7z-$E zV=;k-*_z)_td`eZC3(qRz$#k_6&r?|&{2T8{=G_OHwdc`+7qk-*d|~rj@DjVL3YQl z3bQK;GG?UDX_{Xvr)7*~WZd)b@Y*c1X*qlBu6Wfk?alQSpDFWM8Ot>hZ;MW8yHy)EhzYvpo#TOV? z`i_#kOa)<8MOHl(@iiAkmfxXFiv0d!^4P7i(9~WpAy3k>cv33Ya-@>k%1uIB*^x|; z+D>Ewi3>kTf!kh8p31$)q%dq8V&o*^J{0hQ6IE3x7?dc6NkQ1$OJ#(DR7NOBWrTuM z2H${7#qhFJ5-&@I@v>B&Sxb{KVCQq$Gg=VK9^RnK=kafswX$cla+Np}Nc1tuL%wEJ zDKcwEwQ?n~a@J;RU!nJFQ&NAiYGpZdlSQ%8jfEvCTEmi@Ly=vLk%j5Xlq?m?P_l?E zIOPRq=!8}*OG3`o6bY+E1saxZB|qDifor5E?_bfOPXq%_@UExJ6Z;FUXnOzX$Cd z=6U+2$1L^~|>f2C*$6Kv;F{M^~_VV(ubeRJdA&b@WVQ=ai5yos&1sEqgFQ(sKt#0(cMTTnHvx2+9jkT zXC`4<-PpxQ-HXItcu}pUj6Y^EuNLSRF3@8*M)3t|!ODnx`bzE+NRSx7EYG zgQO=$hHu=w{RJ6z>CQ{4oAg9$U-eFp7Uo=J*d}}j2`#-IiE8cjg43$D)Vn&8Fn_zqm&aorLL;tfVmuiYg1HsgYgN$ApMXxZczZ%NUq-S83#YaWNT zdC-V86t~1|#*!9wkLHZ&Fk#A8eK+N8R)im!S5l&YpeDTcFfcspI6;9WfM^Z7AT zm3&}U)!FDE*kKCevbvXF$ZuS{TXKKiKcaVfPRuJk(yYRw{1!5LM#fY73Xm>DThre_ z5*?Sr|Bpp?#$6c}6a9w3hRmxiKr+8WQ-Bq2o%qzemlS<?8i-y`=l?{2*J_}>+Mu@pFgxiocc4%JF z3IUJwoH5(S*_{7tZ0s=Yi>@Idw0gF=gwyrw$y+AEO$c>6>=u$2(kZo~9U-%RwVIvI z-r_h3-4@~UYc9fWKPAak#vFOO3ZM-r1~~IeA-Oca6~)jBDDFiuTqOl`7;*C4AFac0 z%VQo&XKXwoVBY^7x)wpVd}ZIXWVNk|a0$hW=~B&1&I(2>Pi>=6YuZ*JlqjvK*HFA;Ltx$*Dr6`Jc%kS?!RG{yNS{%=VKZSGL<9a+1&H(;B(Iwx}DPdifIzH z=81P1x8QE~0Z*q(TxBN5gL|w}vAwtrZ>+W370k!CE6JRt2l3A2ouoW9uTn}uyKyj{ zzHE6XS$W6Gx&%nte#qeK-K^--pkeyNyt(#$;%)7gD#7gYDkM^2q{+h0oXYvU=9&{T^~`nVa|xJ<<_L zkiPOfO&SrIfJ(x~H0uYbHmQv7OmA!L*Ytv|w-<`BfFThW51RLHw_r~p{e1ev^y&n+ z&S1YgGUr{4-zVQfzz>(ys&o0yRHA(l7bT$8sO*;jSFRpdb(jEn) zrLVAj%(M(g*JwV))nsGIfU&GNiUmb_&aD6w2i`b_31lbVID!e|?^bdMPg*nxH`{i( z+M^y@nZQsvM~D>R8L=rW7mBYO$0Y)AE5Mk6>8Ux+jnzAU)iIw6@=}bOW-^IIjo3jt zE{CbZPSkM(vx*(9;|S)(c*d>(P!8L1BvXf-y4$2Vu77+M^DV`@$&dD6Lu{IUE@jU8 z#6x^s6=xMg&}qK9DpaFivn@N?ymklvJmJbNJ2&*LKlj$29Td|_wE1vDJm14Msc9Ma z0YBGf*L6Kf`NWt<6t~trAX}#M1?BA}tlZGCN9u3FB97TalUqO@V|K>l_G);Xr*|)! zH{>`4@r2`zvcbTWg)x<_{q z$v-?NpELEtb8@@(oIKB-XA3UgTP%&{tF|7*@|Rz9lI)|$Y{WCkvB<@|q%^J(=Qg}h z){W(>ypo9>9Li@=F70pIm$$e_#E`}g7QAGZVx-4-i_M?l*4NlWP zqWdan@`7Ob^ZW7pcav4%tbk4ReLGvfqq99N=*NLIZvV&K{ZqOHvMwN!1a17l16LBw z==eWg7066DH~YLmG-oa^>7dY1>#(u^f|_a6I;-)~i!Pe2XhFp&=F*nJZG{DuB)O@m zR<&0BPFg)4ckq`1MZHRHHLw=NET)UU9*^7oMVP2p$*Y8FRZIAb#v3d89vVhSZeV+X zPYQ&Z3G*X*N+;l7nM=x%(eufY9SDu~-C)O1oDbf88*Z|GhVND!&#B3U8MCADQ#K)m zWf3!w#_+4r0F*U!0Lva#z%m^2^${6nnL;4-WUxC3$7P`X_MzGKzq+(Q5)9B$Y;6Mzl$kR2jwst`0)Q=AdY!pT1~H#ACwVL_e1i zXge-8%Ny3&eqv)#?*`{aQ>tA>Re^DezPw$JFK4(Elx*<;8KV_QkqK`aa*^3}s+pNnDq~5}&q)O>YvO7-OE9%^$|!Pz ztEOmbotWs@cZ-!8M;TV=)w)1YsYF?`Q(64~?S1=qUAK*He~tf&o%9iT)p5>4?>3Xl zmeSF!EvJ$buUxO(6U92RG}fy+^0;oM|NH#_zCaQr_c?Of&Z@N9$l3%yfB?u%f+Tj_ zf#gX%GgR0$d88kbn0p9iX$d2h9R8%shMi?dU}wDM7$ys(j$rBMVUxMGBTnh ztWgk**U~g|IecA`U66>mpN;HhY(<#2#?mj}U3Mi^ZH~3Qli8Bck_jno2q%*=jpr0Z zrCusy*gZ7s&+v|X$QkVYL`rzh|%6rfi>B(7FNqZF$yA%RY42N&I4{4n%tPG zU}izO3az%WX5LM936}{GOB_%43c;oY1Z$vZ^+D?f1buuYDW!{+N*YzX3^7aPGF=@`Um9g- zj!Z0Uv?hQo7OvD*Pgn!o-~Mjm7{^PuSUySD7f;aHWuBZDSi~M}DN?pjg#ei*BTzx8 zV9Hr&ExMldP9@Y{q~Z6f)Q{v!K>oaPY?zcx9$l_eVs6hTkjqpd^e;9GvK9oYY8&F= z)MchCwK%3WS?K!FinRFUmc{VN*iQW*-MYwH9V5^kn7-V!J&y`}9lE+vlA+3pTpZoc zNO>ZoZm{U26y_*vfYCS-Vq+bgrmbX9re?wlqL^vcscz`(H4b?7qq5nsN}~BUpUGcU zB*Wrg`LJJE1h>~z-ABqaFV4tln@WA?y&sol!{}H#pdp8NRb2f=;%+n;IzeSUdk6l~*jpd5g z@$G`vMlxn>+cet&>EkE+eN023JAdv;i7(vur2OieCr>7RN4t;jhOPRx?<%wnLu)l^ zhSuokHgpP7^Ie5@&L*{dSL1MtCiU^%z|#7z8(X81D52s3!VcV7Z-{8CE*?=7_b?4tL4wY$GF*?)qg z0m;aXvj~_jO!qN?qEj>YK=*X(?A_t}nHyJ;t;~EQ6({d;!iByPk}xQqS21TRRwrLS z`Q~7<`&G<*vh)1$WHLEg!+jcSAf6yq*WQ!KU^v;@+n#{b0SpIgPFB3_Z<)38a7K-j zXMuSJ(S1=o(qPJOF98j;P(&^OtvD|JZ&e_C@)ca_DX`3&m!aU1O1<1;)68IEu`%2c@ssIS7 znrU`HfKWc40QIbhRta%XHCMF$%~myganq|P1}&dbQB zKD3_g(Zo(~uZl7%=cZDg*+Mw-$&cootn3a4^G(z7AL^GL0TW0C)bgfiV1 z_QXjPeWR9_Dl8yXnrKlGON-sMWm9X-As}t1cNtL-B{^|laqB(?ciSTfyKqQA0B1DG`H1;vU0<1A_2x%S#XXWIF#$O{_Knfht zKYcYl5a}R=D98e-v8Kn1u<|(oeRjx*m-u|0&Q%iJivPCA8ySmW)IrJMaL2kJMND%m zLDW}rejc(QNIKzJKos8Cr@2d%b5iI;-}x^OO$GV|MAK463c;Wyak9RkAxW`-2sjR< zWRny^go9U_0HSnJ=HDdA9TVQ}XjO^3FJFK^c~_E}QE+$XqczUHYyqxnN=U(19QV*z z%i_hj%aoBhUs4K<4i(;>WC8v-0h8x&6jD@9$dtQetxvfOWG-)gtMH4jb!lu9iUqjiedBWCiI(E&` z(POH16s3!EOzh|}RXd8(#W^N+^q8t0Md<<@O^;SgX}fy4SH=XlVx8DjBCBSN32wzY zF|I^b?Hd!^igjXTiL4qrCYXb@$OW0_g%sn3xsIfb6}Ds!p15W7JWCZRc*$Ex8po*o zR($h#iNxj{k}UxY(XFIV_8`&=>P;yqgy+XDgwF{AC5cb zK%%e;;4k3l5WctyBCqG@IFh0Wg;9)d`sWC?pi%LpRPm%%5%~kudU#&1rn9@as&XqZ ztMj_jS=Bh5T_gG8ggzC3kMJt(i^|LG!~&Glq>4a`0;W*4Ax#>M3YsA(0)>$1=|zqr zON$U8DoxvsG* z5I3`P08v;4pfe?uCQ3(erDcfM=QpTsQxz#7QF#v-KK{X%>1tqssthSE`YBMNv=>BS z@S=%yltjEA80|LJh2jZ!70lgFEGa9RT;twk5iCt{kPDVl6rn^KfE+~>76GDrk~u_F z+y*JV0XC~L^(kpqUZa`nH`VWq+#B9bT+0dS0{3?K@N z05(Kln+s(mH87#`Tq8gd48u&y0~0QgWCI8g0xiSFRqK0A^bg26(T;tY|{M1 zl4ulVC~$LWfD{-}+6Q7-g&6jM;2W(~7^dh8RhNjM>QYexCtVsUsY(HY0(UBDRjud& z)!RC-L}dvYZ4Wtul;(%Rax+2({~`e87E%Gk)!%$wkQBB6=n#4=fdHVw766?hF9J|u z3xEPlMF1*n0VvZ+(T4I&(h|{y=As&1VNC|2DXP&Gwg8H1B!HG0iKeJVSJ;xEtVTj8 ztC0hQXBTfKZxcrn1rcwPJV=Y&88;}G7h$5XU5*kJDC4wT5ZFnk=$BD}o7}<6OMJXPm$ zB0`Pm;_~LJw`XTDVF|HK9C=a<>bVD+dGJ(Pk9j(yZ$QNiqgGT1q>-%lQ?(@|sw|dW zA5w@N8I?STvplH{xD6?iOvRWQYcyx){Oaa?Oex#ryov0sZlPEbWKdl zq^4KYY5KvDURbUo7$_nrRVrN;jdW7dK9Y$W}rgE@q-v3E>o-9hSCMsvM>137DUk(2!RS)07YLQfR-;1 zP0<%zVM~IdFAzY{7yKnly6_ttAu3Y2dkS$6w}2ri>QM4dZ2?P?)WMSD>;=4pst!pG zUBIEDdS$z3kPSRuk%6EGP(cjFC{{1rE6uSAgiT=WQfL`nqb{=Wp=@_AG{?8hx> z2%_AY%VkoCih#HamjmEorGmJ|rd1WiHIV2Qnu-%q)S!~_Q3XqAig04L&Fz35zIu~G z>U2fe__kLL7nc`d+z~wOd{QW@R>#RI&Sss#SJyWXBa%^t`>4%*T&X@TR`>TUl5JCxER?mYD{c4H%Wd~0O4~zOo4e9> zU%lLRc?CgfepFnPK~gD7h>ClGD5{VkWlxY)ibA5|q9o>SMD9xFd9j+jgpgO3rM)0{ zZdJ#rProF`%5kr7?utSa_uZWcsLI|(phTKJFZ84Rt*4{zmrD=7-u-&#$?o3H_R{dv zBR*Bobq^^y^yJ0v_VfQ-8jn6b8ndDBe0%HpmiGMV&eMb4|GTp^9)Ef?Tph13-@h-? zJo8BeY=?yu<&>FFYs$6A(k?`}7(lxV2S`eJ-*EN}H?)0p; zvnOkDI^TW_nRFpNnk>d2T8}`ztsW@>GHG0Dq>mm=;$&H?ryi-|B6{T1$p@!@n;w0& z`_(gY#H(R&MK{=xH$J=?9Zj#SkMhgU`smdd-(0?)gj&*pU%x%PKAIkx0nH3V1qVDn zUZU$(>18YSUgn=&(h5Z_=e8-IgI*aBDa30Iib|kJ>L7b_qYfyNI>-&(r~`_m&Nd~d zH~YAunI>^_1IrL4a5k0nQmW(Mmy`x0fjpy?U8euh(d$RqWr((%R^wxO#ru z(MmzRtseW1Rtjr$k>F|{JuHk?TE#{5_|G1#LOVYEXeF@|uP%>PQKg=vRaB?vXcg5d zjaEu_c*O0tDayE0KLj7O`L^{5VUC+@%+1 zDR6|K`^|4;uF(P^pC<{V`MZF#=6C(Hkc2%t=l_)9di(6UU4 zTN7uv>BV2(P2Qax&U`bLK32$Q!G3%}R9?E$fN5+Co1zQu8cE6?GIdH#fT4s8@Yk@WV^v6Lj z`DP+vh$6sjXr;4#Rv}S(&zBuy5cK(9{30=GoevzW#77}RP+6lqa#aA$_dsZ>@5iD{ z=APdSsS0i|NOG&0MF)OA94cZuXHGkf3l-JWR5dL1+1Rj)ivOh>%*odH#J2piZhDp> zH+=|yQTGd$=FaHOv)ZB6oNVK6Nj^VX|MVo{>FSvAIPSf!*2-GF);z3~qi`5kET>Wr z3S$thCC%4-x}esITe$g>&^l4uPqiWJBt4jQ;m*iWfITGIfbNn9qRVFWq7)joYW%EL zTgjSXF|?8zlG~RYM}`219@j8zmJD0f{nf5<64f}V+83xu zj1el(9}N!3h#~2l_!7R2F2V8dsw`2`3nx?juv%ZqbKy|5qFz|$x@QVwpVkn<5rk99 zn@fEydQ__$pZuU9%pxcS{c%jf2NlT{^~a(#7Ag`K^v4m2EBs59R24T=Qq6&FQ&-f0 z9Z7#wN;{#poD2Hls2I)qX~qgGMfSKRgG?|H#?bU>Uj#nB>%_Qxb$B+xXp939=6Mw* zWT7XYitPoZGqiWqK6NRfCKkQeZXU?2;Awv;GRVC|V|vi0-l4HTiR03jbX>PPkj_l70KjR+cz-h*fMsKilR8%gT=G!eyK7+=;f zsUpBzeO%(t#3BoXt1ln*tV_8{6F7|}Xf70Mebg_>7lO8o= z9Z}6bIwHDQNB&6OgkipNm9gJdYJ@QzU3Ysx75?rktTsLIQgaC3pwf1X&< zAWjEPszu|Yig)QzLO(^T1pNNxcS1t&HUuKWOFKL>#{=QKY;4UV%oe85UQ+pysGQoZ zRu|_AMa{f})aw#gI$s$@nT2FHdk6|TiYH%a5dlHa!Oa2z&LDz-Y9t#;?`Tgk-lWvE zgM*6RmlpS0Snc!AyV#zIM3;8k-g{+i=@zxy_DY9TrR^Cfw`C+Yx#kdchmY8l_k&}0 zpbnzuhOIV}>WV$0O4G1UV7JiS=1#8$@mZI4OL(V%w#39C-w6(_rcPLB3~fy8{n>j} z&t~|SWQCpt+W~U+Vy)M~;gV3N>8fFerH`vB)V=Mm)veh-(3iIODqavOn`YJBTA^OJ zSuM8?yykN2P_63LlFT)M+(qB_${Z=GUUln08JgkeM9K-5K%ZLEZ}R3Nd#FUc%xwSN zulCER2KS!!)?zAWjvU67Dk+JX51lMfEe0*ladc@MA69DW# ze?(V9Xlt4NF}UCT)?q3<-5WD?voFobvQ0k1LSAjg>XXBZBkWbY?k9Sh~8LO^;qr&3cVKyMeMFaoXkrzq{`AxDyA#yDz@zet2~7?PP!FVCVVd z+pQ<~TuggW$B)&OIs-`o0I2M4=<+sy`$4Z z`k-~_Uj@&>vgy;4k&#j%LEFE;0iq&>u}Kw#6&OHFC-?!s;C5(7si6fOT___J)Oa&J z?`{rfZ+MU@R5f!KJAh&DsWO255ekWW6&-%P#6899w-<|gX54<<*tp>hg%ohvU&`?HAqPLkptey~Sqwb=Upk z-GBW}EB4P?<(}|=*FWyr9ue%n^x}^@&mVLzu=p|g1Uh;RUmc>u1^ai6(d0zlu_*R2`HJITL<6m zJ$6D$A+~}Plczh+x4O$~85d&dkV3#oR;#M{1xVghY8cC>BvFes z<0?%Sthe^}x4tX$=HkVe8)V1*9wu8nymZI-lEcHUdwO{^J?rSIr+pi^xLx<||M6f1eqAp-xPGYbuRgzLZfwS#9i7XF~4 zo5cLS3hyy`FNGhQjDp{5G&|%&F#IBeXNk98s6qd-nJQ3gTt^p~D?AHKyOu1#XG4xM zg75Q_Y4_Q%JDnkZd4_Lz9U;(t@oL)LoZu1O)mCwb-7%diO7vF~)#k?_?&x$ZEuYh7 zg{ZBf4{kdFyZ=B~qi#OvL}V-bO+w|f2qOEIfbIgD5>huuW^OJ^lNw=l6X z@GUWbYY2c`K)6XXG07;5TQP>!c1B{oOwwqQ!YvIexO;}d4-hDuT+_AIV|-wP;fkb8 zHl=Sc1k9H*K?H89jW%(2%Rk>l*^*djz3vCh<(?y@-?#jxPtBu1Al) zmul7Md6=o&h=#aJ8b}P5rnF6-AG2ke|;m zQ~povfb-P!&RMG@+?CC2Uvq6ui|tg%X|yg}4bNU3&Zd9smVUy-$IW4Ba_hu;&;^bB zU3w`u8?N;DY>IH!<<<21kUr%egLmBuy7HI!+0;zV7;*nAYN3|{Fksw4KLB9R0O1z_ z6}Mn{7WWOV?kvH7RJli9!sLEvFqP@QFxhANPii_)(m|cy{Dzi0>?f$%y}~&N9ByE% z(M*W7jb4whcwicJbIJaaLct3L!{L{h?kF9fW$S4)et{Eh7>`5KdU}$d1Y{T=LS2^^ zXYYB6xcAJg=`aJu4QlcsRV#xphh?pJ+t-|~ui43%UOHZG2GP2ub1Z;)O3PZjy5w_D zS%+ss4r+$HwGm*?TlScq{nh&cy{y>(KDa|r$6=MfJDFlpK&vOs2?oPKZGf~hK;!5& zPe>xN$e?(q78l6u{o9Nu6X0J{K;MNu8zxjja}0s|8+#>~32%NfEu$KA#B z9rwEuG$KA{Kn56A*8=Yd@^TD>nX?z;ru_B$j}d_|ox!urdIgDH8rZ1pLkw<&)6=nO z&62q?)yGfIu7y_np_aM_>BAiGG%(`7ihpH~@POtS!6tY>PD0a@&Feac8QDeqh`oc52>JAuf|=3u)&{M4;w&PXCYWlK zvG~;2FNuns`zN6W5Bq=b*-s;X@pmg$zY+p=m_g$Iz(@m3i!3O zWJiFH!#nFfPV#`z7Wkz z(J~ar;E<9NW!>@h<++)>@^;@RoiYTIc4NZ0=il<7l4BhluHApcV2lVJbYhJjce!40mcM z=!1Flk2KK=d!4w~{&vbPtcHs{;Ur*qM|0!sB@ z#90GQ?VK(5HM5iCW#+AbDW@+=7`TF>G53w$kUcgK z%@v5^s)&>nreE# z8}Dcl%>(eO_$D_CLn<$9CxGg+w6VUj-y1XpU{kwYr zxDhZMj91h1zsm!V-<|*e2fP42|D29z{&9Li8^)MI|M8p<10nz#E2OYd* zhJ{5p8jO}74u;**k8e-Ua7t@BIvgFpTD=bpf_ZjTbBINWyv0 zH0sbwAqi(g)2P?IPTXBf&euwL!=51xn7Q;w6}gpTFL#vNi`tpgNR(0}N1VZd2f@V>b&9;5mpyVWF+n#^yC@Z57m}sn)r?^qM3=@RT}gtH2I5 zoG`MfbS;h?J+Tq{gKpPS4To;~krgMUwk=JCM^jVEAa}{EF-%=@!b4hcPFtjvs zGR78^kmf6j;n0smS6ZsoaO7T?*HikiCV#u-TZ*zUX!sA)qdR$e624L=!>99Jm7iTM z8+Nbg#4C9`CTo+uas@}vQU7E>)doFFpHPx!QXWJp20SrPYXctRh5=7dYXe@8num2k ztqpiYJzO8V!8Cpe*tE1kdV{^%y}ba|5j9kdr`O6m#tK7}&eMgbAm(T?VS@uxhOsm? z`viw{=rpGNanwN&nd#86pwZg)J0Xaq6a#?kpc!S_+Z>@0xf@dB3})JQ&^{w?z$D_m zOSt&?kEa)W>>^CFC4x0d+mKfO?r)KpjeF&wW7@#r>*SU^<*A7TwFX$Tvw8nI>r^rcGLj zX_Quyv`X+vTjk%=vcvI|A*d^QJ1cZv0@*Dk9lBc&G^X-cs$lND+5;_+=8$5@s5Lbw z^H8#-f~?%ol*l9bb_$9hyYD7=jn*k8U^?9CjVw@kATb~c*|a96NSP&5f?@;9gkCJD zfvKE>i7#3Ow-B<&bb+KitrDGAVFuZ0Hq`|3bY61xg&1(s;!vVZqIj3bhTCkc&Hlwh zDhI~!qB?vPY=%z&TvX@a)X)ODMeo_ zU^*sKXv-G8Tp8MnSAvK%E@_4<=Dv;~p+QZMh!PsqM9ItHI(-!nw}Au=4R5Z27*jC| zov(f5KXkmpMivD!lyGFOm_c_q}hLI)W}A<80yS%`UDaP4QVAI$P-CmR3RW z`@@OfwolIFF~fTc81=;P<}Kuy6dU54A##GJumCcDdoQr<5HAtno#K`q+E~+46?_{$ zB6Q^4NN_Q3bQ>mrf|m&<&JP6-rMi^+!y?t$c?QuOXru25O?fuWNJy(nV7n{b*}mp!?{}AL{$pBdVp#hBj3_e zR1o#y&167Jom^$?UTL9CRkA+(9i(f1)Qvs5csDmqkkXc<$mxt43I-75Mg7M#Js3B& zFA$xk6y+;aJ(xSdlIp3~WTB_P{iSezl{6QjKvvLuO0B3}A!&21C_Tk2{}YTGDHJ|f>@9&u>vf_Py-Qx#Kr+NRvvNiQjbWq!k?8T{rC(veLzP_fp2LR_E`|eA<2a%^1iS-GU zOpJTz)GZRLDMccI19rSsk%FlSq1!|%&m61HRfwlFQ#Y$Fnn21}I^S~cgW)IDy(4MB z^k|=V)*6(Ur$271Eu3hyq?m&%mgQ`x3M}x0u26o{ew;=!I4u>;)WD#QJ8PUzrr>>FnP6l zI@`VY`ta48DNVXT(dmUjAhj;%SB5BhAstHrI!4(bLe?&PznL=hSSkGQnYYdol0+nL zo6;tKwj)as5Q(QVF)yy5C$kcLd~*$@(XCJBYA544f!EZ_B1tSz4k09=3?zIJE_{T6 zl8M>0nd}&o@ma3{Whg8#=rD>|o(be>3X?iKn52Y88&is&1N{eZPzOsT; zRS7|=rKC7@tE5=dO-hoeB`KauBi;kiRo%Q#^gRdd$rQcFvXW{7O-Va}qNJceQPYv2 zDyS(?%%!P!*HA{}KC9y}NI6A$Dn(JUtfZ(wQ&Lo*C@CsX)D$JC3W^F8ZAE31g=6{7 z3*)&;=X6ynjFhDlMcUFyB6aB`nZ62SQeiqtQ)8I&K1NjpRkjfqj{M3EUo zRa3(?BAQ4UX-Te*m?Bq5T%M~WF4vWk&}3?f%jZ#yRi=Mw#V*3QPTzE99O;#~yCnq$ zt+<*6rMTjPPF!(CC7wjrh%1g2a{U%*G-kM}di0jRiWEMN#qaIt8%}aQgNqEa>GLz} zJ(}5g^-Ej4aB}Z8I=nQQoUE;GgcXyIXi;1Mg-GeniBav^#=a4EznVx9PI=nJ>zB!+ z-RB3B@o;(a{26|YCOdzg>^y%wnfzr)aVdN{MpCVAL{hDdBB|DvBdONbBB|B~kyPvBNUHVKNUHUXNUDud zB-O@BB-O^clxl?ZJVMW-!B|Q)8mvmGMuQD0)o3`9QjLZyQmWB#T}m|?4W(40(Xx~Z zt`SK!9z;@&N0C(Hl}M`bdL-5IFp_F{Ig)C5Es|PtM{%)LocfDl?hp{NN;gr9k2d^$~@YP=)7xtb#kH-Ta z(VC1WTYKAap&1V*N8@ETWJS9UJr8uy%!&>?*c6{>ED)EQ^AXB^%Enx_O?MnLzWG4~ zc^bm9j*lsTaUM=;bM(<58J$dzEm($WK-9L?L)eIv`;O!-nbRC4=vnZlH$;gwzYAU> z6t=msZnNIj^zB90T5pd$V z%m+T>aH@OEmUya^-yjK=F4KUmm~Oc_Eas<}Z^7AopRAvND4^JBm3P`~(=- zOr*=4^3%;JkeqJoT5?n^rn)T^b}du#?y0Efj>6cn03N4Oc=@Pw-#9|CZ}vw>wVL(I z6iPFqLZgbCGD}UE;yBwx7Az4pIHvZiz(hS!R|vf%Dfr`0cUbF{cgQ30{NQTVihfj7 z6(V>+A@-a`;w?E%gP*Dn6(!O}Bw%{BA($yGfiPqGWN!KzQRpIe&AmBFbu{XJx%80l zXD<hd855H$UVf3Pvo^ z#50B9=#8XAayp$51(FU?2%QcEk`BQPoel+(jz@Hzl8Yp*aXOvYlu2saX?1d2c;94Y zz>BM3F(15#k0>@9t~PDv5p=6{onkjU%q{1EQrm$#vtA&M?MKL|1p{qtLl!NuVxW!f z$P#0|6>B?Cr?v#WX3bC`vnOh(77bPAv8mb3(~UYFwfZ6PGzcHUsCH+OEH3<)+MD*t zz!wJsWOpO=Yi4!OQQrh{);k4qU&A_Vnr6pxmn>^*%w<|zr(s*nE6mvyx8bVv)wN;C zu5oSEq~?r6-c{{$-&d^sZAfi<`t|egCT8N0oot@7X)X4{e@CGm(X9LxK2EohcQC>0DP8LbE>)G?@#Sr)Dfxp<*{M zUYUv6Vo4i3MmhRxh!-%XCOP6p`^O&rY$8KRNxK6FtNL<5I@%>t_Q*jRyU!$d` zTLJ35TAe!Ol{J$ZPs(g+AG1+&pxB_6mMp4}N^3e)m{XyYru1U%Yn0NC4wGN58frXI zima%RCl+*VvqQ5|p_lnEy4tfJ$_%MtJzKYEJl%F?R1Wf56vc8pDCdNk-FQsS=`ySF zket(HHnSsgP8oMQwtyRub7IY2emu^}o7UpE^1{6>s$H}guLP2y0sEz0e;TnPEX8hi zrJ@mMwzCTrjW)BNU8iWYnGFe7bi7pLXo+H5Xa`UYUb7{&GU|LWkAC)CwEHoyR{v8b zwPuhJgQo=2Oey+cM4B5?^umZTH>2o>5oK<~+=gMa$W0jc#Ryt8pk_aZpP7%3zwAGW zwu;@+7QFjx7_)f_ru)Lqgp}Q1%M&@)Els>6^PAR4tM8^$OEC$rc40>7=H+N@eRH(7 z@;zm8UZ&V#_a&u8Q209f<;~IR^7q}x*uvr9C9(Oa`?y$R{5?w&V}{wK(RsJo;A*7= zS4cTf5VNHT<$H(95u{6R(pW3xldR-QOt32)1gGO=GOH@`V;g?cf`i}sVuA3+g$ zzX*^8v`MZOm^)Txnu8pTlf%EB&L+sTc*8r(Hv27n07u3ecb1%22vvnUO+4r!tY3C> zB>JLV@?abtXJ-#6?f&mvH;A37KiqS2Idt8L7Wztj?i-e#lG%n_^7KKUZpl@2iEiZo&MdrO!sc<5{lj03vO!7>bOR=Fe1>m+5G#eU?D$n2Y-sh_2uQhN!J}viWk3h zn*s{kL+M)STe8$bAx}wxMDowDY!Gce^&43$`nQdtsWYsq%nI7dclw~-a7*nwY5o<} zZ)I!hAM=rTk46knahY@bo)eJ7Z4<0?FiML3>_|>Ia9ZH>DD7BDMIK9fO#q_7GC}FB zHSg%L&pJ3?9KVKs>g37UHPJZ?8tjzjQy0;DTQs3=wz9Wn{0d0dn=f9}GAkRK;}K4+7A+Gv%xf9@N?r84 zbjekMKk%|eu*95J`Lvx1Qzo9f7IST4vKgXb>Ng|cYZH*4NtLlwa(pSL+w_`Bp zE{fl!>RlbSYGPmhVRGP;^_pt7tuUp}U5D)me_;;1;e{1V9 zr-N}|PQ)y{et+u5>!gf&cp8m`EYHH z+Jo`t`ao13uYY>9K3-p6U!_}l*0^;6FbMV~9WR?VNi=&D*IqGM{;6A8A^9tN1;YVW z2k=cDWo2woSn)NKVGRXYh*=hL{;Dja3JUJ#8Wgr?O_p&DWqDvw5E!h>0v|5wWw^Rw zPzEiOl^V*%I-@Lav`|(nD7ai|P~dF!7STGeWa-h$szG6YuW1*%JY|&i6@#+cl4YZU zg7MOzw0w6|^WE{X@!jQ??~ZD|i>ug1mX`01YQBpb+yKqn(rbTgh6Tf?sCO@dhmtKSSN{RVk?3`ncr3@ZHw>3j@GtKSSN{RUZp z3`ncrAf-!g9l7Z#LPHCM?p|UhFT*Y5E^!>h9l8>n)GZjfnIpnC*#=CyTVsPRXYUT* z&;C#hAmXeTMUQM~hq_1ffZRQP0N0Hlzzw4Z4OxG14P z=qG+cVVvX%LNdt{gjbR$2$jT7DD06uK?oyxf^bCqghC6+6NCwpCs^{wPiXm`Ji$^t zd4gqi@&rrd_z5j{lP6fZCQq;oji1nxGI@gKVe$k^z2pg&ZSfOYf+bI|oJyWxX%s)9 zWlr)0OP1sbmLJIzEG6P6v@A%TV2YnS!E`-(!c*_$38vA>6HJMdCz#&GPiU%|Ji)Xx zd4efq{Dh{1$rDWNk|&s^#ZPF;l{~@pDS3jaQ1T@0)+3sv#56SHNSmqWy8Lr>sGTe=#_`99uZ@Q*-@7BHG+=Ma9g8nI-l?FC3i?6$FewjXmLMAGL>R{e z(YQdg!pYz)yX6AW8t0j`L@PQG#z4VtwLmmp6GUqTqSaMFv|b=G$?vR88#)ndPy1&AyGA-D!p!MNkeI(HeddG+?6+bL!eu!+C60M;7A!6f_tf2kja6_tpSkV6v z!@NW*SYU|owInOp02}X|W61R8pQZUsb`C0WS|2q8)IWSCW;ifH1BkD_H?AE=g9h0s^s;tYig5S|wS@3V81cGQ>-DDq)Yb6%e_CzgtU!@BNmj4|h1Mil!3q>RlVk-eP;g9=6|6wf zFG*Ig0)@8(8Ddzf6)1is$qH7W0F@*wSb-u?lB{F}%y%SN$qI-eNwSg^5G0ahB`Y8d zB*{uvz#K}Fm8^h?l_V=!0l^wUhLDVI1;4aXUcW3nDR z&p;lGtlI%`s3Gn{nO<1-OTN{0vSoh^wW6FaaM$aEyR>QU5BLQOL)ubvP^4+%!fHwO zYA6en|L*2+_J%ujp(-oUb@q$tYrV7o(l$ACdfeG9b$TND0Cxc*3sf!wV2S$CuZ~V< zS7(Rsmp(bzecJ8ref6ySLwEkho%N#6=g@tV&F<;+oDcqW+tZurtDBEM;SamxOQn7^ z`v482zP#uGym=b)EniLHgPZt((#=UniP5`XpFHU3I2``*2||k>#n(QnH?qg}e!`ml=!; + + + + + + +
+
+
+ first/lastByteEn +
+
+
+
+ + first/lastByteEn + +
+
+ + @@ -39,61 +59,6 @@
- - - - - - -
-
-
- - StreamConcat - -
-
-
-
- - StreamConcat - -
-
- - - - -
-
-
- first -
-
-
-
- - first - -
-
- - - - -
-
-
- second -
-
-
-
- - second - -
-
@@ -136,12 +101,12 @@
- - + + -

+
req @@ -149,7 +114,7 @@
- + req @@ -159,7 +124,7 @@ -
+
data @@ -189,40 +154,6 @@ - - - - -
-
-
- Descriptor -
-
-
-
- - Descriptor - -
-
- - - - -
-
-
- DataStream -
-
-
-
- - DataStream - -
-
@@ -657,25 +588,6 @@ - - - - -
-
-
- Align -
- Bytes -
-
-
-
- - Align... - -
-
@@ -695,64 +607,6 @@ - - - - - - - -
-
-
- firstBE -
- lastBE -
-
-
-
- - firstBE... - -
-
- - - - -
-
-
- AlignDescGen -
-
-
-
- - AlignDescGen - -
-
- - - - - - -
-
-
- DataPipe -
-
-
-
- - DataPipe - -
-
@@ -789,23 +643,6 @@ - - - - -
-
-
- latency=3 -
-
-
-
- - latency=3 - -
-
@@ -829,7 +666,7 @@ -
+
@@ -1073,6 +910,44 @@ + + + + + + +
+
+
+ StreamShift +
+
+
+
+ + StreamShift + +
+
+ + + + + + +
+
+
+ Descriptor +
+
+
+
+ + Descriptor + +
+
diff --git a/src/DmaRequestCore.bsv b/src/DmaRequestCore.bsv index 0beca84..bf315ff 100755 --- a/src/DmaRequestCore.bsv +++ b/src/DmaRequestCore.bsv @@ -297,7 +297,10 @@ endmodule typedef 2'b00 NO_TLP_IN_THIS_BEAT; typedef 2'b01 SINGLE_TLP_IN_THIS_BEAT; -typedef 2'b11 TWO_TLP_IN_THIS_BEAT; +typedef 2'b11 DOUBLE_TLP_IN_THIS_BEAT; + +typedef 2'b00 ISSOP_LANE_0; +typedef 2'b10 ISSOP_LANE_32; typedef 3 BYTEEN_INFIFO_DEPTH; @@ -306,28 +309,23 @@ typedef 3 BYTEEN_INFIFO_DEPTH; // - The core use isSop and isEop to location Tlp and allow 2 Tlp in one beat // - The input dataStream should be added Descriptor and aligned to DW already module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); - FIFOF#(DataStream) dataInAFifo <- mkFIFOF; FIFOF#(SideBandByteEn) byteEnAFifo <- mkSizedFIFOF(valueOf(BYTEEN_INFIFO_DEPTH)); - FIFOF#(DataStream) dataInBFifo <- mkFIFOF; FIFOF#(SideBandByteEn) byteEnBFifo <- mkSizedFIFOF(valueOf(BYTEEN_INFIFO_DEPTH)); - FIFOF#(DataBytePtr) dataShiftAFifo <- mkFIFOF; - FIFOF#(DataBytePtr) dataShiftBFifo <- mkFIFOF; + StreamShiftComplex shiftA <- mkStreamShiftComplex(fromInteger(valueOf(STRADDLE_THRESH_BYTE_WIDTH))); + StreamShiftComplex shiftB <- mkStreamShiftComplex(fromInteger(valueOf(STRADDLE_THRESH_BYTE_WIDTH))); FIFOF#(RqAxiStream) axiStreamOutFifo <- mkFIFOF; - Reg#(StreamWithPtr) remainStreamAWpReg <- mkRegU; - Reg#(StreamWithPtr) remainStreamBWpReg <- mkRegU; - - StreamConcat streamAconcater <- mkStreamConcat; - StreamConcat streamBconcater <- mkStreamConcat; - Reg#(Bool) isInStreamAReg <- mkReg(False); Reg#(Bool) isInStreamBReg <- mkReg(False); - Reg#(Bool) hasStreamARemainReg <- mkReg(False); - Reg#(Bool) hasStreamBRemainReg <- mkReg(False); - Reg#(Bool) hasLastStreamARemainReg <- mkReg(False); - Reg#(Bool) hasLastStreamBRemainReg <- mkReg(False); + Reg#(Bool) isInShiftAReg <- mkReg(False); + Reg#(Bool) isInShiftBReg <- mkReg(False); + Reg#(Bool) roundRobinReg <- mkReg(False); + + function Bool hasStraddleSpace(DataStream sdStream); + return !unpack(sdStream.byteEn[valueOf(STRADDLE_THRESH_BYTE_WIDTH)]); + endfunction function PcieRequsterRequestSideBandFrame genRQSideBand( PcieTlpCtlIsEopCommon isEop, PcieTlpCtlIsSopCommon isSop, SideBandByteEn byteEnA, SideBandByteEn byteEnB @@ -360,15 +358,118 @@ module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); return sideBand; endfunction - // Pipeline stage 1: get the byte pointer of each stream - rule prepareBytePtr; - endrule + // Pipeline stage 1: get the shift datastream - // Pipeline Stage 2: concat the stream with its remain data (if exist) + // Pipeline Stage 2: get the axiStream data rule genStraddlePcie; - let straddleWpA = getEmptyStreamWithPtr; - let straddleWpB = getEmptyStreamWithPtr; - Data straddleData = 0; + DataStream sendingStream = getEmptyStream; + DataStream pendingStream = getEmptyStream; + Bool isSendingA = True; + + // In streamA sending epoch, waiting streamA until isLast + if (isInStreamAReg) begin + let {oriStreamA, shiftStreamA} = shiftA.streamFifoOut.first; + sendingStream = isInShiftAReg ? shiftStreamA : oriStreamA; + shiftA.streamFifoOut.deq; + isSendindA = True; + if (shiftB.streamFifoOut.notEmpty && sendingStream.isLast && hasStraddleSpace(sendingStream)) begin + let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; + pendingStream = shiftStreamB; + shiftB.streamFifoOut.deq; + end + end + // In streamB sendging epoch, waiting streamB until isLast + else if (isInStreamBReg) begin + let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; + sendingStream = isInShiftBReg ? shiftStreamB : oriStreamB; + shiftB.streamFifoOut.deq; + isSendindA = False; + if (shiftA.streamFifoOut.notEmpty && sendingStream.isLast && hasStraddleSpace(sendingStream)) begin + let {oriStreamA, shiftStreamA} = shiftA.streamFifoOut.first; + pendingStream = shiftStreamA; + shiftA.streamFifoOut.deq; + end + end + // In Idle, choose one stream to enter new epoch + else begin + if (shiftA.streamFifoOut.notEmpty && shiftB.streamFifoOut.notEmpty) begin + roundRobinReg <= !roundRobinReg; + if (roundRobinReg) begin + let {oriStreamA, shiftStreamA} = shiftA.streamFifoOut.first; + sendingStream = oriStreamA; + shiftA.streamFifoOut.deq; + isSendindA = True; + if (sendingStream.isLast && hasStraddleSpace(sendingStream)) begin + let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; + pendingStream = shiftStreamB; + shiftB.streamFifoOut.deq; + end + end + else begin + let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; + sendingStream = oriStreamB; + shiftB.streamFifoOut.deq; + isSendindA = False; + if (sendingStream.isLast && hasStraddleSpace(sendingStream)) begin + let {oriStreamA, shiftStreamA} = shiftA.streamFifoOut.first; + pendingStream = shiftStreamA; + shiftA.streamFifoOut.deq; + end + end + end + else if (shiftAFifo.notEmpty) begin + let {oriStreamA, shiftStreamA} = shiftA.streamFifoOut.first; + sendingStream = oriStreamA; + shiftA.streamFifoOut.deq; + isSendindA = True; + roundRobinReg <= False; + end + else if (shiftBFifo.notEmpty) begin + let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; + sendingStream = oriStreamB; + shiftB.streamFifoOut.deq; + isSendindA = False; + roundRobinReg <= True; + end + else begin + // Do nothing + end + end + + // Change the registers and generate PcieAxiStream + let sideBandByteEnA = tuple2(0, 0); + let sideBandByteEnB = tuple2(0, 0); + if (!isByteEnZero(sendingStream.byteEn)) begin + if (isSendindA) begin + isInStreamAReg <= !sendingStream.isLast; + isInShiftAReg <= sendingStream.isLast ? False : isInShiftAReg; + if (sendingStream.isFirst) begin + sideBandByteEnA = byteEnAFifo.first; + byteEnAFifo.deq; + end + if (sendingStream.isLast && hasStraddleSpace(sendingStream) && !isByteEnZero(pendingStream)) begin + isInStreamBReg <= !pendingStream.isLast; + isInShiftBReg <= !pendingStream.isLast; + sideBandByteEnB = byteEnBFifo.first; + byteEnBFifo.deq; + end + end + else begin + isInStreamBReg <= !sendingStream.isLast; + isInShiftBReg <= sendingStream.isLast ? False : isInShiftBReg; + if (sendingStream.isFirst) begin + sideBandByteEnB = byteEnBFifo.first; + byteEnBFifo.deq; + end + if (sendingStream.isLast && hasStraddleSpace(sendingStream) && !isByteEnZero(pendingStream)) begin + isInStreamAReg <= !pendingStream.isLast; + isInShiftAReg <= !pendingStream.isLast; + sideBandByteEnA = byteEnAFifo.first; + byteEnAFifo.deq; + end + end + end + let isSop = PcieTlpCtlIsSopCommon { isSopPtrs : replicate(0), isSop : 0 @@ -377,81 +478,50 @@ module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); isEopPtrs : replicate(0), isEop : 0 }; - + + if (sendingStream.isFirst && pendingStream.isFirst) begin + isSop.isSop = fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT)); + isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_0)); + isSop.isSopPtrs[1] = fromInteger(valueOf(ISSOP_LANE_32)); + end + else if (sendingStream.isFirst || pendingStream.isFirst) begin + isSop.isSop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); + isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_0)); + end + if (pendingStream.isLast && !isByteEnZero(pendingStream.byteEn)) begin + isEop.isEop = fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT)); + isEop.isEopPtrs[0] = convertByteEn2DwordPtr(sendingStream.byteEn); + isEop.isEopPtrs[1] = fromInteger(valueOf(STRADDLE_THRESH_DWORD_WIDTH)) + convertByteEn2DwordPtr(pendingStream.byteEn); + end + else if (sendingStream.isLast) begin + isEop.isEop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); + isEop.isEopPtrs[0] = convertByteEn2DwordPtr(sendingStream.byteEn); + end + + let sideBand = genRQSideBand(isEop, isSop, sideBandByteEnA, sideBandByteEnB); + let axiStream = RqAxiStream { + tData : sendingStream | pendingStream, + tKeep : -1, + tLast : False, + tUser : pack(sideBand) + }; + axiStreamOutFifo.enq(axiStream); endrule - - - interface dataAFifoIn = convertFifoToFifoIn(dataInAFifo); + interface dataAFifoIn = shiftA.streamFifoIn; interface byteEnAFifoIn = convertFifoToFifoIn(byteEnAFifo); - interface dataBFifoIn = convertFifoToFifoIn(dataInBFifo); + interface dataBFifoIn = shiftB.streamFifoIn; interface byteEnBFifoIn = convertFifoToFifoIn(byteEnBFifo); interface axiStreamFifoOut = convertFifoToFifoOut(axiStreamOutFifo); endmodule -interface AlignedDescGen; - interface FifoIn#(DmaRequest) reqFifoIn; - interface FifoOut#(DataStream) dataFifoOut; - interface FifoOut#(SideBandByteEn) byteEnFifoOut; -endinterface - -typedef Tuple5#( - DmaRequest , - ByteModDWord, - ByteModDWord, - DataBytePtr , - DmaMemAddr - ) AlignedDescGenPipeTuple; - -// Descriptor is 4DW aligned while the input datastream may be not -// This module will add 0~3 Bytes Dummy Data in the end of DescStream to make sure concat(desc, data) is aligned -module mkAlignedRqDescGen(Bool isWrite, AlignedDescGen ifc); - FIFOF#(DmaRequest) reqInFifo <- mkFIFOF; - FIFOF#(DataStream) dataOutFifo <- mkFIFOF; - FIFOF#(SideBandByteEn) byteEnOutFifo <- mkFIFOF; - - FIFOF#(AlignedDescGenPipeTuple) pipelineFifo <- mkFIFOF; - - function DwordCount getDWordCount(DmaMemAddr startAddr, DmaMemAddr endAddr); - let endOffset = byteModDWord(endAddr); - DwordCount dwCnt = truncate((endAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)) - (startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH))); - return (endOffset == 0) ? dwCnt : dwCnt + 1; - endfunction - - // Pipeline Stage 1: calculate endAddress, first/lastBytePtr and aligned BytePtr - rule getAlignedPtr; - let request = reqInFifo.first; - reqInFifo.deq; - immAssert( - (request.length <= fromInteger(valueOf(BUS_BOUNDARY))), - "Request Check @ mkAlignedRqDescGen", - fshow(request) - ); - DmaMemAddr endAddress = request.startAddr + request.length - 1; - // firstOffset values from {0, 1, 2, 3} - ByteModDWord firstOffset = byteModDWord(request.startAddr); - ByteModDWord lastOffset = byteModDWord(endAddress); - ByteModDWord alignOffset = ~firstOffset + 1; - DataBytePtr bytePtr = fromInteger(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH))) + zeroExtend(alignOffset); - pipelineFifo.enq(tuple5( - request, - firstOffset, - lastOffset, - bytePtr, - endAddress) - ); - endrule - - // Pipeline Stage 2: generate Descriptor and the dataStream - rule genDescriptor; - let {request, firstOffset, lastOffset, bytePtr, endAddress} = pipelineFifo.first; - pipelineFifo.deq; - let firstByteEn = convertDWordOffset2FirstByteEn(firstOffset); - let lastByteEn = convertDWordOffset2LastByteEn(lastOffset); - let dwordCnt = getDWordCount(request.startAddr, endAddress); - lastByteEn = (request.startAddr == endAddress) ? 0 : lastByteEn; - let byteEn = convertBytePtr2ByteEn(bytePtr); - let descriptor = PcieRequesterRequestDescriptor { +function DataStream genRqDescriptorStream(DmaMemAddr startAddr, DmaMemAddr endAddr, DmaMemAddr length); + let endOffset = byteModDWord(endAddr); + DwordCount dwCnt = truncate((endAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)) - (startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH))); + dwCnt = (endOffset == 0) ? dwCnt : dwCnt + 1; + dwCnt = (length == 0) ? 1 : dwCnt; + DataBytePtr bytePtr = fromInteger(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH))); + let descriptor = PcieRequesterRequestDescriptor { forceECRC : False, attributes : 0, trafficClass : 0, @@ -461,21 +531,16 @@ module mkAlignedRqDescGen(Bool isWrite, AlignedDescGen ifc); requesterId : 0, isPoisoned : False, reqType : isWrite ? fromInteger(valueOf(MEM_WRITE_REQ)) : fromInteger(valueOf(MEM_READ_REQ)), - dwordCnt : dwordCnt, - address : truncate(request.startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)), + dwordCnt : dwCnt, + address : truncate(startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)), addrType : fromInteger(valueOf(TRANSLATED_ADDR)) }; - let stream = DataStream { - data : zeroExtend(pack(descriptor)), - byteEn : byteEn, - isFirst : True, - isLast : True - }; - dataOutFifo.enq(stream); - byteEnOutFifo.enq(tuple2(firstByteEn, lastByteEn)); - endrule + let stream = DataStream { + data : zeroExtend(pack(descriptor)), + byteEn : convertBytePtr2ByteEn(bytePtr), + isFirst : True, + isLast : True + }; +endfunction + - interface reqFifoIn = convertFifoToFifoIn(reqInFifo); - interface dataFifoOut = convertFifoToFifoOut(dataOutFifo); - interface byteEnFifoOut = convertFifoToFifoOut(byteEnOutFifo); -endmodule diff --git a/src/DmaRequester.bsv b/src/DmaRequester.bsv index c7b799d..c28da87 100644 --- a/src/DmaRequester.bsv +++ b/src/DmaRequester.bsv @@ -48,7 +48,11 @@ module mkRequesterRequest(RequesterRequest); Wire#(Bool) nonPostedEnWire <- mkDWire(True); ChunkSplit chunkSplit <- mkChunkSplit; - AlignedDescGen rqDescGenarator <- mkAlignedRqDescGen; + + StreamShift shift0 <- mkStreamShift(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH))); + StreamShift shift1 <- mkStreamShift(valueOf(TAdd#(1, TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); + StreamShift shift2 <- mkStreamShift(valueOf(TADD#(2, TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); + StreamShift shift3 <- mkStreamShift(valueOf(TAdd#(3, TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); // Pipeline stage 1: split the whole write request to chunks, latency = 3 rule recvWriting if (postedEnWire); diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 74c467a..fdaa399 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -5,7 +5,6 @@ import PcieTypes::*; import PcieAxiStreamTypes::*; typedef PCIE_AXIS_DATA_WIDTH DATA_WIDTH; -typedef TDiv#(DATA_WIDTH, 2) STRADDLE_THRESH_WIDTH; typedef 64 DMA_MEM_ADDR_WIDTH; @@ -39,6 +38,10 @@ typedef Bit#(TAdd#(1, TLog#(DWORD_EN_WIDTH))) DataDwordPtr; typedef Bit#(TAdd#(1, TLog#(DWORD_BYTES))) DWordBytePtr; typedef Bit#(BYTE_DWORD_SHIFT_WIDTH) ByteModDWord; +typedef TDiv#(DATA_WIDTH, 2) STRADDLE_THRESH_BIT_WIDTH; +typedef TDiv#(BYTE_EN_WIDTH, 2) STRADDLE_THRESH_BYTE_WIDTH; +typedef TDiv#(DWORD_EN_WIDTH, 2) STRADDLE_THRESH_DWORD_WIDTH; + typedef struct { DmaMemAddr startAddr; DmaMemAddr length; diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index 86c87d0..a9d3621 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -348,6 +348,8 @@ interface StreamShift; interface FifoOut#(DataStream) streamFifoOut; endinterface +typedef 2 STREAM_SHIFT_LATENCY; + module mkStreamShift#(DataBytePtr offset)(StreamShift); FIFOF#(DataStream) inFifo <- mkFIFOF; FIFOF#(DataStream) outFifo <- mkFIFOF; @@ -402,4 +404,86 @@ module mkStreamShift#(DataBytePtr offset)(StreamShift); interface streamFifoIn = convertFifoToFifoIn(inFifo); interface streamFifoOut = convertFifoToFifoOut(outFifo); +endmodule + +interface StreamShiftComplex; + interface FifoIn#(DataStream) streamFifoIn; + interface FifoOut#(Tuple2#(DataStream, DataStream)) streamFifoOut; +endinterface + +module mkStreamShiftComplex#(DataBytePtr offset)(StreamShiftComplex); + FIFOF#(DataStream) inFifo <- mkFIFOF; + FIFOF#(Tuple2#(DataStream, DataStream)) outFifo <- mkFIFOF; + + DataBytePtr resByte = getMaxBytePtr - offset; + DataBitPtr offsetBits = zeroExtend(offset) << valueOf(BYTE_WIDTH_WIDTH); + DataBitPtr resBits = getMaxBitPtr - offsetBits; + + Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); + Reg#(Bool) hasLastRemainReg <- mkReg(False); + + function Bool isShiftStreamLast(DataStream stream); + Bool isLast = False; + if (offset > 0 && offset < getMaxBytePtr) begin + isLast = stream.isLast && !unpack(stream.byteEn[resByte]); + end + else if (offset == 0) begin + isLast = stream.isLast; + end + else begin + isLast = False; + end + return isLast; + endfunction + + rule execShift; + if (hasLastRemainReg) begin + outFifo.enq(tuple2(getEmptyStream, remainStreamReg)); + hasLastRemainReg <= False; + remainStreamReg <= getEmptyStream; + end + else begin + let stream = inFifo.first; + inFifo.deq; + let shiftStream = DataStream { + data : (stream.data << offsetBits) | remainStreamReg.data, + byteEn : (stream.byteEn << offset) | remainStreamReg.byteEn, + isFirst : stream.isFirst, + isLast : isShiftStreamLast(stream) + }; + let remainStream = DataStream { + data : stream.data >> resBits, + byteEn : stream.byteEn >> resByte, + isFirst : False, + isLast : True + }; + outFifo.enq(tuple2(stream, shiftStream)); + remainStreamReg <= remainStream; + hasLastRemainReg <= stream.isLast && !isByteEnZero(remainStream.byteEn); + end + endrule + + interface streamFifoIn = convertFifoToFifoIn(inFifo); + interface streamFifoOut = convertFifoToFifoOut(outFifo); +endmodule + +interface StreamAlignToDw; + interface FifoIn#(DataStream) dataFifoIn; + interface FifoIn#(DmaRequest) reqFifoIn; + interface FifoOut#(DataStream) dataFifoOut; + interface FifoOut#(SideBandByteEn) byteEnFifoOut; +endinterface + +module mkStreamAlignToDw(StreamAlignToDw); + FIFOF#(DataStream) dataInFifo <- mkFIFOF; + FIFOF#(DataStream) reqInFifo <- mkFIFOF; + FIFOF#(DataStream) dataOutFifo <- mkFIFOF; + FIFOF#(SideBandByteEn) byteEnOutFifo <- mkFIFOF; + + + + interface dataFifoIn = convertFifoToFifoIn(dataInFifo); + interface reqFifoIn = convertFifoToFifoIn(reqInFifo); + interface dataOutFifo = convertFifoToFifoOut(dataOutFifo); + interface byteEnOutFifo = convertFifoToFifoOut(byteEnOutFifo); endmodule \ No newline at end of file From 39de08992db583dbeecb5249cf49762f8cd37d08 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Tue, 6 Aug 2024 17:13:30 +0800 Subject: [PATCH 33/53] Add ReqRequestCore --- backend/verilog.tar | Bin 276480 -> 0 bytes img/requester.drawio.svg | 135 +++++--- run_one.sh | 2 +- src/DmaCompleter.bsv | 3 - src/DmaRequester.bsv | 97 +++--- src/DmaTypes.bsv | 11 + src/PcieTypes.bsv | 6 +- src/PrimUtils.bsv | 2 +- ...{DmaRequestCore.bsv => ReqRequestCore.bsv} | 304 ++++++++++++------ src/StreamUtils.bsv | 67 +++- test/TestDmaCore.bsv | 93 ++++-- 11 files changed, 490 insertions(+), 230 deletions(-) delete mode 100644 backend/verilog.tar rename src/{DmaRequestCore.bsv => ReqRequestCore.bsv} (70%) diff --git a/backend/verilog.tar b/backend/verilog.tar deleted file mode 100644 index 6d8509e48444ce7d315173052708c34bdc5d3a44..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 276480 zcmeFadv{#NaWATW#ZR&2oNI$8v@{P49!zvj_y8Tm6iEY=w0yC=MnhmgP6W)rGXqhK z?EBg8uliZlUEO=nfL5GDShfi4{?)UqtDn2~57XuBYVr1qPyU8KJ^bvCMxWrH{{E=P z|D_`O`J}(QH|p(<`u$zt`n~<#-Y3oIZ>UpXeY;vumQC}?`+xtB@J#!qtz4t|FYxRK zSO3@FKfRuuO#gg4U9G3f&wu!ss#68;?e6k=_i=!d;b^$mH}&7&+Z~wr=LrJv(3UN+5Y_8-&cX1Tao5&q=qAIIl|ci%2&>*@GnvYtGfT`rF2 zS3H zbg}Y>(EZ`Q=e=@Po{!f{1Q@xF|0KMhJpVl}O@BW&5Bkmd)#Chn(@sxck6)yy__xO| zr>m)H*P$u-TD2L*Bpi?to`qLHi&o*m2HhWoVDzD@gHtrCAoO`r^6R=3u+N)EEDu5xiwn|9WRvB7rtA=lEs|>BRRi}m0`n0x6=>A^lRgYfO zx?mRveX@`vMZA;ie8ezhlK(^BlJm@RuFK90NeaAJH0O)!H?#RsU< z<>I=T&2Mhkt7g4u7Po8q6$JFZd*972FQ%8x zWXMRx>t6yR#&wC>h^|GR8&I^f3cy@_5tSxoM|NC$fr}8Ke1fS73mOIg@^Nis6mU5+ zJ7O!S;tM%h-ndkIb66%rgH$91BNAg1HUXBg@}Uz(dAddlEU>b5X(VzOvPL^E0}7mc z|IOJzbu-{==M|BGA?xBzhryzr6_Gf-gn9$LgfhQgLRnrfS<>i>6{W+9Xkt>*S4(>8 zQw7U)ZUBb%I42i9Cq*Y6e=>bw@6W#n`=+TU*;HoZTUpr+|FVLaFQ$hL!C0@)6- zHu?5?!lk$9Ny=)vRvwvZJ&6N#5FLMn}VvWGHE zyWGS-x@_B9C{Y4j!SzXcFMB=*>W$hcYA`=yskr9g|_(IaP@60bZOE1 zqV4M7f(jW^-zu~0k+DS|2Mt^vDtDG01cQ?on?_qE6`nHvW;DLmyLJ%n8LApAC+|=E z3b*@ZjAxI2oSo95!}V9_E~m@!`Wsqii7K&97zWKQ?!@~ky79>R3>zLcO*0`>i+7Uo zdkhGejQ%JSLA}_WL_CM9q6nrub%S@}{53`BfD!2Qy_kT6c_JI zf5gGXhwjX24D@Qbc#D;z)$8R1t4)DvGCb;0^g4M-QJ)o^xekdzIHgj|sXY}NE^%t5 zm{Xs*KuKVnLjeLVur3>>@amw=gsUkQ?Rkqky_)D=e{Hn8=SUVDI42Hqb1Q9ABPOc6ofeGGX@Z`2E{y=!|Y?%duHp zHmln=u5CF_QI||1S!`n-Fn&5de(`{2R20hW>C3NQQwle&E-iiX{3IBfmOVZCL$U1W zt5>i8NK0)Zb^FG&iH(XY;fhqu+#!yrWoLh6#ab>)Js+OMbEx| z{(StRuDpp)@8O_+HmUW9%m6_@PE3%6i4n3eF+&!1HpE#T?BLa4SbEn`fbn_|g_;$C zP)3rV(`@i`ttKp|e-1r2_95*+`FJkcOKg#4u&kj9S5a9t9Iv+;j+AC(#%ui3FrETEJxx6469b5f!o??yf85e(`31>Gs>%gs`6SDc9OuP|C@_`(6HbRIFkaV z>J5Qm%Av-HcHGGqkxAJV8Kx#ocZ9}5_(88JJE~A>#}-6q>QYk_7-f?W*I~+|Dp4XI zZbl<8inKo9y<>~N&A2wQ(A=#m~)!E+O=wW}f`=<~bqQ1K7;nux! zPtfeYlOFxi@L_*=a7R7d;#jT+O^3SaG2FkS4sIDO*MZhDI_j|Z5c@}JkIQQ$3=es9 zx`iETX0*=c)JP|KR*%3SM5i5Jru|%P;`Z$L*~>xm4?CZ~pIxlq?FFnP=Q}Phu*7)$Lc%a8 z%20SFK^To?AUu;G4DB)yo=K2tLK)0Fl|b8%?Z}5N17!5>`F-q9q5{#oX6{E)absV_ z`#A7c_P(PmIA8Pq=iK#<+>E2SF`qNXdrac@&zajjB=I`L2d8^T;&W(&&QLh=bLU(a z9iGrWl}g%){Z7HNW_}VCA zye(mgzlmGsaXgGZm*lzE0rr7j^Si4y-R;GYHo_fg$?IBBCqDNvL!r(iMta<%QiltF zdr<0bAKP4Gns7^Timy{wdjX24tBzilR|s^c?;Z9?In-a0$Sa(*IuQ1BO>hTNIV&@!{c9MiW+N5WojS#}K^#V-=vjaB zt_Mb8$eIEX1i;~tZWV=k2daFGq9y?W5G6p7`p`?-gQzkqM3m3K#3?VAK+KAqqJXEh z7BbxtC~?WELTHUHxW;pDvrcs6q8dbrX~5P(^E_E*)NGXW+)9l@Vy7f7S}I|kY;}&w zP4ym&b!qh!)loFmQ|ERHdd*BvTDQ`pszyRqX?7P4u7F%MyY$g9+pyMKV{JU+IhXbH zv(R7vnr9rWPvA|gj*@go)^uxg>PflJ^_jWaTbS&Ct~g0{P^rXuYy&B6-8Bkhc64x= zvz-t%@Gx6DTGG-%;^86-;jiiZ!fo(U?vyVz+`CtL5BPuR^Z5R^Z>UTE%SU?&y#D?- zUIN6s;duXlcYkk)|Lx)Z|51Oq`^)>^KSxM@?DxN4)ZAjk{h91$s5=m1~vu9vlE26)$dNr33 zm>mVe7Sm8*P>78x2Xn@fHZ3#-d~T_7h#@d(-wj8?GtC_+p``C@%>J8d#UB3>>E8TU}zCQ;#9clNRRnJUZ~=67o(7fir9zZ4;@LLDn#A+q`dTh%|zwe^`~jYLg9D!}y^7NJ9+vU|;#p|r{i zM{jF6N(af5t;q=p=fS|I*lGhJ(=}xg0I3ikxXM{)v<(^_BY5m<522b|fmuX)Kuw5X zcCDO(!=U|BtROH5PSXawTMK6o;+aQ9o25Tg?IpzK6N-#RL529(C=vg}0?LCX{)rVB z{-@onsrZE`ts8?XKEcJJTop@Vj^P^N?(d+C`#Wgk{+8;9#FoW{33Vvt)~fV!tnT{C zG20t_EvVdp7?rCy&_Ptz7i2@b5ZF%}53WCOOc6zF8V>7xfBwF2Zn8z!Q3q%Fi z!4>IAGSj76_XTQ`g}LI+g%OZA5#?~*PApFxY2@PIfbWdNArG&CIKyNg+Q!Z3?1NOb z$`K|KX_JTU$KjyhMoYA{A1!&!kq%Mzq-P;xlDX0WlE9?8P)=Oj_|TWCwM`rgVt1sAqU&Dg>ljg0(AU zG!Al^I8s^l3p04GU?j0Nr54BXRugRPN^N{dxczjkcT}x#bh#O}z;?Z<9bTBJ9L*31 z5SFC7UD={lealPP|8cu)A8q@G?)|OY|GSGDfJf>6Pp?1tW&h`w{U81QUomudP04g$ z*-l4_bOSR7)-Iu~P-J@%$Z6(C1{#dE8)fi&yMFcGO~p$To;$yUHs?=M5c)~biY8%7 zkfRHlL<-M6?7GP8{Zd}GYqeqf9FD);yUJbTxl2nEv^P|-G(kqSiLH^E!`+ z3f3fzUol2Ua9a|D(bQ}L>8Vayk`PA~D;zwi<35sH=SgG0u6FoMj(vYCi~>^@^A%LG zvaTZR96>CTq&V0}M-+g>ECcuN%1u;+wF9(>3n)dT5=VaR(aAj98-~4&G-3@jkzV5H z7lvd_C}_k66cu|&sa!zXr5aE-lYKBnPb+$PP`l)a)Ws+9o;tj+n-m5WxdIx)qQ4Wc z;_e*LDzf72gqwGc@O1&xD%Lae#&+1F>f&9uP0}Io_3bR*agk|C?m#wQ9@1<0j(yZDbs<@O-Ca;j#g0S4Ij77CU+Gd8~@{H~$Y-X$D`72yC zhfftWG`V08D56ZAMT}X1dB89@$?M}u*}j^|t6f58VV`?u282Jbf*3{Ghgh6|GzXv! zEcbiZM)QBuZkGF9?j0#wg;gAHUujc~Z;(m#0tU;i1k`SuK8gl?1H2C zwAGULh;TC{3YPc|D3mxS^S`;D<8yMgS1dl|hQz0-A*_TM5}!&#mUKP*>ipdl_r%ZU zZ}CkYb5WhQ;oD=}==ALP1h?BiAKy%ttErE$<@9m}7Z=O% z_}zYgKSWe0TTicVP}axy!)W*y{v4VcpTnP{Ls5Eg!2j&`_r;&Pd*aXi5&v`0+ZBHf zhvLt@f%x-KXf+%Nt%kcotKq)T3Lm=StagV&D|{18lpYGLMgyVMXe6{69SE(kQp{QH z4TV;FdqS(dL!s6FKxlDmS7>#xFSI)B39SwXLaW1((CYAjYc=Tg zxmKt#uGIivkYh>?`aQ1Ipg-hV4e)IX0Y2nf4F-L#)nKs8wHgfeg;v9!&}ujoS`GJv zR>MP~)$TxOg|Fdpt#%KDR-+!*YG26N-xFnrT;YQO*Y{w=)jc@i+8*|~vWL4|*Ta3I z>W|c72e=>5m9VLl+Piyb+VdFF-zmBP#Q*nLPN3LCib(q#@iB;G1W7G1R?9x)Z0h&0qfF z9yU9^`4}6{2KxsO@fq1aHD7+&?9!!A&Hu%m!4E4uX7}M>5BH6IhBFc-Oa0|9&1b&t z)3o+%_$MRcQ`SdY`*O-6;r|yYY#W zpBgth#j$=|WEK`1Yy=Dk`ws^_+;2wLMg-X`N6`5h7h#G^@vib`v8M3qvV)2cN+MC* z7!f6*<-*YHsEgm0Q?;u)Cp5)qS9s)Os>L~@!-vBGuE7(|iA+qbe_ZGEi#~KM6|LE} zRNdFIQAAa_chiMD8sswfMk2+%)rAb#io!w!zL5MSE;|`fE=b~T(C;Je28n9}<*;Wd zhZ^OeMu~Scj7;2om&?SPxMi6K2bL0fFsFp~i{mIJI zVrWo{AuLr{N-c&4wHU&xmZj8UXi$qGEPz=`ErtfQ7{Xeap&V*4G^oW8menk!7DJ|A zD8AugRCA59@lCQj4KsErzfJXeqTALZ?`kNw3gTHW_L$ zG_1uC78Wg~7DK~Y3}Fq@Qfe_Yti=$PD=noKL&I7OVP(@$4zw5=)?x^Ypq5gLp2}GF|=EWAzCxHlv)gpDltUM>y}cBp;0ARYB4|{LAnZgKLM?zswE)7N zg(1}92h)bKU$MJk2(|FptA!8tK@6c5J$tq2!H$U`)PiTP7ChKnF@##|>{Vh1yD)}s zPm@?0UShMx5^2(UHED-7Q1`Sz?NtI5TR>K#7N@;RoMN}g5RSAs?N#Cw8%vf@i_=~u zPO%4N3AH%wRpJ!eRhCeT(_SS`v9o0fwK(l-aq5;CKqVKVhW*`#!#;+Rn7UFxU?HlB zyWBW+wnq_dXP{T_Cm&Y-BIujNlBU>hYDl3aA0TYx1B8QkK-DZCp!(zkREu~(?v@YW zM)?5l5f5lnK7E2&Y=Xh;Ga%UiOCbp{^S#y)g@0bTT7l`7M480>`FeN zSyS=^v!Ub(W;yZ+&0dlxn3W_?FxyC;U=|^t(Ci?2f>D3+1fzNRghuJf6O6u-Cm2;H zPcT}RPiPdJJi+KTd4f@^d_tqq#;x4m)*xg;2WGCo8#Z6IiIYiX{N$b#IQg569MUma^^#xtG=|k ziWbXUc|R$SX^Mdc1<>9h0}TtH!vhA|)j>#c3^Xc$MtcmjR{$N17-(MyAw4qCK>@To zWT3+WXn(*!=vJh+!+Kz^&tV0pBdX0eT7A_N+}`hvxZiO-UqR>r_dGf=9fo|(bt^c( zkBc2Stl)ky4ufI z0&JJp0+!VZuzg+&SVPlc=z+MHH*VawSZ2M!%DS)bt(=k)dCi!IIL6)Sb^fOQZ1mP<*-sMU~P%RO0|H6Bn~Up z0{UqVE7by)h!_lOLV7J=J&40fwSaD(!%DS)l^zZ&)dH4vIIL6)Si|A4LM_nHfx`;5 zK&vwxR;UG9is7(AEzr6PhZSmp7Fie!t145Q&PdKbl3$#|kVTD?tg%J)b z)B>%7a9F7pFc{*nQY~PCgTqR-fMFAdm1+Tt7aUfq1q`ql42u(bEnwiqVWnEYG6aVW z9?5IXV{7$ z9A{t8CRfs{;gt-m0`zeu2We)t`VRlV>0nxtN+%l>h3870!zK(3oAqS%y{Ya3sj=0E zg1g={4~2hlH5Nkx$5%=rL7=YZAoN2J_H2yb=dWRUh=>1mFiEU8m%lRa zL0X3vY@XjkvAFj=+=SuY#;sq3@pD|Z@OLP~-~Vwk#c%7A>D!NY?MJYd?*G7;hVB8u zFu%wDrJ~;Mevj_|81?sdac~Re{UJKvU+(`14*VPba*w6NG^^h;`S9L%NSU+w)Z{M( zLh0?^J=gL=7@1e!n?KM|x(5v%!F_-uPWD@Z4c-u%Tuvgp(h?>N(k0DI^*o{^kRqnc9<2xI75d774#)6L+U&OI@7^(rf*0q zw<{4IUu&a^=A0FDK?jbcgP>AdO*(Wnw=YX%NqR(Qro9`S%lHjJaf%lh;eaA}%E#eJ-LbcuJQ z@XBygd1FW{Etp!O@Z6>AQ?#=*RUW3M$f$Q{mWiol3a?AB)H%mc_=aTP@Qe|RF_+Wb zYr~Mx#wX%b_X6)gktu`E!o515O~>rxVR#7+pJ%f_4o_`EL6S}mXcQUs2L+`0h&<=n z-zy-^XWluo=@*dZ!<7t)Z((sH`zYqpd=AY*6^k75{LkaHg7g*P?Y|*+0Xx5;^zP*Q zum~Tw@o4AQ^Yigq@oUPPp%vd&(29R6XvN1Bbn52@d+9DXy_M&eZ^zefuh(Nbq7htT zgA@DgA@no+n-5rCn$9P0a7BXm;cf+gF@1CUc04&hpRQIOj}I0t=BqWY`=HO|?fiT_ zTi`mR+v_)&Gr9J1P$8iZNK8O$EQ@%*oWTZPT_(KH|FqzPfMSCr6K)U>EBxoG4(y+GznrL&K@z{2vnA~ft@O>` zj#ul+`qo!%H?fj*%(d{ID%3|!1yiFTG&k_tYI?=$sFUQ$dL2S@sE3YJb$#=0e6yUc z5Eb6ULWt&c>kl_*Yu2Dn+L5r;dc2;z^<;(S6J_By8{t+=p+!f$t3uDefPc4GqXn#{ z>oHa!IRAp}XoHtCgw)OCJSLfHO+bNRe_g*VCZ$l@#G$?B1(FGf(%jNid zu|#$VuB!T!@W{fPqKdz~T8=LkSTc)07Ee>)Fbbjc_`~Hm1$)j)|K!l*PqLM`Tl9g} zo1tr`^lSD&uw{WBb@bJ+_%?S}7fh#zc-A9lkp3;H*O5DoZ5|5kjWe=9!Gzl3-7#jBv| ze~x!1bjCXqI^&%QRs7}o7`XbzZ-cO z{(QXfx4w)xEFiIoS$Nmme(TaLu>_=>WZSu37%|fI!u^3+jPVO6I?!A%?2yW=kO{XJ z5yKlv8f23^KG9(p9Qy&fndB@nA(|k~Ug4m%{b@(~7FS1le#F%xjs1#r&#wi!Lb^s$ z_qtZ3GbHhDM|xc?QcPRDmmE*8tAb>cQ9cb+q%)+SQ+V5x=`}<0>7U^#r`HV0r+#(m zeA-7y9Gl@Cu;WRS=57`dQn4tFQN~xXs)AN5tDrOM4u&aOPV_P6xizC&>$Sb+b`BmF z+$C)G-qO6n{FHDZ6NF})vz7#I%4r5kt`Ze>h$arPX+q~G+MCeWImRXwmy;R#icKi4 zuZ3o~!(I1)wB{NIImWj)h$)N}HxXx}&`=;pwu!P1h^eAL5GN;LU|iQp8L9{SqCH6l zr#kfm@GuFAXx$bfxWrQ3Rh&1~RU?8qCQpo}PC&!(*&I{Sg$fy&DF_)chSsVHVw-Ui}dOn@~F!f@(6Sy2_2}u(qCFjO}R$PcST~bbh;BVpRr<4pZ+Yt5S*Jdh#P~ zef@B?h^vm4prjHJm~JDkKG!9L6@}qgA~enOHc|YPTmaAW=2#E;y#2ZdNQT`Apd z>|i`reYDsm1_q*i;Q~MN*_et*w|CruSwKeRf=tmkY9iAMY$lDrQ@?kt|&E+@-ajI&Vny_F>YIDLU;Bsa@lqgNOFFUBr6kKG+EaW)k zr-F?cD)K@o8SOa)`nzwsx9S$5k)@htHDhbQ$PU)3z}(z2EOJvZ(F$R33szz3!oF0y zfnkn|%(%S_S1~t5MP8b~He(sL2o)@OxK~Uqz#=PaX$n&fRY8#x>`W0+_q0yS$ef{X z7R6NKB}-t^4oje_JQ!fVSEK6B#_KWDu*WRQAV!m16`eFrQzF|-%gr8CveFto2^H?$_|m^ zt!5Bg*ENK#>Keki3TnPonq;3!KsM|w`7JCR%w8umx5cY_h zR)Z2@*Tyv*TM}<;)^tR!rCAfCrCAfCrCAfCr_xF|8i@`!fitqrc7bt}KtsAhT z?M(DKlo>&@c}bjd zp05POLB4Vk+fu54#6i6Zu47(cE7$B~UWu=Q4l!Axkq7xoD8GAHcC$yA+RBDZRHK9l zn}dazBn;m}tm)&A2bM7YlwYabZ79+%RttnGiw=sEU~(g~U|P0+S>lEXYUvPRi9|a+ zSi;7g(GtR)5G)nTbA%;SJU!ToP)GX|F@XoZe5PlKfWVj1bVNAShiOL(qNqpUQPd+K zihA(Vc|}z?sHhDG6&2#3qE2SdyAZB+@1KuMBqCl@%e z{o?|HZ5S62afi47PbB~R?tis)0;r=pv zXZ}k~$u}JJ9batTIoh)k?izjA;A=)HDx^pZf< zoFsrHMH7(KQFl>q={ z1gUvlhqMZ9@wwsZTTj@)iQX4&R|gkV$e8+8nKg@yEw+7P?bu|XVLAu~CoeXQwu~b@ zWwtt_@wE-n4#GV{RfDh{m-#8m$EV?JMks?5HEKZc&61LePu*Vvb&*}F0F|eASoA;@ z)^A%JF;htg1^`-$b*8aQH zu7BsjuR?Pb3b2CTQ?%lC1=23H;`EfCcaiXU3Dhpxv-2L8nHFuyu=WWj8-TD-SAa@4 zsB@Q;!Jo<5VN_%w%X_YAXV1I^+e)2IRq=YK~ga2Z(f28c)kA z3;}iVVM9qYoqM%hyv0lUtJh1sc%DlzGd}iWAvKFci3DPz_XcbA|^lI|MH0FTBx9l_n^2_Xk zxj%2d3~ZZksH(HH(IRW(eD80^$tcUzhRDusq)D8L{RKAMUR6R&?qR?l|at zb453AYPq+Q+v(6nXSWl$quU8ucRL|6w;v2tw-dJ2?OmKv_d1CYURSF(lY>2_R}1uW z7pR3N%jv~zeLO$M2Zpgni$>!|XGuq7+9Cwn11~+e%DDb;H>+EwzCxeBMrR@%oBp1wJ1aRVUx1^Tf

oye9sU=AvcW6?u${lh#H8;;iABGhk9wJh%JgK*#okHxhEPW&+7)l>C z;o_L!T{NLQz|(P8>rKOQ#|$@=kD~z0?X8B}tgURbA8j&_Jl{?%%B zI(v)vNLLV<3}Ny*vPU@C6t?VbI-l4HNJlss@GM=@%C+!kIT=-14|Zn>!*@tE9Y)%` zFN$@7-7`Sd?L@ME!2EU34QNV2O`EOtVBK4GR zzAPnku z6Fue;Nz#4jd7Jd4W*9<*bKI;y;Ze4_>qoqu`_eLQpNG8zZbHBG4vL|F*mc`D;+zb+C8m>%uwA!7 zl{guLt}^gR)otw)BiUM+C())&Vr zimEMm>bHenrr1$(@J@^R_VR3^pN?+iU%i2nCii==jvi(4dAynh-=$A z%(z+{Me|B2BcFVuMrUg0FM3&2osQtx`<67%icz*5r+Uql?o@J>*-UIMby3Qm$moLf z2N7~?dPLXh&M&~F{>Z0)yvFnZJ)!w2VO$r{g7U8=sq9>@VP8gSLAdvKQk3kRbWRzj z%FACjEDf_fstTtNO^WYoy|WdC6-bQ}>v_{!^PCtYuQUY&aqiw<<^$lQA88<D+Y#-!40SE_(`fCEE!6;aK?)TjVzu+FI)VC)R27PdDs3vNcsIJS z7m+1Z#s;qRc0oqZ%FRT-1fZAaY7D4cXW(9|3NPMd64-lJRJGytKYWC0Mz5AO)JbCn z+r1rt$j8X*+0~Mk{!XTEX;%)=#?Pi(&@`*I<8wEhYr&77j*njiqBigmpb&t~=8oFa zRz`R2v6-=v_Wez??~glb|IM@LeXqaf7=a6#w<3+sAxhfen`r$EsS`Wj2pjKrH~%V% zi4?&CzhTmWrrrO>Nm0l~br8vXHEf_7i>2;9=nzneLEWS35>a(*j)S0jLaQG56;jpP zk|x&BCR82jNQ&uHM`i_=mfzh$w_}_-i(!`;Q-BAHx)QWBo`M)=5iKjMn#E;<1r)ag z7~TO3$tS#XABx7TBcUP01NuUbeQ|ZndH^a4Hl6kTyPzu8zMSUsonPLIP$p4A{rX*m zVg(kyH@(R>bpO?Oa&bXRAfHBv9~rX2qyM(84rJp#H>yt98OcZt@;wN_fyN0@&9oqz^+XSFy})K7mQGxij0*t*mJZuh+Yx zGY4E86IIcQvvI@5G>XXp}l#f9m-E1K%TjBhr#66oA1UGUx5vxX!F zw^0FxFoRPiFc?IRfFl={0Sj^%q9lhwj2u7GR5CWKw->g=jP0)*3`O*1t`q-mqF>+& z8480^$D;HFuds`q?)3yh=NOtA*e)URbha$ z+UPXtW~63dmJ_4a9v(GLv$Mcv8h1b^Yjq00F2Y&k6S5#rkvzj;Rq zQU!+XDZjpwHL>`;r8bDhg=K_-?F><}ok2tofs0CTsC+hmi$hFUP@ykox}E;eKe)w^ z<7Zgy_-0I|msec+;YlRS$ zS)gIO4d`S=y49|-ZN6lQP_!K1z%hwx~86M2SR!cM}0$!F_C5eHy8b& zuRP-wWW+X9_LRT~RfVG@tnR0Xb?QdD6xE_hptFOJID*Jn!DQV@Ir2+k_6Xs-h*Poz#=dzbNQVUa@gsK`(y zT{B=yOk{Xp9fgo!_;N%@kg!MuWP?3%h znk-0=OBSjU?a3mLRXtJ77NmsB7J(c0M0>WdCm^*OUXwVallA66AZ^p5DfD~4x#-n< zCWUvn;2__;@(^aHU z@wPU+r?Z)Qn-5kPjM;^Ql;L0-3k-{j38s7t8wyvgDz#oXNqPsF>6NP$q1utuEhLMp zR~2#cATa*gs=n}YNj0*il#dYN%GIl_#>p2|wzbo7p)+B(t7o<5ERB}Z#cMK9W#bl- zI)K`vxo>DgH<&i+PKs|CUiJ#sR=M^?pDk^5SFpAUAsp50rU?h`#3A^21#7Dyc4U4_ zw}FGZ*rAg!dMj&7=RuOcinWs{dj%_mZQAq1h{td4=FqA{z#wMRK3j@;6>bi#igN95 z3vn2fD7Shl!o)ip5VYwC!3#n!8gJiJtQ9pL5I6m6wLMeP`!W zeha4}5|?knpuJ6ND#!?TD27`&RUzYeR~f`@8h*L7r|cF^U7)p|uD-+Tw2(R4FmvQG zt9?TLQYU0?nytLd^cb+cg;TN90L@(&I4f!{>>T3*OJS8bJ13AbKDMWxHyOKo9ktst z6%FN^(08bNymzJCo~cNTFK9v9wk_32`#QOuvLFmY1@pSs9mLVVDY-gQ5$4}!4)yy} zv+aS9v*E+RpW^$r{xC@M>$bOf&iagOYw7r|@?WiyW%6rLMP>~=2MJ8>3DWAzPaq(#U3YvUO}qK=zjzm zbRAz!#^58!po{owGKP1N;VxvPx0@6h;hGDok8Q^=PDG(Pf(WXPJBldOMi4=@aYqq_ z$_OH;GPV_ATUn)fxMq0w77`^DYK2R>Q#*;03YEhp-Kn)iNrgJ%lD29y>?+3mT1KVy z*y~IX5SxyQafeBMEu$iU-*o~pn-2YQG38l27iF$x{3vEsmR{Mr@%rK`s#AF#@cR3w z*Au^|u#8)0JD)#!{`(#L|31A-K^Mn-escPH{9?!aH$GHf%-f0k3u<{JSN7liOM8q- z1r=Gb?Jcv{$LQ0dNF;aW>{k~Vp;jD;-5w`xJ9CfuIsGwQ7|eS zs@wQ7OlFv2BQcQ;3xqT%)1pWuqmBub1(ATocUpOAkxGb_-w;D^|sX@2mqcD+1uCqlt6O+<=c zRY+vw+@+#&ud1+CW~xt9R;f$V^56jB(Gsdvr@&4}`UY&zB^DeZL6MQK7*-B(bjpx1 zDuW^;UO5W02u#OHEJ}z+UVPu5TZv;kL~nR;*o^H?{DXLAuM--eFi)_OS)55!nB@|sT9#LiimY6JLT4wJbb^i1r$v#-biyFE_!7`Q(PskNF`!LGG2+)B+}zt5}s-j=&=ee*#m-tEv}>>C9%Pc zulyV8%yCIcRE`yx@l6cPq(mh#;~SMk9i5Y6W_+WP$ngymZwGiPO+Gp=G(6@D`+~UE z?4pev1Ho)89MvddQZ_u-yS$uo$ssBepG;_YMxB$&CILl8lu_H=Q$a;m8XMnLM5QXI z$f_jIvnwDWMXeRVPJkk#pjRyS(iG)0P-IjP=9xuckZ51bWEGc`L=wTn8-$#>19Wxa+oO;Y)znRSnxHZOo2d*cIEj zA=pu2wqjqGVXwvuTjzB!oo4Q+J6)i+k?vyWxh_ROox|Ph!J7Wx$b0b4RLnD6;}^G^ z|I}uvRAC!aqp%LfMsNpnr76un(pD1%P_^caK}^iuOlk?gL|BIyOei+AX7gZ_A=BbI z36?eVlR51dKN&(N#@#UtmJ;aHN4jmb{MB0WL{N|lVUy(#v&hYbe5Y|drrA2nGgQ6EtOg{u3-c?zV%7F4y zdc`4Qq86)+T({@9Y8a2Sq>Y^VbDi9jw2@Qau9KURHgXyP=;WrPjob`}bb4E)GN^3e zfLE=wm7SYTxG7x1Eh(%p<3sY4msZxs%8!rj1U0B-SS4lkB{;tc?HA!H%Vm(_N5al< zE8XtqGL6OTK%2R{{a-f@0Mjz4&Q{UGC|O!rH3LE$*U;S+c4#YS9QwVszFhHOD-zY5;`h2lKd%uw*}o zJJ{|}1?ko$ss?QCLXD7_otCtb>q(qeXLYmOE;mjZ+u05en%4KS;SU6A8Jn5<#v8Izq(V{Exti18Q&?oTf z9eXa1Z+o0MOe%N$^?Du7QTP6Izxls3W~Qs0-Rl=&WE}W>+5CT$oyzm@{u$0vGuMZc z+#M+UM2WE_F3`fe0~a`^`<8ANpUv^x^a49d_Db#*VJ27aCm&Y-vU9UoO)uU~8#+6L z$8Ug{<6nG#^|G<2hng?HY`%MZ`px*{==A9I_?t)1zdj-;{X{tZRky6y{Ig^9==AjX zcQ3v=dhvSv^yvAcKN@%a)4fmY^!=^q#eBESfex1yG)QM~MJwoFWH%laQ)*1)RH|&u zU_@u7HB}0;mLe-VD6!6=8g3dKqc(Lvy;@DxAfULZS7Z+>Ecc0X3#>(}R(dn*gBRL0 zqG@&qjmp*L25&7E`L8(?H4?#E(@6lg@{{1u=BcC!v}zWYW46~V^4L(jYGo#tY*(Wx zlU4;~!>zhcLbr3D)vTA3`RWSa z`WP=RFH!d0?DArI*-$6-;w5g>J$`-s==u2Z^OsM4e@exatcU@2ABrGc*t?!ht^kjn zT3dpmH)7vCqTaeK8PV{$juj0O=suQx%AAuB1pWY;R8&a_NS`(e8AdcHqK*Yp?)FnK zst^#&6SnL54*wvdqc%&`U5J$VJy5zc)kN;jQkz$Xucpg;_dd~oewZ$2SBtk_oP3K9 zGJpPq4!Yyh9)9p`@=x$je}B~D|58zJ)E|A)-`yMa_jU)Syg%sg_daQQca+) z2hF$t`sft@#h4oZ#ps&0_b8j(OqP@DDSeFX{p@1>jv9d(*<&n!bGvTN?9<;i?z1tV zAReY=Sp;RI~d@%}Mz^w`P_4X@P3{(fu! zlTSkXpT7P2#eYuw?+5zvYMTTi8vbf`W>1Y9;~M8&qIQs zK%!ze9-)1{PJr`4`_oe$$=Uzh?8VMU+qJIoEHiiaQ~;lZ{GBKf3;hbe&Sjj z9JGxq$UF4^!omqk;USsXSlW0Ji$GzGfW;HG>DSbY%y=pQ}2G>>o+@ZZf92)&15>53@*?2 z?t=%hc{y*s0_xvxubRPO(;qzS4IYjTnyJ1TJ=x>*lz!uQX#>`o^F6F|VXQxEKB|*Q+@h zVLlFM2xZMCF%V%%GRSakf?bm7#4unrrfKX|%}K#U@j0+ncF;0Zf$Sm#%K#2+wKih% zq|~tj3#?Fu8VoJ2otFUxP8u7iZc=7DFOM`6DieTV&gA|wQEFB?(Dvnb6v_dMcH+p- zM*ah!WweP08u2@tYJ3yU+T7B?d!2E(Aby0cpoFXI$hwZ}rh{YZ^(gvIL>&@;bcE`> z6khjUiY}|3BcTZDyyQQDuQ^Zyc2pyAp-@E&eA60ZEk^$p!HFUjob`sRMGcPT8EdfI z&N~POCm%emmcvv2V_a4#*GS~3&Z9aAk5Fs4`*34(xmY%?bF+e0D0iRY^-W!Ki>Vrt zYp`v7lN)kXU@kPmhcOeV2s$*XPB@LHW{b(dOhpajI))aM_i|9Lo_I8vur#$m)6C2C zvE`4$;H=E*InMc>n6Gx8U{=)7m_K_QDlx^IJX>h1nRhMi*BCG!UV~T(RhJY7sq9EqXjwEdpM%=<&E{5sEa6 z9*~X6_H_Iv3{ijLX9qigB$V;77 z$s=x;DB_lh0&bfqNUf8F%sx@DiG?f<6$^bNGWKaKilTyrG9Cp*#X=dAf=XJryuF$> ztMhl$i(7LLGx-Aax5sFd&yG(pjem}d%xFp3=AYPSR6Zh2+1+PX`m74_GJObo&>ud) z`lSDRf3zF&P0Ud^6^kD1#P@dNy~FYAf5m_e&yzGdJ{*nxH>ZcgftgkT#f*Mr7tF@v z1N)Kc{qeyda04z-?`hPJ$G%S@ebN7(9g%kEu2f zoYUlEYz6cav<2_xPl2^M+KMI47JFc@;wiBE(aU@B1Ldr`yE}QG1gJiy@I0DZFB_Fx zPWa2*Vhs*>2_RnPAeV!fbTKQpi+Z|C zQXtpS5Aa)kBA7T;Ri|jcg2a+n3M_3$YZru07)HFIjyenL%+H=JgOv2)Rv|wV<4(3zLg)F22^=={OskR z`G=j)%}Jsi`XC;*E$=^QK7V^VSzb&pc8JUU`>a9j&{fO)qGfvlGZLM=lsP`8<5oLS zteLRc>tG@VGhYtDpD66mxHrz!O=_T}{%l+9Uze&!2woLR-j4oD3!H zQm5q9&BLXf(&k|`t90>jl1(xX%T$tiSmu$;!!n5|9!@ie=HV=Ts2)~whgr%>mZ@U5 zU0lhEuTk^$rsmhb&bCVM=5(0-vBqQrqInp~wwt|S{uhQnSU8)l=)H<`6DQh}mYAl4 z@dt}l=k2q7@v3%4NOu7_-OEn;^{*Xd=QH7^zx<`42V!UHKAh*!LK{EG(U;NK1Paq1 z)fFdxHBh)AJasPEO404=3^Q&%;N_mdrocvS<+NJxHGlbwGfYs|RaHtok3h z4cY=13>LwK&XocH^RU9E7)~R&)Nnh<@+#P16zekK{4gyU=hT|zDZQ$+8J>B$WO^7S zQ4V3I`!QC5OV#u^@`=jFXR^iZwDoFvE(>j!rLisB9CnA8gPD%T!LB#$GUREq;?8}u z>qSB^FkNjhmrtq{OsrNRiv-qi`BPVo%};Cv3C}QLN_K8#9uq8V81oiwCrU{yYRypa zF&@~Uwl?aNc0OaWVsoKkt?YBPA?+i7C1ID|lpGiGewd9YU=OMAr&r35d~$ z6vQgfO$T@d(pG%22F+Y>+iW6rp@jT5t3e@(rFvd}dJER1}&x`M{~8is&tKRUi@s*>jOHxAWN$1G}w#wta?; zK9p)FVO?$hthbj;+71VVxhPhgwzfU%0Ql*yx=bv&JBO0I$2)~3<+Hpz$#nKT-q zhcg;Vf%5KEbhx*@IjYNjFO3s;ZMEZ|bfS+BEw`+g!~KtJN$i?%$gRzm0H#Re)@(l% z@J;8lWW|Nzn8y6=Lzs{$Qz%_P8u}%(Sj4)f6x?=t$#hMI2@7A=JFy_WL;vT!K?n5w z0XlQat@&r(`+B*i_f;itY@RBa4$*a#tR0ID;evy)yD@I9Zeh&0(OA{4`K8wR!4F@& zP)=F0@ZL*r+Rd7IHg1asun1_Im?T{hANIt7I1aeQ@Z$CCx$^;D#YP;59Bpi)Ph~i} z94-7gtLUjaJdHEQO!-kCi?ZVT6>1H6?J@k0+W(!^k24UpgSRf+q#IX6v>PY}119DA zR$3SmhD8qP5w{)U2}Z0UL~1%O zZ>VE<*i+6L8SiTRdjY{sEE)N3dw+n%jpm<4mo8F0>(W!+@W23y)wrfA^MM*?XAl3! zkv`+;wXvH11vi+`i$*jgIhqFx3tG~qCthiQl4*eKjUEm#s>`?;n=)=x<^FvZ-C|lL z+8-KE#+ozrUStMQn67E=Q<<4CP?=fhk(G+uJTGsmFr~x}s-X|doQ5}1AMWi%w|QR1 z62?oY(J|xurYvY<6QMijFn~t4rORoXBC`V|*iX@0gkSGS7FFW%u+S}{Kkn-O2A0rK z7{zaK?IpDYe~TsHFkcb-8MLZYFjYPe6Y9{}GqR`!tFf35VizsKR_3Mu^M4S7>GY$_ z4sl4CQYhhcG-)nn2fUaFNA2QkByn7Vw<$uKa5EyDickYnFk){e>9xh8{I}BHs)C*` z-e=mBFTJe98&aaLc?&A=yu%zaE?FlM!kbuZd}HRvYUP@jx9ss1AMQ~p!I_S!w7mPn z*3kv2cPCyTYgNh(0pP|$y!ou_B96b{miygtLuF&csYP#r8wkW%sPt7jFzfel_`g4B ze%q9LB~E5o>SSsElQ&GCp?}M5@ZER1J|M_VoJVp|FVx0NRJgCbusvhbaO5Xn&L_Ag zpP$}$jf=W_e~~F>0`KUyO7_!#DBn4T>ky8(u-Z=Y6^fxsF{n`Ns}$z7V3MN9c&1XA z7ov5FzD{9Yozy9YI)!;(QK#6~Da?zvI>j@c!n`}GQuGTHhehuW3lx3#{QZ&4Z#qQmT3~r9e`udora!QmTA1r9e`ueWK7w z28G%;rW8mDwQo!*kQ8d)m{K4q)V?vLK%&>aHKjnJ*SaHKjnJ*S?@TF>sI~7*DUfKj?@cL?XtnQ6DUfKj?@cL?XtnQ6DUc|&A51Bb zD77C+K;9bNb=f`rW8oB+GnN|NV3{z zrW8oB+GnN|NV3{zrW8oh+UKSeNYdKprsyQ2wD!3v1(LM(xhVybr1pg=1(Kxpg((G+ zr1pg=1(Kxpg((FRS^LtI0*S1BX-a`a*1j~QKq6~jnxd2JiP}$0DUgWTPfRJ0h}us~ zDUgWTPfRJ0gxbdkdM3E*N1YQmgK>i~O@J_K!EbbSG@UcK<(2B3$uGf}CDN{QCUaPH zvq@7V5sqZDGWu1_nj(pCWI`ew8A*a8^O=^MH${@98F4vs;uJ|_GvX9IIdh66vKa{y zIk1f+vKes_L_3d=NFtjNCqcC9B;99EQA}SnBrc|hcrVB)1^~6)0!=XjsPPtPiXlL) zw?I>j0g#kvNLH4dCP1)Aw2+*VT8a1J&3Et9=VcvJq`g z_nYU)pOras6z7;a*KJSB~H_hhpyMg9+karWdT-KN5R@Zm*z5I<$l%HprDj$a); zKYnraw85~@&U9KRNS*Pw$4_7XD~&SE+^B`(>7&<=Hlz6J=&RG?|M!U0G&0DLV)nMi zP+b~jT5zb~E-=(OgPldAf7_=kB>ayF1rCh1N+9CHz+4Y=R=_=d30OC4A2YFDo$TME z7oA`TI>xPPtpFCRXb_&=tObm$#o9wtatVsjjL&)xT~PeC|EK1Q|J`6#h8Jm9vlaa| zUQVu8>kn7cFMl;(jF<1n_$bi%_rGfX_ZQ}DZX17ieSBgRZc*gv%g0@T;AoeFy08S2 z=fD>)arqp+K>Xvip29%~2Ho{Y7Y(#)+ z?o8kep*!}Jnwh)NDb?I(>>Ja09aD|ib+_hUcG$w(Bk5~e20{{(4?#-Lk+rhtD5Cab zG8J9`D=nk>1O>SLv9;0>A1f2Yv_6)G2OhUDj7s_lAy(#`2q_eQGkgXWOG0ZVnvz!^ zOGjC+CJ!OjdNqHJBCqh{s#k3yBn!-sjope@IX>D9+M-txt7T_IC0o5}V!o_dNPXU` z21p}xV_ACIsCgxtg#u+1vECbzE^A3gouQR6z|FaoP^>6N+6F)q7!I}IyUOwUKB ze|+)8E6KS(vPzN`2V>@j|JIralW;IexuH{ZmZv3xDkDlHW{^7Ez)a|_` z8nuZweC=xey?e;oPp3EQcg?_jipI~xO`E5gcrr7FWyurTnMoeun%3lry-O~6XkM~Q zpRq6H+OFBXF`eLWmuWVWHeW76kCo1T-3N@Q1TmFPx$jO35bEaTrc?)#g5 zFRC#2h4rQvjr^=3_P@A#wo*!0v|0uAHBakD!$1c(Z*fGfzWF3B4GU-zr2JgoUR~is zeQyny0%m%Bv;H7JsT?rAzzqn!E}*^zkxp*#k*eixBmA=Q%Gnw!p#5w1Up7I{D&B-5 z0wgF*wLsd?XzFG?yPo~Ww86x-xmrvv=)Tpf#oO6AE?b?S<6F0DO!hDmK3|w3%zbeQ zJTlaK%=xgBKlqFonJ<2)M!`e93!1!P>hK48f&Me5?|r&HYzNnsn-aRR7=U2Cz4Wlc zmTrk~M4v@6Q>8^+-jOOhpAj8>aihVnBpzS=B2_+f?4nDZiWc?jUwh?Rbn6?Fja|%K zIGm;5d>;0l8EKV{`+Q(E0Wrg^!;{cI+y_ag4JNPd)p zvSa0R38^lLtT8@clqW@WF}{gULty^nBdV!V*@W+?yw@=AZq<|>86j##ZJKCOUqi0r zKMK?o$4V|v*%UJk=@zzHkuFSBO--XABYYk)jk~!|f%WgVt2NTrpFgf`qB6Y8f^9{w z@XVJ40bgZs45L{g#hFCa)s&!5_qg(=bg1E&Z#7Fyo2D8ms_sIVjC_bo<1fpBZ0&V; zcRCE$?Jia{tfk{&h0*DK+q)^}$?{}C!$pw{Senj8tIkUlajmUSZE!IW7enPIo11^4 zh9pw55gzmMHVoj(5JM(AxN)+t{T7{z;S`ugM&UPW&rdWIfH}Wz%r{Q{(!|Go^EevD z%HD+{GtRPuoWR+PXKW``az;h53+kw`Kt!KhdB|Atpza>3Myc&0m2@n0dUl0fiDMqI z51Z+7xmeom2eTe#l%bI=zWHj(9#A4QztPCgJJ6S+Uv|52&I$+hdvoR~i}}y^jPRRB z&%Zw69f;C*m<-KRAO>HQ9ypm1qR^V>TvIgz|F4VL>gH)klTktA(E{N?USdZarNUSixja==x5d%m! z54+#ps!%6>QfEZP|Fjx)H1|fW7tO?I*VU$w2`Vvbs;3Z{bGA16Uvo*+um4?t|9ZMw zKfRuuKUpomU7lP#Uo37u(t04B|JcKU4*b*KANBaZz22a=zdPtB=RZb+{rz9gfBhV5 zfM0w;t3$t=&ZkREa4&GzQSGDBCTMV4RV&ET-9-e? z>uGH3$#2JBylgQ4#~_z>Tl z^VxI^?ED7nE`LtAHpc!23xW>q{CW;8Dt=Ya72hi8ihmV!#m5S|yt(4hYU_Jf!Cy?@+`b)8&d;Z-Rdl_J z4+VdDJ3n8eg^lO8*Kb0&Sp0#4r}~e4{cmOd@0T+ekk*yK4+jc=gZFI1zbgM5gw6{8 zx$46~>m{uh{`>UYw2dl${p0wmw{kQXxLDt+B<90M%iEjv_<-88(SbS2kQru*{ zjxE6AhcXZ(etq+9jO#O3sAJE+--cg*xUtQpgFeI`A0b*#Xtmnni!}tzVg%4{7N0c4 zsK2Y4NTcH4E!K$L)pR|^vU|iv{)xS)%gY%;W#P z$0e}m%`K@r4$^LT?G{%kg+%?KQxkY+YOv!==0@efz{q=5V>Ba@hEbY#kp zxLf2wB0c3-K(3G|kxo-x73mDgd6$u?t`=$Lciu~mC+8RSG)OjU)8Ef@Y99hax#mVx(mVeHB-tE;D&K&w6x)Rvig2}dxJ2-z^sb|5paeeb8D0!3@#fT z2maaMQ&&kD2?wFk3^qvggWGIk5z*31L~w~!tE+r2qN_v%b4*AS8l8ZKxU)H?bPKsg z5kh0vHya97H_Furi2Yy`7Lo{EnzjVq2T4$1P;m2(Dk%T4+A9R}0!5MF{N_rPQ6#ug zmRLwZLy!eFY?v1)iUd$em7x=ep^co&SR44z!z37totD${DUOJzV%iCkEM^JPdLWg& zgk-d32}Vgx-@N-UHdC65#rtHYr>hhQ{8CH|5lfIJ7@YT9;4BRmv&VK1IVBh-DxKdh zmsr)nM1GpO$x28Gt|vc^Zzdn;m{{a(f74YW0@E?%3$eO{FfT6{jY*lz7igb3G^#Yh zmyRSskic@8GsdWtg*PPR&?sSll&WNEStrrGgh4R!9TFMl^hu($1ZgaBf@H{I3DWkk zM7>?x6%1!<_(xX>Ym}Q2T&-8Ds)W=_%kwv-nzJgU6?btn8Pl8&19`s;7H=*u z{9V5US@Skt(@Nx9SCRsyRynm)W(V9nzC z&1^oQB@-NzyP2%tt?VAQIf088@)qs}0oo;^|M7!{|1%D@yH|A7o!lKab)}E1e`@^h zr>4mhGqrqhFTmIg(=;)?h(=|42@U!65*ps?C8U~OVn*St$j{gOtTq~xl=Rh-p88b5 zvN@(g!%EAvWjfFVce9C|bGNTAR${%a6j)~~1=iO}LE>sABy}~L#_5{*>0tCJBT+RY z3RN>I&^4oyR5Mf6D6kuy%$c&l&i7a`o81*cn|4F>(54bSw5dc7Z7NBuVk)Fog$W{U zRWhx^bG9X^74scPtvn0>>}G)mA(gb}aWk=4sud7BGpLLa5{xlraTc7AqYu`$7z-$E zV=;k-*_z)_td`eZC3(qRz$#k_6&r?|&{2T8{=G_OHwdc`+7qk-*d|~rj@DjVL3YQl z3bQK;GG?UDX_{Xvr)7*~WZd)b@Y*c1X*qlBu6Wfk?alQSpDFWM8Ot>hZ;MW8yHy)EhzYvpo#TOV? z`i_#kOa)<8MOHl(@iiAkmfxXFiv0d!^4P7i(9~WpAy3k>cv33Ya-@>k%1uIB*^x|; z+D>Ewi3>kTf!kh8p31$)q%dq8V&o*^J{0hQ6IE3x7?dc6NkQ1$OJ#(DR7NOBWrTuM z2H${7#qhFJ5-&@I@v>B&Sxb{KVCQq$Gg=VK9^RnK=kafswX$cla+Np}Nc1tuL%wEJ zDKcwEwQ?n~a@J;RU!nJFQ&NAiYGpZdlSQ%8jfEvCTEmi@Ly=vLk%j5Xlq?m?P_l?E zIOPRq=!8}*OG3`o6bY+E1saxZB|qDifor5E?_bfOPXq%_@UExJ6Z;FUXnOzX$Cd z=6U+2$1L^~|>f2C*$6Kv;F{M^~_VV(ubeRJdA&b@WVQ=ai5yos&1sEqgFQ(sKt#0(cMTTnHvx2+9jkT zXC`4<-PpxQ-HXItcu}pUj6Y^EuNLSRF3@8*M)3t|!ODnx`bzE+NRSx7EYG zgQO=$hHu=w{RJ6z>CQ{4oAg9$U-eFp7Uo=J*d}}j2`#-IiE8cjg43$D)Vn&8Fn_zqm&aorLL;tfVmuiYg1HsgYgN$ApMXxZczZ%NUq-S83#YaWNT zdC-V86t~1|#*!9wkLHZ&Fk#A8eK+N8R)im!S5l&YpeDTcFfcspI6;9WfM^Z7AT zm3&}U)!FDE*kKCevbvXF$ZuS{TXKKiKcaVfPRuJk(yYRw{1!5LM#fY73Xm>DThre_ z5*?Sr|Bpp?#$6c}6a9w3hRmxiKr+8WQ-Bq2o%qzemlS<?8i-y`=l?{2*J_}>+Mu@pFgxiocc4%JF z3IUJwoH5(S*_{7tZ0s=Yi>@Idw0gF=gwyrw$y+AEO$c>6>=u$2(kZo~9U-%RwVIvI z-r_h3-4@~UYc9fWKPAak#vFOO3ZM-r1~~IeA-Oca6~)jBDDFiuTqOl`7;*C4AFac0 z%VQo&XKXwoVBY^7x)wpVd}ZIXWVNk|a0$hW=~B&1&I(2>Pi>=6YuZ*JlqjvK*HFA;Ltx$*Dr6`Jc%kS?!RG{yNS{%=VKZSGL<9a+1&H(;B(Iwx}DPdifIzH z=81P1x8QE~0Z*q(TxBN5gL|w}vAwtrZ>+W370k!CE6JRt2l3A2ouoW9uTn}uyKyj{ zzHE6XS$W6Gx&%nte#qeK-K^--pkeyNyt(#$;%)7gD#7gYDkM^2q{+h0oXYvU=9&{T^~`nVa|xJ<<_L zkiPOfO&SrIfJ(x~H0uYbHmQv7OmA!L*Ytv|w-<`BfFThW51RLHw_r~p{e1ev^y&n+ z&S1YgGUr{4-zVQfzz>(ys&o0yRHA(l7bT$8sO*;jSFRpdb(jEn) zrLVAj%(M(g*JwV))nsGIfU&GNiUmb_&aD6w2i`b_31lbVID!e|?^bdMPg*nxH`{i( z+M^y@nZQsvM~D>R8L=rW7mBYO$0Y)AE5Mk6>8Ux+jnzAU)iIw6@=}bOW-^IIjo3jt zE{CbZPSkM(vx*(9;|S)(c*d>(P!8L1BvXf-y4$2Vu77+M^DV`@$&dD6Lu{IUE@jU8 z#6x^s6=xMg&}qK9DpaFivn@N?ymklvJmJbNJ2&*LKlj$29Td|_wE1vDJm14Msc9Ma z0YBGf*L6Kf`NWt<6t~trAX}#M1?BA}tlZGCN9u3FB97TalUqO@V|K>l_G);Xr*|)! zH{>`4@r2`zvcbTWg)x<_{q z$v-?NpELEtb8@@(oIKB-XA3UgTP%&{tF|7*@|Rz9lI)|$Y{WCkvB<@|q%^J(=Qg}h z){W(>ypo9>9Li@=F70pIm$$e_#E`}g7QAGZVx-4-i_M?l*4NlWP zqWdan@`7Ob^ZW7pcav4%tbk4ReLGvfqq99N=*NLIZvV&K{ZqOHvMwN!1a17l16LBw z==eWg7066DH~YLmG-oa^>7dY1>#(u^f|_a6I;-)~i!Pe2XhFp&=F*nJZG{DuB)O@m zR<&0BPFg)4ckq`1MZHRHHLw=NET)UU9*^7oMVP2p$*Y8FRZIAb#v3d89vVhSZeV+X zPYQ&Z3G*X*N+;l7nM=x%(eufY9SDu~-C)O1oDbf88*Z|GhVND!&#B3U8MCADQ#K)m zWf3!w#_+4r0F*U!0Lva#z%m^2^${6nnL;4-WUxC3$7P`X_MzGKzq+(Q5)9B$Y;6Mzl$kR2jwst`0)Q=AdY!pT1~H#ACwVL_e1i zXge-8%Ny3&eqv)#?*`{aQ>tA>Re^DezPw$JFK4(Elx*<;8KV_QkqK`aa*^3}s+pNnDq~5}&q)O>YvO7-OE9%^$|!Pz ztEOmbotWs@cZ-!8M;TV=)w)1YsYF?`Q(64~?S1=qUAK*He~tf&o%9iT)p5>4?>3Xl zmeSF!EvJ$buUxO(6U92RG}fy+^0;oM|NH#_zCaQr_c?Of&Z@N9$l3%yfB?u%f+Tj_ zf#gX%GgR0$d88kbn0p9iX$d2h9R8%shMi?dU}wDM7$ys(j$rBMVUxMGBTnh ztWgk**U~g|IecA`U66>mpN;HhY(<#2#?mj}U3Mi^ZH~3Qli8Bck_jno2q%*=jpr0Z zrCusy*gZ7s&+v|X$QkVYL`rzh|%6rfi>B(7FNqZF$yA%RY42N&I4{4n%tPG zU}izO3az%WX5LM936}{GOB_%43c;oY1Z$vZ^+D?f1buuYDW!{+N*YzX3^7aPGF=@`Um9g- zj!Z0Uv?hQo7OvD*Pgn!o-~Mjm7{^PuSUySD7f;aHWuBZDSi~M}DN?pjg#ei*BTzx8 zV9Hr&ExMldP9@Y{q~Z6f)Q{v!K>oaPY?zcx9$l_eVs6hTkjqpd^e;9GvK9oYY8&F= z)MchCwK%3WS?K!FinRFUmc{VN*iQW*-MYwH9V5^kn7-V!J&y`}9lE+vlA+3pTpZoc zNO>ZoZm{U26y_*vfYCS-Vq+bgrmbX9re?wlqL^vcscz`(H4b?7qq5nsN}~BUpUGcU zB*Wrg`LJJE1h>~z-ABqaFV4tln@WA?y&sol!{}H#pdp8NRb2f=;%+n;IzeSUdk6l~*jpd5g z@$G`vMlxn>+cet&>EkE+eN023JAdv;i7(vur2OieCr>7RN4t;jhOPRx?<%wnLu)l^ zhSuokHgpP7^Ie5@&L*{dSL1MtCiU^%z|#7z8(X81D52s3!VcV7Z-{8CE*?=7_b?4tL4wY$GF*?)qg z0m;aXvj~_jO!qN?qEj>YK=*X(?A_t}nHyJ;t;~EQ6({d;!iByPk}xQqS21TRRwrLS z`Q~7<`&G<*vh)1$WHLEg!+jcSAf6yq*WQ!KU^v;@+n#{b0SpIgPFB3_Z<)38a7K-j zXMuSJ(S1=o(qPJOF98j;P(&^OtvD|JZ&e_C@)ca_DX`3&m!aU1O1<1;)68IEu`%2c@ssIS7 znrU`HfKWc40QIbhRta%XHCMF$%~myganq|P1}&dbQB zKD3_g(Zo(~uZl7%=cZDg*+Mw-$&cootn3a4^G(z7AL^GL0TW0C)bgfiV1 z_QXjPeWR9_Dl8yXnrKlGON-sMWm9X-As}t1cNtL-B{^|laqB(?ciSTfyKqQA0B1DG`H1;vU0<1A_2x%S#XXWIF#$O{_Knfht zKYcYl5a}R=D98e-v8Kn1u<|(oeRjx*m-u|0&Q%iJivPCA8ySmW)IrJMaL2kJMND%m zLDW}rejc(QNIKzJKos8Cr@2d%b5iI;-}x^OO$GV|MAK463c;Wyak9RkAxW`-2sjR< zWRny^go9U_0HSnJ=HDdA9TVQ}XjO^3FJFK^c~_E}QE+$XqczUHYyqxnN=U(19QV*z z%i_hj%aoBhUs4K<4i(;>WC8v-0h8x&6jD@9$dtQetxvfOWG-)gtMH4jb!lu9iUqjiedBWCiI(E&` z(POH16s3!EOzh|}RXd8(#W^N+^q8t0Md<<@O^;SgX}fy4SH=XlVx8DjBCBSN32wzY zF|I^b?Hd!^igjXTiL4qrCYXb@$OW0_g%sn3xsIfb6}Ds!p15W7JWCZRc*$Ex8po*o zR($h#iNxj{k}UxY(XFIV_8`&=>P;yqgy+XDgwF{AC5cb zK%%e;;4k3l5WctyBCqG@IFh0Wg;9)d`sWC?pi%LpRPm%%5%~kudU#&1rn9@as&XqZ ztMj_jS=Bh5T_gG8ggzC3kMJt(i^|LG!~&Glq>4a`0;W*4Ax#>M3YsA(0)>$1=|zqr zON$U8DoxvsG* z5I3`P08v;4pfe?uCQ3(erDcfM=QpTsQxz#7QF#v-KK{X%>1tqssthSE`YBMNv=>BS z@S=%yltjEA80|LJh2jZ!70lgFEGa9RT;twk5iCt{kPDVl6rn^KfE+~>76GDrk~u_F z+y*JV0XC~L^(kpqUZa`nH`VWq+#B9bT+0dS0{3?K@N z05(Kln+s(mH87#`Tq8gd48u&y0~0QgWCI8g0xiSFRqK0A^bg26(T;tY|{M1 zl4ulVC~$LWfD{-}+6Q7-g&6jM;2W(~7^dh8RhNjM>QYexCtVsUsY(HY0(UBDRjud& z)!RC-L}dvYZ4Wtul;(%Rax+2({~`e87E%Gk)!%$wkQBB6=n#4=fdHVw766?hF9J|u z3xEPlMF1*n0VvZ+(T4I&(h|{y=As&1VNC|2DXP&Gwg8H1B!HG0iKeJVSJ;xEtVTj8 ztC0hQXBTfKZxcrn1rcwPJV=Y&88;}G7h$5XU5*kJDC4wT5ZFnk=$BD}o7}<6OMJXPm$ zB0`Pm;_~LJw`XTDVF|HK9C=a<>bVD+dGJ(Pk9j(yZ$QNiqgGT1q>-%lQ?(@|sw|dW zA5w@N8I?STvplH{xD6?iOvRWQYcyx){Oaa?Oex#ryov0sZlPEbWKdl zq^4KYY5KvDURbUo7$_nrRVrN;jdW7dK9Y$W}rgE@q-v3E>o-9hSCMsvM>137DUk(2!RS)07YLQfR-;1 zP0<%zVM~IdFAzY{7yKnly6_ttAu3Y2dkS$6w}2ri>QM4dZ2?P?)WMSD>;=4pst!pG zUBIEDdS$z3kPSRuk%6EGP(cjFC{{1rE6uSAgiT=WQfL`nqb{=Wp=@_AG{?8hx> z2%_AY%VkoCih#HamjmEorGmJ|rd1WiHIV2Qnu-%q)S!~_Q3XqAig04L&Fz35zIu~G z>U2fe__kLL7nc`d+z~wOd{QW@R>#RI&Sss#SJyWXBa%^t`>4%*T&X@TR`>TUl5JCxER?mYD{c4H%Wd~0O4~zOo4e9> zU%lLRc?CgfepFnPK~gD7h>ClGD5{VkWlxY)ibA5|q9o>SMD9xFd9j+jgpgO3rM)0{ zZdJ#rProF`%5kr7?utSa_uZWcsLI|(phTKJFZ84Rt*4{zmrD=7-u-&#$?o3H_R{dv zBR*Bobq^^y^yJ0v_VfQ-8jn6b8ndDBe0%HpmiGMV&eMb4|GTp^9)Ef?Tph13-@h-? zJo8BeY=?yu<&>FFYs$6A(k?`}7(lxV2S`eJ-*EN}H?)0p; zvnOkDI^TW_nRFpNnk>d2T8}`ztsW@>GHG0Dq>mm=;$&H?ryi-|B6{T1$p@!@n;w0& z`_(gY#H(R&MK{=xH$J=?9Zj#SkMhgU`smdd-(0?)gj&*pU%x%PKAIkx0nH3V1qVDn zUZU$(>18YSUgn=&(h5Z_=e8-IgI*aBDa30Iib|kJ>L7b_qYfyNI>-&(r~`_m&Nd~d zH~YAunI>^_1IrL4a5k0nQmW(Mmy`x0fjpy?U8euh(d$RqWr((%R^wxO#ru z(MmzRtseW1Rtjr$k>F|{JuHk?TE#{5_|G1#LOVYEXeF@|uP%>PQKg=vRaB?vXcg5d zjaEu_c*O0tDayE0KLj7O`L^{5VUC+@%+1 zDR6|K`^|4;uF(P^pC<{V`MZF#=6C(Hkc2%t=l_)9di(6UU4 zTN7uv>BV2(P2Qax&U`bLK32$Q!G3%}R9?E$fN5+Co1zQu8cE6?GIdH#fT4s8@Yk@WV^v6Lj z`DP+vh$6sjXr;4#Rv}S(&zBuy5cK(9{30=GoevzW#77}RP+6lqa#aA$_dsZ>@5iD{ z=APdSsS0i|NOG&0MF)OA94cZuXHGkf3l-JWR5dL1+1Rj)ivOh>%*odH#J2piZhDp> zH+=|yQTGd$=FaHOv)ZB6oNVK6Nj^VX|MVo{>FSvAIPSf!*2-GF);z3~qi`5kET>Wr z3S$thCC%4-x}esITe$g>&^l4uPqiWJBt4jQ;m*iWfITGIfbNn9qRVFWq7)joYW%EL zTgjSXF|?8zlG~RYM}`219@j8zmJD0f{nf5<64f}V+83xu zj1el(9}N!3h#~2l_!7R2F2V8dsw`2`3nx?juv%ZqbKy|5qFz|$x@QVwpVkn<5rk99 zn@fEydQ__$pZuU9%pxcS{c%jf2NlT{^~a(#7Ag`K^v4m2EBs59R24T=Qq6&FQ&-f0 z9Z7#wN;{#poD2Hls2I)qX~qgGMfSKRgG?|H#?bU>Uj#nB>%_Qxb$B+xXp939=6Mw* zWT7XYitPoZGqiWqK6NRfCKkQeZXU?2;Awv;GRVC|V|vi0-l4HTiR03jbX>PPkj_l70KjR+cz-h*fMsKilR8%gT=G!eyK7+=;f zsUpBzeO%(t#3BoXt1ln*tV_8{6F7|}Xf70Mebg_>7lO8o= z9Z}6bIwHDQNB&6OgkipNm9gJdYJ@QzU3Ysx75?rktTsLIQgaC3pwf1X&< zAWjEPszu|Yig)QzLO(^T1pNNxcS1t&HUuKWOFKL>#{=QKY;4UV%oe85UQ+pysGQoZ zRu|_AMa{f})aw#gI$s$@nT2FHdk6|TiYH%a5dlHa!Oa2z&LDz-Y9t#;?`Tgk-lWvE zgM*6RmlpS0Snc!AyV#zIM3;8k-g{+i=@zxy_DY9TrR^Cfw`C+Yx#kdchmY8l_k&}0 zpbnzuhOIV}>WV$0O4G1UV7JiS=1#8$@mZI4OL(V%w#39C-w6(_rcPLB3~fy8{n>j} z&t~|SWQCpt+W~U+Vy)M~;gV3N>8fFerH`vB)V=Mm)veh-(3iIODqavOn`YJBTA^OJ zSuM8?yykN2P_63LlFT)M+(qB_${Z=GUUln08JgkeM9K-5K%ZLEZ}R3Nd#FUc%xwSN zulCER2KS!!)?zAWjvU67Dk+JX51lMfEe0*ladc@MA69DW# ze?(V9Xlt4NF}UCT)?q3<-5WD?voFobvQ0k1LSAjg>XXBZBkWbY?k9Sh~8LO^;qr&3cVKyMeMFaoXkrzq{`AxDyA#yDz@zet2~7?PP!FVCVVd z+pQ<~TuggW$B)&OIs-`o0I2M4=<+sy`$4Z z`k-~_Uj@&>vgy;4k&#j%LEFE;0iq&>u}Kw#6&OHFC-?!s;C5(7si6fOT___J)Oa&J z?`{rfZ+MU@R5f!KJAh&DsWO255ekWW6&-%P#6899w-<|gX54<<*tp>hg%ohvU&`?HAqPLkptey~Sqwb=Upk z-GBW}EB4P?<(}|=*FWyr9ue%n^x}^@&mVLzu=p|g1Uh;RUmc>u1^ai6(d0zlu_*R2`HJITL<6m zJ$6D$A+~}Plczh+x4O$~85d&dkV3#oR;#M{1xVghY8cC>BvFes z<0?%Sthe^}x4tX$=HkVe8)V1*9wu8nymZI-lEcHUdwO{^J?rSIr+pi^xLx<||M6f1eqAp-xPGYbuRgzLZfwS#9i7XF~4 zo5cLS3hyy`FNGhQjDp{5G&|%&F#IBeXNk98s6qd-nJQ3gTt^p~D?AHKyOu1#XG4xM zg75Q_Y4_Q%JDnkZd4_Lz9U;(t@oL)LoZu1O)mCwb-7%diO7vF~)#k?_?&x$ZEuYh7 zg{ZBf4{kdFyZ=B~qi#OvL}V-bO+w|f2qOEIfbIgD5>huuW^OJ^lNw=l6X z@GUWbYY2c`K)6XXG07;5TQP>!c1B{oOwwqQ!YvIexO;}d4-hDuT+_AIV|-wP;fkb8 zHl=Sc1k9H*K?H89jW%(2%Rk>l*^*djz3vCh<(?y@-?#jxPtBu1Al) zmul7Md6=o&h=#aJ8b}P5rnF6-AG2ke|;m zQ~povfb-P!&RMG@+?CC2Uvq6ui|tg%X|yg}4bNU3&Zd9smVUy-$IW4Ba_hu;&;^bB zU3w`u8?N;DY>IH!<<<21kUr%egLmBuy7HI!+0;zV7;*nAYN3|{Fksw4KLB9R0O1z_ z6}Mn{7WWOV?kvH7RJli9!sLEvFqP@QFxhANPii_)(m|cy{Dzi0>?f$%y}~&N9ByE% z(M*W7jb4whcwicJbIJaaLct3L!{L{h?kF9fW$S4)et{Eh7>`5KdU}$d1Y{T=LS2^^ zXYYB6xcAJg=`aJu4QlcsRV#xphh?pJ+t-|~ui43%UOHZG2GP2ub1Z;)O3PZjy5w_D zS%+ss4r+$HwGm*?TlScq{nh&cy{y>(KDa|r$6=MfJDFlpK&vOs2?oPKZGf~hK;!5& zPe>xN$e?(q78l6u{o9Nu6X0J{K;MNu8zxjja}0s|8+#>~32%NfEu$KA#B z9rwEuG$KA{Kn56A*8=Yd@^TD>nX?z;ru_B$j}d_|ox!urdIgDH8rZ1pLkw<&)6=nO z&62q?)yGfIu7y_np_aM_>BAiGG%(`7ihpH~@POtS!6tY>PD0a@&Feac8QDeqh`oc52>JAuf|=3u)&{M4;w&PXCYWlK zvG~;2FNuns`zN6W5Bq=b*-s;X@pmg$zY+p=m_g$Iz(@m3i!3O zWJiFH!#nFfPV#`z7Wkz z(J~ar;E<9NW!>@h<++)>@^;@RoiYTIc4NZ0=il<7l4BhluHApcV2lVJbYhJjce!40mcM z=!1Flk2KK=d!4w~{&vbPtcHs{;Ur*qM|0!sB@ z#90GQ?VK(5HM5iCW#+AbDW@+=7`TF>G53w$kUcgK z%@v5^s)&>nreE# z8}Dcl%>(eO_$D_CLn<$9CxGg+w6VUj-y1XpU{kwYr zxDhZMj91h1zsm!V-<|*e2fP42|D29z{&9Li8^)MI|M8p<10nz#E2OYd* zhJ{5p8jO}74u;**k8e-Ua7t@BIvgFpTD=bpf_ZjTbBINWyv0 zH0sbwAqi(g)2P?IPTXBf&euwL!=51xn7Q;w6}gpTFL#vNi`tpgNR(0}N1VZd2f@V>b&9;5mpyVWF+n#^yC@Z57m}sn)r?^qM3=@RT}gtH2I5 zoG`MfbS;h?J+Tq{gKpPS4To;~krgMUwk=JCM^jVEAa}{EF-%=@!b4hcPFtjvs zGR78^kmf6j;n0smS6ZsoaO7T?*HikiCV#u-TZ*zUX!sA)qdR$e624L=!>99Jm7iTM z8+Nbg#4C9`CTo+uas@}vQU7E>)doFFpHPx!QXWJp20SrPYXctRh5=7dYXe@8num2k ztqpiYJzO8V!8Cpe*tE1kdV{^%y}ba|5j9kdr`O6m#tK7}&eMgbAm(T?VS@uxhOsm? z`viw{=rpGNanwN&nd#86pwZg)J0Xaq6a#?kpc!S_+Z>@0xf@dB3})JQ&^{w?z$D_m zOSt&?kEa)W>>^CFC4x0d+mKfO?r)KpjeF&wW7@#r>*SU^<*A7TwFX$Tvw8nI>r^rcGLj zX_Quyv`X+vTjk%=vcvI|A*d^QJ1cZv0@*Dk9lBc&G^X-cs$lND+5;_+=8$5@s5Lbw z^H8#-f~?%ol*l9bb_$9hyYD7=jn*k8U^?9CjVw@kATb~c*|a96NSP&5f?@;9gkCJD zfvKE>i7#3Ow-B<&bb+KitrDGAVFuZ0Hq`|3bY61xg&1(s;!vVZqIj3bhTCkc&Hlwh zDhI~!qB?vPY=%z&TvX@a)X)ODMeo_ zU^*sKXv-G8Tp8MnSAvK%E@_4<=Dv;~p+QZMh!PsqM9ItHI(-!nw}Au=4R5Z27*jC| zov(f5KXkmpMivD!lyGFOm_c_q}hLI)W}A<80yS%`UDaP4QVAI$P-CmR3RW z`@@OfwolIFF~fTc81=;P<}Kuy6dU54A##GJumCcDdoQr<5HAtno#K`q+E~+46?_{$ zB6Q^4NN_Q3bQ>mrf|m&<&JP6-rMi^+!y?t$c?QuOXru25O?fuWNJy(nV7n{b*}mp!?{}AL{$pBdVp#hBj3_e zR1o#y&167Jom^$?UTL9CRkA+(9i(f1)Qvs5csDmqkkXc<$mxt43I-75Mg7M#Js3B& zFA$xk6y+;aJ(xSdlIp3~WTB_P{iSezl{6QjKvvLuO0B3}A!&21C_Tk2{}YTGDHJ|f>@9&u>vf_Py-Qx#Kr+NRvvNiQjbWq!k?8T{rC(veLzP_fp2LR_E`|eA<2a%^1iS-GU zOpJTz)GZRLDMccI19rSsk%FlSq1!|%&m61HRfwlFQ#Y$Fnn21}I^S~cgW)IDy(4MB z^k|=V)*6(Ur$271Eu3hyq?m&%mgQ`x3M}x0u26o{ew;=!I4u>;)WD#QJ8PUzrr>>FnP6l zI@`VY`ta48DNVXT(dmUjAhj;%SB5BhAstHrI!4(bLe?&PznL=hSSkGQnYYdol0+nL zo6;tKwj)as5Q(QVF)yy5C$kcLd~*$@(XCJBYA544f!EZ_B1tSz4k09=3?zIJE_{T6 zl8M>0nd}&o@ma3{Whg8#=rD>|o(be>3X?iKn52Y88&is&1N{eZPzOsT; zRS7|=rKC7@tE5=dO-hoeB`KauBi;kiRo%Q#^gRdd$rQcFvXW{7O-Va}qNJceQPYv2 zDyS(?%%!P!*HA{}KC9y}NI6A$Dn(JUtfZ(wQ&Lo*C@CsX)D$JC3W^F8ZAE31g=6{7 z3*)&;=X6ynjFhDlMcUFyB6aB`nZ62SQeiqtQ)8I&K1NjpRkjfqj{M3EUo zRa3(?BAQ4UX-Te*m?Bq5T%M~WF4vWk&}3?f%jZ#yRi=Mw#V*3QPTzE99O;#~yCnq$ zt+<*6rMTjPPF!(CC7wjrh%1g2a{U%*G-kM}di0jRiWEMN#qaIt8%}aQgNqEa>GLz} zJ(}5g^-Ej4aB}Z8I=nQQoUE;GgcXyIXi;1Mg-GeniBav^#=a4EznVx9PI=nJ>zB!+ z-RB3B@o;(a{26|YCOdzg>^y%wnfzr)aVdN{MpCVAL{hDdBB|DvBdONbBB|B~kyPvBNUHVKNUHUXNUDud zB-O@BB-O^clxl?ZJVMW-!B|Q)8mvmGMuQD0)o3`9QjLZyQmWB#T}m|?4W(40(Xx~Z zt`SK!9z;@&N0C(Hl}M`bdL-5IFp_F{Ig)C5Es|PtM{%)LocfDl?hp{NN;gr9k2d^$~@YP=)7xtb#kH-Ta z(VC1WTYKAap&1V*N8@ETWJS9UJr8uy%!&>?*c6{>ED)EQ^AXB^%Enx_O?MnLzWG4~ zc^bm9j*lsTaUM=;bM(<58J$dzEm($WK-9L?L)eIv`;O!-nbRC4=vnZlH$;gwzYAU> z6t=msZnNIj^zB90T5pd$V z%m+T>aH@OEmUya^-yjK=F4KUmm~Oc_Eas<}Z^7AopRAvND4^JBm3P`~(=- zOr*=4^3%;JkeqJoT5?n^rn)T^b}du#?y0Efj>6cn03N4Oc=@Pw-#9|CZ}vw>wVL(I z6iPFqLZgbCGD}UE;yBwx7Az4pIHvZiz(hS!R|vf%Dfr`0cUbF{cgQ30{NQTVihfj7 z6(V>+A@-a`;w?E%gP*Dn6(!O}Bw%{BA($yGfiPqGWN!KzQRpIe&AmBFbu{XJx%80l zXD<hd855H$UVf3Pvo^ z#50B9=#8XAayp$51(FU?2%QcEk`BQPoel+(jz@Hzl8Yp*aXOvYlu2saX?1d2c;94Y zz>BM3F(15#k0>@9t~PDv5p=6{onkjU%q{1EQrm$#vtA&M?MKL|1p{qtLl!NuVxW!f z$P#0|6>B?Cr?v#WX3bC`vnOh(77bPAv8mb3(~UYFwfZ6PGzcHUsCH+OEH3<)+MD*t zz!wJsWOpO=Yi4!OQQrh{);k4qU&A_Vnr6pxmn>^*%w<|zr(s*nE6mvyx8bVv)wN;C zu5oSEq~?r6-c{{$-&d^sZAfi<`t|egCT8N0oot@7X)X4{e@CGm(X9LxK2EohcQC>0DP8LbE>)G?@#Sr)Dfxp<*{M zUYUv6Vo4i3MmhRxh!-%XCOP6p`^O&rY$8KRNxK6FtNL<5I@%>t_Q*jRyU!$d` zTLJ35TAe!Ol{J$ZPs(g+AG1+&pxB_6mMp4}N^3e)m{XyYru1U%Yn0NC4wGN58frXI zima%RCl+*VvqQ5|p_lnEy4tfJ$_%MtJzKYEJl%F?R1Wf56vc8pDCdNk-FQsS=`ySF zket(HHnSsgP8oMQwtyRub7IY2emu^}o7UpE^1{6>s$H}guLP2y0sEz0e;TnPEX8hi zrJ@mMwzCTrjW)BNU8iWYnGFe7bi7pLXo+H5Xa`UYUb7{&GU|LWkAC)CwEHoyR{v8b zwPuhJgQo=2Oey+cM4B5?^umZTH>2o>5oK<~+=gMa$W0jc#Ryt8pk_aZpP7%3zwAGW zwu;@+7QFjx7_)f_ru)Lqgp}Q1%M&@)Els>6^PAR4tM8^$OEC$rc40>7=H+N@eRH(7 z@;zm8UZ&V#_a&u8Q209f<;~IR^7q}x*uvr9C9(Oa`?y$R{5?w&V}{wK(RsJo;A*7= zS4cTf5VNHT<$H(95u{6R(pW3xldR-QOt32)1gGO=GOH@`V;g?cf`i}sVuA3+g$ zzX*^8v`MZOm^)Txnu8pTlf%EB&L+sTc*8r(Hv27n07u3ecb1%22vvnUO+4r!tY3C> zB>JLV@?abtXJ-#6?f&mvH;A37KiqS2Idt8L7Wztj?i-e#lG%n_^7KKUZpl@2iEiZo&MdrO!sc<5{lj03vO!7>bOR=Fe1>m+5G#eU?D$n2Y-sh_2uQhN!J}viWk3h zn*s{kL+M)STe8$bAx}wxMDowDY!Gce^&43$`nQdtsWYsq%nI7dclw~-a7*nwY5o<} zZ)I!hAM=rTk46knahY@bo)eJ7Z4<0?FiML3>_|>Ia9ZH>DD7BDMIK9fO#q_7GC}FB zHSg%L&pJ3?9KVKs>g37UHPJZ?8tjzjQy0;DTQs3=wz9Wn{0d0dn=f9}GAkRK;}K4+7A+Gv%xf9@N?r84 zbjekMKk%|eu*95J`Lvx1Qzo9f7IST4vKgXb>Ng|cYZH*4NtLlwa(pSL+w_`Bp zE{fl!>RlbSYGPmhVRGP;^_pt7tuUp}U5D)me_;;1;e{1V9 zr-N}|PQ)y{et+u5>!gf&cp8m`EYHH z+Jo`t`ao13uYY>9K3-p6U!_}l*0^;6FbMV~9WR?VNi=&D*IqGM{;6A8A^9tN1;YVW z2k=cDWo2woSn)NKVGRXYh*=hL{;Dja3JUJ#8Wgr?O_p&DWqDvw5E!h>0v|5wWw^Rw zPzEiOl^V*%I-@Lav`|(nD7ai|P~dF!7STGeWa-h$szG6YuW1*%JY|&i6@#+cl4YZU zg7MOzw0w6|^WE{X@!jQ??~ZD|i>ug1mX`01YQBpb+yKqn(rbTgh6Tf?sCO@dhmtKSSN{RVk?3`ncr3@ZHw>3j@GtKSSN{RUZp z3`ncrAf-!g9l7Z#LPHCM?p|UhFT*Y5E^!>h9l8>n)GZjfnIpnC*#=CyTVsPRXYUT* z&;C#hAmXeTMUQM~hq_1ffZRQP0N0Hlzzw4Z4OxG14P z=qG+cVVvX%LNdt{gjbR$2$jT7DD06uK?oyxf^bCqghC6+6NCwpCs^{wPiXm`Ji$^t zd4gqi@&rrd_z5j{lP6fZCQq;oji1nxGI@gKVe$k^z2pg&ZSfOYf+bI|oJyWxX%s)9 zWlr)0OP1sbmLJIzEG6P6v@A%TV2YnS!E`-(!c*_$38vA>6HJMdCz#&GPiU%|Ji)Xx zd4efq{Dh{1$rDWNk|&s^#ZPF;l{~@pDS3jaQ1T@0)+3sv#56SHNSmqWy8Lr>sGTe=#_`99uZ@Q*-@7BHG+=Ma9g8nI-l?FC3i?6$FewjXmLMAGL>R{e z(YQdg!pYz)yX6AW8t0j`L@PQG#z4VtwLmmp6GUqTqSaMFv|b=G$?vR88#)ndPy1&AyGA-D!p!MNkeI(HeddG+?6+bL!eu!+C60M;7A!6f_tf2kja6_tpSkV6v z!@NW*SYU|owInOp02}X|W61R8pQZUsb`C0WS|2q8)IWSCW;ifH1BkD_H?AE=g9h0s^s;tYig5S|wS@3V81cGQ>-DDq)Yb6%e_CzgtU!@BNmj4|h1Mil!3q>RlVk-eP;g9=6|6wf zFG*Ig0)@8(8Ddzf6)1is$qH7W0F@*wSb-u?lB{F}%y%SN$qI-eNwSg^5G0ahB`Y8d zB*{uvz#K}Fm8^h?l_V=!0l^wUhLDVI1;4aXUcW3nDR z&p;lGtlI%`s3Gn{nO<1-OTN{0vSoh^wW6FaaM$aEyR>QU5BLQOL)ubvP^4+%!fHwO zYA6en|L*2+_J%ujp(-oUb@q$tYrV7o(l$ACdfeG9b$TND0Cxc*3sf!wV2S$CuZ~V< zS7(Rsmp(bzecJ8ref6ySLwEkho%N#6=g@tV&F<;+oDcqW+tZurtDBEM;SamxOQn7^ z`v482zP#uGym=b)EniLHgPZt((#=UniP5`XpFHU3I2``*2||k>#n(QnH?qg}e!`ml=!; + - - - - - -
-
-
- first/lastByteEn -
-
-
-
- - first/lastByteEn - -
-
- - + + @@ -101,39 +83,39 @@
- - + + -
+
- req + data
- - req + + data - - + + -
+
-
- data +
+ req
- - data + + req @@ -666,7 +648,7 @@ -
+
@@ -910,41 +892,96 @@ - - - + + + -
+
- StreamShift + StreamShiftToAlignDw
- - StreamShift + + StreamShiftToAlignDw + + + + + + + + + + +
+
+
+ first/lastByteEn +
+
+
+
+ + first/lastByteEn + +
+
+ + + + +
+
+
+ RqDescriptorGenerator +
+
+
+
+ + RqDescriptorGenerator + +
+
+ + + + + +
+
+
+ + DataStream + +
+
+
+
+ + DataStream
- - - + -
+
- Descriptor + bitOr
- - Descriptor + + bitOr diff --git a/run_one.sh b/run_one.sh index ad1eac1..f049c0d 100755 --- a/run_one.sh +++ b/run_one.sh @@ -13,7 +13,7 @@ TEST_LOG=run.log TEST_DIR=test cd $TEST_DIR truncate -s 0 $TEST_LOG -FILES=`ls TestStreamUtils.bsv` +FILES=`ls TestDmaCore.bsv` ########################################################################### for FILE in $FILES; do # echo $FILE diff --git a/src/DmaCompleter.bsv b/src/DmaCompleter.bsv index 2b1ed04..c4936dc 100644 --- a/src/DmaCompleter.bsv +++ b/src/DmaCompleter.bsv @@ -21,9 +21,6 @@ typedef 2'b00 NP_CREDIT_NOCHANGE; typedef 'h1F IDEA_CQ_TKEEP_OF_CSR; typedef 'hF IDEA_CC_TKEEP_OF_CSR; -typedef PcieAxiStream#(PCIE_COMPLETER_REQUEST_TUSER_WIDTH) CmplReqAxiStream; -typedef PcieAxiStream#(PCIE_COMPLETER_COMPLETE_TUSER_WIDTH) CmplCmplAxiStream; - typedef struct { DmaCsrAddr addr; DmaCsrValue value; diff --git a/src/DmaRequester.bsv b/src/DmaRequester.bsv index c28da87..68e7304 100644 --- a/src/DmaRequester.bsv +++ b/src/DmaRequester.bsv @@ -6,13 +6,11 @@ import StreamUtils::*; import PcieTypes::*; import PcieAxiStreamTypes::*; import PcieDescriptorTypes::*; +import ReqRequestCore::*; import DmaTypes::*; typedef TSub#(DATA_WIDTH, DES_RQ_DESCRIPTOR_WIDTH) ONE_TLP_THRESH; -typedef PcieAxiStream#(PCIE_REQUESTER_REQUEST_TUSER_WIDTH) ReqReqAxiStream; -typedef PcieAxiStream#(PCIE_REQUESTER_COMPLETE_TUSER_WIDTH) ReqCmplAxiStream; - interface DmaRequester; interface DmaCardToHostWrite c2hWrite; interface DmaCardToHostRead c2hRead; @@ -20,10 +18,17 @@ interface DmaRequester; (* prefix = "" *) interface RawPcieRequesterComplete rawRequesterComplete; endinterface -interface RequesterRequest; +typedef 2 STRADDLE_NUM; + +interface DmaRequesterRequestFifoIn; interface FifoIn#(DataStream) wrDataFifoIn; interface FifoIn#(DmaRequest) wrReqFifoIn; interface FifoIn#(DmaRequest) rdReqFifoIn; +endinterface + +interface RequesterRequest; + interface DmaRequesterRequestFifoIn reqA; + interface DmaRequesterRequestFifoIn reqB; interface FifoOut#(ReqReqAxiStream) axiStreamFifoOut; interface Put#(Bool) postedEn; interface Put#(Bool) nonPostedEn; @@ -37,53 +42,63 @@ interface RequesterComplete; endinterface module mkRequesterRequest(RequesterRequest); - FIFOF#(DataStream) wrDataInFifo <- mkFIFOF; - FIFOF#(DmaRequest) wrReqInFifo <- mkFIFOF; - FIFOF#(DmaRequest) rdReqInFifo <- mkFIFOF; + RequesterRequestCore rqACore <- mkRequesterRequestCore; + RequesterRequestCore rqBCore <- mkRequesterRequestCore; FIFOF#(ReqReqAxiStream) axiStreamOutFifo <- mkFIFOF; - + Reg#(DmaMemAddr) inflightRemainBytesReg <- mkReg(0); Reg#(Bool) isInWritingReg <- mkReg(False); - Wire#(Bool) postedEnWire <- mkDWire(False); + Wire#(Bool) postedEnWire <- mkDWire(True); Wire#(Bool) nonPostedEnWire <- mkDWire(True); - ChunkSplit chunkSplit <- mkChunkSplit; - - StreamShift shift0 <- mkStreamShift(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH))); - StreamShift shift1 <- mkStreamShift(valueOf(TAdd#(1, TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); - StreamShift shift2 <- mkStreamShift(valueOf(TADD#(2, TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); - StreamShift shift3 <- mkStreamShift(valueOf(TAdd#(3, TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); - - // Pipeline stage 1: split the whole write request to chunks, latency = 3 - rule recvWriting if (postedEnWire); - if (wrReqInFifo.notEmpty && chunkSplit.dataFifoIn.notFull) begin - wrReqInFifo.deq; - chunkSplit.reqFifoIn.enq(wrReqInFifo.first); + ConvertDataStreamsToStraddleAxis straddleAxis <- mkConvertDataStreamsToStraddleAxis; + + // Pipeline stage 1: split to chunks, align to DWord and add descriptor at the first + // See RequesterRequestCore + + // Pipeline stage 2: put 2 core output datastream to straddleAxis and generate ReqReqAxiStream + rule coreToStraddle; + if (rqACore.dataFifoOut.notEmpty) begin + let stream = rqACore.dataFifoOut.first; + if (stream.isFirst && rqACore.byteEnFifoOut.notEmpty) begin + let sideBandByteEn = rqACore.byteEnFifoOut.first; + straddleAxis.byteEnAFifoIn.enq(sideBandByteEn); + rqACore.dataFifoOut.deq; + straddleAxis.dataAFifoIn.enq(stream); + end + else begin + rqACore.dataFifoOut.deq; + straddleAxis.dataAFifoIn.enq(stream); + end end - if (wrDataInFifo.notEmpty && chunkSplit.reqFifoIn.notFull) begin - wrDataInFifo.deq; - chunkSplit.dataFifoIn.enq(wrDataInFifo.first); + if (rqBCore.dataFifoOut.notEmpty) begin + let stream = rqBCore.dataFifoOut.first; + if (stream.isFirst && rqBCore.byteEnFifoOut.notEmpty) begin + let sideBandByteEn = rqBCore.byteEnFifoOut.first; + straddleAxis.byteEnBFifoIn.enq(sideBandByteEn); + rqBCore.dataFifoOut.deq; + straddleAxis.dataBFifoIn.enq(stream); + end + else begin + rqBCore.dataFifoOut.deq; + straddleAxis.dataBFifoIn.enq(stream); + end end endrule - // Pipeline stage 2: generate the RQ descriptor, which may be with 0~3 Byte invalid data for DW alignment, latency = 2 - rule addDescriptor; - if (chunkSplit.chunkReqFifoOut.notEmpty) begin - let chunkReq = chunkSplit.chunkReqFifoOut.first; - chunkSplit.chunkReqFifoOut.deq; - rqDescGenarator.reqFifoIn.enq(chunkReq); - end - if (chunkSplit.chunkDataFifoOut.notEmpty) begin - let chunkDataStream = chunkSplit.chunkDataFifoOut.first; - chunkSplit.chunkDataFifoOut.deq; - descriptorConcat.inputStreamSecondFifoIn.enq(chunkDataStream); - end - endrule + interface DmaRequesterRequestFifoIn reqA; + interface wrDataFifoIn = rqACore.dataFifoIn; + interface wrReqFifoIn = rqACore.wrReqFifoIn; + interface rdReqFifoIn = rqACore.rdReqFifoIn; + endinterface + + interface DmaRequesterRequestFifoIn reqB; + interface wrDataFifoIn = rqBCore.dataFifoIn; + interface wrReqFifoIn = rqBCore.wrReqFifoIn; + interface rdReqFifoIn = rqBCore.rdReqFifoIn; + endinterface - interface wrDataFifoIn = convertFifoToFifoIn(wrDataInFifo); - interface wrReqFifoIn = convertFifoToFifoIn(wrReqInFifo); - interface rdReqFifoIn = convertFifoToFifoIn(rdReqInFifo); - interface axiStreamFifoOut = convertFifoToFifoOut(axiStreamOutFifo); + interface axiStreamFifoOut = straddleAxis.axiStreamFifoOut; interface Put postedEn; method Action put(Bool postedEnable); diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index fdaa399..7e3f574 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -47,6 +47,12 @@ typedef struct { DmaMemAddr length; } DmaRequest deriving(Bits, Bounded, Eq); +typedef struct { + DmaMemAddr startAddr; + DmaMemAddr endAddr; + DmaMemAddr length; +} DmaExtendRequest deriving(Bits, Bounded, Eq); + typedef enum { DMA_RX, DMA_TX @@ -59,6 +65,11 @@ typedef struct { Bool isLast; } DataStream deriving(Bits, Bounded, Eq); +typedef Tuple2#( + DWordByteEn, + DWordByteEn +) SideBandByteEn; + instance FShow#(DmaRequest); function Fmt fshow(DmaRequest request); return ($format("> valueOf(BYTE_DWORD_SHIFT_WIDTH)) - (startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH))); - dwCnt = (endOffset == 0) ? dwCnt : dwCnt + 1; - dwCnt = (length == 0) ? 1 : dwCnt; - DataBytePtr bytePtr = fromInteger(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH))); - let descriptor = PcieRequesterRequestDescriptor { - forceECRC : False, - attributes : 0, - trafficClass : 0, - requesterIdEn : False, - completerId : 0, - tag : 0, - requesterId : 0, - isPoisoned : False, - reqType : isWrite ? fromInteger(valueOf(MEM_WRITE_REQ)) : fromInteger(valueOf(MEM_READ_REQ)), - dwordCnt : dwCnt, - address : truncate(startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)), - addrType : fromInteger(valueOf(TRANSLATED_ADDR)) +// Generate RequesterRequest descriptor +interface RqDescriptorGenerator; + interface FifoIn#(DmaExtendRequest) exReqFifoIn; + interface FifoOut#(DataStream) descFifoOut; +endinterface + +module mkRqDescriptorGenerator#(Bool isWrite)(RqDescriptorGenerator); + FIFOF#(DmaExtendRequest) exReqInFifo <- mkFIFOF; + FIFOF#(DataStream) descOutFifo <- mkFIFOF; + + rule genRqDesc; + let exReq = exReqInFifo.first; + exReqInFifo.deq; + let endOffset = byteModDWord(exReq.endAddr); + DwordCount dwCnt = truncate((exReq.endAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)) - (exReq.startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH))); + dwCnt = (endOffset == 0) ? dwCnt : dwCnt + 1; + dwCnt = (exReq.length == 0) ? 1 : dwCnt; + DataBytePtr bytePtr = fromInteger(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH))); + let descriptor = PcieRequesterRequestDescriptor { + forceECRC : False, + attributes : 0, + trafficClass : 0, + requesterIdEn : False, + completerId : 0, + tag : 0, + requesterId : 0, + isPoisoned : False, + reqType : isWrite ? fromInteger(valueOf(MEM_WRITE_REQ)) : fromInteger(valueOf(MEM_READ_REQ)), + dwordCnt : dwCnt, + address : truncate(exReq.startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)), + addrType : fromInteger(valueOf(TRANSLATED_ADDR)) + }; + let stream = DataStream { + data : zeroExtend(pack(descriptor)), + byteEn : convertBytePtr2ByteEn(bytePtr), + isFirst : True, + isLast : True }; - let stream = DataStream { - data : zeroExtend(pack(descriptor)), - byteEn : convertBytePtr2ByteEn(bytePtr), - isFirst : True, - isLast : True - }; -endfunction + descOutFifo.enq(stream); + endrule + + interface exReqFifoIn = convertFifoToFifoIn(exReqInFifo); + interface descFifoOut = convertFifoToFifoOut(descOutFifo); +endmodule + +// Core path of a single stream, from (DataStream, DmaRequest) ==> (DataStream, SideBandByteEn) +// split to chunks, align to DWord and add descriptor at the first +interface RequesterRequestCore; + interface FifoIn#(DataStream) dataFifoIn; + interface FifoIn#(DmaRequest) wrReqFifoIn; + interface FifoIn#(DmaRequest) rdReqFifoIn; + interface FifoOut#(DataStream) dataFifoOut; + interface FifoOut#(SideBandByteEn) byteEnFifoOut; + interface Get#(Bool) isWriteDone; +endinterface + +module mkRequesterRequestCore(RequesterRequestCore); + FIFOF#(DataStream) dataInFifo <- mkFIFOF; + FIFOF#(DmaRequest) wrReqInFifo <- mkFIFOF; + FIFOF#(DmaRequest) rdReqInFifo <- mkFIFOF; + FIFOF#(DataStream) dataOutFifo <- mkFIFOF; + FIFOF#(SideBandByteEn) byteEnOutFifo <- mkFIFOF; + + ChunkSplit chunkSplit <- mkChunkSplit(DMA_TX); + StreamShiftAlignToDw streamAlign <- mkStreamShiftAlignToDw(fromInteger(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); + RqDescriptorGenerator rqDescGenerator <- mkRqDescriptorGenerator(True); + + // Pipeline stage 1: split the whole write request to chunks, latency = 3 + rule splitToChunks; + let wrStream = dataInFifo.first; + if (wrStream.isFirst && wrReqInFifo.notEmpty) begin + wrReqInFifo.deq; + chunkSplit.reqFifoIn.enq(wrReqInFifo.first); + dataInFifo.deq; + chunkSplit.dataFifoIn.enq(wrStream); + end + else if (!wrStream.isFirst) begin + dataInFifo.deq; + chunkSplit.dataFifoIn.enq(wrStream); + end + endrule + + // Pipeline stage 2: shift the datastream for descriptor adding and dw alignment + rule shiftToAlignment; + if (chunkSplit.chunkReqFifoOut.notEmpty) begin + let chunkReq = chunkSplit.chunkReqFifoOut.first; + chunkSplit.chunkReqFifoOut.deq; + let endAddr = chunkReq.startAddr + chunkReq.length; + let exReq = DmaExtendRequest { + startAddr: chunkReq.startAddr, + endAddr : endAddr, + length : chunkReq.length + }; + streamAlign.reqFifoIn.enq(exReq); + rqDescGenerator.exReqFifoIn.enq(exReq); + end + if (chunkSplit.chunkDataFifoOut.notEmpty) begin + let chunkDataStream = chunkSplit.chunkDataFifoOut.first; + chunkSplit.chunkDataFifoOut.deq; + streamAlign.dataFifoIn.enq(chunkDataStream); + end + endrule + // Pipeline stage 3: Add descriptor and add to the axis convert module + rule addDescriptorToAxis; + if (streamAlign.byteEnFifoOut.notEmpty) begin + let sideBandByteEn = streamAlign.byteEnFifoOut.first; + streamAlign.byteEnFifoOut.deq; + byteEnOutFifo.enq(sideBandByteEn); + end + if (streamAlign.dataFifoOut.notEmpty) begin + let stream = streamAlign.dataFifoOut.first; + streamAlign.dataFifoOut.deq; + if (stream.isFirst) begin + let descStream = rqDescGenerator.descFifoOut.first; + rqDescGenerator.descFifoOut.deq; + stream.data = stream.data | descStream.data; + stream.byteEn = stream.byteEn | descStream.byteEn; + end + dataOutFifo.enq(stream); + end + endrule + interface dataFifoIn = convertFifoToFifoIn(dataInFifo); + interface wrReqFifoIn = convertFifoToFifoIn(wrReqInFifo); + interface rdReqFifoIn = convertFifoToFifoIn(rdReqInFifo); + interface dataFifoOut = convertFifoToFifoOut(dataOutFifo); + interface byteEnFifoOut = convertFifoToFifoOut(byteEnOutFifo); + + // TODO: how to give the + interface Get isWriteDone; + method ActionValue#(Bool) get(); + return True; + endmethod + endinterface +endmodule diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index a9d3621..f08896a 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -5,6 +5,7 @@ import SemiFifo::*; import PrimUtils::*; import DmaTypes::*; +import PcieAxiStreamTypes::*; typedef 32 STREAM_SIZE_WIDTH; typedef UInt#(STREAM_SIZE_WIDTH) StreamSize; @@ -272,7 +273,7 @@ module mkStreamSplit(StreamSplit ifc); DataBytePtr offsetBytePtr = 0; let curLocation = unpack(zeroExtend(bytePtr)) + streamByteCntReg; if (!isSplittedReg && curLocation >= splitLocation) begin - offsetBytePtr = truncate(pack(splitLocation - streamByteCntReg)); + offsetBytePtr = truncate(pack(splitLocation - curLocation)); end splitPtrFifo.enq(offsetBytePtr); if (offsetBytePtr > 0 && !stream.isLast) begin @@ -302,6 +303,8 @@ module mkStreamSplit(StreamSplit ifc); // split location not in this beat, do nothing if (!hasRemainReg && offsetBytePtr == 0) begin outputFifo.enq(streamWp.stream); + hasRemainReg <= False; + hasLastRemainReg <= False; end // split the frame in this cycle to a isLast=True frame and a remain frame else if (!hasRemainReg && offsetBytePtr > 0) begin @@ -467,23 +470,61 @@ module mkStreamShiftComplex#(DataBytePtr offset)(StreamShiftComplex); interface streamFifoOut = convertFifoToFifoOut(outFifo); endmodule -interface StreamAlignToDw; - interface FifoIn#(DataStream) dataFifoIn; - interface FifoIn#(DmaRequest) reqFifoIn; - interface FifoOut#(DataStream) dataFifoOut; - interface FifoOut#(SideBandByteEn) byteEnFifoOut; +interface StreamShiftAlignToDw; + interface FifoIn#(DataStream) dataFifoIn; + interface FifoIn#(DmaExtendRequest) reqFifoIn; + interface FifoOut#(DataStream) dataFifoOut; + interface FifoOut#(SideBandByteEn) byteEnFifoOut; endinterface -module mkStreamAlignToDw(StreamAlignToDw); - FIFOF#(DataStream) dataInFifo <- mkFIFOF; - FIFOF#(DataStream) reqInFifo <- mkFIFOF; - FIFOF#(DataStream) dataOutFifo <- mkFIFOF; - FIFOF#(SideBandByteEn) byteEnOutFifo <- mkFIFOF; +typedef 2 STREAM_ALIGN_DW_LATENCY; +module mkStreamShiftAlignToDw#(DataBytePtr offset)(StreamShiftAlignToDw); + FIFOF#(DataStream) dataInFifo <- mkFIFOF; + FIFOF#(DmaExtendRequest) reqInFifo <- mkFIFOF; + FIFOF#(DataStream) dataOutFifo <- mkFIFOF; + FIFOF#(SideBandByteEn) byteEnOutFifo <- mkFIFOF; + FIFOF#(DataBytePtr) shiftSetFifo <- mkSizedFIFOF(valueOf(TMul#(2, STREAM_SHIFT_LATENCY))); + + Vector#(DWORD_BYTES, StreamShift) shifts = newVector; + for (DataBytePtr idx = 0; idx < fromInteger(valueOf(DWORD_BYTES)); idx = idx + 1 ) begin + shifts[idx] <- mkStreamShift(offset + idx); + end + + rule getOffset; + let exReq = reqInFifo.first; + reqInFifo.deq; + ByteModDWord startAddrOffset = byteModDWord(exReq.startAddr); + shiftSetFifo.enq(zeroExtend(startAddrOffset)); + ByteModDWord endAddrOffset = byteModDWord(exReq.endAddr); + let firstByteEn = convertDWordOffset2FirstByteEn(startAddrOffset); + let lastByteEn = convertDWordOffset2LastByteEn(endAddrOffset); + byteEnOutFifo.enq(tuple2(firstByteEn, lastByteEn)); + let stream = dataInFifo.first; + dataInFifo.deq; + for (DataBytePtr idx = 0; idx < fromInteger(valueOf(DWORD_BYTES)); idx = idx + 1 ) begin + shifts[idx].streamFifoIn.enq(stream); + end + endrule + + rule getShiftData; + DataStream stream = getEmptyStream; + let offset = shiftSetFifo.first; + for (DataBytePtr idx = 0; idx < fromInteger(valueOf(DWORD_BYTES)); idx = idx + 1 ) begin + shifts[idx].streamFifoOut.deq; + if (idx == offset) begin + stream = shifts[idx].streamFifoOut.first; + end + end + if (stream.isLast) begin + shiftSetFifo.deq; + end + dataOutFifo.enq(stream); + endrule interface dataFifoIn = convertFifoToFifoIn(dataInFifo); interface reqFifoIn = convertFifoToFifoIn(reqInFifo); - interface dataOutFifo = convertFifoToFifoOut(dataOutFifo); - interface byteEnOutFifo = convertFifoToFifoOut(byteEnOutFifo); + interface dataFifoOut = convertFifoToFifoOut(dataOutFifo); + interface byteEnFifoOut = convertFifoToFifoOut(byteEnOutFifo); endmodule \ No newline at end of file diff --git a/test/TestDmaCore.bsv b/test/TestDmaCore.bsv index 1f13a0c..d70fc32 100755 --- a/test/TestDmaCore.bsv +++ b/test/TestDmaCore.bsv @@ -2,9 +2,15 @@ import GetPut::*; import Randomizable::*; import SemiFifo::*; -import PrimUtils::*; +import PcieAxiStreamTypes::*; import DmaTypes::*; -import DmaRequestCore::*; +import PrimUtils::*; +import PcieTypes::*; +import PcieDescriptorTypes::*; +import StreamUtils::*; +import ReqRequestCore::*; +import DmaRequester::*; +import TestStreamUtils::*; typedef 100000 CHUNK_PER_EPOCH_TEST_NUM; typedef 64'hFFFFFFFFFFFFFFFF MAX_ADDRESS; @@ -91,27 +97,76 @@ module mkChunkComputerTb(Empty); endmodule -typedef 'hABCD311 SIMPLE_TEST_ADDR ; -typedef 'h1111 SIMPLE_TEST_LEN ; - -module mkSimpleTestAlignedRqDescGen(Empty); - AlignedDescGen dut <- mkAlignedRqDescGen(True); - - Reg#(Bool) isInitReg <- mkReg(False); - - rule testInit if (!isInitReg); - isInitReg <= True; - dut.reqFifoIn.enq(DmaRequest { - startAddr: fromInteger(valueOf(SIMPLE_TEST_ADDR)), - length : fromInteger(valueOf(SIMPLE_TEST_LEN)) - }); +typedef 60 SIMPLE_TEST_BYTELEN; +typedef 'hABCDEF SIMPLE_TEST_ADDR; + +module mkSimpleRequesterRequestCoreTb(Empty); + RequesterRequestCore dut <- mkRequesterRequestCore; + Reg#(UInt#(32)) testCntReg <- mkReg(0); + + rule testInput if (testCntReg < 1); + let req = DmaRequest { + startAddr : fromInteger(valueOf(SIMPLE_TEST_ADDR)), + length : fromInteger(valueOf(SIMPLE_TEST_BYTELEN)) + }; + dut.wrReqFifoIn.enq(req); + let stream = generatePsuedoStream(fromInteger(valueOf(SIMPLE_TEST_BYTELEN)), True, True); + dut.dataFifoIn.enq(stream); + testCntReg <= testCntReg + 1; endrule - rule testOutput if (isInitReg); + rule testOutput; let stream = dut.dataFifoOut.first; dut.dataFifoOut.deq; $display(fshow(stream)); - $finish(); + if (stream.isFirst) begin + let {firstByteEn, lastByteEn} = dut.byteEnFifoOut.first; + dut.byteEnFifoOut.deq; + $display("firstByteEn:%b, lastByteEn:%b", firstByteEn, lastByteEn); + PcieRequesterRequestDescriptor desc = unpack(truncate(stream.data)); + $display("Descriptor Elements: dwordCnt:%d, address:%h", desc.dwordCnt, desc.address << 2); + end + if (stream.isLast) begin + $finish(); + end + endrule +endmodule + +module mkSimpleRequesterRequestTb(Empty); + RequesterRequest dut <- mkRequesterRequest; + Reg#(UInt#(32)) testCntReg <- mkReg(0); + Reg#(UInt#(32)) tlpNumReg <- mkReg(2); + + rule testInput if (testCntReg < 1); + let req = DmaRequest { + startAddr : fromInteger(valueOf(SIMPLE_TEST_ADDR)), + length : fromInteger(valueOf(SIMPLE_TEST_BYTELEN)) + }; + dut.reqA.wrReqFifoIn.enq(req); + dut.reqB.wrReqFifoIn.enq(req); + let stream = generatePsuedoStream(fromInteger(valueOf(SIMPLE_TEST_BYTELEN)), True, True); + dut.reqA.wrDataFifoIn.enq(stream); + dut.reqB.wrDataFifoIn.enq(stream); + testCntReg <= testCntReg + 1; endrule -endmodule \ No newline at end of file + rule testOutput; + let stream = dut.axiStreamFifoOut.first; + dut.axiStreamFifoOut.deq; + $display("data: %h", stream.tData); + PcieRequesterRequestSideBandFrame sideBand = unpack(stream.tUser); + $display("isSop : ", sideBand.isSop.isSop, ", isEop : ", sideBand.isEop.isEop); + let tlpNum = tlpNumReg; + if (sideBand.isEop.isEop == fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT))) begin + tlpNum = tlpNum - 1; + end + else if (sideBand.isEop.isEop == fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT))) begin + tlpNum = tlpNum - 2; + end + if (tlpNum == 0) begin + $finish(); + end + tlpNumReg <= tlpNum; + endrule + +endmodule From 7cd136bc5cc2d2bdf28e8664bdd84c29c7f2f579 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Wed, 14 Aug 2024 18:43:12 +0800 Subject: [PATCH 34/53] Reorganize DmaC2HPipe and PCIe adapter interfaces Reorganize DmaC2HPipe and PCIe adapter interfaces --- backend/Makefile | 4 +- img/dmac.drawio.svg | 1130 ++++++++++++++ img/requesterCmpl.drawio.svg | 2744 ++++++++++++++++++++++++++++++++++ run_one.sh | 2 +- src/CompletionFifo.bsv | 118 ++ src/DmaC2HPipe.bsv | 70 + src/DmaCompleter.bsv | 279 ---- src/DmaController.bsv | 65 - src/DmaH2CPipe.bsv | 165 ++ src/DmaRequester.bsv | 179 --- src/DmaTypes.bsv | 68 +- src/DmaWrapper.bsv | 89 ++ src/PcieAdapter.bsv | 486 ++++++ src/PcieDescriptorTypes.bsv | 9 +- src/PcieTypes.bsv | 8 + src/PrimUtils.bsv | 12 + src/ReqCompleterCore.bsv | 184 +++ src/ReqRequestCore.bsv | 241 --- src/StreamUtils.bsv | 256 ++-- test/TestCompletionFifo.bsv | 162 ++ test/TestDmaCore.bsv | 81 + test/TestStreamUtils.bsv | 106 +- 22 files changed, 5440 insertions(+), 1018 deletions(-) create mode 100644 img/dmac.drawio.svg create mode 100644 img/requesterCmpl.drawio.svg create mode 100644 src/CompletionFifo.bsv create mode 100644 src/DmaC2HPipe.bsv delete mode 100644 src/DmaCompleter.bsv delete mode 100755 src/DmaController.bsv create mode 100644 src/DmaH2CPipe.bsv delete mode 100644 src/DmaRequester.bsv create mode 100755 src/DmaWrapper.bsv create mode 100644 src/PcieAdapter.bsv create mode 100644 src/ReqCompleterCore.bsv create mode 100644 test/TestCompletionFifo.bsv diff --git a/backend/Makefile b/backend/Makefile index b629e29..0c3c559 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -8,8 +8,8 @@ OUTPUTDIR ?= output LOGFILE ?= run.log RUNTOPHASE ?= place # synth place route all PARTNAME = xcvu13p-fhgb2104-2-i -TARGETFILE ?= ../test/TestDmacVivado.bsv -TOPMODULE ?= mkTestDmacCsrWrRdLoop +TARGETFILE ?= ../test/TestCompletionFifo.bsv +TOPMODULE ?= mkCompletionFifoInst export TOP = $(TOPMODULE) export RTL = $(VLOGDIR) diff --git a/img/dmac.drawio.svg b/img/dmac.drawio.svg new file mode 100644 index 0000000..4b7cd50 --- /dev/null +++ b/img/dmac.drawio.svg @@ -0,0 +1,1130 @@ + + + + + + + +
+
+
+ wrData +
+
+
+
+ + wrData + +
+
+ + + + +
+
+
+ wrReq +
+
+
+
+ + wrReq + +
+
+ + + + +
+
+
+ + ReqRequstCore + +
+
+
+
+ + ReqRequstCore + +
+
+ + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ rdData +
+
+
+
+ + rdData + +
+
+ + + + +
+
+
+ rdReq +
+
+
+
+ + rdReq + +
+
+ + + + +
+
+
+ + ReqCompleteCore + +
+
+
+
+ + ReqCompleteCore + +
+
+ + + + +
+
+
+ StraddleStream +
+
+
+
+ + StraddleStream + +
+
+ + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ + TX + +
+ convertDataStreamToAxis +
+
+
+
+ + TX... + +
+
+ + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ + RX + +
+ convertAxisToDataStream +
+
+
+
+ + RX... + +
+
+ + + + + + +
+
+
+ StraddleStream +
+
+
+
+ + StraddleStream + +
+
+ + + + + + +
+
+
+ StraddleStream +
+
+
+
+ + StraddleStream + +
+
+ + + + +
+
+
+ wrData +
+
+
+
+ + wrData + +
+
+ + + + +
+
+
+ wrReq +
+
+
+
+ + wrReq + +
+
+ + + + +
+
+
+ + ReqRequstCore + +
+
+
+
+ + ReqRequstCore + +
+
+ + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ rdData +
+
+
+
+ + rdData + +
+
+ + + + +
+
+
+ rdReq +
+
+
+
+ + rdReq + +
+
+ + + + +
+
+
+ + ReqCompleteCore + +
+
+
+
+ + ReqCompleteCore + +
+
+ + + + +
+
+
+ StraddleStream +
+
+
+
+ + StraddleStream + +
+
+ + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ PATH0 +
+
+
+
+ + PATH0 + +
+
+ + + + +
+
+
+ PATH1 +
+
+
+
+ + PATH1 + +
+
+ + + + + + + + + + +
+
+
+ AxiStreamMaster +
+
+
+
+ + AxiStreamMaster + +
+
+ + + + +
+
+
+ AxiStreamSlave +
+
+
+
+ + AxiStreamSlave + +
+
+ + + + +
+
+
+ RqDescriptor +
+
+
+
+ + RqDescriptor + +
+
+ + + + +
+
+
+ Payload +
+
+
+
+ + Payload + +
+
+ + + + +
+
+
+ RcDescriptor +
+
+
+
+ + RcDescriptor + +
+
+ + + + +
+
+
+ Payload +
+
+
+
+ + Payload + +
+
+ + + + +
+
+
+ User Interface +
+ All are pipelined +
+
+
+
+ + User Interface... + +
+
+ + + + +
+
+
+ Raw Xilinx Pcie IP Interface +
+
+
+
+ + Raw Xilinx Pcie... + +
+
+ + + + +
+
+
+ wrData +
+
+
+
+ + wrData + +
+
+ + + + +
+
+
+ wrReq +
+
+
+
+ + wrReq + +
+
+ + + + +
+
+
+ + ChunkSplit + +
+
+
+
+ + ChunkSplit + +
+
+ + + + +
+
+
+ chunkData +
+
+
+
+ + chunkData + +
+
+ + + + +
+
+
+ chunkReq +
+
+
+
+ + chunkReq + +
+
+ + + + +
+
+
+ + Add Descriptor +
+ Align To DWord +
+
+
+
+
+
+ + Add Descriptor... + +
+
+ + + + +
+
+
+ Data +
+
+
+
+ + Data + +
+
+ + + + +
+
+
+ sideBand +
+
+
+
+ + sideBand + +
+
+ + + + + + + +
+
+
+ rdReq +
+
+
+
+ + rdReq + +
+
+ + + + +
+
+
+ + ChunkSplit + +
+
+
+
+ + ChunkSplit + +
+
+ + + + +
+
+
+ chunkReq +
+
+
+
+ + chunkReq + +
+
+ + + + +
+
+
+ + Add Descriptor +
+
+
+
+
+
+ + Add Descriptor + +
+
+ + + + +
+
+
+ Data +
+
+
+
+ + Data + +
+
+ + + + + +
+
+
+ Straddle +
+
+
+
+ + Straddle + +
+
+ + + + +
+
+
+ + Reserve Tag +
+
+
+
+
+
+ + Reserve Tag + +
+
+ + + + +
+
+
+ + remove header +
+
+
+
+
+
+ + remove header + +
+
+ + + + +
+
+
+ + Reorder +
+
+
+
+
+
+ + Reorder + +
+
+ + + + +
+
+
+ chunkData +
+
+
+
+ + chunkData + +
+
+ + + + +
+
+
+ + ChunkReshape + +
+
+
+
+ + ChunkReshape + +
+
+ + + + +
+
+
+ rdData +
+
+
+
+ + rdData + +
+
+ + + + + + +
+
+
+ reqFifo +
+
+
+
+ + reqFifo + +
+
+ + + + +
+
+
+ + MPS + +
+
+
+
+ + MPS + +
+
+ + + + +
+
+
+ + MRRS + +
+
+
+
+ + MRRS + +
+
+ + + + +
+
+
+ + RCB + +
+
+
+
+ + RCB + +
+
+ + + + +
+
+
+ + + TX + + +
+ DataPath0 +
+
+
+
+ + TX... + +
+
+ + + + +
+
+
+ + + RX + + +
+ DataPath0 +
+
+
+
+ + RX... + +
+
+ + + + + + +
+
+
+ PathNum = 2 +
+ StraddleNum = 2 +
+
+
+
+ + PathNum = 2... + +
+
+ +
+ + + + + Text is not SVG - cannot display + + + +
\ No newline at end of file diff --git a/img/requesterCmpl.drawio.svg b/img/requesterCmpl.drawio.svg new file mode 100644 index 0000000..2b1c844 --- /dev/null +++ b/img/requesterCmpl.drawio.svg @@ -0,0 +1,2744 @@ + + + + + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3 +
+
+
+
+ + A3 + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + + + +
+
+
+ request +
+
+
+
+ + request + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ ideal response +
+
+
+
+ + ideal response + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ A3(2) +
+
+
+
+ + A3(2) + +
+
+ + + + +
+
+
+ real RC AXIS +
+
+
+
+ + real RC AXIS + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ A3(2) +
+
+
+
+ + A3(2) + +
+
+ + + + +
+
+
+ LSB +
+
+
+
+ + LSB + +
+
+ + + + + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + + + +
+
+
+ A3(2) +
+
+
+
+ + A3(2) + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + + + + + + + + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ PathA +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ PathA +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ firstBeat +
+
+
+
+ + firstBeat + +
+
+ + + + +
+
+
+ firstBeat +
+
+
+
+ + firstBeat + +
+
+ + + + +
+
+
+ PathA +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + + + +
+
+
+ A3 +
+
+
+
+ + A3 + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + + + + +
+
+
+ FIFO0 +
+
+
+
+ + FIFO0 + +
+
+ + + + +
+
+
+ FIFO1 +
+
+
+
+ + FIFO1 + +
+
+ + + + +
+
+
+ FIFO0 +
+
+
+
+ + FIFO0 + +
+
+ + + + +
+
+
+ FIFO1 +
+
+
+
+ + FIFO1 + +
+
+ + + + +
+
+
+ Completion Buffer +
+
+
+
+ + Completion Buffer + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3 +
+
+
+
+ + A3 + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + + + +
+
+
+ request +
+
+
+
+ + request + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ ideal response +
+
+
+
+ + ideal response + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ A3(2) +
+
+
+
+ + A3(2) + +
+
+ + + + +
+
+
+ real RC AXIS +
+
+
+
+ + real RC AXIS + +
+
+ + + + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ A3(2) +
+
+
+
+ + A3(2) + +
+
+ + + + +
+
+
+ LSB +
+
+
+
+ + LSB + +
+
+ + + + + + +
+
+
+ PathA +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ PathA +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ firstBeat +
+
+
+
+ + firstBeat + +
+
+ + + + +
+
+
+ firstBeat +
+
+
+
+ + firstBeat + +
+
+ + + + +
+
+
+ + PathA + +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ FIFO0 +
+
+
+
+ + FIFO0 + +
+
+ + + + +
+
+
+ FIFO1 +
+
+
+
+ + FIFO1 + +
+
+ + + + + + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ FIFO0 +
+
+
+
+ + FIFO0 + +
+
+ + + + + + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ FIFO1 +
+
+
+
+ + FIFO1 + +
+
+ + + + + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ FIFO... +
+
+
+
+ + FIFO... + +
+
+ + + + + + + + + +
+
+
+ FIFO31 +
+
+
+
+ + FIFO31 + +
+
+ + + + + + +
+
+
+ Reg#(SlotNum) nextSlotPtrReg; +
+
+
+
+ + Reg#(SlotNum) nextSlotPtrReg; + +
+
+ + + + +
+
+
+ CompletionBuffer.put(tuple2(tag, nextSlotPtrReg) +
+
+
+
+ + CompletionBuffer.put(tuple2(tag, nextSlotPtrR... + +
+
+ + + + +
+
+
+ slotNum = CompletionBuffer.drain +
+ Fifos[slotNum].deq; +
+
+
+
+ + slotNum = CompletionBuffer.drain... + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3 +
+
+
+
+ + A3 + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + + + +
+
+
+ request +
+
+
+
+ + request + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ ideal response +
+
+
+
+ + ideal response + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ A3(2) +
+
+
+
+ + A3(2) + +
+
+ + + + +
+
+
+ real RC AXIS +
+
+
+
+ + real RC AXIS + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ A3(2) +
+
+
+
+ + A3(2) + +
+
+ + + + +
+
+
+ LSB +
+
+
+
+ + LSB + +
+
+ + + + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + + + +
+
+
+ A3(2) +
+
+
+
+ + A3(2) + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + + + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + + + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ PathA +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ PathA +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ firstBeat +
+
+
+
+ + firstBeat + +
+
+ + + + +
+
+
+ firstBeat +
+
+
+
+ + firstBeat + +
+
+ + + + +
+
+
+ PathA +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + + + +
+
+
+ A3 +
+
+
+
+ + A3 + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + + +
+
+
+ request complete +
+
+
+
+ + request complete + +
+
+ + + + + +
+
+
+ Stream +
+
+
+
+ + Stream + +
+
+ + + + +
+
+
+ SLOTNUM : MRRS BRAM +
+
+
+
+ + SLOTNUM : MRRS BRAM + +
+
+ + + + +
+
+
+ 2: Data +
+
+
+
+ + 2: Data + +
+
+ + + + +
+
+
+ 3: +
+
+
+
+ + 3: + +
+
+ + + + +
+
+
+ 4: +
+
+
+
+ + 4: + +
+
+ + + + +
+
+
+ Completion Buffer +
+
+
+
+ + Completion Buffer + +
+
+ + + + + + +
+
+
+ PcieAxiStream +
+
+
+
+ + PcieAxiStream + +
+
+ + + + +
+
+
+ StreamShiftRight +
+
+
+
+ + StreamShiftRight + +
+
+ + + + + + + + +
+
+
+ Extract data according to isSop and isEop +
+ Dispatch to path A/B +
+
+
+
+ + Extract data according... + +
+
+ + + + +
+
+
+ StreamShiftRight +
+
+
+
+ + StreamShiftRight + +
+
+ + + + + + +
+
+
+ StraddleStreamFifo +
+
+
+
+ + StraddleStreamFifo + +
+
+ + + + + + +
+
+
+ StraddleStreamFifo +
+
+
+
+ + StraddleStreamFifo + +
+
+ + + + + +
+
+
+ reserve +
+
+
+
+ + reserve + +
+
+ + + + + + +
+
+
+ ChunkCompute +
+
+
+
+ + ChunkCompute + +
+
+ + + + + + +
+
+
+ DescriptorGen +
+
+
+
+ + DescriptorGen + +
+
+ + + + + +
+
+
+ Tag +
+
+
+
+ + Tag + +
+
+ + + + + +
+
+
+ drain +
+
+
+
+ + drain + +
+
+ + + + +
+
+
+ CBuffer +
+
+
+
+ + CBuffer + +
+
+ + + + + + +
+
+
+ RdReq +
+
+
+
+ + RdReq + +
+
+ + + + +
+
+
+ AxiStreamTx +
+
+
+
+ + AxiStreamTx + +
+
+ + + + + + +
+
+
+ AxiStreamRx +
+
+
+
+ + AxiStreamRx + +
+
+ + + + + +
+
+
+ Tag +
+
+
+
+ + Tag + +
+
+ + + + + +
+
+
+ TLp Payload +
+
+
+
+ + TLp Payload + +
+
+ + + + +
+
+
+ removeDesc +
+
+
+
+ + removeDesc + +
+
+ + + + + + +
+
+
+ SLOTNUM +
+
+
+
+ + SLOTNUM + +
+
+ + + + +
+
+
+ token -> tag +
+ data -> slotNum +
+
+
+
+ + token -> tag... + +
+
+
+ + + + + Text is not SVG - cannot display + + + +
\ No newline at end of file diff --git a/run_one.sh b/run_one.sh index f049c0d..a1389d3 100755 --- a/run_one.sh +++ b/run_one.sh @@ -13,7 +13,7 @@ TEST_LOG=run.log TEST_DIR=test cd $TEST_DIR truncate -s 0 $TEST_LOG -FILES=`ls TestDmaCore.bsv` +FILES=`ls TestCompletionFifo.bsv` ########################################################################### for FILE in $FILES; do # echo $FILE diff --git a/src/CompletionFifo.bsv b/src/CompletionFifo.bsv new file mode 100644 index 0000000..442d339 --- /dev/null +++ b/src/CompletionFifo.bsv @@ -0,0 +1,118 @@ +import GetPut::*; +import Counter::*; +import FIFOF::*; +import BRAMFIFO::*; +import Vector::*; +import DReg::*; + +import SemiFifo::*; + +// CompletionFifo +// +// A CompletionFifo is like a CompletionBuffer +// but uses Fifos instead of RegFile. +// CompletionFifo can reorder interlaced chunks belong to different streams. + +// Example +// reserve a token : slot = CRam.reserve.get; +// receive a chunk : CRam.append.enq(tuple2(slot, chunk)); +// all chunks received: CRam.complete.put(slot); +// get chunks in order: CRam.drain.first; CRam.drain.deq; + +// Parameters: +// nSlot : slot numbers +// nChunk: chunk numbers per slot +// tChunk: chunk data types +interface CompletionFifo#(numeric type nSlot, type tChunk); + interface Get#(SlotNum#(nSlot)) reserve; + method Bool available; + interface FifoIn#(Tuple2#(SlotNum#(nSlot), tChunk)) append; + interface Put#(SlotNum#(nSlot)) complete; + interface FifoOut#(tChunk) drain; +endinterface + +typedef Bit#(TLog#(nSlot)) SlotNum#(numeric type nSlot); + +function Bool isPowerOf2(Integer n); + return (n == (2 ** (log2(n)))); +endfunction + +module mkCompletionFifo#(Integer nChunk)(CompletionFifo#(nSlot, tChunk)) + provisos (Bits#(tChunk, szChunk), Log#(nSlot, ln), Add#(1, ln, ln1), Add#(1, _a, szChunk)); + + let maxSlotIdx = fromInteger(valueOf(nSlot) - 1); + function Action incrSlotIdx(Reg#(Bit#(ln)) idxReg); + action + if (isPowerOf2(valueOf(nSlot))) + idxReg <= idxReg + 1; // counter wraps automagically + else + idxReg <= ((idxReg == maxSlotIdx) ? 0 : idxReg + 1); + endaction + endfunction + + FIFOF#(Tuple2#(SlotNum#(nSlot), tChunk)) appendFifo <- mkFIFOF; + FIFOF#(tChunk) drainFifo <- mkFIFOF; + Vector#(nSlot, FIFOF#(tChunk)) bufferFifos <- replicateM(mkSizedBRAMFIFOF(nChunk)); + + Reg#(SlotNum#(nSlot)) inIdxReg <- mkReg(0); // input index, return this value when `reserve` is called + Reg#(SlotNum#(nSlot)) outIdxReg <- mkReg(0); // output index, pipeout Fifos[outIdxReg] + Counter#(ln1) counter <- mkCounter(0); // number of filled slots + Reg#(Vector#(nSlot, Bool)) flagsReg <- mkReg(replicate(False)); + Reg#(Maybe#(SlotNum#(nSlot))) cmplSlotReg <- mkDReg(tagged Invalid); + RWire#(SlotNum#(nSlot)) rstSlot <- mkRWire; + + + rule writeBuffer; + let {slot, data} = appendFifo.first; + appendFifo.deq; + bufferFifos[slot].enq(data); + endrule + + rule readBuffer; + if (!bufferFifos[outIdxReg].notEmpty && flagsReg[outIdxReg]) begin // complete assert and the buffer is empty + incrSlotIdx(outIdxReg); + rstSlot.wset(outIdxReg); + counter.down; + end + else begin + let data = bufferFifos[outIdxReg].first; + bufferFifos[outIdxReg].deq; + drainFifo.enq(data); + end + endrule + + rule setFlags; + let cmplMaybe = cmplSlotReg; + let rstMaybe = rstSlot.wget; + let flags = flagsReg; + if (isValid(cmplMaybe)) begin + flags[fromMaybe(?, cmplMaybe)] = True; + end + if (isValid(rstMaybe)) begin + flags[fromMaybe(?, rstMaybe)] = False; + end + flagsReg <= flags; + endrule + + interface Get reserve; + method ActionValue#(SlotNum#(nSlot)) get() if (counter.value <= maxSlotIdx); + incrSlotIdx(inIdxReg); + counter.up; + return inIdxReg; + endmethod + endinterface + + method Bool available(); + return (counter.value <= maxSlotIdx); + endmethod + + interface Put complete; + method Action put(SlotNum#(nSlot) slot); + cmplSlotReg <= tagged Valid slot; + endmethod + endinterface + + interface append = convertFifoToFifoIn(appendFifo); + interface drain = convertFifoToFifoOut(drainFifo); + +endmodule diff --git a/src/DmaC2HPipe.bsv b/src/DmaC2HPipe.bsv new file mode 100644 index 0000000..b501018 --- /dev/null +++ b/src/DmaC2HPipe.bsv @@ -0,0 +1,70 @@ +import FIFOF::*; +import GetPut::*; + +import SemiFifo::*; +import StreamUtils::*; +import PcieTypes::*; +import PcieAxiStreamTypes::*; +import PcieDescriptorTypes::*; +import ReqRequestCore::*; +import DmaTypes::*; + +// TODO : change the PCIe Adapter Ifc to TlpData and TlpHeader, +// move the module which convert TlpHeader to IP descriptor from dma to adapter +interface DmaC2HPipe; + // User Logic Ifc + interface FifoIn#(DataStream) wrDataFifoIn; + interface FifoIn#(DmaRequest) reqFifoIn; + interface FifoOut#(DataStream) rdDataFifoOut; + // Pcie Adapter Ifc + interface FifoOut#(DataStream) tlpDataFifoOut; + interface FifoOut#(SideBandByteEn) tlpSideBandFifoOut; + interface FifoIn#(StraddleStream) tlpDataFifoIn; + // TODO: Cfg Ifc + // interface Put#(DmaConfig) configuration; + // interface Client#(DmaCsrValue, DmaCsrValue) statusReg; +endinterface + +// Single Path module +module mkDmaC2HPipe(DmaC2HPipe); + RequesterRequestCore requestCore <- mkRequesterRequestCore; + RequesterCompleteCore completeCore <- mkRequesterCompleteCore; + + FIFOF#(DataStream) dataInFifo <- mkFIFOF; + FIFOF#(DmaRequest) reqInFifo <- mkFIFOF; + FIFOF#(DataStream) tlpOutFifo <- mkFIFOF; + FIFOF#(SideBandByteEn) tlpSideBandFifo <- mkFIFOF; + + rule reqDeMux; + let req = reqInFifo.first; + reqInFifo.deq; + if (req.isWrite) begin + requestCore.wrReqFifoIn.enq(req); + end + else begin + completeCore.rdReqFifoIn.enq(req); + end + endrule + + rule dataPipe; + let stream = dataInFifo.firts; + dataInFifo.deq; + requestCore.dataFifoIn.enq(stream); + endrule + + rule tlpOutMux; + + endrule + + // User Logic Ifc + interface wrDataFifoIn = convertFifoToFifoIn(dataInFifo); + interface reqFifoIn = convertFifoToFifoIn(reqInFifo); + interface rdDataFifoOut = completeCore.dataFifoOut; + // Pcie Adapter Ifc + interface tlpDataFifoOut = requestCore.dataFifoOut; + interface tlpSideBandFifoOut = requestCore.byteEnFifoOut; + interface tlpDataFifoIn = completeCore.dataFifoIn; + // TODO: Cfg Ifc + +endmodule + diff --git a/src/DmaCompleter.bsv b/src/DmaCompleter.bsv deleted file mode 100644 index c4936dc..0000000 --- a/src/DmaCompleter.bsv +++ /dev/null @@ -1,279 +0,0 @@ -import FIFOF::*; -import Vector::*; - -import SemiFifo::*; -import PrimUtils::*; -import PcieAxiStreamTypes::*; -import PcieTypes::*; -import PcieDescriptorTypes::*; -import DmaTypes::*; - -typedef 1 IDEA_CQ_CSR_DWORD_CNT; -typedef 2 IDEA_CC_CSR_DWORD_CNT; -typedef 4 IDEA_BYTE_CNT_OF_CSR; -typedef 4 IDEA_FIRST_BE_HIGH_VALID_PTR_OF_CSR; - -typedef 64 CMPL_NPREQ_INFLIGHT_NUM; -typedef 20 CMPL_NPREQ_WAITING_CLKS; -typedef 2'b11 NP_CREDIT_INCREMENT; -typedef 2'b00 NP_CREDIT_NOCHANGE; - -typedef 'h1F IDEA_CQ_TKEEP_OF_CSR; -typedef 'hF IDEA_CC_TKEEP_OF_CSR; - -typedef struct { - DmaCsrAddr addr; - DmaCsrValue value; -} CsrWriteReq deriving(Bits, Eq, Bounded); - -instance FShow#(CsrWriteReq); - function Fmt fshow(CsrWriteReq wrReq); - return ($format("32bit requests -module mkCompleterRequest(CompleterRequest); - FIFOF#(CmplReqAxiStream) inFifo <- mkFIFOF; - FIFOF#(CsrWriteReq) wrReqFifo <- mkFIFOF; - FIFOF#(CsrReadReq) rdReqFifo <- mkFIFOF; - - Reg#(Bool) isInPacket <- mkReg(False); - Reg#(UInt#(32)) illegalPcieReqCntReg <- mkReg(0); - - function PcieCompleterRequestDescriptor getDescriptorFromFirstBeat(CmplReqAxiStream axiStream); - return unpack(axiStream.tData[valueOf(DES_CQ_DESCRIPTOR_WIDTH)-1:0]); - endfunction - - function Data getDataFromFirstBeat(CmplReqAxiStream axiStream); - return axiStream.tData >> valueOf(DES_CQ_DESCRIPTOR_WIDTH); - endfunction - - function Bool isFirstBytesAllValid(PcieCompleterRequestSideBandFrame sideBand); - return (sideBand.firstByteEn[valueOf(IDEA_FIRST_BE_HIGH_VALID_PTR_OF_CSR)-1] == 1); - endfunction - - function DmaCsrAddr getCsrAddrFromCqDescriptor(PcieCompleterRequestDescriptor descriptor); - let addr = getAddrLowBits(zeroExtend(descriptor.address), descriptor.barAperture); - // Only support one BAR now, no operation - if (descriptor.barId == 0) begin - addr = addr; - end - else begin - addr = 0; - end - return truncate(addr << valueOf(TSub#(DMA_MEM_ADDR_WIDTH, DES_ADDR_WIDTH))); - endfunction - - rule parseTlp; - inFifo.deq; - let axiStream = inFifo.first; - PcieCompleterRequestSideBandFrame sideBand = unpack(axiStream.tUser); - isInPacket <= !axiStream.tLast; - if (!isInPacket) begin - let descriptor = getDescriptorFromFirstBeat(axiStream); - case (descriptor.reqType) - fromInteger(valueOf(MEM_WRITE_REQ)): begin - $display("SIM INFO @ mkCompleterRequest: MemWrite Detect!"); - if (descriptor.dwordCnt == fromInteger(valueOf(IDEA_CQ_CSR_DWORD_CNT)) && isFirstBytesAllValid(sideBand)) begin - let firstData = getDataFromFirstBeat(axiStream); - DmaCsrValue wrValue = firstData[valueOf(DMA_CSR_ADDR_WIDTH)-1:0]; - DmaCsrAddr wrAddr = getCsrAddrFromCqDescriptor(descriptor); - $display("SIM INFO @ mkCompleterRequest: Valid wrReq with Addr %h, data %h", wrAddr, wrValue); - let wrReq = CsrWriteReq { - addr : wrAddr, - value : wrValue - }; - wrReqFifo.enq(wrReq); - end - else begin - illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; - end - end - fromInteger(valueOf(MEM_READ_REQ)): begin - $display("SIM INFO @ mkCompleterRequest: MemRead Detect!"); - let rdReqAddr = getCsrAddrFromCqDescriptor(descriptor); - let rdReq = CsrReadReq{ - addr: rdReqAddr, - cqDescriptor: descriptor - }; - $display("SIM INFO @ mkCompleterRequest: Valid rdReq with Addr %h", rdReqAddr); - rdReqFifo.enq(rdReq); - end - default: illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; - endcase - end - endrule - - interface axiStreamFifoIn = convertFifoToFifoIn(inFifo); - interface csrWriteReqFifoOut = convertFifoToFifoOut(wrReqFifo); - interface csrReadReqFifoOut = convertFifoToFifoOut(rdReqFifo); -endmodule - -module mkCompleterComplete(CompleterComplete); - FIFOF#(CmplCmplAxiStream) outFifo <- mkFIFOF; - FIFOF#(CsrReadResp) rdRespFifo <- mkFIFOF; - FIFOF#(CsrReadReq) rdReqFifo <- mkFIFOF; - - // Only response MemRd TLP in this rule - rule genTlp; - let value = rdRespFifo.first; - rdRespFifo.deq; - let cqDescriptor = rdReqFifo.first.cqDescriptor; - let addr = rdReqFifo.first.addr; - rdReqFifo.deq; - $display("SIM INFO @ mkCompleterComplete: Valid rdResp with Addr %h, data %h", addr, value); - let ccDescriptor = PcieCompleterCompleteDescriptor { - reserve0 : 0, - attributes : cqDescriptor.attributes, - trafficClass : cqDescriptor.trafficClass, - completerIdEn : False, - completerId : 0, - tag : cqDescriptor.tag, - requesterId : cqDescriptor.requesterId, - reserve1 : 0, - isPoisoned : False, - status : fromInteger(valueOf(DES_CC_STAUS_SUCCESS)), - dwordCnt : fromInteger(valueOf(IDEA_CC_CSR_DWORD_CNT)), - reserve2 : 0, - isLockedReadCmpl: False, - byteCnt : fromInteger(valueOf(IDEA_BYTE_CNT_OF_CSR)), - reserve3 : 0, - addrType : cqDescriptor.addrType, - lowerAddr : truncate(addr) - }; - Data data = zeroExtend(pack(ccDescriptor)); - data = data | (zeroExtend(value) << valueOf(DES_CC_DESCRIPTOR_WIDTH)); - let isSop = PcieTlpCtlIsSopCommon { - isSopPtrs : replicate(0), // Straddle mode is disable of completer - isSop : 1 - }; - let isEop = PcieTlpCtlIsEopCommon { - isEopPtrs : replicate(0), // Straddle mode is disable of completer - isEop : 1 - }; - let sideBand = PcieCompleterCompleteSideBandFrame { - parity : 0, // Do not enable parity check in the core - discontinue : False, - isSop : isSop, - isEop : isEop - }; - let axiStream = CmplCmplAxiStream { - tData : data, - tKeep : fromInteger(valueOf(IDEA_CC_TKEEP_OF_CSR)), - tLast : True, - tUser : pack(sideBand) - }; - outFifo.enq(axiStream); - endrule - - interface axiStreamFifoOut = convertFifoToFifoOut(outFifo); - interface csrReadRespFifoIn = convertFifoToFifoIn(rdRespFifo); - interface csrReadReqFifoIn = convertFifoToFifoIn(rdReqFifo); -endmodule - -(* synthesize *) -module mkDmaCompleter(DmaCompleter); - CompleterRequest cmplRequest <- mkCompleterRequest; - CompleterComplete cmplComplete <- mkCompleterComplete; - - FIFOF#(DmaCsrValue) h2cCsrWriteDataFifo <- mkFIFOF; - FIFOF#(DmaCsrAddr) h2cCsrWriteReqFifo <- mkFIFOF; - FIFOF#(DmaCsrAddr) h2cCsrReadReqFifo <- mkFIFOF; - FIFOF#(DmaCsrValue) h2cCsrReadDataFifo <- mkFIFOF; - CounteredFIFOF#(CsrReadReq) csrRdReqStoreFifo <- mkCounteredFIFOF(valueOf(CMPL_NPREQ_INFLIGHT_NUM)); - - Reg#(PcieNonPostedRequst) npReqCreditCtrlReg <- mkReg(fromInteger(valueOf(NP_CREDIT_INCREMENT))); - Reg#(PcieNonPostedRequstCount) npReqCreditCntReg <- mkReg(0); - - let rawAxiStreamSlaveIfc <- mkFifoInToRawPcieAxiStreamSlave(cmplRequest.axiStreamFifoIn); - let rawAxiStreamMasterIfc <- mkFifoOutToRawPcieAxiStreamMaster(cmplComplete.axiStreamFifoOut); - - rule genCsrWriteReq; - let wrReq = cmplRequest.csrWriteReqFifoOut.first; - cmplRequest.csrWriteReqFifoOut.deq; - h2cCsrWriteDataFifo.enq(wrReq.value); - h2cCsrWriteReqFifo.enq(wrReq.addr); - endrule - - rule genCsrReadReq; - let rdReq = cmplRequest.csrReadReqFifoOut.first; - cmplRequest.csrReadReqFifoOut.deq; - h2cCsrReadReqFifo.enq(rdReq.addr); - csrRdReqStoreFifo.enq(rdReq); - endrule - - rule procCsrReadResp; - let req = csrRdReqStoreFifo.first; - let resp = h2cCsrReadDataFifo.first; - csrRdReqStoreFifo.deq; - h2cCsrReadDataFifo.deq; - cmplComplete.csrReadRespFifoIn.enq(resp); - cmplComplete.csrReadReqFifoIn.enq(req); - endrule - - rule npBackPressure; - if (csrRdReqStoreFifo.getCurSize == fromInteger(valueOf(TDiv#(CMPL_NPREQ_INFLIGHT_NUM,2)))) begin - npReqCreditCtrlReg <= fromInteger(valueOf(NP_CREDIT_NOCHANGE)); - end - else begin - npReqCreditCtrlReg <= fromInteger(valueOf(NP_CREDIT_INCREMENT)); - end - endrule - - interface RawPcieCompleterRequest rawCompleterRequest; - interface rawAxiStreamSlave = rawAxiStreamSlaveIfc; - method PcieNonPostedRequst nonPostedReqCreditIncrement = npReqCreditCtrlReg; - method Action nonPostedReqCreditCnt(PcieNonPostedRequstCount nonPostedpReqCount); - npReqCreditCntReg <= nonPostedpReqCount; - endmethod - endinterface - - interface RawPcieCompleterComplete rawCompleterComplete; - interface rawAxiStreamMaster = rawAxiStreamMasterIfc; - endinterface - - interface DmaHostToCardWrite h2cWrite; - interface dataFifoOut = convertFifoToFifoOut(h2cCsrWriteDataFifo); - interface reqFifoOut = convertFifoToFifoOut(h2cCsrWriteReqFifo); - endinterface - - interface DmaHostToCardRead h2cRead; - interface reqFifoOut = convertFifoToFifoOut(h2cCsrReadReqFifo); - interface dataFifoIn = convertFifoToFifoIn(h2cCsrReadDataFifo); - endinterface - - // TODO: get internal registers value - method DmaCsrValue getRegisterValue(DmaCsrAddr addr); - return 0; - endmethod - -endmodule diff --git a/src/DmaController.bsv b/src/DmaController.bsv deleted file mode 100755 index c1d0999..0000000 --- a/src/DmaController.bsv +++ /dev/null @@ -1,65 +0,0 @@ -import FIFOF::*; - -import PcieTypes::*; -import PcieConfigurator::*; -import DmaTypes::*; -import DmaCompleter::*; -import DmaRequester::*; - -interface DmaController; - // Requester interfaces, where the Card serve as the Master - interface DmaCardToHostWrite c2hWrite; - interface DmaCardToHostRead c2hRead; - - // Completer interfaces, where the Card serve as the Slave - interface DmaHostToCardWrite h2cWrite; - interface DmaHostToCardRead h2cRead; - - // Raw PCIe interfaces, connected to the Xilinx PCIe IP - interface RawXilinxPcieIp rawPcie; -endinterface - -(* synthesize *) -module mkDmaController(DmaController); - DmaCompleter completer <- mkDmaCompleter; - DmaRequester requester <- mkDmaRequester; - PcieConfigurator pcieConfigurator <- mkPcieConfigurator; - - interface c2hWrite = requester.c2hWrite; - interface c2hRead = requester.c2hRead; - - interface h2cWrite = completer.h2cWrite; - interface h2cRead = completer.h2cRead; - - interface RawXilinxPcieIp rawPcie; - interface requesterRequest = requester.rawRequesterRequest; - interface requesterComplete = requester.rawRequesterComplete; - interface completerRequest = completer.rawCompleterRequest; - interface completerComplete = completer.rawCompleterComplete; - interface configuration = pcieConfigurator.rawConfiguration; - method Action linkUp(Bool isLinkUp); - endmethod - endinterface -endmodule - -interface DmaControllerCompleter; - // Completer interfaces, where the Card serve as the Slave - interface DmaHostToCardWrite h2cWrite; - interface DmaHostToCardRead h2cRead; - - // Raw PCIe interfaces, connected to the Xilinx PCIe IP - interface RawXilinxPcieIpCompleter rawPcie; -endinterface - -// Only for testing in bsv, do not use for synthesize -module mkDmaControllerCompleter(DmaControllerCompleter); - DmaCompleter completer <- mkDmaCompleter; - - interface h2cWrite = completer.h2cWrite; - interface h2cRead = completer.h2cRead; - - interface RawXilinxPcieIpCompleter rawPcie; - interface completerRequest = completer.rawCompleterRequest; - interface completerComplete = completer.rawCompleterComplete; - endinterface -endmodule diff --git a/src/DmaH2CPipe.bsv b/src/DmaH2CPipe.bsv new file mode 100644 index 0000000..0671074 --- /dev/null +++ b/src/DmaH2CPipe.bsv @@ -0,0 +1,165 @@ +import FIFOF::*; +import Vector::*; + +import SemiFifo::*; +import PrimUtils::*; +import PcieAxiStreamTypes::*; +import PcieTypes::*; +import PcieDescriptorTypes::*; +import DmaTypes::*; + +typedef 1 IDEA_CQ_CSR_DWORD_CNT; +typedef 2 IDEA_CC_CSR_DWORD_CNT; +typedef 4 IDEA_BYTE_CNT_OF_CSR; +typedef 4 IDEA_FIRST_BE_HIGH_VALID_PTR_OF_CSR; + +typedef 64 CMPL_NPREQ_INFLIGHT_NUM; +typedef 20 CMPL_NPREQ_WAITING_CLKS; +typedef 2'b11 NP_CREDIT_INCREMENT; +typedef 2'b00 NP_CREDIT_NOCHANGE; + +typedef 'h1F IDEA_CQ_TKEEP_OF_CSR; +typedef 'hF IDEA_CC_TKEEP_OF_CSR; + +typedef struct { + DmaCsrAddr addr; + DmaCsrValue value; +} CsrWriteReq deriving(Bits, Eq, Bounded); + +instance FShow#(CsrWriteReq); + function Fmt fshow(CsrWriteReq wrReq); + return ($format("> valueOf(DES_CQ_DESCRIPTOR_WIDTH); + endfunction + + Reg#(Bool) isInPacket <- mkReg(False); + Reg#(UInt#(32)) illegalPcieReqCntReg <- mkReg(0); + + BytePtr csrBytes = fromInteger(valueOf(TDiv#(DMA_CSR_DATA_WIDTH, BYTE_WIDTH))); + + rule parseTlp; + tlpInFifo.deq; + let stream = tlpInFifo.first; + isInPacket <= !stream.isLast; + if (!isInPacket) begin + let descriptor = getDescriptorFromFirstBeat(stream); + case (descriptor.reqType) + fromInteger(valueOf(MEM_WRITE_REQ)): begin + $display("SIM INFO @ mkCompleterRequest: MemWrite Detect!"); + if (descriptor.dwordCnt == fromInteger(valueOf(IDEA_CQ_CSR_DWORD_CNT))) begin + let firstData = getDataFromFirstBeat(stream); + DmaCsrValue wrValue = firstData[valueOf(DMA_CSR_ADDR_WIDTH)-1:0]; + DmaCsrAddr wrAddr = getCsrAddrFromCqDescriptor(descriptor); + $display("SIM INFO @ mkCompleterRequest: Valid wrReq with Addr %h, data %h", wrAddr, wrValue); + let req = DmaRequest { + startAddr : wrAddr, + length : zeroExtend(csrBytes), + isWrite : True + }; + reqOutFifo.enq(req); + dataOutFifo.enq(wrValue); + end + else begin + illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; + end + end + fromInteger(valueOf(MEM_READ_REQ)): begin + $display("SIM INFO @ mkCompleterRequest: MemRead Detect!"); + let rdAddr = getCsrAddrFromCqDescriptor(descriptor); + let req = CsrReadReq{ + startAddr : rdAddr, + length : zeroExtend(csrBytes), + isWrite : False + }; + $display("SIM INFO @ mkCompleterRequest: Valid rdReq with Addr %h", rdAddr); + rdReqFifo.enq(req); + pendingFifo.enq(tuple2(req, descriptor)); + end + default: illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; + endcase + end + endrule + + rule genTlp; + let value = dataInFifo.first; + dataInFifo.deq; + let {req, cqDescriptor} = pendingFifo.first; + pendingFifo.deq; + let addr = req.startAddr; + $display("SIM INFO @ mkCompleterComplete: Valid rdResp with Addr %h, data %h", addr, value); + let ccDescriptor = PcieCompleterCompleteDescriptor { + reserve0 : 0, + attributes : cqDescriptor.attributes, + trafficClass : cqDescriptor.trafficClass, + completerIdEn : False, + completerId : 0, + tag : cqDescriptor.tag, + requesterId : cqDescriptor.requesterId, + reserve1 : 0, + isPoisoned : False, + status : fromInteger(valueOf(DES_CC_STAUS_SUCCESS)), + dwordCnt : fromInteger(valueOf(IDEA_CC_CSR_DWORD_CNT)), + reserve2 : 0, + isLockedReadCmpl: False, + byteCnt : fromInteger(valueOf(IDEA_BYTE_CNT_OF_CSR)), + reserve3 : 0, + addrType : cqDescriptor.addrType, + lowerAddr : truncate(addr) + }; + Data data = zeroExtend(pack(ccDescriptor)); + data = data | (zeroExtend(value) << valueOf(DES_CC_DESCRIPTOR_WIDTH)); + let stream = DataStream { + data : data, + byteEn : convertBytePtr2ByteEn(csrBytes), + isFirst : True, + isLast : True + }; + tlpOutFifo.enq(stream); + endrule + + // User Logic Ifc + interface reqFifoOut = convertFifoToFifoOut(reqOutFifo); + interface rdDataFifoIn = convertFifoToFifoIn(dataInFifo); + interface wrDataFifoOut = convertFifoToFifoOut(dataOutFifo); + // Pcie Adapter Ifc + interface tlpDataFifoIn = convertFifoToFifoIn(tlpInFifo); + interface tlpDataFifoOut = convertFifoToFifoOut(tlpOutFifo); +endmodule diff --git a/src/DmaRequester.bsv b/src/DmaRequester.bsv deleted file mode 100644 index 68e7304..0000000 --- a/src/DmaRequester.bsv +++ /dev/null @@ -1,179 +0,0 @@ -import FIFOF::*; -import GetPut::*; - -import SemiFifo::*; -import StreamUtils::*; -import PcieTypes::*; -import PcieAxiStreamTypes::*; -import PcieDescriptorTypes::*; -import ReqRequestCore::*; -import DmaTypes::*; - -typedef TSub#(DATA_WIDTH, DES_RQ_DESCRIPTOR_WIDTH) ONE_TLP_THRESH; - -interface DmaRequester; - interface DmaCardToHostWrite c2hWrite; - interface DmaCardToHostRead c2hRead; - (* prefix = "" *) interface RawPcieRequesterRequest rawRequesterRequest; - (* prefix = "" *) interface RawPcieRequesterComplete rawRequesterComplete; -endinterface - -typedef 2 STRADDLE_NUM; - -interface DmaRequesterRequestFifoIn; - interface FifoIn#(DataStream) wrDataFifoIn; - interface FifoIn#(DmaRequest) wrReqFifoIn; - interface FifoIn#(DmaRequest) rdReqFifoIn; -endinterface - -interface RequesterRequest; - interface DmaRequesterRequestFifoIn reqA; - interface DmaRequesterRequestFifoIn reqB; - interface FifoOut#(ReqReqAxiStream) axiStreamFifoOut; - interface Put#(Bool) postedEn; - interface Put#(Bool) nonPostedEn; - interface Get#(Bool) isWriteDataRecvDone; -endinterface - -interface RequesterComplete; - interface FifoIn#(DmaRequest) rdReqFifoIn; - interface FifoOut#(DataStream) rdDataFifoOut; - interface FifoIn#(ReqCmplAxiStream) axiStreamFifoIn; -endinterface - -module mkRequesterRequest(RequesterRequest); - RequesterRequestCore rqACore <- mkRequesterRequestCore; - RequesterRequestCore rqBCore <- mkRequesterRequestCore; - FIFOF#(ReqReqAxiStream) axiStreamOutFifo <- mkFIFOF; - - Reg#(DmaMemAddr) inflightRemainBytesReg <- mkReg(0); - Reg#(Bool) isInWritingReg <- mkReg(False); - Wire#(Bool) postedEnWire <- mkDWire(True); - Wire#(Bool) nonPostedEnWire <- mkDWire(True); - - ConvertDataStreamsToStraddleAxis straddleAxis <- mkConvertDataStreamsToStraddleAxis; - - // Pipeline stage 1: split to chunks, align to DWord and add descriptor at the first - // See RequesterRequestCore - - // Pipeline stage 2: put 2 core output datastream to straddleAxis and generate ReqReqAxiStream - rule coreToStraddle; - if (rqACore.dataFifoOut.notEmpty) begin - let stream = rqACore.dataFifoOut.first; - if (stream.isFirst && rqACore.byteEnFifoOut.notEmpty) begin - let sideBandByteEn = rqACore.byteEnFifoOut.first; - straddleAxis.byteEnAFifoIn.enq(sideBandByteEn); - rqACore.dataFifoOut.deq; - straddleAxis.dataAFifoIn.enq(stream); - end - else begin - rqACore.dataFifoOut.deq; - straddleAxis.dataAFifoIn.enq(stream); - end - end - if (rqBCore.dataFifoOut.notEmpty) begin - let stream = rqBCore.dataFifoOut.first; - if (stream.isFirst && rqBCore.byteEnFifoOut.notEmpty) begin - let sideBandByteEn = rqBCore.byteEnFifoOut.first; - straddleAxis.byteEnBFifoIn.enq(sideBandByteEn); - rqBCore.dataFifoOut.deq; - straddleAxis.dataBFifoIn.enq(stream); - end - else begin - rqBCore.dataFifoOut.deq; - straddleAxis.dataBFifoIn.enq(stream); - end - end - endrule - - interface DmaRequesterRequestFifoIn reqA; - interface wrDataFifoIn = rqACore.dataFifoIn; - interface wrReqFifoIn = rqACore.wrReqFifoIn; - interface rdReqFifoIn = rqACore.rdReqFifoIn; - endinterface - - interface DmaRequesterRequestFifoIn reqB; - interface wrDataFifoIn = rqBCore.dataFifoIn; - interface wrReqFifoIn = rqBCore.wrReqFifoIn; - interface rdReqFifoIn = rqBCore.rdReqFifoIn; - endinterface - - interface axiStreamFifoOut = straddleAxis.axiStreamFifoOut; - - interface Put postedEn; - method Action put(Bool postedEnable); - postedEnWire <= postedEnable; - endmethod - endinterface - - interface Put nonPostedEn; - method Action put(Bool nonPostedEnable); - nonPostedEnWire <= nonPostedEnable; - endmethod - endinterface - - interface Get isWriteDataRecvDone; - method ActionValue#(Bool) get(); - return (inflightRemainBytesReg == 0); - endmethod - endinterface -endmodule - -module mkRequesterComplete(RequesterComplete); - FIFOF#(DataStream) rdDataOutFifo <- mkFIFOF; - FIFOF#(DmaRequest) rdReqInFifo <- mkFIFOF; - FIFOF#(ReqCmplAxiStream) axiStreamInFifo <- mkFIFOF; - - // TODO: RC Logic - - interface rdReqFifoIn = convertFifoToFifoIn(rdReqInFifo); - interface rdDataFifoOut = convertFifoToFifoOut(rdDataOutFifo); - interface axiStreamFifoIn = convertFifoToFifoIn(axiStreamInFifo); -endmodule - -(* synthesize *) -module mkDmaRequester(DmaRequester); - RequesterRequest reqRequest <- mkRequesterRequest; - RequesterComplete reqComplete <- mkRequesterComplete; - - FIFOF#(DataStream) c2hWriteDataFifo <- mkFIFOF; - FIFOF#(DmaRequest) c2hWriteReqFifo <- mkFIFOF; - FIFOF#(DataStream) c2hReadDataFifo <- mkFIFOF; - FIFOF#(DmaRequest) c2hReadReqFifo <- mkFIFOF; - - let rawAxiStreamSlaveIfc <- mkFifoInToRawPcieAxiStreamSlave(reqComplete.axiStreamFifoIn); - let rawAxiStreamMasterIfc <- mkFifoOutToRawPcieAxiStreamMaster(reqRequest.axiStreamFifoOut); - - interface DmaCardToHostWrite c2hWrite; - interface dataFifoIn = convertFifoToFifoIn(c2hWriteDataFifo); - interface reqFifoIn = convertFifoToFifoIn(c2hWriteReqFifo); - // TODO: isDone need assertion - method Bool isDone = True; - endinterface - - interface DmaCardToHostRead c2hRead; - interface reqFifoIn = convertFifoToFifoIn(c2hReadReqFifo); - interface dataFifoOut = convertFifoToFifoOut(c2hReadDataFifo); - endinterface - - interface RawPcieRequesterRequest rawRequesterRequest; - interface rawAxiStreamMaster = rawAxiStreamMasterIfc; - method Action pcieProgressTrack( - Bool tagValid0, - Bool tagValid1, - PcieRqTag tag0, - PcieRqTag tag1, - Bool seqNumValid0, - Bool seqNumValid1, - PcieRqSeqNum seqNum0, - PcieRqSeqNum seqNum1 - ); - // Not support progress track now - endmethod - endinterface - - interface RawPcieRequesterComplete rawRequesterComplete; - interface rawAxiStreamSlave = rawAxiStreamSlaveIfc; - endinterface - -endmodule diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 7e3f574..2f00ffe 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -3,6 +3,7 @@ import FShow::*; import SemiFifo::*; import PcieTypes::*; import PcieAxiStreamTypes::*; +import PcieDescriptorTypes::*; typedef PCIE_AXIS_DATA_WIDTH DATA_WIDTH; @@ -22,6 +23,9 @@ typedef Bit#(BYTE_WIDTH) Byte; typedef Bit#(DWORD_WIDTH) DWord; typedef Bit#(1) ByteParity; +typedef 4096 BUS_BOUNDARY; +typedef TAdd#(1, TLog#(BUS_BOUNDARY)) BUS_BOUNDARY_WIDTH; + typedef 2 CONCAT_STREAM_NUM; typedef TDiv#(DATA_WIDTH, BYTE_WIDTH) BYTE_EN_WIDTH; @@ -45,6 +49,7 @@ typedef TDiv#(DWORD_EN_WIDTH, 2) STRADDLE_THRESH_DWORD_WIDTH; typedef struct { DmaMemAddr startAddr; DmaMemAddr length; + Bool isWrite; } DmaRequest deriving(Bits, Bounded, Eq); typedef struct { @@ -72,7 +77,7 @@ typedef Tuple2#( instance FShow#(DmaRequest); function Fmt fshow(DmaRequest request); - return ($format("> valueOf(STRADDLE_THRESH_BIT_WIDTH))); + end +endfunction + +function Data getStraddleData(PcieTlpCtlIsSopPtr isSopPtr, Data data); + if (isSopPtr == fromInteger(valueOf(ISSOP_LANE_0))) begin + let sData = zeroExtend(Data'(data[valueOf(STRADDLE_THRESH_BIT_WIDTH)-1:0])); + end + else begin + let sData = data >> valueOf(STRADDLE_THRESH_BIT_WIDTH); + end + return sData; +endfunction + +function ByteEn getStraddleByteEn(PcieTlpCtlIsSopPtr isSopPtr, ByteEn byteEn); + if (isSopPtr == fromInteger(valueOf(ISSOP_LANE_0))) begin + let sByteEn = zeroExtend(ByteEn'(byteEn[valueOf(STRADDLE_THRESH_BYTE_WIDTH)-1:0])); + end + else begin + let sByteEn = byteEn >> valueOf(STRADDLE_THRESH_BYTE_WIDTH); + end + return sByteEn; +endfunction + +function Bool isMyValidTlp(DmaPathNo path, PcieRequesterCompleteDescriptor desc); + Bool valid = (desc.status == fromInteger(valueOf(SUCCESSFUL_CMPL))) && (!desc.isPoisoned); + Bool pathMatch = (truncate(path) == desc.tag[valueOf(DES_NONEXTENDED_TAG_WIDTH) - 1]); + return valid && pathMatch; +endfunction + +interface RequesterCompleteCore; + interface FifoIn#(StraddleStream) tlpFifoIn; + interface FifoOut#(DataStream) tlpFifoOut; + interface FifoOut#(DataStream) dataFifoOut; + interface FifoIn#(DmaRequest) rdReqFifoIn; +endinterface + +module mkRequesterCompleteCore(RequesterCompleteCore); + FIFOF#(StraddleStream) tlpInFifo <- mkFIFOF; + FIFOF#(DmaRequest) reqInFifo <- mkFIFOF; + FIFOF#(DataStream) tlpOutFifo <- mkFIFOF; + + FIFOF#(SlotToken) tagFifo <- mkSizedFIFOF(4); + FIFOF#(Bool) completedFifo <- mkSizedFIFOF(4); + + StreamPipe descRemove <- mkStreamHeaderRemove(fromInteger(valueOf(DES_RC_DESCRIPTOR_WIDTH))); + StreamPipe streamReshape <- mkStreamReshape; + ChunkCompute chunkSplitor <- mkChunkComputer; + CompletionFifo#(SLOT_PER_PATH, DataStream) cBuffer <- mkCompletionFifo(valueOf(MAX_STREAM_NUM_PER_COMPLETION)); + + Reg#(Bool) hasReadOnce <- mkReg(False); + + // Pipeline stage 1: convert StraddleStream to DataStream, may cost 2 cycle for one StraddleStream + rule convertStraddleToDataStream; + let sdStream = tlpInFifo.first; + if (sdStream.isDoubleFrame) begin + PcieTlpCtlIsSopPtr isSopPtr = 0; + if (hasReadOnce) begin + tlpInFifo.deq; + hasReadOnce <= False; + isSopPtr = 1; + end + else begin + hasReadOnce <= True; + end + let stream = DataStream { + data : getStraddleData(isSopPtr, sdStream.data), + byteEn : getStraddleByteEn(isSopPtr, sideBand.dataByteEn);, + isFirst : sdStream.isFirst[isSopPtr], + isLast : sdStream.isLast[isSopPtr] + } + let tag = sdStream.tag[isSopPtr]; + tagFifo.enq(tag); + end + else begin + tlpInFifo.deq; + hasReadOnce <= False; + let stream = DataStream { + data : sdStream.data, + byteEn : sdStream.byteEn, + isFirst : sdStream.isFirst[0], + isLast : sdStream.isLast[0] + }; + let tag = sdStream.tag[0]; + end + descRemove.streamFifoIn.enq(stream); + endrule + + // Pipeline stage 2: remove the descriptor in the head of each TLP + + // Pipeline stage 3: Buffer the received DataStreams and reorder the, + rule reorderStream; + let stream = descRemove.streamFifoOut.first; + let isCompleted = completedFifo.first; + let tag = tagFifo.first; + descRemove.streamFifoOut.deq; + completedFifo.deq; + tagFifo.deq; + stream.isLast = isCompleted && stream.isLast; + cBuffer.append.enq(tuple2(tag, stream)); + if (stream.isLast) begin + cBuffer.complete.put(tag); + end + endrule + + // Pipeline stage 4: there may be bubbles in the first and last DataStream of a TLP because of RCB + // Reshape the DataStream and make sure it is continuous + rule reshapeStream; + let stream = cBuffer.drain.first; + cBuffer.drain.deq; + streamReshape.streamFifoIn.enq(stream); + endrule + + // Pipeline stage 1: split to req to MRRS chunks + rule reqSplit; + let req = reqInFifo.first; + reqInFifo.deq; + chunkSplitor.dmaRequestFifoIn.enq(req); + endrule + + // Pipeline stage 2: generate read descriptor + rule cqDescGen; + let req = chunkSplitor.chunkRequestFifoOut.first; + chunkSplitor.chunkRequestFifoOut.deq; + let tag <- completedFifo.reserve.get; + let descriptor = PcieRequesterRequestDescriptor { + forceECRC : False, + attributes : 0, + trafficClass : 0, + requesterIdEn : False, + completerId : 0, + tag : tag, + requesterId : 0, + isPoisoned : False, + reqType : fromInteger(valueOf(MEM_READ_REQ)), + dwordCnt : truncate(req.length >> valueOf(BYTE_DWORD_SHIFT_WIDTH)) + zeroExtend(DwordCount'(rq.length[1:0])), + address : truncate(req.startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)), + addrType : fromInteger(valueOf(TRANSLATED_ADDR)) + }; + let stream = DataStream { + data : zeroExtend(pack(descriptor)), + byteEn : convertBytePtr2ByteEn(fromInteger(valueOf(DES_RQ_DESCRIPTOR_WIDTH))), + isFirst : True, + isLast : True + }; + tlpOutFifo.enq(stream); + endrule + + interface tlpFifoIn = convertFifoToFifoIn(tlpInFifo); + interface tlpFifoOut = convertFifoToFifoOut(tlpOutFifo); + interface rdReqFifoIn = convertFifoToFifoOut(reqInFifo); + interface dataFifoOut = streamReshape.streamFifoOut; +endmodule diff --git a/src/ReqRequestCore.bsv b/src/ReqRequestCore.bsv index a644671..bf5a60c 100755 --- a/src/ReqRequestCore.bsv +++ b/src/ReqRequestCore.bsv @@ -10,10 +10,6 @@ import PrimUtils::*; import StreamUtils::*; import PcieDescriptorTypes::*; - -typedef 4096 BUS_BOUNDARY; -typedef TAdd#(1, TLog#(BUS_BOUNDARY)) BUS_BOUNDARY_WIDTH; - typedef Bit#(BUS_BOUNDARY_WIDTH) PcieTlpMaxMaxPayloadSize; typedef Bit#(TLog#(BUS_BOUNDARY_WIDTH)) PcieTlpSizeWidth; @@ -280,234 +276,8 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); endinterface endmodule -typedef 2'b00 NO_TLP_IN_THIS_BEAT; -typedef 2'b01 SINGLE_TLP_IN_THIS_BEAT; -typedef 2'b11 DOUBLE_TLP_IN_THIS_BEAT; - -typedef 2'b00 ISSOP_LANE_0; -typedef 2'b10 ISSOP_LANE_32; - typedef 3 BYTEEN_INFIFO_DEPTH; -// Convert 2 DataStream input to 1 PcieAxiStream output -// - The axistream is in straddle mode which means tKeep and tLast are ignored -// - The core use isSop and isEop to location Tlp and allow 2 Tlp in one beat -// - The input dataStream should be added Descriptor and aligned to DW already -interface ConvertDataStreamsToStraddleAxis; - interface FifoIn#(DataStream) dataAFifoIn; - interface FifoIn#(SideBandByteEn) byteEnAFifoIn; - interface FifoIn#(DataStream) dataBFifoIn; - interface FifoIn#(SideBandByteEn) byteEnBFifoIn; - interface FifoOut#(ReqReqAxiStream) axiStreamFifoOut; -endinterface - -module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); - FIFOF#(SideBandByteEn) byteEnAFifo <- mkSizedFIFOF(valueOf(BYTEEN_INFIFO_DEPTH)); - FIFOF#(SideBandByteEn) byteEnBFifo <- mkSizedFIFOF(valueOf(BYTEEN_INFIFO_DEPTH)); - - StreamShiftComplex shiftA <- mkStreamShiftComplex(fromInteger(valueOf(STRADDLE_THRESH_BYTE_WIDTH))); - StreamShiftComplex shiftB <- mkStreamShiftComplex(fromInteger(valueOf(STRADDLE_THRESH_BYTE_WIDTH))); - - FIFOF#(ReqReqAxiStream) axiStreamOutFifo <- mkFIFOF; - - Reg#(Bool) isInStreamAReg <- mkReg(False); - Reg#(Bool) isInStreamBReg <- mkReg(False); - Reg#(Bool) isInShiftAReg <- mkReg(False); - Reg#(Bool) isInShiftBReg <- mkReg(False); - Reg#(Bool) roundRobinReg <- mkReg(False); - - function Bool hasStraddleSpace(DataStream sdStream); - return !unpack(sdStream.byteEn[valueOf(STRADDLE_THRESH_BYTE_WIDTH)]); - endfunction - - function PcieRequesterRequestSideBandFrame genRQSideBand( - PcieTlpCtlIsEopCommon isEop, PcieTlpCtlIsSopCommon isSop, SideBandByteEn byteEnA, SideBandByteEn byteEnB - ); - let {firstByteEnA, lastByteEnA} = byteEnA; - let {firstByteEnB, lastByteEnB} = byteEnB; - let sideBand = PcieRequesterRequestSideBandFrame { - // Do not use parity check in the core - parity : 0, - // Do not support progress track - seqNum1 : 0, - seqNum0 : 0, - //TODO: Do not support Transaction Processing Hint now, maybe we need TPH for better performance - tphSteeringTag : 0, - tphIndirectTagEn : 0, - tphType : 0, - tphPresent : 0, - // Do not support discontinue - discontinue : False, - // Indicates end of the tlp - isEop : isEop, - // Indicates starts of a new tlp - isSop : isSop, - // Disable when use DWord-aligned Mode - addrOffset : 0, - // Indicates byte enable in the first/last DWord - lastByteEn : {pack(lastByteEnB), pack(lastByteEnA)}, - firstByteEn : {pack(firstByteEnB), pack(firstByteEnA)} - }; - return sideBand; - endfunction - - // Pipeline stage 1: get the shift datastream - - // Pipeline Stage 2: get the axiStream data - rule genStraddlePcie; - DataStream sendingStream = getEmptyStream; - DataStream pendingStream = getEmptyStream; - Bool isSendingA = True; - - // In streamA sending epoch, waiting streamA until isLast - if (isInStreamAReg) begin - let {oriStreamA, shiftStreamA} = shiftA.streamFifoOut.first; - sendingStream = isInShiftAReg ? shiftStreamA : oriStreamA; - shiftA.streamFifoOut.deq; - isSendingA = True; - if (shiftB.streamFifoOut.notEmpty && sendingStream.isLast && hasStraddleSpace(sendingStream)) begin - let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; - pendingStream = shiftStreamB; - shiftB.streamFifoOut.deq; - end - end - // In streamB sendging epoch, waiting streamB until isLast - else if (isInStreamBReg) begin - let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; - sendingStream = isInShiftBReg ? shiftStreamB : oriStreamB; - shiftB.streamFifoOut.deq; - isSendingA = False; - if (shiftA.streamFifoOut.notEmpty && sendingStream.isLast && hasStraddleSpace(sendingStream)) begin - let {oriStreamA, shiftStreamA} = shiftA.streamFifoOut.first; - pendingStream = shiftStreamA; - shiftA.streamFifoOut.deq; - end - end - // In Idle, choose one stream to enter new epoch - else begin - if (shiftA.streamFifoOut.notEmpty && shiftB.streamFifoOut.notEmpty) begin - roundRobinReg <= !roundRobinReg; - if (roundRobinReg) begin - let {oriStreamA, shiftStreamA} = shiftA.streamFifoOut.first; - sendingStream = oriStreamA; - shiftA.streamFifoOut.deq; - isSendingA = True; - if (sendingStream.isLast && hasStraddleSpace(sendingStream)) begin - let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; - pendingStream = shiftStreamB; - shiftB.streamFifoOut.deq; - end - end - else begin - let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; - sendingStream = oriStreamB; - shiftB.streamFifoOut.deq; - isSendingA = False; - if (sendingStream.isLast && hasStraddleSpace(sendingStream)) begin - let {oriStreamA, shiftStreamA} = shiftA.streamFifoOut.first; - pendingStream = shiftStreamA; - shiftA.streamFifoOut.deq; - end - end - end - else if (shiftA.streamFifoOut.notEmpty) begin - let {oriStreamA, shiftStreamA} = shiftA.streamFifoOut.first; - sendingStream = oriStreamA; - shiftA.streamFifoOut.deq; - isSendingA = True; - roundRobinReg <= False; - end - else if (shiftB.streamFifoOut.notEmpty) begin - let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; - sendingStream = oriStreamB; - shiftB.streamFifoOut.deq; - isSendingA = False; - roundRobinReg <= True; - end - else begin - // Do nothing - end - end - - if (!isByteEnZero(sendingStream.byteEn)) begin - // Change the registers and generate PcieAxiStream - let sideBandByteEnA = tuple2(0, 0); - let sideBandByteEnB = tuple2(0, 0); - if (isSendingA) begin - isInStreamAReg <= !sendingStream.isLast; - isInShiftAReg <= sendingStream.isLast ? False : isInShiftAReg; - if (sendingStream.isFirst) begin - sideBandByteEnA = byteEnAFifo.first; - byteEnAFifo.deq; - end - if (sendingStream.isLast && hasStraddleSpace(sendingStream) && !isByteEnZero(pendingStream.byteEn)) begin - isInStreamBReg <= !pendingStream.isLast; - isInShiftBReg <= !pendingStream.isLast; - sideBandByteEnB = byteEnBFifo.first; - byteEnBFifo.deq; - end - end - else begin - isInStreamBReg <= !sendingStream.isLast; - isInShiftBReg <= sendingStream.isLast ? False : isInShiftBReg; - if (sendingStream.isFirst) begin - sideBandByteEnB = byteEnBFifo.first; - byteEnBFifo.deq; - end - if (sendingStream.isLast && hasStraddleSpace(sendingStream) && !isByteEnZero(pendingStream.byteEn)) begin - isInStreamAReg <= !pendingStream.isLast; - isInShiftAReg <= !pendingStream.isLast; - sideBandByteEnA = byteEnAFifo.first; - byteEnAFifo.deq; - end - end - - let isSop = PcieTlpCtlIsSopCommon { - isSopPtrs : replicate(0), - isSop : 0 - }; - let isEop = PcieTlpCtlIsEopCommon { - isEopPtrs : replicate(0), - isEop : 0 - }; - - if (sendingStream.isFirst && pendingStream.isFirst) begin - isSop.isSop = fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT)); - isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_0)); - isSop.isSopPtrs[1] = fromInteger(valueOf(ISSOP_LANE_32)); - end - else if (sendingStream.isFirst || pendingStream.isFirst) begin - isSop.isSop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); - isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_0)); - end - if (pendingStream.isLast && !isByteEnZero(pendingStream.byteEn)) begin - isEop.isEop = fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT)); - isEop.isEopPtrs[0] = truncate(convertByteEn2DwordPtr(sendingStream.byteEn)); - isEop.isEopPtrs[1] = fromInteger(valueOf(STRADDLE_THRESH_DWORD_WIDTH)) + truncate(convertByteEn2DwordPtr(pendingStream.byteEn)); - end - else if (sendingStream.isLast) begin - isEop.isEop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); - isEop.isEopPtrs[0] = truncate(convertByteEn2DwordPtr(sendingStream.byteEn)); - end - - let sideBand = genRQSideBand(isEop, isSop, sideBandByteEnA, sideBandByteEnB); - let axiStream = ReqReqAxiStream { - tData : sendingStream.data | pendingStream.data, - tKeep : -1, - tLast : False, - tUser : pack(sideBand) - }; - axiStreamOutFifo.enq(axiStream); - end - endrule - - interface dataAFifoIn = shiftA.streamFifoIn; - interface byteEnAFifoIn = convertFifoToFifoIn(byteEnAFifo); - interface dataBFifoIn = shiftB.streamFifoIn; - interface byteEnBFifoIn = convertFifoToFifoIn(byteEnBFifo); - interface axiStreamFifoOut = convertFifoToFifoOut(axiStreamOutFifo); -endmodule - // Generate RequesterRequest descriptor interface RqDescriptorGenerator; interface FifoIn#(DmaExtendRequest) exReqFifoIn; @@ -558,16 +328,13 @@ endmodule interface RequesterRequestCore; interface FifoIn#(DataStream) dataFifoIn; interface FifoIn#(DmaRequest) wrReqFifoIn; - interface FifoIn#(DmaRequest) rdReqFifoIn; interface FifoOut#(DataStream) dataFifoOut; interface FifoOut#(SideBandByteEn) byteEnFifoOut; - interface Get#(Bool) isWriteDone; endinterface module mkRequesterRequestCore(RequesterRequestCore); FIFOF#(DataStream) dataInFifo <- mkFIFOF; FIFOF#(DmaRequest) wrReqInFifo <- mkFIFOF; - FIFOF#(DmaRequest) rdReqInFifo <- mkFIFOF; FIFOF#(DataStream) dataOutFifo <- mkFIFOF; FIFOF#(SideBandByteEn) byteEnOutFifo <- mkFIFOF; @@ -633,14 +400,6 @@ module mkRequesterRequestCore(RequesterRequestCore); interface dataFifoIn = convertFifoToFifoIn(dataInFifo); interface wrReqFifoIn = convertFifoToFifoIn(wrReqInFifo); - interface rdReqFifoIn = convertFifoToFifoIn(rdReqInFifo); interface dataFifoOut = convertFifoToFifoOut(dataOutFifo); interface byteEnFifoOut = convertFifoToFifoOut(byteEnOutFifo); - - // TODO: how to give the - interface Get isWriteDone; - method ActionValue#(Bool) get(); - return True; - endmethod - endinterface endmodule diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index f08896a..be8f49f 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -16,11 +16,9 @@ typedef struct { DataBytePtr bytePtr; } StreamWithPtr deriving(Bits, Bounded, Eq, FShow); -interface StreamConcat; - interface FifoIn#(DataStream) inputStreamFirstFifoIn; - interface FifoIn#(DataStream) inputStreamSecondFifoIn; - interface FifoOut#(DataStream) outputStreamFifoOut; - interface FifoOut#(DataBytePtr) outputBytePtrFifoOut; +interface StreamPipe; + interface FifoIn#(DataStream) streamFifoIn; + interface FifoOut#(DataStream) streamFifoOut; endinterface interface StreamSplit; @@ -33,6 +31,10 @@ function Bool isByteEnZero(ByteEn byteEn); return !unpack(byteEn[0]); endfunction +function Bool isByteEnFull(ByteEn byteEn); + return unpack(byteEn[valueOf(BYTE_EN_WIDTH)-1]); +endfunction + function DataStream getEmptyStream (); return DataStream { data: 0, @@ -119,120 +121,8 @@ function Tuple2#(StreamWithPtr, StreamWithPtr) getConcatStream (StreamWithPtr st return tuple2(concatStreamWithPtr, remainStreamWithPtr); endfunction -(* synthesize *) -module mkStreamConcat (StreamConcat); - - FIFOF#(DataStream) inputFifoA <- mkFIFOF; - FIFOF#(DataStream) inputFifoB <- mkFIFOF; - FIFOF#(DataStream) outputFifo <- mkFIFOF; - FIFOF#(DataBytePtr) bytePtrFifo <- mkFIFOF; - - FIFOF#(StreamWithPtr) prepareFifoA <- mkFIFOF; - FIFOF#(StreamWithPtr) prepareFifoB <- mkFIFOF; - - - Reg#(Bool) hasRemainReg <- mkReg(False); - Reg#(Bool) hasLastRemainReg <- mkReg(False); - Reg#(Bool) isStreamAEndReg <- mkReg(False); - - Reg#(StreamWithPtr) remainStreamWpReg <- mkRegU; - - // Pipeline stage 1: get the bytePtr of each stream - rule prepareStreamA; - let streamA = inputFifoA.first; - inputFifoA.deq; - let bytePtr = convertByteEn2BytePtr(streamA.byteEn); - prepareFifoA.enq(StreamWithPtr { - stream: streamA, - bytePtr: bytePtr - }); - endrule - - rule prepareStreamB; - let streamB = inputFifoB.first; - inputFifoB.deq; - let bytePtr = convertByteEn2BytePtr(streamB.byteEn); - prepareFifoB.enq(StreamWithPtr { - stream: streamB, - bytePtr: bytePtr - }); - endrule - - // Pipeline stage 2: concat the stream frame - rule concatStream; - // Only the remain data - if (hasRemainReg && hasLastRemainReg) begin - outputFifo.enq(remainStreamWpReg.stream); - bytePtrFifo.enq(remainStreamWpReg.bytePtr); - hasRemainReg <= False; - isStreamAEndReg <= False; - end - // StreamB or streamB + the remain data - else if (prepareFifoB.notEmpty && isStreamAEndReg) begin - let streamBWp = prepareFifoB.first; - prepareFifoB.deq; - streamBWp.stream.isFirst = False; - if (hasRemainReg) begin - let {concatStreamWp, remainStreamWp} = getConcatStream(remainStreamWpReg, streamBWp); - hasRemainReg <= !isByteEnZero(remainStreamWp.stream.byteEn); - hasLastRemainReg <= streamBWp.stream.isLast; - remainStreamWpReg <= remainStreamWp; - outputFifo.enq(concatStreamWp.stream); - bytePtrFifo.enq(concatStreamWp.bytePtr); - end - else begin - outputFifo.enq(streamBWp.stream); - bytePtrFifo.enq(streamBWp.bytePtr); - end - // reset isStreamAEnd to False when the whole concat end - isStreamAEndReg <= streamBWp.stream.isLast ? False : isStreamAEndReg; - end - // StreamA or StreamA + first StreamB - else if (prepareFifoA.notEmpty) begin - let streamAWp = prepareFifoA.first; - // Only StreamA frame - if (!streamAWp.stream.isLast) begin - outputFifo.enq(streamAWp.stream); - bytePtrFifo.enq(streamAWp.bytePtr); - prepareFifoA.deq; - isStreamAEndReg <= False; - end - // the last StreamA + the first StreamB - else if(streamAWp.stream.isLast && prepareFifoB.notEmpty) begin - let streamBWp = prepareFifoB.first; - let {concatStreamWp, remainStreamWp} = getConcatStream(streamAWp, streamBWp); - hasRemainReg <= !isByteEnZero(remainStreamWp.stream.byteEn); - hasLastRemainReg <= streamBWp.stream.isLast; - remainStreamWpReg <= remainStreamWp; - // If streamB.isLast, reset isStreamAEnd; otherwise assert isStreamAEnd - isStreamAEndReg <= streamBWp.stream.isLast ? False : True; - outputFifo.enq(concatStreamWp.stream); - bytePtrFifo.enq(concatStreamWp.bytePtr); - prepareFifoA.deq; - prepareFifoB.deq; - end - // Do nothing - else begin - // - !prepareB.notEmpty ==> waiting StreamB for concatation - end - end - // Do nothing - else begin - // - prepareB.notEmpty && !isStreamAEnd ==> waiting streamAEnd asserts - // - !prepareB.notEmpty && !prepareA.notEmpty ==> waiting new data - end - endrule - - interface inputStreamFirstFifoIn = convertFifoToFifoIn(inputFifoA); - interface inputStreamSecondFifoIn = convertFifoToFifoIn(inputFifoB); - interface outputStreamFifoOut = convertFifoToFifoOut(outputFifo); - interface outputBytePtrFifoOut = convertFifoToFifoOut(bytePtrFifo); - -endmodule - typedef 3 STREAM_SPLIT_LATENCY; -(* synthesize *) module mkStreamSplit(StreamSplit ifc); Reg#(StreamSize) streamByteCntReg <- mkReg(0); @@ -346,14 +236,9 @@ module mkStreamSplit(StreamSplit ifc); endmodule -interface StreamShift; - interface FifoIn#(DataStream) streamFifoIn; - interface FifoOut#(DataStream) streamFifoOut; -endinterface - typedef 2 STREAM_SHIFT_LATENCY; -module mkStreamShift#(DataBytePtr offset)(StreamShift); +module mkStreamShift#(DataBytePtr offset)(StreamPipe); FIFOF#(DataStream) inFifo <- mkFIFOF; FIFOF#(DataStream) outFifo <- mkFIFOF; @@ -527,4 +412,127 @@ module mkStreamShiftAlignToDw#(DataBytePtr offset)(StreamShiftAlignToDw); interface reqFifoIn = convertFifoToFifoIn(reqInFifo); interface dataFifoOut = convertFifoToFifoOut(dataOutFifo); interface byteEnFifoOut = convertFifoToFifoOut(byteEnOutFifo); -endmodule \ No newline at end of file +endmodule + +// Remove the first N Bytes of a stream +module mkStreamHeaderRemove#(DataBytePtr headerLen)(StreamPipe); + FIFOF#(DataStream) inFifo <- mkFIFOF; + FIFOF#(DataStream) outFifo <- mkFIFOF; + + Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); + DataBitPtr headerBitLen = zeroExtend(headerLen) >> valueOf(BYTE_WIDTH_WIDTH); + + rule removeHeader; + if (hasLastRemainReg) begin + outFifo.enq(remainStreamReg); + hasLastRemainReg <= False; + remainStreamReg <= getEmptyStream; + end + else begin + let stream = inFifo.first; + inFifo.deq; + let resStream = DataStream { + data : stream.data >> headerBitLen, + byteEn : stream.byteEn >> headerLen, + isFirst : stream.isFirst, + isLast : stream.isLast + }; + let removeStream = DataStream { + data : zeroExtend(Data'(stream.data[headerBitLen-1:0])), + byteEn : zeroExtend(ByteEn'(stream.byteEn[headerLen-1:0])), + isFirst : False, + isLast : False + }; + let newStream = DataStream { + data : remainStreamReg.data | stream.data << headerBitLen, + byteEn : remainStreamReg.byteEn | stream.data << headerLen, + isFirst : stream.isFirst, + isLast : stream.isLast + }; + if (stream.isLast && stream.isFirst) begin + outFifo.enq(resStream); + end + else if (stream.isFirst) begin + remainStreamReg <= resStream; + end + else begin + outFifo.enq(newStream); + if (stream.isLast) begin + if(isByteEnZero(resStream)) begin + remainStreamReg <= getEmptyStream; + hasLastRemainReg <= False; + end + else begin + remainStreamReg <= resStream; + hasLastRemainReg <= True; + end + end + end + end + endrule + + interface streamFifoIn = convertFifoToFifoIn(inFifo); + interface streamFifoOut = convertFifoToFifoOut(outFifo); +endmodule + +// Only support one not full dataStream between streams +module mkStreamReshape(StreamPipe); + FIFOF#(DataStream) inFifo <- mkFIFOF; + FIFOF#(DataStream) outFifo <- mkFIFOF; + + //During Stream Varibles + Reg#(DataBytePtr) rmBytePtrReg <- mkReg(0); + Reg#(DataBitPtr) rmBitPtrReg <- mkReg(0); + Reg#(DataBytePtr) rsBytePtrReg <- mkReg(0); + Reg#(DataBitPtr) rsBitPtrReg <- mkReg(0); + Reg#(Bool) isDetectedReg <- mkReg(False); + Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); + + rule shape; + if (hasLastRemainReg) begin + outFifo.enq(hasLastRemainReg); + isDetectedReg <= False; + end + else begin + let stream = inFifo.first; + inFifo.deq; + Bool isDetect = !stream.isLast && !isByteEnFull(stream.byteEn) && (!isDetectedReg); + if (isDetect) begin + let bytePtr = convertByteEn2BytePtr(stream.byteEn); + DataBitPtr bitPtr = zeroExtend(bytePtr) >> valueOf(BYTE_WIDTH_WIDTH); + rmBytePtrReg <= bytePtr; + rmBitPtrReg <= bitPtr; + rsBytePtrReg <= getMaxBytePtr - bytePtr; + rsBitPtrReg <= getMaxBitPtr - bitPtr; + remainStreamReg <= stream; + isDetectedReg <= True; + end + else begin + if (isDetectedReg) begin + let remainStream = DataStream { + data : stream.data >> rsBitPtrReg, + byteEn : stream.byteEn >> rsBytePtrReg, + isFirst : stream.isFirst, + isLast : True + }; + remainStreamReg <= remainStream; + isLast = isByteEnZero(remainStream.byteEn); + let outStream = DataStream { + data : (stream.data << rmBitPtrReg) | remainStreamReg.data, + byteEn : (stream.byteEn << rmBytePtrReg) | remainStreamReg.byteEn, + isFirst : remainStreamReg.isFirst, + isLast : isLast + }; + outFifo.enq(outStream); + isDetectedReg <= isLast ? False : isDetectedReg; + end + else begin + ourFifo.enq(stream); + end + end + end + endrule + + interface streamFifoIn = convertFifoToFifoIn(inFifo); + interface streamFifoOut = convertFifoToFifoOut(outFifo); +endmodule diff --git a/test/TestCompletionFifo.bsv b/test/TestCompletionFifo.bsv new file mode 100644 index 0000000..fbb01d4 --- /dev/null +++ b/test/TestCompletionFifo.bsv @@ -0,0 +1,162 @@ +import GetPut::*; +import Counter::*; +import FIFOF::*; +import Randomizable::*; +import LFSR::*; +import Vector::*; + +import SemiFifo::*; +import CompletionFifo::*; +import PrimUtils::*; +import PcieAxiStreamTypes::*; +import DmaTypes::*; + +typedef 6 TEST_CHUNK_NUM; +typedef 16 TEST_SLOT_NUM; + +typedef Bit#(32) TestData; +typedef Bit#(TLog#(TEST_SLOT_NUM)) TestTag; +typedef Bit#(TLog#(TEST_CHUNK_NUM)) TestReq; +typedef Bit#(8) TimeInterval; + +(* doc = "testcase" *) +module mkCompletionFifoTb(Empty); + + CompletionFifo#(TEST_SLOT_NUM, TestData) dut <- mkCompletionFifo(valueOf(TEST_CHUNK_NUM)); + Randomize#(TestReq) reqGen <- mkConstrainedRandomizer(1, fromInteger(valueOf(TEST_CHUNK_NUM)-1)); + + FIFOF#(TestTag) tagFifo <- mkSizedFIFOF(valueOf(TEST_SLOT_NUM)); + FIFOF#(Tuple2#(TestTag, TestReq)) reqFifo <- mkSizedFIFOF(valueOf(TEST_SLOT_NUM)); + + Vector#(TEST_SLOT_NUM, Reg#(TestReq)) reqs <- replicateM(mkReg(0)); + Vector#(TEST_SLOT_NUM, Reg#(TestReq)) reqDones <- replicateM(mkReg(0)); + Vector#(TEST_SLOT_NUM, Reg#(Bool)) doneFlags <- replicateM(mkReg(True)); + + Reg#(Bool) initReg <- mkReg(False); + Reg#(TestTag) outPtrReg <- mkReg(0); + Reg#(TestData) dataReg <- mkReg(0); + + Reg#(UInt#(32)) sentChunksReg <- mkReg(0); + Reg#(UInt#(32)) recvChunksReg <- mkReg(0); + + rule init if (!initReg); + reqGen.cntrl.init; + initReg <= True; + endrule + + rule genRequest if (initReg); + if (dut.available) begin + let tag <- dut.reserve.get; + tagFifo.enq(tag); + let reqLen <- reqGen.next; + reqFifo.enq(tuple2(tag, reqLen)); + sentChunksReg <= sentChunksReg + unpack(zeroExtend(reqLen)); + $display("INFO: Gen Tag %h request %h", tag, reqLen); + end + endrule + + rule getResponse if (initReg); + outPtrReg <= outPtrReg == fromInteger(valueOf(TEST_SLOT_NUM)-1) ? 0 : outPtrReg + 1; + if (!doneFlags[outPtrReg]) begin + if (reqDones[outPtrReg] <= reqs[outPtrReg]) begin + reqDones[outPtrReg] <= reqDones[outPtrReg] + 1; + dut.append.enq(tuple2(outPtrReg, zeroExtend(outPtrReg) << valueOf(TLog#(TEST_SLOT_NUM)) | zeroExtend(reqDones[outPtrReg]))); + end + else begin + $display("Debug: set tag %h done, dones %d, req %d", outPtrReg, reqDones[outPtrReg]-1, reqs[outPtrReg]); + dut.complete.put(outPtrReg); + doneFlags[outPtrReg] <= True; + end + end + else begin + if (reqFifo.notEmpty) begin + let {tag, reqLen} = reqFifo.first; + if (outPtrReg == tag) begin + reqDones[outPtrReg] <= 0; + reqs[outPtrReg] <= reqLen; + doneFlags[outPtrReg] <= False; + reqFifo.deq; + end + end + end + endrule + + rule readCompletionFifo if (initReg); + let data = dut.drain.first; + dataReg <= data; + immAssert( + (data > dataReg || dataReg == 0), + "order check @ mkCompletionFifoTb", + $format(data, dataReg) + ); + dut.drain.deq; + recvChunksReg <= recvChunksReg + 1; + $display("Debug: drain from CFifo %h", data); + endrule + + rule testFinish if (initReg); + if (recvChunksReg == sentChunksReg && recvChunksReg > 0) begin + $display("test CompletionFifo end!"); + $finish(); + end + endrule + +endmodule + +module mkSimpleCompletionFifoTb(Empty); + + CompletionFifo#(TEST_SLOT_NUM, TestData) dut <- mkCompletionFifo(valueOf(TEST_CHUNK_NUM)); + FIFOF#(TestTag) tagFifo <- mkSizedFIFOF(valueOf(TEST_SLOT_NUM)); + Reg#(Bool) initReg <- mkReg(False); + Reg#(UInt#(10)) testCntReg <- mkReg(0); + Reg#(UInt#(10)) testOutReg <- mkReg(0); + let testNum = 20; + + + rule init if (!initReg); + initReg <= True; + endrule + + rule genRequest if (initReg && testCntReg <= testNum); + if (dut.available) begin + let tag <- dut.reserve.get; + tagFifo.enq(tag); + $display("INFO: Gen Tag %d", tag); + testCntReg <= testCntReg + 1; + end + endrule + + rule getResponse if (initReg); + let tag = tagFifo.first; + tagFifo.deq; + dut.append.enq(tuple2(tag, zeroExtend(tag)*10)); + dut.complete.put(tag); + endrule + + rule getOrder if (initReg); + let data = dut.drain.first; + dut.drain.deq; + $display("INFO: %d drain %d", testOutReg, data); + testOutReg <= testOutReg + 1; + if (testOutReg == fromInteger(testNum)) begin + $finish(); + end + endrule + +endmodule + +interface CFifoInstTb; + interface Get#(TestTag) reserve; + interface FifoIn#(Tuple2#(TestTag, DataStream)) append; + interface Put#(TestTag) complete; + interface FifoOut#(DataStream) drain; +endinterface + +(* synthesize *) +module mkCompletionFifoInst(CFifoInstTb); + CompletionFifo#(TEST_SLOT_NUM, DataStream) cFifo <- mkCompletionFifo(valueOf(MAX_STREAM_NUM_PER_COMPLETION)); + interface reserve = cFifo.reserve; + interface append = cFifo.append; + interface complete = cFifo.complete; + interface drain = cFifo.drain; +endmodule diff --git a/test/TestDmaCore.bsv b/test/TestDmaCore.bsv index d70fc32..c5ea0c1 100755 --- a/test/TestDmaCore.bsv +++ b/test/TestDmaCore.bsv @@ -1,5 +1,6 @@ import GetPut::*; import Randomizable::*; +import Vector::*; import SemiFifo::*; import PcieAxiStreamTypes::*; @@ -11,6 +12,7 @@ import StreamUtils::*; import ReqRequestCore::*; import DmaRequester::*; import TestStreamUtils::*; +import ReqCompleterCore::*; typedef 100000 CHUNK_PER_EPOCH_TEST_NUM; typedef 64'hFFFFFFFFFFFFFFFF MAX_ADDRESS; @@ -170,3 +172,82 @@ module mkSimpleRequesterRequestTb(Empty); endrule endmodule + +module mkSimpleConvertStraddleAxisToDataStreamTb(Empty); + ConvertStraddleAxisToDataStream dut <- mkConvertStraddleToDataStream; + Reg#(UInt#(32)) testCntReg <- mkReg(0); + Reg#(UInt#(32)) tlpNumReg <- mkReg(2); + + CmplByteCnt testLength = 20; + DmaMemAddr startAddr = fromInteger(valueOf(SIMPLE_TEST_ADDR)); + + rule testInput if (testCntReg < 1); + let desc0 = PcieRequesterCompleteDescriptor { + reserve0 : 0, + attributes : 0, + trafficClass : 0, + reserve1 : 0, + completerId : 123, + tag : 'b01100, + requesterId : 0, + reserve2 : 0, + isPoisoned : False, + status : fromInteger(valueOf(SUCCESSFUL_CMPL)), + dwordCnt : 1, + reserve3 : 0, + isRequestCompleted : True, + isLockedReadCmpl : False, + byteCnt : testLength, + errorcode : 0, + lowerAddr : truncate(startAddr) + }; + let desc1 = desc0; + desc1.lowerAddr = desc0.lowerAddr + truncate(testLength); + desc1.tag = 'b10001; + let stream = generatePsuedoStream(unpack(zeroExtend(testLength)), True, True); + let isSop = PcieTlpCtlIsSopReqCpl { + isSop : fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT)), + isSopPtrs : replicate(0) + }; + isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_0)); + isSop.isSopPtrs[1] = fromInteger(valueOf(ISSOP_LANE_32)); + let isEop = PcieTlpCtlIsEopReqCpl { + isEop : fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT)), + isEopPtrs : replicate(0) + }; + let data0 = stream.data << valueOf(DES_RC_DESCRIPTOR_WIDTH) | zeroExtend(pack(desc0)); + let data1 = stream.data << valueOf(DES_RC_DESCRIPTOR_WIDTH) | zeroExtend(pack(desc1)); + let byteEn = stream.byteEn << valueOf(TDiv#(DES_RC_DESCRIPTOR_WIDTH, BYTE_WIDTH)); + let sideBand = PcieRequesterCompleteSideBandFrame { + parity : 0, + discontinue : False, + isEop : isEop, + isSop : isSop, + dataByteEn : byteEn | byteEn << valueOf(STRADDLE_THRESH_BYTE_WIDTH) + }; + + let axiStream = ReqCmplAxiStream { + tData : data0 | data1 << valueOf(STRADDLE_THRESH_BIT_WIDTH), + tKeep : -1, + tLast : True, + tUser : pack(sideBand) + }; + dut.axiStreamFifoIn.enq(axiStream); + testCntReg <= testCntReg + 1; + endrule + + rule testOutput; + for (Integer pathIdx = 0; pathIdx < valueOf(DMA_PATH_NUM); pathIdx = pathIdx + 1) begin + if (dut.dataFifoOut[pathIdx].notEmpty) begin + let stream = dut.dataFifoOut[pathIdx].first; + dut.dataFifoOut[pathIdx].deq; + $display(fshow(stream)); + end + end + endrule + +endmodule + + + + diff --git a/test/TestStreamUtils.bsv b/test/TestStreamUtils.bsv index ba0154e..a8e38ff 100755 --- a/test/TestStreamUtils.bsv +++ b/test/TestStreamUtils.bsv @@ -77,110 +77,6 @@ module mkRandomStreamSize(StreamSize seed, StreamSizeBitPtr maxSizeBitPtr, Rando endmethod endmodule -(* doc = "testcase" *) -module mkStreamConcatTb(Empty); - - StreamConcat dut <- mkStreamConcat; - - RandomStreamSize streamASizeRandomValue <- mkRandomStreamSize(fromInteger(valueOf(SEED_1)), fromInteger(valueOf(MAX_STREAM_SIZE_PTR))); - RandomStreamSize streamBSizeRandomValue <- mkRandomStreamSize(fromInteger(valueOf(SEED_2)), fromInteger(valueOf(MAX_STREAM_SIZE_PTR))); - - Reg#(StreamSize) streamARemainSizeReg <- mkReg(0); - Reg#(StreamSize) streamBRemainSizeReg <- mkReg(0); - Reg#(StreamSize) concatSizeReg <- mkReg(0); - - FIFOF#(StreamSize) ideaConcatSizeFifo <- mkSizedFIFOF(valueOf(TEST_IDEAL_FIFO_DEPTH)); - - Reg#(Bool) isInitReg <- mkReg(False); - Reg#(UInt#(32)) testCntReg <- mkReg(0); - Reg#(UInt#(32)) testRoundReg <- mkReg(0); - Reg#(UInt#(32)) testFinishCntReg <- mkReg(0); - - Bool logDetailEn = unpack(fromInteger(valueOf(LOG_DETAILS_EN))); - - rule testInit if (!isInitReg); - $display("INFO: start mkStreamConcatTb!"); - isInitReg <= True; - endrule - - rule testInput if (isInitReg && testCntReg < fromInteger(valueOf(TEST_NUM))); - if (testRoundReg == 0) begin - StreamSize sizeA <- streamASizeRandomValue.next; - StreamSize sizeB <- streamBSizeRandomValue.next; - ideaConcatSizeFifo.enq(sizeA + sizeB); - testRoundReg <= (sizeA + sizeB) / getMaxFrameSize(); - - let isLastA = (sizeA <= getMaxFrameSize()); - let isLastB = (sizeB <= getMaxFrameSize()); - let firstSizeA = isLastA ? sizeA : getMaxFrameSize(); - let firstSizeB = isLastB ? sizeB : getMaxFrameSize(); - - dut.inputStreamFirstFifoIn.enq(generatePsuedoStream(firstSizeA, True, isLastA)); - dut.inputStreamSecondFifoIn.enq(generatePsuedoStream(firstSizeB, True, isLastB)); - streamARemainSizeReg <= sizeA - firstSizeA; - streamBRemainSizeReg <= sizeB - firstSizeB; - testCntReg <= testCntReg + 1; - if (logDetailEn) begin - $display("INFO: Add Input of %d Epoch", testCntReg + 1); - $display("INFO: streamASize = %d, streamBSize = %d, ideaSize = %d", sizeA, sizeB, sizeA+sizeB); - end - end - - else if (testRoundReg > 0) begin - if (streamARemainSizeReg > 0 && dut.inputStreamFirstFifoIn.notFull) begin - Bool isLast = streamARemainSizeReg <= getMaxFrameSize(); - StreamSize size = isLast ? streamARemainSizeReg : getMaxFrameSize(); - dut.inputStreamFirstFifoIn.enq(generatePsuedoStream(size, False, isLast)); - streamARemainSizeReg <= streamARemainSizeReg - size; - end - if (streamBRemainSizeReg > 0 && dut.inputStreamSecondFifoIn.notFull) begin - Bool isLast = streamBRemainSizeReg <= getMaxFrameSize(); - StreamSize size = isLast ? streamBRemainSizeReg : getMaxFrameSize(); - dut.inputStreamSecondFifoIn.enq(generatePsuedoStream(size, False, isLast)); - streamBRemainSizeReg <= streamBRemainSizeReg - size; - end - testRoundReg <= testRoundReg - 1; - end - endrule - - rule testOutput; - let outStream = dut.outputStreamFifoOut.first; - StreamSize concatSize = concatSizeReg + unpack(zeroExtend(convertByteEn2BytePtr(outStream.byteEn))); - if (outStream.isLast) begin - let ideaSize = ideaConcatSizeFifo.first; - immAssert( - (concatSize == ideaSize), - "outStream length check @ mkStreamConcatTb::testOutput", - $format("ideaSize = %d, realSize = %d \n", ideaSize, concatSize) - ); - if (logDetailEn) begin - $display("INFO: verify output ideaSize=%d, realSize=%d, ideaLastSize=%d", ideaSize, concatSize, ideaSize%getMaxFrameSize); - end - ideaConcatSizeFifo.deq; - testFinishCntReg <= testFinishCntReg + 1; - concatSizeReg <= 0; - end - else begin - concatSizeReg <= concatSize; - immAssert( - (outStream.data == getPseudoData()), - "outStream Data Check @ mkStreamConcatTb::testOutput", - $format(outStream) - ); - end - dut.outputStreamFifoOut.deq; - dut.outputBytePtrFifoOut.deq; - endrule - - rule testFinish; - if (testFinishCntReg == fromInteger(valueOf(TEST_NUM)-1)) begin - $display("INFO: end mkStreamConcatTb"); - $finish(); - end - endrule - -endmodule - (* doc = "testcase" *) module mkStreamSplitTb(Empty); @@ -287,7 +183,7 @@ endmodule module mkStreamShiftTb(Empty); RandomStreamSize streamSizeRandomValue <- mkRandomStreamSize(fromInteger(valueOf(SEED_1)), fromInteger(valueOf(MAX_STREAM_SIZE_PTR))); Vector#(TAdd#(BYTE_EN_WIDTH, 1), FIFOF#(StreamSize)) setSizeFifo <- replicateM(mkSizedFIFOF(10)); - Vector#(TAdd#(BYTE_EN_WIDTH, 1), StreamShift) duts = newVector; + Vector#(TAdd#(BYTE_EN_WIDTH, 1), StreamPipe) duts = newVector; for (DataBytePtr idx = 0; idx <= getMaxBytePtr; idx = idx + 1) begin duts[idx] <- mkStreamShift(idx); end From a84cd316da44fee066811a66f64fae12237b3a71 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Fri, 16 Aug 2024 20:09:40 +0800 Subject: [PATCH 35/53] Reorganize DmaC2HPipe&DmaH2CPipe --- img/dmac.drawio.svg | 22 +- run_one.sh | 2 +- src/DmaC2HPipe.bsv | 266 +++++++++++++++++++++-- src/DmaH2CPipe.bsv | 41 +--- src/DmaTypes.bsv | 47 +++- src/{ReqRequestCore.bsv => DmaUtils.bsv} | 108 ++------- src/DmaWrapper.bsv | 5 +- src/PcieAdapter.bsv | 103 ++++++++- src/PcieTypes.bsv | 2 + src/ReqCompleterCore.bsv | 184 ---------------- src/StreamUtils.bsv | 35 +-- test/TestDmaCore.bsv | 70 ++---- test/TestStreamUtils.bsv | 2 +- 13 files changed, 471 insertions(+), 416 deletions(-) rename src/{ReqRequestCore.bsv => DmaUtils.bsv} (77%) mode change 100755 => 100644 delete mode 100644 src/ReqCompleterCore.bsv diff --git a/img/dmac.drawio.svg b/img/dmac.drawio.svg index 4b7cd50..0a7cc01 100644 --- a/img/dmac.drawio.svg +++ b/img/dmac.drawio.svg @@ -1,4 +1,4 @@ - + @@ -43,14 +43,14 @@
- ReqRequstCore + DmaC2HWrite
- ReqRequstCore + DmaC2HWrite
@@ -113,14 +113,14 @@
- ReqCompleteCore + DmaC2HRead
- ReqCompleteCore + DmaC2HRead
@@ -169,7 +169,7 @@ TX
- convertDataStreamToAxis + RequsterPcieAdapter
@@ -224,7 +224,7 @@ RX
- convertAxisToDataStream + CompleterPcieAdapter
@@ -314,14 +314,14 @@
- ReqRequstCore + DmaC2HWrite
- ReqRequstCore + DmaC2HWrite @@ -384,14 +384,14 @@
- ReqCompleteCore + DmaC2HRead
- ReqCompleteCore + DmaC2HRead diff --git a/run_one.sh b/run_one.sh index a1389d3..f049c0d 100755 --- a/run_one.sh +++ b/run_one.sh @@ -13,7 +13,7 @@ TEST_LOG=run.log TEST_DIR=test cd $TEST_DIR truncate -s 0 $TEST_LOG -FILES=`ls TestCompletionFifo.bsv` +FILES=`ls TestDmaCore.bsv` ########################################################################### for FILE in $FILES; do # echo $FILE diff --git a/src/DmaC2HPipe.bsv b/src/DmaC2HPipe.bsv index b501018..60b5ebb 100644 --- a/src/DmaC2HPipe.bsv +++ b/src/DmaC2HPipe.bsv @@ -2,12 +2,14 @@ import FIFOF::*; import GetPut::*; import SemiFifo::*; +import PrimUtils::*; import StreamUtils::*; import PcieTypes::*; +import DmaTypes::*; import PcieAxiStreamTypes::*; import PcieDescriptorTypes::*; -import ReqRequestCore::*; -import DmaTypes::*; +import DmaUtils::*; +import CompletionFifo::*; // TODO : change the PCIe Adapter Ifc to TlpData and TlpHeader, // move the module which convert TlpHeader to IP descriptor from dma to adapter @@ -26,9 +28,9 @@ interface DmaC2HPipe; endinterface // Single Path module -module mkDmaC2HPipe(DmaC2HPipe); - RequesterRequestCore requestCore <- mkRequesterRequestCore; - RequesterCompleteCore completeCore <- mkRequesterCompleteCore; +module mkDmaC2HPipe#(DmaPathNo pathIdx)(DmaC2HPipe); + C2HReadCore readCore <- mkC2HReadCore(pathIdx); + C2HWriteCore writeCore <- mkC2HWriteCore; FIFOF#(DataStream) dataInFifo <- mkFIFOF; FIFOF#(DmaRequest) reqInFifo <- mkFIFOF; @@ -39,32 +41,268 @@ module mkDmaC2HPipe(DmaC2HPipe); let req = reqInFifo.first; reqInFifo.deq; if (req.isWrite) begin - requestCore.wrReqFifoIn.enq(req); + writeCore.wrReqFifoIn.enq(req); end else begin - completeCore.rdReqFifoIn.enq(req); + readCore.rdReqFifoIn.enq(req); end + $display("SIM INFO @ mkDmaC2HPipe%d: New Request isWrite:%b startAddr:%h length:%h", + pathIdx, pack(req.isWrite), req.startAddr, req.length); endrule rule dataPipe; - let stream = dataInFifo.firts; + let stream = dataInFifo.first; dataInFifo.deq; - requestCore.dataFifoIn.enq(stream); + writeCore.dataFifoIn.enq(stream); endrule rule tlpOutMux; - + if (readCore.tlpFifoOut.notEmpty) begin + tlpOutFifo.enq(readCore.tlpFifoOut.first); + tlpSideBandFifo.enq(readCore.tlpSideBandFifoOut.first); + readCore.tlpSideBandFifoOut.deq; + readCore.tlpFifoOut.deq; + end + else begin + if (writeCore.tlpSideBandFifoOut.notEmpty) begin + tlpSideBandFifo.enq(writeCore.tlpSideBandFifoOut.first); + writeCore.tlpSideBandFifoOut.deq; + end + tlpOutFifo.enq(writeCore.tlpFifoOut.first); + writeCore.tlpFifoOut.deq; + end endrule // User Logic Ifc interface wrDataFifoIn = convertFifoToFifoIn(dataInFifo); interface reqFifoIn = convertFifoToFifoIn(reqInFifo); - interface rdDataFifoOut = completeCore.dataFifoOut; + interface rdDataFifoOut = readCore.dataFifoOut; // Pcie Adapter Ifc - interface tlpDataFifoOut = requestCore.dataFifoOut; - interface tlpSideBandFifoOut = requestCore.byteEnFifoOut; - interface tlpDataFifoIn = completeCore.dataFifoIn; + interface tlpDataFifoOut = convertFifoToFifoOut(tlpOutFifo); + interface tlpSideBandFifoOut = convertFifoToFifoOut(tlpSideBandFifo); + interface tlpDataFifoIn = readCore.tlpFifoIn; // TODO: Cfg Ifc endmodule +interface C2HReadCore; + // User Logic Ifc + interface FifoOut#(DataStream) dataFifoOut; + interface FifoIn#(DmaRequest) rdReqFifoIn; + // PCIe IP Ifc, connect to Requester Adapter + interface FifoIn#(StraddleStream) tlpFifoIn; + interface FifoOut#(DataStream) tlpFifoOut; + interface FifoOut#(SideBandByteEn) tlpSideBandFifoOut; +endinterface + +module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); + FIFOF#(StraddleStream) tlpInFifo <- mkFIFOF; + FIFOF#(DmaRequest) reqInFifo <- mkFIFOF; + FIFOF#(DataStream) tlpOutFifo <- mkFIFOF; + FIFOF#(SideBandByteEn) tlpByteEnFifo <- mkFIFOF; + + FIFOF#(SlotToken) tagFifo <- mkSizedFIFOF(valueOf(TAdd#(1, STREAM_HEADER_REMOVE_LATENCY))); + FIFOF#(Bool) completedFifo <- mkSizedFIFOF(valueOf(TAdd#(1, STREAM_HEADER_REMOVE_LATENCY))); + + StreamPipe descRemove <- mkStreamHeaderRemove(fromInteger(valueOf(DES_RC_DESCRIPTOR_WIDTH))); + StreamPipe streamReshape <- mkStreamReshape; + ChunkCompute chunkSplitor <- mkChunkComputer(DMA_RX); + CompletionFifo#(SLOT_PER_PATH, DataStream) cBuffer <- mkCompletionFifo(valueOf(MAX_STREAM_NUM_PER_COMPLETION)); + + Reg#(Bool) hasReadOnce <- mkReg(False); + + // Pipeline stage 1: convert StraddleStream to DataStream, may cost 2 cycle for one StraddleStream + rule convertStraddleToDataStream; + let sdStream = tlpInFifo.first; + let stream = getEmptyStream; + if (sdStream.isDoubleFrame) begin + PcieTlpCtlIsSopPtr isSopPtr = 0; + if (hasReadOnce) begin + tlpInFifo.deq; + hasReadOnce <= False; + isSopPtr = 1; + end + else begin + hasReadOnce <= True; + end + stream = DataStream { + data : getStraddleData(isSopPtr, sdStream.data), + byteEn : getStraddleByteEn(isSopPtr, sdStream.byteEn), + isFirst : sdStream.isFirst[isSopPtr], + isLast : sdStream.isLast[isSopPtr] + }; + let tag = sdStream.tag[isSopPtr]; + tagFifo.enq(tag); + end + else begin + tlpInFifo.deq; + hasReadOnce <= False; + stream = DataStream { + data : sdStream.data, + byteEn : sdStream.byteEn, + isFirst : sdStream.isFirst[0], + isLast : sdStream.isLast[0] + }; + let tag = sdStream.tag[0]; + tagFifo.enq(tag); + end + descRemove.streamFifoIn.enq(stream); + endrule + + // Pipeline stage 2: remove the descriptor in the head of each TLP + + // Pipeline stage 3: Buffer the received DataStreams and reorder the, + rule reorderStream; + let stream = descRemove.streamFifoOut.first; + let isCompleted = completedFifo.first; + let tag = tagFifo.first; + descRemove.streamFifoOut.deq; + completedFifo.deq; + tagFifo.deq; + stream.isLast = isCompleted && stream.isLast; + cBuffer.append.enq(tuple2(tag, stream)); + if (stream.isLast) begin + cBuffer.complete.put(tag); + end + endrule + + // Pipeline stage 4: there may be bubbles in the first and last DataStream of a TLP because of RCB + // Reshape the DataStream and make sure it is continuous + rule reshapeStream; + let stream = cBuffer.drain.first; + cBuffer.drain.deq; + streamReshape.streamFifoIn.enq(stream); + endrule + + // Pipeline stage 1: split to req to MRRS chunks + rule reqSplit; + let req = reqInFifo.first; + reqInFifo.deq; + chunkSplitor.dmaRequestFifoIn.enq(req); + endrule + + // Pipeline stage 2: generate read descriptor + rule cqDescGen; + let req = chunkSplitor.chunkRequestFifoOut.first; + chunkSplitor.chunkRequestFifoOut.deq; + let token <- cBuffer.reserve.get; + let descriptor = PcieRequesterRequestDescriptor { + forceECRC : False, + attributes : 0, + trafficClass : 0, + requesterIdEn : False, + completerId : 0, + tag : zeroExtend(token) | (zeroExtend(pathIdx) << (valueOf(DES_NONEXTENDED_TAG_WIDTH)-1)), + requesterId : 0, + isPoisoned : False, + reqType : fromInteger(valueOf(MEM_READ_REQ)), + dwordCnt : truncate(req.length >> valueOf(BYTE_DWORD_SHIFT_WIDTH)) + zeroExtend(req.length[0]|req.length[1]), + address : truncate(req.startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)), + addrType : fromInteger(valueOf(TRANSLATED_ADDR)) + }; + let stream = DataStream { + data : zeroExtend(pack(descriptor)), + byteEn : convertBytePtr2ByteEn(fromInteger(valueOf(DES_RQ_DESCRIPTOR_WIDTH))), + isFirst : True, + isLast : True + }; + tlpOutFifo.enq(stream); + let endAddr = req.startAddr + req.length; + ByteModDWord startAddrOffset = byteModDWord(req.startAddr); + ByteModDWord endAddrOffset = byteModDWord(endAddr); + let firstByteEn = convertDWordOffset2FirstByteEn(startAddrOffset); + let lastByteEn = convertDWordOffset2LastByteEn(endAddrOffset); + tlpByteEnFifo.enq(tuple2(firstByteEn, lastByteEn)); + endrule + + // User Logic Ifc + interface rdReqFifoIn = convertFifoToFifoIn(reqInFifo); + interface dataFifoOut = streamReshape.streamFifoOut; + // PCIe IP Ifc + interface tlpFifoIn = convertFifoToFifoIn(tlpInFifo); + interface tlpFifoOut = convertFifoToFifoOut(tlpOutFifo); + interface tlpSideBandFifoOut = convertFifoToFifoOut(tlpByteEnFifo); +endmodule + +// Core path of a single stream, from (DataStream, DmaRequest) ==> (DataStream, SideBandByteEn) +// split to chunks, align to DWord and add descriptor at the first +interface C2HWriteCore; + // User Logic Ifc + interface FifoIn#(DataStream) dataFifoIn; + interface FifoIn#(DmaRequest) wrReqFifoIn; + // PCIe IP Ifc + interface FifoOut#(DataStream) tlpFifoOut; + interface FifoOut#(SideBandByteEn) tlpSideBandFifoOut; +endinterface + +module mkC2HWriteCore(C2HWriteCore); + FIFOF#(DataStream) dataInFifo <- mkFIFOF; + FIFOF#(DmaRequest) wrReqInFifo <- mkFIFOF; + FIFOF#(DataStream) dataOutFifo <- mkFIFOF; + FIFOF#(SideBandByteEn) byteEnOutFifo <- mkFIFOF; + + ChunkSplit chunkSplit <- mkChunkSplit(DMA_TX); + StreamShiftAlignToDw streamAlign <- mkStreamShiftAlignToDw(fromInteger(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); + RqDescriptorGenerator rqDescGenerator <- mkRqDescriptorGenerator(True); + + // Pipeline stage 1: split the whole write request to chunks, latency = 3 + rule splitToChunks; + let wrStream = dataInFifo.first; + if (wrStream.isFirst && wrReqInFifo.notEmpty) begin + wrReqInFifo.deq; + chunkSplit.reqFifoIn.enq(wrReqInFifo.first); + dataInFifo.deq; + chunkSplit.dataFifoIn.enq(wrStream); + end + else if (!wrStream.isFirst) begin + dataInFifo.deq; + chunkSplit.dataFifoIn.enq(wrStream); + end + endrule + + // Pipeline stage 2: shift the datastream for descriptor adding and dw alignment + rule shiftToAlignment; + if (chunkSplit.chunkReqFifoOut.notEmpty) begin + let chunkReq = chunkSplit.chunkReqFifoOut.first; + chunkSplit.chunkReqFifoOut.deq; + let endAddr = chunkReq.startAddr + chunkReq.length; + let exReq = DmaExtendRequest { + startAddr: chunkReq.startAddr, + endAddr : endAddr, + length : chunkReq.length + }; + streamAlign.reqFifoIn.enq(exReq); + rqDescGenerator.exReqFifoIn.enq(exReq); + end + if (chunkSplit.chunkDataFifoOut.notEmpty) begin + let chunkDataStream = chunkSplit.chunkDataFifoOut.first; + chunkSplit.chunkDataFifoOut.deq; + streamAlign.dataFifoIn.enq(chunkDataStream); + end + endrule + + // Pipeline stage 3: Add descriptor and add to the axis convert module + rule addDescriptorToAxis; + if (streamAlign.byteEnFifoOut.notEmpty) begin + let sideBandByteEn = streamAlign.byteEnFifoOut.first; + streamAlign.byteEnFifoOut.deq; + byteEnOutFifo.enq(sideBandByteEn); + end + if (streamAlign.dataFifoOut.notEmpty) begin + let stream = streamAlign.dataFifoOut.first; + streamAlign.dataFifoOut.deq; + if (stream.isFirst) begin + let descStream = rqDescGenerator.descFifoOut.first; + rqDescGenerator.descFifoOut.deq; + stream.data = stream.data | descStream.data; + stream.byteEn = stream.byteEn | descStream.byteEn; + end + dataOutFifo.enq(stream); + end + endrule + + interface dataFifoIn = convertFifoToFifoIn(dataInFifo); + interface wrReqFifoIn = convertFifoToFifoIn(wrReqInFifo); + interface tlpFifoOut = convertFifoToFifoOut(dataOutFifo); + interface tlpSideBandFifoOut = convertFifoToFifoOut(byteEnOutFifo); +endmodule diff --git a/src/DmaH2CPipe.bsv b/src/DmaH2CPipe.bsv index 0671074..5d451c8 100644 --- a/src/DmaH2CPipe.bsv +++ b/src/DmaH2CPipe.bsv @@ -13,32 +13,6 @@ typedef 2 IDEA_CC_CSR_DWORD_CNT; typedef 4 IDEA_BYTE_CNT_OF_CSR; typedef 4 IDEA_FIRST_BE_HIGH_VALID_PTR_OF_CSR; -typedef 64 CMPL_NPREQ_INFLIGHT_NUM; -typedef 20 CMPL_NPREQ_WAITING_CLKS; -typedef 2'b11 NP_CREDIT_INCREMENT; -typedef 2'b00 NP_CREDIT_NOCHANGE; - -typedef 'h1F IDEA_CQ_TKEEP_OF_CSR; -typedef 'hF IDEA_CC_TKEEP_OF_CSR; - -typedef struct { - DmaCsrAddr addr; - DmaCsrValue value; -} CsrWriteReq deriving(Bits, Eq, Bounded); - -instance FShow#(CsrWriteReq); - function Fmt fshow(CsrWriteReq wrReq); - return ($format("> valueOf(STRADDLE_THRESH_BIT_WIDTH); + end + return sData; +endfunction + +function ByteEn getStraddleByteEn(PcieTlpCtlIsSopPtr isSopPtr, ByteEn byteEn); + ByteEn sByteEn = 0; + if (isSopPtr == fromInteger(valueOf(ISSOP_LANE_0))) begin + sByteEn = zeroExtend(ByteEn'(byteEn[valueOf(STRADDLE_THRESH_BYTE_WIDTH)-1:0])); + end + else begin + sByteEn = byteEn >> valueOf(STRADDLE_THRESH_BYTE_WIDTH); + end + return sByteEn; +endfunction + typedef 2 DMA_PATH_NUM; -typedef 2 PCIE_STRADDLE_NUM; // set straddle of RC and RQ same in the Xilinx IP GUI typedef TAdd#(1, TLog#(DMA_PATH_NUM)) DMA_PATH_WIDTH; typedef Bit#(DMA_PATH_WIDTH) DmaPathNo; diff --git a/src/ReqRequestCore.bsv b/src/DmaUtils.bsv old mode 100755 new mode 100644 similarity index 77% rename from src/ReqRequestCore.bsv rename to src/DmaUtils.bsv index bf5a60c..bb420d5 --- a/src/ReqRequestCore.bsv +++ b/src/DmaUtils.bsv @@ -10,6 +10,7 @@ import PrimUtils::*; import StreamUtils::*; import PcieDescriptorTypes::*; + typedef Bit#(BUS_BOUNDARY_WIDTH) PcieTlpMaxMaxPayloadSize; typedef Bit#(TLog#(BUS_BOUNDARY_WIDTH)) PcieTlpSizeWidth; @@ -27,6 +28,7 @@ typedef struct { DmaMemAddr firstChunkLen; } ChunkRequestFrame deriving(Bits, Eq); +// Split the input DmaRequest Info MRRS aligned chunkReqs interface ChunkCompute; interface FifoIn#(DmaRequest) dmaRequestFifoIn; interface FifoOut#(DmaRequest) chunkRequestFifoOut; @@ -76,8 +78,9 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); if (totalLenRemainReg <= tlpMaxSize) begin isSplittingReg <= False; outputFifo.enq(DmaRequest { - startAddr: newChunkPtrReg, - length: totalLenRemainReg + startAddr : newChunkPtrReg, + length : totalLenRemainReg, + isWrite : False }); splitFifo.deq; totalLenRemainReg <= 0; @@ -85,8 +88,9 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); else begin isSplittingReg <= True; outputFifo.enq(DmaRequest { - startAddr: newChunkPtrReg, - length: tlpMaxSize + startAddr : newChunkPtrReg, + length : tlpMaxSize, + isWrite : False }); newChunkPtrReg <= newChunkPtrReg + tlpMaxSize; totalLenRemainReg <= totalLenRemainReg - tlpMaxSize; @@ -97,8 +101,9 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); Bool isSplittingNextCycle = (remainderLength > 0); isSplittingReg <= isSplittingNextCycle; outputFifo.enq(DmaRequest { - startAddr: splitRequest.dmaRequest.startAddr, - length: splitRequest.firstChunkLen + startAddr : splitRequest.dmaRequest.startAddr, + length : splitRequest.firstChunkLen, + isWrite : False }); if (!isSplittingNextCycle) begin splitFifo.deq; @@ -185,7 +190,8 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); firstChunkSplitor.splitLocationFifoIn.enq(unpack(truncate(firstChunkLen))); let firstReq = DmaRequest { startAddr : request.startAddr, - length : firstChunkLen + length : firstChunkLen, + isWrite : request.isWrite }; firstReqPipeFifo.enq(firstReq); firstChunkSplitor.inputStreamFifoIn.enq(stream); @@ -238,7 +244,8 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); remainLenReg <= 0; let chunkReq = DmaRequest { startAddr: nextStartAddrReg, - length : remainLenReg + length : remainLenReg, + isWrite : True }; reqOutFifo.enq(chunkReq); end @@ -247,7 +254,8 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); remainLenReg <= remainLenReg - tlpMaxSizeReg; let chunkReq = DmaRequest { startAddr: nextStartAddrReg, - length : tlpMaxSizeReg + length : tlpMaxSizeReg, + isWrite : True }; reqOutFifo.enq(chunkReq); end @@ -276,8 +284,6 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); endinterface endmodule -typedef 3 BYTEEN_INFIFO_DEPTH; - // Generate RequesterRequest descriptor interface RqDescriptorGenerator; interface FifoIn#(DmaExtendRequest) exReqFifoIn; @@ -323,83 +329,3 @@ module mkRqDescriptorGenerator#(Bool isWrite)(RqDescriptorGenerator); interface descFifoOut = convertFifoToFifoOut(descOutFifo); endmodule -// Core path of a single stream, from (DataStream, DmaRequest) ==> (DataStream, SideBandByteEn) -// split to chunks, align to DWord and add descriptor at the first -interface RequesterRequestCore; - interface FifoIn#(DataStream) dataFifoIn; - interface FifoIn#(DmaRequest) wrReqFifoIn; - interface FifoOut#(DataStream) dataFifoOut; - interface FifoOut#(SideBandByteEn) byteEnFifoOut; -endinterface - -module mkRequesterRequestCore(RequesterRequestCore); - FIFOF#(DataStream) dataInFifo <- mkFIFOF; - FIFOF#(DmaRequest) wrReqInFifo <- mkFIFOF; - FIFOF#(DataStream) dataOutFifo <- mkFIFOF; - FIFOF#(SideBandByteEn) byteEnOutFifo <- mkFIFOF; - - ChunkSplit chunkSplit <- mkChunkSplit(DMA_TX); - StreamShiftAlignToDw streamAlign <- mkStreamShiftAlignToDw(fromInteger(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); - RqDescriptorGenerator rqDescGenerator <- mkRqDescriptorGenerator(True); - - // Pipeline stage 1: split the whole write request to chunks, latency = 3 - rule splitToChunks; - let wrStream = dataInFifo.first; - if (wrStream.isFirst && wrReqInFifo.notEmpty) begin - wrReqInFifo.deq; - chunkSplit.reqFifoIn.enq(wrReqInFifo.first); - dataInFifo.deq; - chunkSplit.dataFifoIn.enq(wrStream); - end - else if (!wrStream.isFirst) begin - dataInFifo.deq; - chunkSplit.dataFifoIn.enq(wrStream); - end - endrule - - // Pipeline stage 2: shift the datastream for descriptor adding and dw alignment - rule shiftToAlignment; - if (chunkSplit.chunkReqFifoOut.notEmpty) begin - let chunkReq = chunkSplit.chunkReqFifoOut.first; - chunkSplit.chunkReqFifoOut.deq; - let endAddr = chunkReq.startAddr + chunkReq.length; - let exReq = DmaExtendRequest { - startAddr: chunkReq.startAddr, - endAddr : endAddr, - length : chunkReq.length - }; - streamAlign.reqFifoIn.enq(exReq); - rqDescGenerator.exReqFifoIn.enq(exReq); - end - if (chunkSplit.chunkDataFifoOut.notEmpty) begin - let chunkDataStream = chunkSplit.chunkDataFifoOut.first; - chunkSplit.chunkDataFifoOut.deq; - streamAlign.dataFifoIn.enq(chunkDataStream); - end - endrule - - // Pipeline stage 3: Add descriptor and add to the axis convert module - rule addDescriptorToAxis; - if (streamAlign.byteEnFifoOut.notEmpty) begin - let sideBandByteEn = streamAlign.byteEnFifoOut.first; - streamAlign.byteEnFifoOut.deq; - byteEnOutFifo.enq(sideBandByteEn); - end - if (streamAlign.dataFifoOut.notEmpty) begin - let stream = streamAlign.dataFifoOut.first; - streamAlign.dataFifoOut.deq; - if (stream.isFirst) begin - let descStream = rqDescGenerator.descFifoOut.first; - rqDescGenerator.descFifoOut.deq; - stream.data = stream.data | descStream.data; - stream.byteEn = stream.byteEn | descStream.byteEn; - end - dataOutFifo.enq(stream); - end - endrule - - interface dataFifoIn = convertFifoToFifoIn(dataInFifo); - interface wrReqFifoIn = convertFifoToFifoIn(wrReqInFifo); - interface dataFifoOut = convertFifoToFifoOut(dataOutFifo); - interface byteEnFifoOut = convertFifoToFifoOut(byteEnOutFifo); -endmodule diff --git a/src/DmaWrapper.bsv b/src/DmaWrapper.bsv index c106352..7f4ea90 100755 --- a/src/DmaWrapper.bsv +++ b/src/DmaWrapper.bsv @@ -23,7 +23,10 @@ endinterface // TODO : connect Configurator to other modules (* synthesize *) module mkDmaController(DmaController); - Vector#(DMA_PATH_NUM, DmaC2HPipe) c2hPipes <- replicateM(mkDmaC2HPipe); + Vector#(DMA_PATH_NUM, DmaC2HPipe) c2hPipes = newVector; + for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1) begin + c2hPipes[pathIdx] <- mkDmaC2HPipe(pathIdx); + end DmaH2CPipe h2cPipe <- mkDmaH2cPipe; RequesterAxiStreamAdapter reqAdapter <- mkRequesterAxiStreamAdapter; diff --git a/src/PcieAdapter.bsv b/src/PcieAdapter.bsv index cd5aee1..a0b08c5 100644 --- a/src/PcieAdapter.bsv +++ b/src/PcieAdapter.bsv @@ -11,11 +11,21 @@ import StreamUtils::*; import PcieDescriptorTypes::*; import CompletionFifo::*; +typedef 64 CMPL_NPREQ_INFLIGHT_NUM; +typedef 20 CMPL_NPREQ_WAITING_CLKS; +typedef 2'b11 NP_CREDIT_INCREMENT; +typedef 2'b00 NP_CREDIT_NOCHANGE; + +typedef 3 BYTEEN_INFIFO_DEPTH; + +typedef 'h1F IDEA_CQ_TKEEP_OF_CSR; +typedef 'hF IDEA_CC_TKEEP_OF_CSR; + // Support Straddle in RQ/RC interface RequesterAxiStreamAdapter; // Dma To Adapter DataStreams interface Vector#(DMA_PATH_NUM, FifoIn#(DataStream)) dmaDataFifoIn; - interface Vector#(DMA_PATH_NUM, FIfoIn#(SideBandByteEn)) dmaSideBandFifoIn; + interface Vector#(DMA_PATH_NUM, FifoIn#(SideBandByteEn)) dmaSideBandFifoIn; // Adapter To Dma StraddleStreams, which may contains 2 TLP interface Vector#(DMA_PATH_NUM, FifoOut#(StraddleStream)) dmaDataFifoOut; // C2H RQ AxiStream Master @@ -78,7 +88,67 @@ interface CompleterAxiStreamAdapter; (* prefix = "" *) interface RawPcieCompleterComplete rawCompleterComplete; endinterface +// Completer Only Receives and Transmits One Beat TLP, in which isFirst = isLast = True module mkCompleterAxiStreamAdapter(CompleterAxiStreamAdapter); + FIFOF#(DataStream) inFifo <- mkFIFOF; + FIFOF#(DataStream) outFifo <- mkFIFOF; + FIFOF#(CmplReqAxiStream) reqInFifo <- mkFIFOF; + FIFOF#(CmplCmplAxiStream) cmplOutFifo <- mkFIFOF; + + Reg#(Bool) isInPacketReg <- mkReg(False); + + let rawAxiStreamSlaveIfc <- mkFifoInToRawPcieAxiStreamSlave(convertFifoToFifoIn(reqInFifo)); + let rawAxiStreamMasterIfc <- mkFifoOutToRawPcieAxiStreamMaster(convertFifoToFifoOut(cmplOutFifo)); + + rule genAxis; + // Straddle mode is disable of completer + let stream = inFifo.first; + inFifo.deq; + if (stream.isFirst && stream.isLast) begin + let isSop = PcieTlpCtlIsSopCommon { + isSopPtrs : replicate(0), + isSop : 1 + }; + let isEop = PcieTlpCtlIsEopCommon { + isEopPtrs : replicate(0), + isEop : 1 + }; + // Do not enable parity check in the core + let sideBand = PcieCompleterCompleteSideBandFrame { + parity : 0, + discontinue : False, + isSop : isSop, + isEop : isEop + }; + let axiStream = CmplCmplAxiStream { + tData : stream.data, + tKeep : fromInteger(valueOf(IDEA_CC_TKEEP_OF_CSR)), + tLast : True, + tUser : pack(sideBand) + }; + cmplOutFifo.enq(axiStream); + end + endrule + + rule parseAxis; + let axiStream = reqInFifo.first; + reqInFifo.deq; + isInPacketReg <= !axiStream.tLast; + // First Beat + if (!isInPacketReg && axiStream.tLast) begin + PcieCompleterRequestSideBandFrame sideBand = unpack(axiStream.tUser); + let stream = DataStream { + data : axiStream.tData, + byteEn : sideBand.dataByteEn, + isFirst : True, + isLast : True + }; + outFifo.enq(stream); + end + endrule + + interface dmaDataFifoOut = convertFifoToFifoOut(outFifo); + interface dmaDataFifoIn = convertFifoToFifoIn(inFifo); interface RawPcieCompleterRequest rawCompleterRequest; interface rawAxiStreamSlave = rawAxiStreamSlaveIfc; @@ -307,8 +377,8 @@ module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); Vector#(DMA_PATH_NUM, FifoIn#(SideBandByteEn)) byteEnFifoInIfc = newVector; dataFifoInIfc[0] = shiftA.streamFifoIn; dataFifoInIfc[1] = shiftB.streamFifoIn; - byteEnFifoInIfc[0] = onvertFifoToFifoIn(byteEnAFifo); - byteEnFifoInIfc[1] = onvertFifoToFifoIn(byteEnBFifo); + byteEnFifoInIfc[0] = convertFifoToFifoIn(byteEnAFifo); + byteEnFifoInIfc[1] = convertFifoToFifoIn(byteEnBFifo); interface dataFifoIn = dataFifoInIfc; interface byteEnFifoIn = byteEnFifoInIfc; interface axiStreamFifoOut = convertFifoToFifoOut(axiStreamOutFifo); @@ -326,8 +396,23 @@ module mkConvertStraddleAxisToDataStream(ConvertStraddleAxisToDataStream); // During TLP varibles Vector#(DMA_PATH_NUM, Reg#(Bool)) isInTlpRegs <- replicateM(mkReg(False)); Vector#(DMA_PATH_NUM, Reg#(Bool)) isCompleted <- replicateM(mkReg(False)); - Vector#(DMA_PATH_NUM, Reg#(SlotToken)) tagReg <- mkReg(0); + Vector#(DMA_PATH_NUM, Reg#(SlotToken)) tagReg <- replicateM(mkReg(0)); + function PcieRequesterCompleteDescriptor getDescriptorFromData(PcieTlpCtlIsSopPtr isSopPtr, Data data); + if (isSopPtr == fromInteger(valueOf(ISSOP_LANE_0))) begin + return unpack(truncate(data)); + end + else begin + return unpack(truncate(data >> valueOf(STRADDLE_THRESH_BIT_WIDTH))); + end + endfunction + + function Bool isMyValidTlp(DmaPathNo path, PcieRequesterCompleteDescriptor desc); + Bool valid = (desc.status == fromInteger(valueOf(SUCCESSFUL_CMPL))) && (!desc.isPoisoned); + Bool pathMatch = (truncate(path) == desc.tag[valueOf(DES_NONEXTENDED_TAG_WIDTH) - 1]); + return valid && pathMatch; + endfunction + rule parseAxiStream; let axiStream = axiStreamInFifo.first; axiStreamInFifo.deq; @@ -360,7 +445,7 @@ module mkConvertStraddleAxisToDataStream(ConvertStraddleAxisToDataStream); // 1 belongs to this path else if (isMyValidTlp(pathIdx, desc1)) begin let isSopPtr = isSop.isSopPtrs[1]; - sdStream.data = getStraddleData(isSopPtr, axiStream.tData) + sdStream.data = getStraddleData(isSopPtr, axiStream.tData); sdStream.byteEn = getStraddleByteEn(isSopPtr, sideBand.dataByteEn); sdStream.isDoubleFrame = False; sdStream.isFirst[0] = True; @@ -375,7 +460,7 @@ module mkConvertStraddleAxisToDataStream(ConvertStraddleAxisToDataStream); // 0 belongs to this path else if (isMyValidTlp(pathIdx, desc0)) begin let isSopPtr = isSop.isSopPtrs[0]; - sdStream.data = getStraddleData(isSopPtr, axiStream.tData) + sdStream.data = getStraddleData(isSopPtr, axiStream.tData); sdStream.byteEn = getStraddleByteEn(isSopPtr, sideBand.dataByteEn); sdStream.isDoubleFrame = False; sdStream.isFirst[0] = True; @@ -428,7 +513,7 @@ module mkConvertStraddleAxisToDataStream(ConvertStraddleAxisToDataStream); isCompleted[pathIdx] <= desc.isRequestCompleted; end else if (isMyValidTlp(pathIdx, desc)) begin - sdStream.data = getStraddleData(isSopPtr, axiStream.tData) + sdStream.data = getStraddleData(isSopPtr, axiStream.tData); sdStream.byteEn = getStraddleByteEn(isSopPtr, sideBand.dataByteEn); sdStream.isDoubleFrame = False; sdStream.isFirst[0] = True; @@ -441,7 +526,7 @@ module mkConvertStraddleAxisToDataStream(ConvertStraddleAxisToDataStream); isCompleted[pathIdx] <= desc.isRequestCompleted; end else if (isInTlpRegs[pathIdx]) begin - sdStream.data = getStraddleData(0, axiStream.tData) + sdStream.data = getStraddleData(0, axiStream.tData); sdStream.byteEn = getStraddleByteEn(0, sideBand.dataByteEn); sdStream.isDoubleFrame = False; sdStream.isFirst[0] = False; @@ -464,7 +549,7 @@ module mkConvertStraddleAxisToDataStream(ConvertStraddleAxisToDataStream); sdStream.isFirst[0] = False; sdStream.isLast[0] = unpack(isEop.isEop[0]); sdStream.tag[0] = tagReg[pathIdx]; - sdStream.isCompleted = isCompleted[pathIdx]; + sdStream.isCompleted[0] = isCompleted[pathIdx]; outFifos[pathIdx].enq(sdStream); tagReg[pathIdx] <= sdStream.tag[0]; isInTlpRegs[pathIdx] <= !sdStream.isLast[0]; diff --git a/src/PcieTypes.bsv b/src/PcieTypes.bsv index b48f872..17399da 100755 --- a/src/PcieTypes.bsv +++ b/src/PcieTypes.bsv @@ -5,6 +5,8 @@ import PcieAxiStreamTypes::*; typedef 512 PCIE_TLP_BYTES; typedef TLog#(PCIE_TLP_BYTES) PCIE_TLP_BYTES_WIDTH; +typedef 2 PCIE_STRADDLE_NUM; // set straddle of RC and RQ same in the Xilinx IP GUI + typedef 512 PCIE_TDATA_WIDTH; typedef 64 PCIE_TDATA_BYTES; typedef 16 PCIE_TDATA_DWORDS; diff --git a/src/ReqCompleterCore.bsv b/src/ReqCompleterCore.bsv deleted file mode 100644 index 20302d8..0000000 --- a/src/ReqCompleterCore.bsv +++ /dev/null @@ -1,184 +0,0 @@ -import FIFOF::*; -import GetPut :: *; -import Vector::*; - -import SemiFifo::*; -import PcieTypes::*; -import DmaTypes::*; -import PcieAxiStreamTypes::*; -import PrimUtils::*; -import StreamUtils::*; -import PcieDescriptorTypes::*; -import CompletionFifo::*; - -function StraddleStream getEmptyStraddleStream(); - let sdStream = StraddleStream { - data : 0, - byteEn : 0, - isDoubleFrame : False, - isFirst : replicate(False), - isLast : replicate(False), - tag : replicate(0), - isCompleted : replicate(0) - }; - return sdStream; -endfunction - -function PcieRequesterCompleteDescriptor getDescriptorFromData(PcieTlpCtlIsSopPtr isSopPtr, Data data); - if (isSopPtr == fromInteger(valueOf(ISSOP_LANE_0))) begin - return unpack(truncate(data)); - end - else begin - return unpack(truncate(data >> valueOf(STRADDLE_THRESH_BIT_WIDTH))); - end -endfunction - -function Data getStraddleData(PcieTlpCtlIsSopPtr isSopPtr, Data data); - if (isSopPtr == fromInteger(valueOf(ISSOP_LANE_0))) begin - let sData = zeroExtend(Data'(data[valueOf(STRADDLE_THRESH_BIT_WIDTH)-1:0])); - end - else begin - let sData = data >> valueOf(STRADDLE_THRESH_BIT_WIDTH); - end - return sData; -endfunction - -function ByteEn getStraddleByteEn(PcieTlpCtlIsSopPtr isSopPtr, ByteEn byteEn); - if (isSopPtr == fromInteger(valueOf(ISSOP_LANE_0))) begin - let sByteEn = zeroExtend(ByteEn'(byteEn[valueOf(STRADDLE_THRESH_BYTE_WIDTH)-1:0])); - end - else begin - let sByteEn = byteEn >> valueOf(STRADDLE_THRESH_BYTE_WIDTH); - end - return sByteEn; -endfunction - -function Bool isMyValidTlp(DmaPathNo path, PcieRequesterCompleteDescriptor desc); - Bool valid = (desc.status == fromInteger(valueOf(SUCCESSFUL_CMPL))) && (!desc.isPoisoned); - Bool pathMatch = (truncate(path) == desc.tag[valueOf(DES_NONEXTENDED_TAG_WIDTH) - 1]); - return valid && pathMatch; -endfunction - -interface RequesterCompleteCore; - interface FifoIn#(StraddleStream) tlpFifoIn; - interface FifoOut#(DataStream) tlpFifoOut; - interface FifoOut#(DataStream) dataFifoOut; - interface FifoIn#(DmaRequest) rdReqFifoIn; -endinterface - -module mkRequesterCompleteCore(RequesterCompleteCore); - FIFOF#(StraddleStream) tlpInFifo <- mkFIFOF; - FIFOF#(DmaRequest) reqInFifo <- mkFIFOF; - FIFOF#(DataStream) tlpOutFifo <- mkFIFOF; - - FIFOF#(SlotToken) tagFifo <- mkSizedFIFOF(4); - FIFOF#(Bool) completedFifo <- mkSizedFIFOF(4); - - StreamPipe descRemove <- mkStreamHeaderRemove(fromInteger(valueOf(DES_RC_DESCRIPTOR_WIDTH))); - StreamPipe streamReshape <- mkStreamReshape; - ChunkCompute chunkSplitor <- mkChunkComputer; - CompletionFifo#(SLOT_PER_PATH, DataStream) cBuffer <- mkCompletionFifo(valueOf(MAX_STREAM_NUM_PER_COMPLETION)); - - Reg#(Bool) hasReadOnce <- mkReg(False); - - // Pipeline stage 1: convert StraddleStream to DataStream, may cost 2 cycle for one StraddleStream - rule convertStraddleToDataStream; - let sdStream = tlpInFifo.first; - if (sdStream.isDoubleFrame) begin - PcieTlpCtlIsSopPtr isSopPtr = 0; - if (hasReadOnce) begin - tlpInFifo.deq; - hasReadOnce <= False; - isSopPtr = 1; - end - else begin - hasReadOnce <= True; - end - let stream = DataStream { - data : getStraddleData(isSopPtr, sdStream.data), - byteEn : getStraddleByteEn(isSopPtr, sideBand.dataByteEn);, - isFirst : sdStream.isFirst[isSopPtr], - isLast : sdStream.isLast[isSopPtr] - } - let tag = sdStream.tag[isSopPtr]; - tagFifo.enq(tag); - end - else begin - tlpInFifo.deq; - hasReadOnce <= False; - let stream = DataStream { - data : sdStream.data, - byteEn : sdStream.byteEn, - isFirst : sdStream.isFirst[0], - isLast : sdStream.isLast[0] - }; - let tag = sdStream.tag[0]; - end - descRemove.streamFifoIn.enq(stream); - endrule - - // Pipeline stage 2: remove the descriptor in the head of each TLP - - // Pipeline stage 3: Buffer the received DataStreams and reorder the, - rule reorderStream; - let stream = descRemove.streamFifoOut.first; - let isCompleted = completedFifo.first; - let tag = tagFifo.first; - descRemove.streamFifoOut.deq; - completedFifo.deq; - tagFifo.deq; - stream.isLast = isCompleted && stream.isLast; - cBuffer.append.enq(tuple2(tag, stream)); - if (stream.isLast) begin - cBuffer.complete.put(tag); - end - endrule - - // Pipeline stage 4: there may be bubbles in the first and last DataStream of a TLP because of RCB - // Reshape the DataStream and make sure it is continuous - rule reshapeStream; - let stream = cBuffer.drain.first; - cBuffer.drain.deq; - streamReshape.streamFifoIn.enq(stream); - endrule - - // Pipeline stage 1: split to req to MRRS chunks - rule reqSplit; - let req = reqInFifo.first; - reqInFifo.deq; - chunkSplitor.dmaRequestFifoIn.enq(req); - endrule - - // Pipeline stage 2: generate read descriptor - rule cqDescGen; - let req = chunkSplitor.chunkRequestFifoOut.first; - chunkSplitor.chunkRequestFifoOut.deq; - let tag <- completedFifo.reserve.get; - let descriptor = PcieRequesterRequestDescriptor { - forceECRC : False, - attributes : 0, - trafficClass : 0, - requesterIdEn : False, - completerId : 0, - tag : tag, - requesterId : 0, - isPoisoned : False, - reqType : fromInteger(valueOf(MEM_READ_REQ)), - dwordCnt : truncate(req.length >> valueOf(BYTE_DWORD_SHIFT_WIDTH)) + zeroExtend(DwordCount'(rq.length[1:0])), - address : truncate(req.startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)), - addrType : fromInteger(valueOf(TRANSLATED_ADDR)) - }; - let stream = DataStream { - data : zeroExtend(pack(descriptor)), - byteEn : convertBytePtr2ByteEn(fromInteger(valueOf(DES_RQ_DESCRIPTOR_WIDTH))), - isFirst : True, - isLast : True - }; - tlpOutFifo.enq(stream); - endrule - - interface tlpFifoIn = convertFifoToFifoIn(tlpInFifo); - interface tlpFifoOut = convertFifoToFifoOut(tlpOutFifo); - interface rdReqFifoIn = convertFifoToFifoOut(reqInFifo); - interface dataFifoOut = streamReshape.streamFifoOut; -endmodule diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index be8f49f..d314686 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -372,7 +372,7 @@ module mkStreamShiftAlignToDw#(DataBytePtr offset)(StreamShiftAlignToDw); FIFOF#(DataBytePtr) shiftSetFifo <- mkSizedFIFOF(valueOf(TMul#(2, STREAM_SHIFT_LATENCY))); - Vector#(DWORD_BYTES, StreamShift) shifts = newVector; + Vector#(DWORD_BYTES, StreamPipe) shifts = newVector; for (DataBytePtr idx = 0; idx < fromInteger(valueOf(DWORD_BYTES)); idx = idx + 1 ) begin shifts[idx] <- mkStreamShift(offset + idx); end @@ -414,12 +414,16 @@ module mkStreamShiftAlignToDw#(DataBytePtr offset)(StreamShiftAlignToDw); interface byteEnFifoOut = convertFifoToFifoOut(byteEnOutFifo); endmodule +typedef 3 STREAM_HEADER_REMOVE_LATENCY; + // Remove the first N Bytes of a stream module mkStreamHeaderRemove#(DataBytePtr headerLen)(StreamPipe); - FIFOF#(DataStream) inFifo <- mkFIFOF; + FIFOF#(DataStream) inFifo <- mkFIFOF; FIFOF#(DataStream) outFifo <- mkFIFOF; - Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); + Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); + Reg#(Bool) hasLastRemainReg <- mkReg(False); + DataBitPtr headerBitLen = zeroExtend(headerLen) >> valueOf(BYTE_WIDTH_WIDTH); rule removeHeader; @@ -445,7 +449,7 @@ module mkStreamHeaderRemove#(DataBytePtr headerLen)(StreamPipe); }; let newStream = DataStream { data : remainStreamReg.data | stream.data << headerBitLen, - byteEn : remainStreamReg.byteEn | stream.data << headerLen, + byteEn : remainStreamReg.byteEn | stream.byteEn << headerLen, isFirst : stream.isFirst, isLast : stream.isLast }; @@ -458,7 +462,7 @@ module mkStreamHeaderRemove#(DataBytePtr headerLen)(StreamPipe); else begin outFifo.enq(newStream); if (stream.isLast) begin - if(isByteEnZero(resStream)) begin + if(isByteEnZero(resStream.byteEn)) begin remainStreamReg <= getEmptyStream; hasLastRemainReg <= False; end @@ -481,17 +485,19 @@ module mkStreamReshape(StreamPipe); FIFOF#(DataStream) outFifo <- mkFIFOF; //During Stream Varibles - Reg#(DataBytePtr) rmBytePtrReg <- mkReg(0); - Reg#(DataBitPtr) rmBitPtrReg <- mkReg(0); - Reg#(DataBytePtr) rsBytePtrReg <- mkReg(0); - Reg#(DataBitPtr) rsBitPtrReg <- mkReg(0); - Reg#(Bool) isDetectedReg <- mkReg(False); - Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); + Reg#(DataBytePtr) rmBytePtrReg <- mkReg(0); + Reg#(DataBitPtr) rmBitPtrReg <- mkReg(0); + Reg#(DataBytePtr) rsBytePtrReg <- mkReg(0); + Reg#(DataBitPtr) rsBitPtrReg <- mkReg(0); + Reg#(Bool) isDetectedReg <- mkReg(False); + Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); + Reg#(Bool) hasLastRemainReg <- mkReg(False); rule shape; if (hasLastRemainReg) begin - outFifo.enq(hasLastRemainReg); + outFifo.enq(remainStreamReg); isDetectedReg <= False; + hasLastRemainReg <= False; end else begin let stream = inFifo.first; @@ -516,7 +522,7 @@ module mkStreamReshape(StreamPipe); isLast : True }; remainStreamReg <= remainStream; - isLast = isByteEnZero(remainStream.byteEn); + let isLast = isByteEnZero(remainStream.byteEn); let outStream = DataStream { data : (stream.data << rmBitPtrReg) | remainStreamReg.data, byteEn : (stream.byteEn << rmBytePtrReg) | remainStreamReg.byteEn, @@ -524,10 +530,11 @@ module mkStreamReshape(StreamPipe); isLast : isLast }; outFifo.enq(outStream); + hasLastRemainReg <= !isLast; isDetectedReg <= isLast ? False : isDetectedReg; end else begin - ourFifo.enq(stream); + outFifo.enq(stream); end end end diff --git a/test/TestDmaCore.bsv b/test/TestDmaCore.bsv index c5ea0c1..1c99962 100755 --- a/test/TestDmaCore.bsv +++ b/test/TestDmaCore.bsv @@ -9,10 +9,11 @@ import PrimUtils::*; import PcieTypes::*; import PcieDescriptorTypes::*; import StreamUtils::*; -import ReqRequestCore::*; -import DmaRequester::*; +import PcieAdapter::*; import TestStreamUtils::*; -import ReqCompleterCore::*; +import DmaUtils::*; +import DmaC2HPipe::*; + typedef 100000 CHUNK_PER_EPOCH_TEST_NUM; typedef 64'hFFFFFFFFFFFFFFFF MAX_ADDRESS; @@ -21,6 +22,7 @@ typedef 2'b00 DEFAULT_TLP_SIZE_SETTING; typedef 4 CHUNK_TX_TEST_SETTING_NUM; typedef 6 CHUNK_RX_TEST_SETTING_NUM; +(* doc = "testcase" *) module mkChunkComputerTb(Empty); ChunkCompute dut <- mkChunkComputer(DMA_TX); @@ -55,8 +57,9 @@ module mkChunkComputerTb(Empty); let testEnd = testAddr + testLength - 1; if (testEnd > testAddr && testEnd <= fromInteger(valueOf(MAX_ADDRESS))) begin let request = DmaRequest{ - startAddr: testAddr, - length: testLength + startAddr : testAddr, + length : testLength, + isWrite : False }; lenRemainReg <= testLength; dut.dmaRequestFifoIn.enq(request); @@ -102,14 +105,16 @@ endmodule typedef 60 SIMPLE_TEST_BYTELEN; typedef 'hABCDEF SIMPLE_TEST_ADDR; -module mkSimpleRequesterRequestCoreTb(Empty); - RequesterRequestCore dut <- mkRequesterRequestCore; +(* doc = "testcase" *) +module mkSimpleC2HWriteCoreTb(Empty); + C2HWriteCore dut <- mkC2HWriteCore; Reg#(UInt#(32)) testCntReg <- mkReg(0); rule testInput if (testCntReg < 1); let req = DmaRequest { startAddr : fromInteger(valueOf(SIMPLE_TEST_ADDR)), - length : fromInteger(valueOf(SIMPLE_TEST_BYTELEN)) + length : fromInteger(valueOf(SIMPLE_TEST_BYTELEN)), + isWrite : True }; dut.wrReqFifoIn.enq(req); let stream = generatePsuedoStream(fromInteger(valueOf(SIMPLE_TEST_BYTELEN)), True, True); @@ -118,12 +123,12 @@ module mkSimpleRequesterRequestCoreTb(Empty); endrule rule testOutput; - let stream = dut.dataFifoOut.first; - dut.dataFifoOut.deq; + let stream = dut.tlpFifoOut.first; + dut.tlpFifoOut.deq; $display(fshow(stream)); if (stream.isFirst) begin - let {firstByteEn, lastByteEn} = dut.byteEnFifoOut.first; - dut.byteEnFifoOut.deq; + let {firstByteEn, lastByteEn} = dut.tlpSideBandFifoOut.first; + dut.tlpSideBandFifoOut.deq; $display("firstByteEn:%b, lastByteEn:%b", firstByteEn, lastByteEn); PcieRequesterRequestDescriptor desc = unpack(truncate(stream.data)); $display("Descriptor Elements: dwordCnt:%d, address:%h", desc.dwordCnt, desc.address << 2); @@ -134,47 +139,8 @@ module mkSimpleRequesterRequestCoreTb(Empty); endrule endmodule -module mkSimpleRequesterRequestTb(Empty); - RequesterRequest dut <- mkRequesterRequest; - Reg#(UInt#(32)) testCntReg <- mkReg(0); - Reg#(UInt#(32)) tlpNumReg <- mkReg(2); - - rule testInput if (testCntReg < 1); - let req = DmaRequest { - startAddr : fromInteger(valueOf(SIMPLE_TEST_ADDR)), - length : fromInteger(valueOf(SIMPLE_TEST_BYTELEN)) - }; - dut.reqA.wrReqFifoIn.enq(req); - dut.reqB.wrReqFifoIn.enq(req); - let stream = generatePsuedoStream(fromInteger(valueOf(SIMPLE_TEST_BYTELEN)), True, True); - dut.reqA.wrDataFifoIn.enq(stream); - dut.reqB.wrDataFifoIn.enq(stream); - testCntReg <= testCntReg + 1; - endrule - - rule testOutput; - let stream = dut.axiStreamFifoOut.first; - dut.axiStreamFifoOut.deq; - $display("data: %h", stream.tData); - PcieRequesterRequestSideBandFrame sideBand = unpack(stream.tUser); - $display("isSop : ", sideBand.isSop.isSop, ", isEop : ", sideBand.isEop.isEop); - let tlpNum = tlpNumReg; - if (sideBand.isEop.isEop == fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT))) begin - tlpNum = tlpNum - 1; - end - else if (sideBand.isEop.isEop == fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT))) begin - tlpNum = tlpNum - 2; - end - if (tlpNum == 0) begin - $finish(); - end - tlpNumReg <= tlpNum; - endrule - -endmodule - module mkSimpleConvertStraddleAxisToDataStreamTb(Empty); - ConvertStraddleAxisToDataStream dut <- mkConvertStraddleToDataStream; + ConvertStraddleAxisToDataStream dut <- mkConvertStraddleAxisToDataStream; Reg#(UInt#(32)) testCntReg <- mkReg(0); Reg#(UInt#(32)) tlpNumReg <- mkReg(2); diff --git a/test/TestStreamUtils.bsv b/test/TestStreamUtils.bsv index a8e38ff..bae36fb 100755 --- a/test/TestStreamUtils.bsv +++ b/test/TestStreamUtils.bsv @@ -239,7 +239,7 @@ module mkStreamShiftTb(Empty); endrule for (DataBytePtr shiftOffset = 0; shiftOffset <= getMaxBytePtr; shiftOffset = shiftOffset + 1) begin - StreamShift dut = duts[shiftOffset]; + StreamPipe dut = duts[shiftOffset]; rule testOutput if (isInitReg); let shiftStream = dut.streamFifoOut.first; From bc8960417ee15c9ceb7bbc6d66a032c0cef72592 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Wed, 21 Aug 2024 21:34:10 +0800 Subject: [PATCH 36/53] Add RawDmaController Wrapper and a simple cocotb --- backend/Makefile | 4 +- cocotb/dma_wr_rd_tb.py | 170 +++++++++++++++++++++++++++++++++ img/streamConcat.drawio.svg | 1 - src/DmaC2HPipe.bsv | 3 +- src/DmaH2CPipe.bsv | 33 ++++--- src/DmaWrapper.bsv | 183 +++++++++++++++++++++++++++++++++--- src/PcieAdapter.bsv | 2 + src/PrimUtils.bsv | 1 - test/TestCompletionFifo.bsv | 2 +- 9 files changed, 370 insertions(+), 29 deletions(-) create mode 100644 cocotb/dma_wr_rd_tb.py delete mode 100644 img/streamConcat.drawio.svg diff --git a/backend/Makefile b/backend/Makefile index 0c3c559..0be2d3e 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -8,8 +8,8 @@ OUTPUTDIR ?= output LOGFILE ?= run.log RUNTOPHASE ?= place # synth place route all PARTNAME = xcvu13p-fhgb2104-2-i -TARGETFILE ?= ../test/TestCompletionFifo.bsv -TOPMODULE ?= mkCompletionFifoInst +TARGETFILE ?= ../src/DmaWrapper.bsv +TOPMODULE ?= mkRawDmaController export TOP = $(TOPMODULE) export RTL = $(VLOGDIR) diff --git a/cocotb/dma_wr_rd_tb.py b/cocotb/dma_wr_rd_tb.py new file mode 100644 index 0000000..b70af5b --- /dev/null +++ b/cocotb/dma_wr_rd_tb.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python +import itertools +import logging +import os + +import cocotb_test.simulator +import pytest + +import cocotb +from cocotb.triggers import RisingEdge, FallingEdge, Timer +from cocotb.regression import TestFactory + +from cocotbext.axi import AxiStreamBus +from cocotbext.pcie.core import RootComplex +from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice +from cocotbext.axi import AxiReadBus, AxiRamRead + +class TB(object): + def __init__(self, dut): + self.dut = dut + + self.log = logging.getLogger("cocotb.tb") + self.log.setLevel(logging.DEBUG) + + # PCIe + self.rc = RootComplex() + + self.dev = UltraScalePlusPcieDevice( + # configuration options + pcie_generation=3, + # pcie_link_width=2, + # user_clk_frequency=250e6, + alignment="dword", + cq_straddle=False, + cc_straddle=False, + rq_straddle=True, + rc_straddle=True, + rc_4tlp_straddle=False, + pf_count=1, + max_payload_size=1024, + enable_client_tag=True, + enable_extended_tag=True, + enable_parity=False, + enable_rx_msg_interface=False, + enable_sriov=False, + enable_extended_configuration=False, + + pf0_msi_enable=True, + pf0_msi_count=32, + pf1_msi_enable=False, + pf1_msi_count=1, + pf2_msi_enable=False, + pf2_msi_count=1, + pf3_msi_enable=False, + pf3_msi_count=1, + pf0_msix_enable=False, + pf0_msix_table_size=0, + pf0_msix_table_bir=0, + pf0_msix_table_offset=0x00000000, + pf0_msix_pba_bir=0, + pf0_msix_pba_offset=0x00000000, + pf1_msix_enable=False, + pf1_msix_table_size=0, + pf1_msix_table_bir=0, + pf1_msix_table_offset=0x00000000, + pf1_msix_pba_bir=0, + pf1_msix_pba_offset=0x00000000, + pf2_msix_enable=False, + pf2_msix_table_size=0, + pf2_msix_table_bir=0, + pf2_msix_table_offset=0x00000000, + pf2_msix_pba_bir=0, + pf2_msix_pba_offset=0x00000000, + pf3_msix_enable=False, + pf3_msix_table_size=0, + pf3_msix_table_bir=0, + pf3_msix_table_offset=0x00000000, + pf3_msix_pba_bir=0, + pf3_msix_pba_offset=0x00000000, + + # signals + user_clk=dut.CLK, + user_reset=dut.RST_N, + + rq_bus=AxiStreamBus.from_prefix(dut, "m_axis_rq"), + pcie_rq_seq_num0=dut.pcie_rq_seq_num0, + pcie_rq_seq_num_vld0=dut.pcie_rq_seq_num_vld0, + pcie_rq_seq_num1=dut.pcie_rq_seq_num1, + pcie_rq_seq_num_vld1=dut.pcie_rq_seq_num_vld1, + + rc_bus=AxiStreamBus.from_prefix(dut, "s_axis_rc"), + + cfg_max_payload=dut.cfg_max_payload, + cfg_max_read_req=dut.cfg_max_read_req, + + cfg_fc_sel=0b100, + cfg_fc_ph=dut.cfg_fc_ph, + cfg_fc_pd=dut.cfg_fc_pd, + ) + + self.dev.log.setLevel(logging.DEBUG) + + self.rc.make_port().connect(self.dev) + + dut.requester_id.setimmediatevalue(0) + dut.requester_id_enable.setimmediatevalue(0) + + dut.enable.setimmediatevalue(0) + +# no input now +@cocotb.test(timeout_time=1000000000, timeout_unit="ns") +async def run_test_write(dut): + + tb = TB(dut) + + await FallingEdge(dut.RST_N) + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + await tb.rc.enumerate() + + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + await dev.enable_device() + await dev.set_master() + + mem = tb.rc.mem_pool.alloc_region(16*1024*1024) + mem_base = mem.get_absolute_address(0) + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +if cocotb.SIM_NAME: + + factory = TestFactory(run_test_write) + factory.generate_tests() + + +# cocotb-test + +tests_dir = os.path.dirname(__file__) +rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', 'backend', 'verilog')) + + +def test_dma_wr(request,): + dut = "mkRawDmaController" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = dut + + verilog_sources = [ + os.path.join(rtl_dir, f"{dut}.v"), + os.path.join(rtl_dir, f"FIFO2.v"), + os.path.join(rtl_dir, f"SizedFIFO.v"), + os.path.join(rtl_dir, f"BRAM2.v"), + os.path.join(rtl_dir, f"Counter.v"), + # os.path.join(rtl_dir, f"mkRequesterAxiStreamAdapter.v") + ] + + sim_build = os.path.join(tests_dir, "sim_build", + request.node.name.replace('[', '-').replace(']', '')) + + cocotb_test.simulator.run( + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + timescale="1ns/1ps", + sim_build=sim_build + ) + \ No newline at end of file diff --git a/img/streamConcat.drawio.svg b/img/streamConcat.drawio.svg deleted file mode 100644 index 6901206..0000000 --- a/img/streamConcat.drawio.svg +++ /dev/null @@ -1 +0,0 @@ -
stage1
stage1
stage1
stage1
output
output
remainDataReg
remainDataReg
mux
mux
Concat
Concat
isFirstA
isLastB
isFirstB
isLastB
hasRemain
isFirstA...
getPtr
getPtr
getPtr
getPtr
Input
Input
Input
Input
Text is not SVG - cannot display
\ No newline at end of file diff --git a/src/DmaC2HPipe.bsv b/src/DmaC2HPipe.bsv index 60b5ebb..4fe57eb 100644 --- a/src/DmaC2HPipe.bsv +++ b/src/DmaC2HPipe.bsv @@ -28,6 +28,7 @@ interface DmaC2HPipe; endinterface // Single Path module +// (* synthesize *) // module mkDmaC2HPipe#(DmaPathNo pathIdx)(DmaC2HPipe); C2HReadCore readCore <- mkC2HReadCore(pathIdx); C2HWriteCore writeCore <- mkC2HWriteCore; @@ -202,7 +203,7 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); }; let stream = DataStream { data : zeroExtend(pack(descriptor)), - byteEn : convertBytePtr2ByteEn(fromInteger(valueOf(DES_RQ_DESCRIPTOR_WIDTH))), + byteEn : convertBytePtr2ByteEn(fromInteger(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH)))), isFirst : True, isLast : True }; diff --git a/src/DmaH2CPipe.bsv b/src/DmaH2CPipe.bsv index 5d451c8..fd214dc 100644 --- a/src/DmaH2CPipe.bsv +++ b/src/DmaH2CPipe.bsv @@ -6,6 +6,7 @@ import PrimUtils::*; import PcieAxiStreamTypes::*; import PcieTypes::*; import PcieDescriptorTypes::*; +import PcieAdapter::*; import DmaTypes::*; typedef 1 IDEA_CQ_CSR_DWORD_CNT; @@ -24,7 +25,7 @@ interface DmaH2CPipe; // TODO: Cfg Ifc endinterface -(* synthesize *) +// (* synthesize *) // module mkDmaH2CPipe(DmaH2CPipe); FIFOF#(DataStream) tlpInFifo <- mkFIFOF; @@ -32,7 +33,7 @@ module mkDmaH2CPipe(DmaH2CPipe); FIFOF#(DmaRequest) reqOutFifo <- mkFIFOF; FIFOF#(DmaCsrValue) dataInFifo <- mkFIFOF; - FIFOF#(DmaCsrValue) dataOutFifo <- mkFIFOF + FIFOF#(DmaCsrValue) dataOutFifo <- mkFIFOF; FIFOF#(Tuple2#(DmaRequest, PcieCompleterRequestDescriptor)) pendingFifo <- mkSizedFIFOF(valueOf(CMPL_NPREQ_INFLIGHT_NUM)); @@ -47,7 +48,19 @@ module mkDmaH2CPipe(DmaH2CPipe); Reg#(Bool) isInPacket <- mkReg(False); Reg#(UInt#(32)) illegalPcieReqCntReg <- mkReg(0); - BytePtr csrBytes = fromInteger(valueOf(TDiv#(DMA_CSR_DATA_WIDTH, BYTE_WIDTH))); + DataBytePtr csrBytes = fromInteger(valueOf(TDiv#(DMA_CSR_DATA_WIDTH, BYTE_WIDTH))); + + function DmaCsrAddr getCsrAddrFromCqDescriptor(PcieCompleterRequestDescriptor descriptor); + let addr = getAddrLowBits(zeroExtend(descriptor.address), descriptor.barAperture); + // Only support one BAR now, no operation + if (descriptor.barId == 0) begin + addr = addr; + end + else begin + addr = 0; + end + return truncate(addr << valueOf(TSub#(DMA_MEM_ADDR_WIDTH, DES_ADDR_WIDTH))); + endfunction rule parseTlp; tlpInFifo.deq; @@ -58,13 +71,13 @@ module mkDmaH2CPipe(DmaH2CPipe); case (descriptor.reqType) fromInteger(valueOf(MEM_WRITE_REQ)): begin $display("SIM INFO @ mkDmaH2CPipe: MemWrite Detect!"); + let firstData = getDataFromFirstBeat(stream); + DmaCsrValue wrValue = truncate(firstData); + let wrAddr = getCsrAddrFromCqDescriptor(descriptor); if (descriptor.dwordCnt == fromInteger(valueOf(IDEA_CQ_CSR_DWORD_CNT))) begin - let firstData = getDataFromFirstBeat(stream); - DmaCsrValue wrValue = firstData[valueOf(DMA_CSR_ADDR_WIDTH)-1:0]; - DmaCsrAddr wrAddr = getCsrAddrFromCqDescriptor(descriptor); $display("SIM INFO @ mkDmaH2CPipe: Valid wrReq with Addr %h, data %h", wrAddr, wrValue); let req = DmaRequest { - startAddr : wrAddr, + startAddr : zeroExtend(wrAddr), length : zeroExtend(csrBytes), isWrite : True }; @@ -72,8 +85,6 @@ module mkDmaH2CPipe(DmaH2CPipe); dataOutFifo.enq(wrValue); end else begin - DmaCsrValue wrValue = firstData; - DmaCsrAddr wrAddr = getCsrAddrFromCqDescriptor(descriptor); $display("SIM INFO @ mkDmaH2CPipe: Invalid wrReq with Addr %h, data %h", wrAddr, wrValue); illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; end @@ -81,8 +92,8 @@ module mkDmaH2CPipe(DmaH2CPipe); fromInteger(valueOf(MEM_READ_REQ)): begin $display("SIM INFO @ mkDmaH2CPipe: MemRead Detect!"); let rdAddr = getCsrAddrFromCqDescriptor(descriptor); - let req = CsrReadReq{ - startAddr : rdAddr, + let req = DmaRequest{ + startAddr : zeroExtend(rdAddr), length : zeroExtend(csrBytes), isWrite : False }; diff --git a/src/DmaWrapper.bsv b/src/DmaWrapper.bsv index 7f4ea90..1d94a6e 100755 --- a/src/DmaWrapper.bsv +++ b/src/DmaWrapper.bsv @@ -1,10 +1,19 @@ import FIFOF::*; +import Vector::*; +import SemiFifo::*; +import BusConversion::*; +import AxiStreamTypes::*; import PcieTypes::*; import PcieConfigurator::*; +import PcieAxiStreamTypes::*; +import PcieAdapter::*; import DmaTypes::*; -import DmaCompleter::*; -import DmaRequester::*; +import DmaUtils::*; +import DmaC2HPipe::*; +import DmaH2CPipe::*; + +// For Bsv User interface DmaController; // User Logic Ifc @@ -14,20 +23,20 @@ interface DmaController; interface FifoIn#(DmaCsrValue) h2cDataFifoIn; interface FifoOut#(DmaCsrValue) h2cDataFifoOut; - interface FifoOut#(DmaCsrAddr) h2cReqFifoOut; + interface FifoOut#(DmaRequest) h2cReqFifoOut; // Raw PCIe interfaces, connected to the Xilinx PCIe IP - interface RawXilinxPcieIp rawPcie; + (* prefix = "" *)interface RawXilinxPcieIp rawPcie; endinterface // TODO : connect Configurator to other modules -(* synthesize *) +// (* synthesize *) // module mkDmaController(DmaController); Vector#(DMA_PATH_NUM, DmaC2HPipe) c2hPipes = newVector; for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1) begin c2hPipes[pathIdx] <- mkDmaC2HPipe(pathIdx); end - DmaH2CPipe h2cPipe <- mkDmaH2cPipe; + DmaH2CPipe h2cPipe <- mkDmaH2CPipe; RequesterAxiStreamAdapter reqAdapter <- mkRequesterAxiStreamAdapter; CompleterAxiStreamAdapter cmplAdapter <- mkCompleterAxiStreamAdapter; @@ -39,10 +48,10 @@ module mkDmaController(DmaController); Vector#(DMA_PATH_NUM, FifoIn#(DmaRequest)) c2hReqInIfc = newVector; for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1) begin + c2hDataInIfc[pathIdx] = c2hPipes[pathIdx].wrDataFifoIn; + c2hDataOutIfc[pathIdx] = c2hPipes[pathIdx].rdDataFifoOut; + c2hReqInIfc[pathIdx] = c2hPipes[pathIdx].reqFifoIn; rule conncetC2HToAdapter; - c2hDataInIfc[pathIdx] = c2hPipes[pathIdx].wrDataFifoIn; - c2hDataOutIfc[pathIdx] = c2hPipes[pathIdx].rdDataFifoOut; - c2hReqInIfc[pathIdx] = c2hPipes[pathIdx].reqFifoIn; if (c2hPipes[pathIdx].tlpDataFifoOut.notEmpty) begin reqAdapter.dmaDataFifoIn[pathIdx].enq(c2hPipes[pathIdx].tlpDataFifoOut.first); c2hPipes[pathIdx].tlpDataFifoOut.deq; @@ -53,7 +62,7 @@ module mkDmaController(DmaController); end if (reqAdapter.dmaDataFifoOut[pathIdx].notEmpty) begin c2hPipes[pathIdx].tlpDataFifoIn.enq(reqAdapter.dmaDataFifoOut[pathIdx].first); - dmaDataFifoOut[pathIdx].deq; + reqAdapter.dmaDataFifoOut[pathIdx].deq; end endrule end @@ -73,7 +82,7 @@ module mkDmaController(DmaController); // User Logic Ifc interface c2hDataFifoIn = c2hDataInIfc; interface c2hDataFifoOut = c2hDataOutIfc; - interface c2hReqFifoIn = c2hReqFifoIn; + interface c2hReqFifoIn = c2hReqInIfc; interface h2cDataFifoIn = h2cPipe.rdDataFifoIn; interface h2cDataFifoOut = h2cPipe.wrDataFifoOut; interface h2cReqFifoOut = h2cPipe.reqFifoOut; @@ -84,9 +93,159 @@ module mkDmaController(DmaController); interface requesterComplete = reqAdapter.rawRequesterComplete; interface completerRequest = cmplAdapter.rawCompleterRequest; interface completerComplete = cmplAdapter.rawCompleterComplete; - interface configuration = pcieConfigurator.rawConfiguration; + interface configuration = configurator.rawConfiguration; method Action linkUp(Bool isLinkUp); + // let cfgs = configurator.get; + // c2hpipes[pathIdx].setCfg(cfgs); endmethod endinterface endmodule +// For Verilog User + +(* always_ready, always_enabled *) +interface RawDmaReqSlave; + (* prefix = "" *) + method Action validReq( + (* port = "valid" *) Bool valid, + (* port = "start_addr" *) DmaMemAddr startAddr, + (* port = "byte_cnt" *) DmaMemAddr length, + (* port = "is_write" *) Bool isWrite + ); + (* result = "ready" *) method Bool ready; +endinterface + +(* always_ready, always_enabled *) +interface RawDmaCsrReqMaster; + (* result = "address" *) method DmaCsrAddr address; + (* result = "is_write" *) method Bool isWrite; + (* result = "valid" *) method Bool valid; + (* prefix = "" *) method Action ready((* port = "ready" *) Bool rdy); +endinterface + +typedef TDiv#(DATA_WIDTH, BYTE_WIDTH) DMA_DATA_KEEP_WIDTH; +typedef 1 DMA_DATA_USER_WIDTH; +typedef RawAxiStreamSlave#(DMA_DATA_KEEP_WIDTH, DMA_DATA_USER_WIDTH) RawDmaDataSlave; +typedef RawAxiStreamMaster#(DMA_DATA_KEEP_WIDTH, DMA_DATA_USER_WIDTH) RawDmaDataMaster; +typedef AxiStream#(DMA_DATA_KEEP_WIDTH, DMA_DATA_USER_WIDTH) DmaAxiStream; +typedef RawBusMaster#(DmaCsrValue) RawDmaCsrMaster; +typedef RawBusSlave#(DmaCsrValue) RawDmaCsrSlave; + +module mkFifoInToRawDmaDataSlave#(FifoIn#(DataStream) pipe)(RawDmaDataSlave); + Reg#(Bool) isFirstReg <- mkReg(False); + let rawBus <- mkFifoInToRawBusSlave(pipe); + method Bool tReady = rawBus.ready; + method Action tValid( + Bool valid, + Bit#(DATA_WIDTH) tData, + Bit#(DMA_DATA_KEEP_WIDTH) tKeep, + Bool tLast, + Bit#(DMA_DATA_USER_WIDTH) tUser + ); + if (valid) begin + if (tLast) begin + isFirstReg <= True; + end + else if (isFirstReg) begin + isFirstReg <= False; + end + end + let stream = DataStream { + data : tData, + byteEn : tKeep, + isFirst : isFirstReg && valid, + isLast : tLast + }; + rawBus.validData(valid, stream); + endmethod +endmodule + +module mkFifoOutToRawDmaDataMaster#(FifoOut#(DataStream) pipe)(RawDmaDataMaster); + let rawBus <- mkFifoOutToRawBusMaster(pipe); + method Bool tValid = rawBus.valid; + method Bit#(DATA_WIDTH) tData = rawBus.data.data; + method Bit#(DMA_DATA_KEEP_WIDTH) tKeep = rawBus.data.byteEn; + method Bool tLast = rawBus.data.isLast; + method Bit#(DMA_DATA_USER_WIDTH) tUser = 0; + method Action tReady(Bool rdy); + rawBus.ready(rdy); + endmethod +endmodule + +module mkFifoInToRawDmaReqSlave#(FifoIn#(DmaRequest) pipe)(RawDmaReqSlave); + let rawBus <- mkFifoInToRawBusSlave(pipe); + method Action validReq( + Bool valid, + DmaMemAddr startAddr, + DmaMemAddr length, + Bool isWrite + ); + let request = DmaRequest { + startAddr : startAddr, + length : length, + isWrite : isWrite + }; + rawBus.validData(valid, request); + endmethod + method Bool ready = rawBus.ready; +endmodule + +module mkFifoOutToRawCsrReqMaster#(FifoOut#(DmaRequest) pipe)(RawDmaCsrReqMaster); + let rawBus <- mkFifoOutToRawBusMaster(pipe); + method DmaCsrAddr address = truncate(rawBus.data.startAddr); + method Bool isWrite = rawBus.data.isWrite; + method Bool valid = rawBus.valid; + method Action ready(Bool rdy); + rawBus.ready(rdy); + endmethod +endmodule + +// Raw verilog Wrapper of Dma User Logic Ifc +interface RawDmaController; + // User Logic Ifc + (* prefix = "s_axis_c2h_0" *) interface RawDmaDataSlave dmaWrData0; + (* prefix = "s_desc_c2h_0" *) interface RawDmaReqSlave dmaDesc0; + (* prefix = "m_axis_c2h_0" *) interface RawDmaDataMaster dmaRdData0; + + (* prefix = "s_axis_c2h_1" *) interface RawDmaDataSlave dmaWrData1; + (* prefix = "s_desc_c2h_1" *) interface RawDmaReqSlave dmaDesc1; + (* prefix = "m_axis_c2h_1" *) interface RawDmaDataMaster dmaRdData1; + + (* prefix = "s_h2c_value" *) interface RawDmaCsrSlave dmaRdCsr; + (* prefix = "m_h2c_value" *) interface RawDmaCsrReqMaster dmaCsrDesc; + (* prefix = "m_h2c_desc" *) interface RawDmaCsrMaster dmaWrCsr; + + // Raw PCIe interfaces, connected to the Xilinx PCIe IP + (* prefix = "" *) interface RawXilinxPcieIp rawPcie; +endinterface + +// (* synthesize *) // +module mkRawDmaController(RawDmaController); + DmaController dmac <- mkDmaController; + + let dmaWrData0Ifc <- mkFifoInToRawDmaDataSlave(dmac.c2hDataFifoIn[0]); + let dmaDesc0Ifc <- mkFifoInToRawDmaReqSlave(dmac.c2hReqFifoIn[0]); + let dmaRdData0Ifc <- mkFifoOutToRawDmaDataMaster(dmac.c2hDataFifoOut[0]); + + let dmaWrData1Ifc <- mkFifoInToRawDmaDataSlave(dmac.c2hDataFifoIn[1]); + let dmaDesc1Ifc <- mkFifoInToRawDmaReqSlave(dmac.c2hReqFifoIn[1]); + let dmaRdData1Ifc <- mkFifoOutToRawDmaDataMaster(dmac.c2hDataFifoOut[1]); + + let dmaRdCsrIfc <- mkFifoInToRawBusSlave(dmac.h2cDataFifoIn); + let dmaWrCsrIfc <- mkFifoOutToRawBusMaster(dmac.h2cDataFifoOut); + let dmaCsrDescIfc <- mkFifoOutToRawCsrReqMaster(dmac.h2cReqFifoOut); + + interface dmaWrData0 = dmaWrData0Ifc; + interface dmaDesc0 = dmaDesc0Ifc; + interface dmaRdData0 = dmaRdData0Ifc; + interface dmaWrData1 = dmaWrData1Ifc; + interface dmaDesc1 = dmaDesc1Ifc; + interface dmaRdData1 = dmaRdData1Ifc; + interface dmaRdCsr = dmaRdCsrIfc; + interface dmaCsrDesc = dmaCsrDescIfc; + interface dmaWrCsr = dmaWrCsrIfc; + + interface rawPcie = dmac.rawPcie; +endmodule + + diff --git a/src/PcieAdapter.bsv b/src/PcieAdapter.bsv index a0b08c5..dc07f33 100644 --- a/src/PcieAdapter.bsv +++ b/src/PcieAdapter.bsv @@ -34,6 +34,7 @@ interface RequesterAxiStreamAdapter; (* prefix = "" *) interface RawPcieRequesterComplete rawRequesterComplete; endinterface +// (* synthesize *) // module mkRequesterAxiStreamAdapter(RequesterAxiStreamAdapter); ConvertDataStreamsToStraddleAxis dmaToAxisConverter <- mkConvertDataStreamsToStraddleAxis; ConvertStraddleAxisToDataStream axisToDmaConverter <- mkConvertStraddleAxisToDataStream; @@ -89,6 +90,7 @@ interface CompleterAxiStreamAdapter; endinterface // Completer Only Receives and Transmits One Beat TLP, in which isFirst = isLast = True +// (* synthesize *) // module mkCompleterAxiStreamAdapter(CompleterAxiStreamAdapter); FIFOF#(DataStream) inFifo <- mkFIFOF; FIFOF#(DataStream) outFifo <- mkFIFOF; diff --git a/src/PrimUtils.bsv b/src/PrimUtils.bsv index 292876f..bb0c8e2 100644 --- a/src/PrimUtils.bsv +++ b/src/PrimUtils.bsv @@ -413,7 +413,6 @@ function DmaMemAddr getAddrLowBits(DmaMemAddr addr, Bit#(TLog#(DMA_MEM_ADDR_WIDT 61: temp = zeroExtend(DmaMemAddr'(addr[61-1:0])); 62: temp = zeroExtend(DmaMemAddr'(addr[62-1:0])); 63: temp = zeroExtend(DmaMemAddr'(addr[63-1:0])); - 64: temp = zeroExtend(DmaMemAddr'(addr[64-1:0])); default: temp = 0; endcase return temp; diff --git a/test/TestCompletionFifo.bsv b/test/TestCompletionFifo.bsv index fbb01d4..ab36404 100644 --- a/test/TestCompletionFifo.bsv +++ b/test/TestCompletionFifo.bsv @@ -152,7 +152,7 @@ interface CFifoInstTb; interface FifoOut#(DataStream) drain; endinterface -(* synthesize *) +// (* synthesize *) // module mkCompletionFifoInst(CFifoInstTb); CompletionFifo#(TEST_SLOT_NUM, DataStream) cFifo <- mkCompletionFifo(valueOf(MAX_STREAM_NUM_PER_COMPLETION)); interface reserve = cFifo.reserve; From 9426c06a4141aae7c514ec72455eb38a4f7b1e64 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Tue, 27 Aug 2024 19:52:47 +0800 Subject: [PATCH 37/53] Add cocotb testbench --- cocotb/Makefile | 33 ++++ cocotb/dma_wr_rd_tb.py | 289 ++++++++++++++++++++++++++++++------ run_one.sh | 2 +- src/DmaC2HPipe.bsv | 36 +++-- src/DmaH2CPipe.bsv | 2 +- src/DmaWrapper.bsv | 37 ++--- src/PcieAdapter.bsv | 4 +- src/PcieConfigurator.bsv | 2 +- src/PcieTypes.bsv | 4 +- src/StreamUtils.bsv | 2 +- test/TestCompletionFifo.bsv | 2 +- test/TestDmaCore.bsv | 93 +++++++++++- test/TestStreamUtils.bsv | 2 - 13 files changed, 409 insertions(+), 99 deletions(-) create mode 100644 cocotb/Makefile diff --git a/cocotb/Makefile b/cocotb/Makefile new file mode 100644 index 0000000..f44df22 --- /dev/null +++ b/cocotb/Makefile @@ -0,0 +1,33 @@ +ROOT_DIR = $(abspath ../) +BACKEND_DIR = $(ROOT_DIR)/backend +TB_DIR = $(abspath ./) +include $(ROOT_DIR)/Makefile.base +VBUILD_DIR = $(BACKEND_DIR)/build +VSRC_DIR = $(BACKEND_DIR)/verilog + +TARGET = RawDmaController +TOP_MODULE = mk$(TARGET) +TOP_FILE = $(TOP_MODULE).v +VLOG_FILE = $(TB_DIR)/$(TOP_FILE) + +TB_CASE = dma_wr_rd +TB_FILE = $(TB_CASE)_tb.py +DATE = $(shell date "+%Y%m%d") +LOG_FILE = $(TB_DIR)/log/$(DATE)_$(TOP_MODULE).log + +verilog: + cd $(BACKEND_DIR) && make verilog + bluetcl $(BACKEND_DIR)/listVlogFiles.tcl -bdir $(VBUILD_DIR) -vdir $(VSRC_DIR) $(TOP_MODULE) $(TOP_MODULE) | grep -i '\.v' | xargs -I {} cat {} >> $(VLOG_FILE) + sed -i '1i `timescale 1ns/1ps' $(VLOG_FILE) + +cocotb:clean verilog run + +run: + cd $(TB_DIR) + mkdir -p log + python3 $(TB_FILE) 2>&1 | tee $(LOG_FILE) + +clean: + cd $(BACKEND_DIR) && make clean + cd $(TB_DIR) && rm -rf $(VLOG_FILE) __pycache__ .pytest_cache sim_build *.log + \ No newline at end of file diff --git a/cocotb/dma_wr_rd_tb.py b/cocotb/dma_wr_rd_tb.py index b70af5b..29be393 100644 --- a/cocotb/dma_wr_rd_tb.py +++ b/cocotb/dma_wr_rd_tb.py @@ -9,37 +9,58 @@ import cocotb from cocotb.triggers import RisingEdge, FallingEdge, Timer from cocotb.regression import TestFactory +from cocotb.clock import Clock -from cocotbext.axi import AxiStreamBus from cocotbext.pcie.core import RootComplex from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice -from cocotbext.axi import AxiReadBus, AxiRamRead +from cocotbext.axi.stream import define_stream +from cocotbext.axi import (AxiStreamBus, AxiStreamSource, AxiStreamSink, AxiStreamMonitor, AxiStreamFrame) + +# class TB architecture +# -------------- ------------- ----------- +# | Root Complex | <-> | End Pointer | <-> | Dut(DMAC) | +# -------------- ------------- ----------- + +DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc", + signals=["start_addr", "byte_cnt", "is_write", "valid", "ready"] +) class TB(object): - def __init__(self, dut): + def __init__(self, dut, msix=False): self.dut = dut self.log = logging.getLogger("cocotb.tb") self.log.setLevel(logging.DEBUG) - + + self.clock = dut.CLK + self.resetn = dut.RST_N + # PCIe self.rc = RootComplex() + cq_straddle = False + cc_straddle = False + rq_straddle = True + rc_straddle = True + rc_4tlp_straddle = False + + self.client_tag = bool(int(os.getenv("CLIENT_TAG", "1"))) + self.dev = UltraScalePlusPcieDevice( # configuration options pcie_generation=3, # pcie_link_width=2, # user_clk_frequency=250e6, alignment="dword", - cq_straddle=False, - cc_straddle=False, - rq_straddle=True, - rc_straddle=True, - rc_4tlp_straddle=False, + cq_straddle=cq_straddle, + cc_straddle=cc_straddle, + rq_straddle=rq_straddle, + rc_straddle=rc_straddle, + rc_4tlp_straddle=rc_4tlp_straddle, pf_count=1, max_payload_size=1024, - enable_client_tag=True, - enable_extended_tag=True, + enable_client_tag=self.client_tag, + enable_extended_tag=False, enable_parity=False, enable_rx_msg_interface=False, enable_sriov=False, @@ -53,12 +74,12 @@ def __init__(self, dut): pf2_msi_count=1, pf3_msi_enable=False, pf3_msi_count=1, - pf0_msix_enable=False, - pf0_msix_table_size=0, - pf0_msix_table_bir=0, + pf0_msix_enable=msix, + pf0_msix_table_size=63, + pf0_msix_table_bir=4, pf0_msix_table_offset=0x00000000, - pf0_msix_pba_bir=0, - pf0_msix_pba_offset=0x00000000, + pf0_msix_pba_bir=4, + pf0_msix_pba_offset=0x00008000, pf1_msix_enable=False, pf1_msix_table_size=0, pf1_msix_table_bir=0, @@ -79,43 +100,219 @@ def __init__(self, dut): pf3_msix_pba_offset=0x00000000, # signals - user_clk=dut.CLK, - user_reset=dut.RST_N, + user_clk=self.clock, + # user_reset=~self.resetn, + user_lnk_up=dut.user_lnk_up, + # sys_clk=dut.sys_clk, + # sys_clk_gt=dut.sys_clk_gt, + # sys_reset=dut.sys_reset, + # phy_rdy_out=dut.phy_rdy_out, rq_bus=AxiStreamBus.from_prefix(dut, "m_axis_rq"), pcie_rq_seq_num0=dut.pcie_rq_seq_num0, pcie_rq_seq_num_vld0=dut.pcie_rq_seq_num_vld0, pcie_rq_seq_num1=dut.pcie_rq_seq_num1, pcie_rq_seq_num_vld1=dut.pcie_rq_seq_num_vld1, - + pcie_rq_tag0=dut.pcie_rq_tag0, + pcie_rq_tag1=dut.pcie_rq_tag1, + # pcie_rq_tag_av=dut.pcie_rq_tag_av, + pcie_rq_tag_vld0=dut.pcie_rq_tag_vld0, + pcie_rq_tag_vld1=dut.pcie_rq_tag_vld1, + rc_bus=AxiStreamBus.from_prefix(dut, "s_axis_rc"), + cq_bus=AxiStreamBus.from_prefix(dut, "s_axis_cq"), + pcie_cq_np_req=dut.pcie_cq_np_req, + pcie_cq_np_req_count=dut.pcie_cq_np_req_count, + + cc_bus=AxiStreamBus.from_prefix(dut, "m_axis_cc"), + + pcie_tfc_nph_av=dut.pcie_tfc_nph_av, + pcie_tfc_npd_av=dut.pcie_tfc_npd_av, + cfg_phy_link_down=dut.cfg_phy_link_down, + cfg_phy_link_status=dut.cfg_phy_link_status, + cfg_negotiated_width=dut.cfg_negotiated_width, + cfg_current_speed=dut.cfg_current_speed, cfg_max_payload=dut.cfg_max_payload, cfg_max_read_req=dut.cfg_max_read_req, - - cfg_fc_sel=0b100, + cfg_function_status=dut.cfg_function_status, + cfg_function_power_state=dut.cfg_function_power_state, + cfg_vf_status=dut.cfg_vf_status, + cfg_vf_power_state=dut.cfg_vf_power_state, + cfg_link_power_state=dut.cfg_link_power_state, + cfg_mgmt_addr=dut.cfg_mgmt_addr, + cfg_mgmt_function_number=dut.cfg_mgmt_function_number, + cfg_mgmt_write=dut.cfg_mgmt_write, + cfg_mgmt_write_data=dut.cfg_mgmt_write_data, + cfg_mgmt_byte_enable=dut.cfg_mgmt_byte_enable, + cfg_mgmt_read=dut.cfg_mgmt_read, + cfg_mgmt_read_data=dut.cfg_mgmt_read_data, + cfg_mgmt_read_write_done=dut.cfg_mgmt_read_write_done, + cfg_mgmt_debug_access=dut.cfg_mgmt_debug_access, + cfg_err_cor_out=dut.cfg_err_cor_out, + cfg_err_nonfatal_out=dut.cfg_err_nonfatal_out, + cfg_err_fatal_out=dut.cfg_err_fatal_out, + cfg_local_error_valid=dut.cfg_local_error_valid, + cfg_local_error_out=dut.cfg_local_error_out, + cfg_ltssm_state=dut.cfg_ltssm_state, + cfg_rx_pm_state=dut.cfg_rx_pm_state, + cfg_tx_pm_state=dut.cfg_tx_pm_state, + cfg_rcb_status=dut.cfg_rcb_status, + cfg_obff_enable=dut.cfg_obff_enable, + # cfg_pl_status_change=dut.cfg_pl_status_change, + # cfg_tph_requester_enable=dut.cfg_tph_requester_enable, + # cfg_tph_st_mode=dut.cfg_tph_st_mode, + # cfg_vf_tph_requester_enable=dut.cfg_vf_tph_requester_enable, + # cfg_vf_tph_st_mode=dut.cfg_vf_tph_st_mode, + cfg_msg_received=dut.cfg_msg_received, + cfg_msg_received_data=dut.cfg_msg_received_data, + cfg_msg_received_type=dut.cfg_msg_received_type, + cfg_msg_transmit=dut.cfg_msg_transmit, + cfg_msg_transmit_type=dut.cfg_msg_transmit_type, + cfg_msg_transmit_data=dut.cfg_msg_transmit_data, + cfg_msg_transmit_done=dut.cfg_msg_transmit_done, cfg_fc_ph=dut.cfg_fc_ph, cfg_fc_pd=dut.cfg_fc_pd, + cfg_fc_nph=dut.cfg_fc_nph, + cfg_fc_npd=dut.cfg_fc_npd, + cfg_fc_cplh=dut.cfg_fc_cplh, + cfg_fc_cpld=dut.cfg_fc_cpld, + cfg_fc_sel=dut.cfg_fc_sel, + cfg_dsn=dut.cfg_dsn, + cfg_bus_number=dut.cfg_bus_number, + cfg_power_state_change_ack=dut.cfg_power_state_change_ack, + cfg_power_state_change_interrupt=dut.cfg_power_state_change_interrupt, + cfg_err_cor_in=dut.cfg_err_cor_in, + cfg_err_uncor_in=dut.cfg_err_uncor_in, + cfg_flr_in_process=dut.cfg_flr_in_process, + cfg_flr_done=dut.cfg_flr_done, + cfg_vf_flr_in_process=dut.cfg_vf_flr_in_process, + cfg_vf_flr_func_num=dut.cfg_vf_flr_func_num, + cfg_vf_flr_done=dut.cfg_vf_flr_done, + cfg_link_training_enable=dut.cfg_link_training_enable, + cfg_interrupt_int=dut.cfg_interrupt_int, + cfg_interrupt_pending=dut.cfg_interrupt_pending, + cfg_interrupt_sent=dut.cfg_interrupt_sent, + cfg_interrupt_msi_enable=dut.cfg_interrupt_msi_enable, + cfg_interrupt_msi_mmenable=dut.cfg_interrupt_msi_mmenable, + cfg_interrupt_msi_mask_update=dut.cfg_interrupt_msi_mask_update, + cfg_interrupt_msi_data=dut.cfg_interrupt_msi_data, + cfg_interrupt_msi_select=dut.cfg_interrupt_msi_select, + cfg_interrupt_msi_int=dut.cfg_interrupt_msi_int, + cfg_interrupt_msi_pending_status=dut.cfg_interrupt_msi_pending_status, + cfg_interrupt_msi_pending_status_data_enable=dut.cfg_interrupt_msi_pending_status_data_enable, + cfg_interrupt_msi_pending_status_function_num=dut.cfg_interrupt_msi_pending_status_function_num, + cfg_interrupt_msi_sent=dut.cfg_interrupt_msi_sent, + cfg_interrupt_msi_fail=dut.cfg_interrupt_msi_fail, + cfg_interrupt_msi_attr=dut.cfg_interrupt_msi_attr, + cfg_interrupt_msi_tph_present=dut.cfg_interrupt_msi_tph_present, + cfg_interrupt_msi_tph_type=dut.cfg_interrupt_msi_tph_type, + cfg_interrupt_msi_tph_st_tag=dut.cfg_interrupt_msi_tph_st_tag, + cfg_interrupt_msi_function_number=dut.cfg_interrupt_msi_function_number, + cfg_pm_aspm_l1_entry_reject=dut.cfg_pm_aspm_l1_entry_reject, + cfg_pm_aspm_tx_l0s_entry_disable=dut.cfg_pm_aspm_tx_l0s_entry_disable, + cfg_hot_reset_out=dut.cfg_hot_reset_out, + cfg_config_space_enable=dut.cfg_config_space_enable, + cfg_req_pm_transition_l23_ready=dut.cfg_req_pm_transition_l23_ready, + cfg_hot_reset_in=dut.cfg_hot_reset_in, + cfg_ds_port_number=dut.cfg_ds_port_number, + cfg_ds_bus_number=dut.cfg_ds_bus_number, + cfg_ds_device_number=dut.cfg_ds_device_number, ) - self.dev.log.setLevel(logging.DEBUG) + self.dev.log.setLevel(logging.INFO) + + dut.pcie_cq_np_req.setimmediatevalue(1) + dut.cfg_mgmt_addr.setimmediatevalue(0) + dut.cfg_mgmt_function_number.setimmediatevalue(0) + dut.cfg_mgmt_write.setimmediatevalue(0) + dut.cfg_mgmt_write_data.setimmediatevalue(0) + dut.cfg_mgmt_byte_enable.setimmediatevalue(0) + dut.cfg_mgmt_read.setimmediatevalue(0) + dut.cfg_mgmt_debug_access.setimmediatevalue(0) + dut.cfg_msg_transmit.setimmediatevalue(0) + dut.cfg_msg_transmit_type.setimmediatevalue(0) + dut.cfg_msg_transmit_data.setimmediatevalue(0) + dut.cfg_fc_sel.setimmediatevalue(0) + dut.cfg_dsn.setimmediatevalue(0) + dut.cfg_power_state_change_ack.setimmediatevalue(0) + dut.cfg_err_cor_in.setimmediatevalue(0) + dut.cfg_err_uncor_in.setimmediatevalue(0) + dut.cfg_flr_done.setimmediatevalue(0) + dut.cfg_vf_flr_func_num.setimmediatevalue(0) + dut.cfg_vf_flr_done.setimmediatevalue(0) + dut.cfg_link_training_enable.setimmediatevalue(1) + dut.cfg_interrupt_int.setimmediatevalue(0) + dut.cfg_interrupt_pending.setimmediatevalue(0) + dut.cfg_interrupt_msi_select.setimmediatevalue(0) + dut.cfg_interrupt_msi_int.setimmediatevalue(0) + dut.cfg_interrupt_msi_pending_status.setimmediatevalue(0) + dut.cfg_interrupt_msi_pending_status_data_enable.setimmediatevalue(0) + dut.cfg_interrupt_msi_pending_status_function_num.setimmediatevalue(0) + dut.cfg_interrupt_msi_attr.setimmediatevalue(0) + dut.cfg_interrupt_msi_tph_present.setimmediatevalue(0) + dut.cfg_interrupt_msi_tph_type.setimmediatevalue(0) + dut.cfg_interrupt_msi_tph_st_tag.setimmediatevalue(0) + dut.cfg_interrupt_msi_function_number.setimmediatevalue(0) + dut.cfg_pm_aspm_l1_entry_reject.setimmediatevalue(0) + dut.cfg_pm_aspm_tx_l0s_entry_disable.setimmediatevalue(0) + dut.cfg_config_space_enable.setimmediatevalue(1) + dut.cfg_req_pm_transition_l23_ready.setimmediatevalue(0) + dut.cfg_hot_reset_in.setimmediatevalue(0) + dut.cfg_ds_port_number.setimmediatevalue(0) + dut.cfg_ds_bus_number.setimmediatevalue(0) + dut.cfg_ds_device_number.setimmediatevalue(0) self.rc.make_port().connect(self.dev) - - dut.requester_id.setimmediatevalue(0) - dut.requester_id_enable.setimmediatevalue(0) - - dut.enable.setimmediatevalue(0) + + # DMA + self.c2h_write_source_0 = AxiStreamSource(AxiStreamBus.from_prefix(dut, "s_axis_c2h_0"), self.clock, self.resetn, False) + self.c2h_desc_source_0 = DescSource(DescBus.from_prefix(dut, "s_desc_c2h_0"),self.clock, self.resetn, False) + self.c2h_read_sink_0 = AxiStreamSink(AxiStreamBus.from_prefix(dut, "m_axis_c2h_0"), self.clock, self.resetn, False) + self.c2h_write_source_1 = AxiStreamSource(AxiStreamBus.from_prefix(dut, "s_axis_c2h_1"), self.clock, self.resetn, False) + self.c2h_desc_source_1 = DescSource(DescBus.from_prefix(dut, "s_desc_c2h_1"), self.clock, self.resetn, False) + self.c2h_read_sink_1 = AxiStreamSink(AxiStreamBus.from_prefix(dut, "m_axis_c2h_1"), self.clock, self.resetn, False) + + #monitor + self.rq_monitor = AxiStreamMonitor(AxiStreamBus.from_prefix(dut, "m_axis_rq"), self.clock, self.resetn, False) + + #Do not use user_rst but gen rstn for bsv + async def gen_reset(self): + self.resetn.value = 0 + await RisingEdge(self.clock) + await RisingEdge(self.clock) + await RisingEdge(self.clock) + self.resetn.value = 1 + await RisingEdge(self.clock) + await RisingEdge(self.clock) + await RisingEdge(self.clock) + self.log.info("Generated DMA RST_N") + + async def send_desc(self, channel, startAddr, length, isWrite): + desc = DescTransaction() + desc.start_addr = startAddr + desc.byte_cnt = length + desc.is_write = isWrite + self.log.debug("Send a DMA Request, isWrite:%s, startAddr:%s", isWrite, startAddr) + if channel == 0: + await self.c2h_desc_source_0.send(desc) + else: + await self.c2h_desc_source_1.send(desc) + + async def run_single_write_once(self, channel, addr, length): + await self.c2h_write_source_0.send(b'ABCD') + self.log.debug("Send test write request!") + await self.send_desc(channel, addr, length, True) + + async def run_single_read_once(self, channel, addr, length): + await self.send_desc(channel, addr, length, False) # no input now -@cocotb.test(timeout_time=1000000000, timeout_unit="ns") +@cocotb.test(timeout_time=6000, timeout_unit="ns") async def run_test_write(dut): tb = TB(dut) - - await FallingEdge(dut.RST_N) - await RisingEdge(dut.clk) - await RisingEdge(dut.clk) + await tb.gen_reset() await tb.rc.enumerate() @@ -125,10 +322,16 @@ async def run_test_write(dut): mem = tb.rc.mem_pool.alloc_region(16*1024*1024) mem_base = mem.get_absolute_address(0) + + await tb.run_single_write_once(0, mem_base, 4) - await RisingEdge(dut.clk) - await RisingEdge(dut.clk) + await Timer(500, units='ns') + await tb.run_single_read_once(0, mem_base, 4) + frame = await tb.c2h_read_sink_0.recv() + print(frame) + await RisingEdge(tb.clock) + if cocotb.SIM_NAME: @@ -139,25 +342,19 @@ async def run_test_write(dut): # cocotb-test tests_dir = os.path.dirname(__file__) -rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', 'backend', 'verilog')) +rtl_dir = tests_dir -def test_dma_wr(request,): +def test_dma_wr(): dut = "mkRawDmaController" module = os.path.splitext(os.path.basename(__file__))[0] toplevel = dut verilog_sources = [ - os.path.join(rtl_dir, f"{dut}.v"), - os.path.join(rtl_dir, f"FIFO2.v"), - os.path.join(rtl_dir, f"SizedFIFO.v"), - os.path.join(rtl_dir, f"BRAM2.v"), - os.path.join(rtl_dir, f"Counter.v"), - # os.path.join(rtl_dir, f"mkRequesterAxiStreamAdapter.v") + os.path.join(rtl_dir, f"{dut}.v") ] - sim_build = os.path.join(tests_dir, "sim_build", - request.node.name.replace('[', '-').replace(']', '')) + sim_build = os.path.join(tests_dir, "sim_build", dut) cocotb_test.simulator.run( python_search=[tests_dir], @@ -167,4 +364,6 @@ def test_dma_wr(request,): timescale="1ns/1ps", sim_build=sim_build ) - \ No newline at end of file + +if __name__ == "__main__": + test_dma_wr() \ No newline at end of file diff --git a/run_one.sh b/run_one.sh index f049c0d..ad1eac1 100755 --- a/run_one.sh +++ b/run_one.sh @@ -13,7 +13,7 @@ TEST_LOG=run.log TEST_DIR=test cd $TEST_DIR truncate -s 0 $TEST_LOG -FILES=`ls TestDmaCore.bsv` +FILES=`ls TestStreamUtils.bsv` ########################################################################### for FILE in $FILES; do # echo $FILE diff --git a/src/DmaC2HPipe.bsv b/src/DmaC2HPipe.bsv index 4fe57eb..0d744f8 100644 --- a/src/DmaC2HPipe.bsv +++ b/src/DmaC2HPipe.bsv @@ -1,5 +1,6 @@ import FIFOF::*; import GetPut::*; +import Connectable::*; import SemiFifo::*; import PrimUtils::*; @@ -28,7 +29,7 @@ interface DmaC2HPipe; endinterface // Single Path module -// (* synthesize *) // +(* synthesize *) module mkDmaC2HPipe#(DmaPathNo pathIdx)(DmaC2HPipe); C2HReadCore readCore <- mkC2HReadCore(pathIdx); C2HWriteCore writeCore <- mkC2HWriteCore; @@ -38,6 +39,8 @@ module mkDmaC2HPipe#(DmaPathNo pathIdx)(DmaC2HPipe); FIFOF#(DataStream) tlpOutFifo <- mkFIFOF; FIFOF#(SideBandByteEn) tlpSideBandFifo <- mkFIFOF; + mkConnection(dataInFifo, writeCore.dataFifoIn); + rule reqDeMux; let req = reqInFifo.first; reqInFifo.deq; @@ -47,16 +50,10 @@ module mkDmaC2HPipe#(DmaPathNo pathIdx)(DmaC2HPipe); else begin readCore.rdReqFifoIn.enq(req); end - $display("SIM INFO @ mkDmaC2HPipe%d: New Request isWrite:%b startAddr:%h length:%h", + $display($time, "ns SIM INFO @ mkDmaC2HPipe%d: New Request isWrite:%b startAddr:%h length:%d", pathIdx, pack(req.isWrite), req.startAddr, req.length); endrule - rule dataPipe; - let stream = dataInFifo.first; - dataInFifo.deq; - writeCore.dataFifoIn.enq(stream); - endrule - rule tlpOutMux; if (readCore.tlpFifoOut.notEmpty) begin tlpOutFifo.enq(readCore.tlpFifoOut.first); @@ -105,7 +102,7 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); FIFOF#(SlotToken) tagFifo <- mkSizedFIFOF(valueOf(TAdd#(1, STREAM_HEADER_REMOVE_LATENCY))); FIFOF#(Bool) completedFifo <- mkSizedFIFOF(valueOf(TAdd#(1, STREAM_HEADER_REMOVE_LATENCY))); - StreamPipe descRemove <- mkStreamHeaderRemove(fromInteger(valueOf(DES_RC_DESCRIPTOR_WIDTH))); + StreamPipe descRemove <- mkStreamHeaderRemove(fromInteger(valueOf(TDiv#(DES_RC_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); StreamPipe streamReshape <- mkStreamReshape; ChunkCompute chunkSplitor <- mkChunkComputer(DMA_RX); CompletionFifo#(SLOT_PER_PATH, DataStream) cBuffer <- mkCompletionFifo(valueOf(MAX_STREAM_NUM_PER_COMPLETION)); @@ -115,7 +112,10 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); // Pipeline stage 1: convert StraddleStream to DataStream, may cost 2 cycle for one StraddleStream rule convertStraddleToDataStream; let sdStream = tlpInFifo.first; + $display($time, "ns SIM INFO @ mkDmaC2HReadCore: recv new stream from straddle adapter, isDouble:%b", pack(sdStream.isDoubleFrame)); let stream = getEmptyStream; + SlotToken tag = 0; + Bool isCompleted = False; if (sdStream.isDoubleFrame) begin PcieTlpCtlIsSopPtr isSopPtr = 0; if (hasReadOnce) begin @@ -132,8 +132,8 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); isFirst : sdStream.isFirst[isSopPtr], isLast : sdStream.isLast[isSopPtr] }; - let tag = sdStream.tag[isSopPtr]; - tagFifo.enq(tag); + tag = sdStream.tag[isSopPtr]; + isCompleted = sdStream.isCompleted[isSopPtr]; end else begin tlpInFifo.deq; @@ -144,10 +144,13 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); isFirst : sdStream.isFirst[0], isLast : sdStream.isLast[0] }; - let tag = sdStream.tag[0]; - tagFifo.enq(tag); + tag = sdStream.tag[0]; + isCompleted = sdStream.isCompleted[0]; end descRemove.streamFifoIn.enq(stream); + tagFifo.enq(tag); + completedFifo.enq(isCompleted); + $display("parse from straddle", fshow(stream)); endrule // Pipeline stage 2: remove the descriptor in the head of each TLP @@ -157,6 +160,8 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); let stream = descRemove.streamFifoOut.first; let isCompleted = completedFifo.first; let tag = tagFifo.first; + $display($time, "ns SIM INFO @ mkDmaC2HReadCore: recv new tlp tag%d, isCompleted:%b", tag, pack(isCompleted)); + $display("desc remove output", fshow(stream)); descRemove.streamFifoOut.deq; completedFifo.deq; tagFifo.deq; @@ -173,6 +178,7 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); let stream = cBuffer.drain.first; cBuffer.drain.deq; streamReshape.streamFifoIn.enq(stream); + $display("cbuf output", fshow(stream)); endrule // Pipeline stage 1: split to req to MRRS chunks @@ -214,6 +220,7 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); let firstByteEn = convertDWordOffset2FirstByteEn(startAddrOffset); let lastByteEn = convertDWordOffset2LastByteEn(endAddrOffset); tlpByteEnFifo.enq(tuple2(firstByteEn, lastByteEn)); + $display($time, "ns SIM INFO @ mkDmaC2HReadCore: output new tlp, BE:%h/%h", firstByteEn, lastByteEn); endrule // User Logic Ifc @@ -249,11 +256,13 @@ module mkC2HWriteCore(C2HWriteCore); // Pipeline stage 1: split the whole write request to chunks, latency = 3 rule splitToChunks; let wrStream = dataInFifo.first; + $display($time, "ns SIM INFO @ mkDmaC2HWriteCore: get new writing stream"); if (wrStream.isFirst && wrReqInFifo.notEmpty) begin wrReqInFifo.deq; chunkSplit.reqFifoIn.enq(wrReqInFifo.first); dataInFifo.deq; chunkSplit.dataFifoIn.enq(wrStream); + $display($time, "ns SIM INFO @ mkDmaC2HWriteCore: new write start, startAddr:%h, length:%d ", wrReqInFifo.first.startAddr, wrReqInFifo.first.length); end else if (!wrStream.isFirst) begin dataInFifo.deq; @@ -288,6 +297,7 @@ module mkC2HWriteCore(C2HWriteCore); let sideBandByteEn = streamAlign.byteEnFifoOut.first; streamAlign.byteEnFifoOut.deq; byteEnOutFifo.enq(sideBandByteEn); + $display($time, "ns SIM INFO @ mkDmaC2HWriteCore: output new tlp, BE:%h/%h", tpl_1(sideBandByteEn), tpl_2(sideBandByteEn)); end if (streamAlign.dataFifoOut.notEmpty) begin let stream = streamAlign.dataFifoOut.first; diff --git a/src/DmaH2CPipe.bsv b/src/DmaH2CPipe.bsv index fd214dc..d7dd6f6 100644 --- a/src/DmaH2CPipe.bsv +++ b/src/DmaH2CPipe.bsv @@ -25,7 +25,7 @@ interface DmaH2CPipe; // TODO: Cfg Ifc endinterface -// (* synthesize *) // +(* synthesize *) module mkDmaH2CPipe(DmaH2CPipe); FIFOF#(DataStream) tlpInFifo <- mkFIFOF; diff --git a/src/DmaWrapper.bsv b/src/DmaWrapper.bsv index 1d94a6e..0bf04fc 100755 --- a/src/DmaWrapper.bsv +++ b/src/DmaWrapper.bsv @@ -1,5 +1,6 @@ import FIFOF::*; import Vector::*; +import Connectable :: *; import SemiFifo::*; import BusConversion::*; @@ -30,7 +31,7 @@ interface DmaController; endinterface // TODO : connect Configurator to other modules -// (* synthesize *) // +(* synthesize *) module mkDmaController(DmaController); Vector#(DMA_PATH_NUM, DmaC2HPipe) c2hPipes = newVector; for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1) begin @@ -51,33 +52,13 @@ module mkDmaController(DmaController); c2hDataInIfc[pathIdx] = c2hPipes[pathIdx].wrDataFifoIn; c2hDataOutIfc[pathIdx] = c2hPipes[pathIdx].rdDataFifoOut; c2hReqInIfc[pathIdx] = c2hPipes[pathIdx].reqFifoIn; - rule conncetC2HToAdapter; - if (c2hPipes[pathIdx].tlpDataFifoOut.notEmpty) begin - reqAdapter.dmaDataFifoIn[pathIdx].enq(c2hPipes[pathIdx].tlpDataFifoOut.first); - c2hPipes[pathIdx].tlpDataFifoOut.deq; - end - if (c2hPipes[pathIdx].tlpSideBandFifoOut.notEmpty) begin - reqAdapter.dmaSideBandFifoIn[pathIdx].enq(c2hPipes[pathIdx].tlpSideBandFifoOut.first); - c2hPipes[pathIdx].tlpSideBandFifoOut.deq; - end - if (reqAdapter.dmaDataFifoOut[pathIdx].notEmpty) begin - c2hPipes[pathIdx].tlpDataFifoIn.enq(reqAdapter.dmaDataFifoOut[pathIdx].first); - reqAdapter.dmaDataFifoOut[pathIdx].deq; - end - endrule + mkConnection(c2hPipes[pathIdx].tlpDataFifoOut, reqAdapter.dmaDataFifoIn[pathIdx]); + mkConnection(c2hPipes[pathIdx].tlpSideBandFifoOut, reqAdapter.dmaSideBandFifoIn[pathIdx]); + mkConnection(reqAdapter.dmaDataFifoOut[pathIdx], c2hPipes[pathIdx].tlpDataFifoIn); end - rule connectH2CToAdapter; - if (cmplAdapter.dmaDataFifoOut.notEmpty) begin - h2cPipe.tlpDataFifoIn.enq(cmplAdapter.dmaDataFifoOut.first); - cmplAdapter.dmaDataFifoOut.deq; - end - if (h2cPipe.tlpDataFifoOut.notEmpty) begin - cmplAdapter.dmaDataFifoIn.enq(h2cPipe.tlpDataFifoOut.first); - h2cPipe.tlpDataFifoOut.deq; - end - - endrule + mkConnection(cmplAdapter.dmaDataFifoOut, h2cPipe.tlpDataFifoIn); + mkConnection(h2cPipe.tlpDataFifoOut, cmplAdapter.dmaDataFifoIn); // User Logic Ifc interface c2hDataFifoIn = c2hDataInIfc; @@ -132,7 +113,7 @@ typedef RawBusMaster#(DmaCsrValue) RawDmaCsrMaster; typedef RawBusSlave#(DmaCsrValue) RawDmaCsrSlave; module mkFifoInToRawDmaDataSlave#(FifoIn#(DataStream) pipe)(RawDmaDataSlave); - Reg#(Bool) isFirstReg <- mkReg(False); + Reg#(Bool) isFirstReg <- mkReg(True); let rawBus <- mkFifoInToRawBusSlave(pipe); method Bool tReady = rawBus.ready; method Action tValid( @@ -219,7 +200,7 @@ interface RawDmaController; (* prefix = "" *) interface RawXilinxPcieIp rawPcie; endinterface -// (* synthesize *) // +(* synthesize *) module mkRawDmaController(RawDmaController); DmaController dmac <- mkDmaController; diff --git a/src/PcieAdapter.bsv b/src/PcieAdapter.bsv index dc07f33..4aa8c7f 100644 --- a/src/PcieAdapter.bsv +++ b/src/PcieAdapter.bsv @@ -34,7 +34,7 @@ interface RequesterAxiStreamAdapter; (* prefix = "" *) interface RawPcieRequesterComplete rawRequesterComplete; endinterface -// (* synthesize *) // +(* synthesize *) module mkRequesterAxiStreamAdapter(RequesterAxiStreamAdapter); ConvertDataStreamsToStraddleAxis dmaToAxisConverter <- mkConvertDataStreamsToStraddleAxis; ConvertStraddleAxisToDataStream axisToDmaConverter <- mkConvertStraddleAxisToDataStream; @@ -90,7 +90,7 @@ interface CompleterAxiStreamAdapter; endinterface // Completer Only Receives and Transmits One Beat TLP, in which isFirst = isLast = True -// (* synthesize *) // +(* synthesize *) module mkCompleterAxiStreamAdapter(CompleterAxiStreamAdapter); FIFOF#(DataStream) inFifo <- mkFIFOF; FIFOF#(DataStream) outFifo <- mkFIFOF; diff --git a/src/PcieConfigurator.bsv b/src/PcieConfigurator.bsv index c0553c9..f68ac2c 100644 --- a/src/PcieConfigurator.bsv +++ b/src/PcieConfigurator.bsv @@ -121,7 +121,7 @@ module mkPcieConfigurator(PcieConfigurator); endmethod method Action getMsiSignals( - Bool msiEn, + PcieCfgMsiEn msiEn, Bool msiSent, Bool msiFail, PcieCfgMsiMmEn msiMmEn, diff --git a/src/PcieTypes.bsv b/src/PcieTypes.bsv index 17399da..2b1a685 100755 --- a/src/PcieTypes.bsv +++ b/src/PcieTypes.bsv @@ -261,7 +261,7 @@ interface RawPcieCfgMsi; (* result = "tph_type" *) method PcieCfgMsiTphType tphType; (* result = "tph_st_tag" *) method PcieCfgMsiTphStTag tphStTag; (* prefix = "" *) method Action getMsiSignals( - (* port = "enable" *) Bool msiEn, + (* port = "enable" *) PcieCfgMsiEn msiEn, (* port = "sent" *) Bool msiSent, (* port = "fail" *) Bool msiFail, (* port = "mmenable" *) PcieCfgMsiMmEn msiMmEn, @@ -414,7 +414,7 @@ typedef Bit#(PCIE_CFG_PHY_LINK_DOWN_WIDTH) PcieCfgPhyLinkDown; typedef Bit#(PCIE_CFG_PHY_LINK_STATUS_WIDTH) PcieCfgPhyLinkStatus; typedef 3 PCIE_CFG_NEGOTIATED_WIDTH_WIDTH; -typedef 3 PCIE_CFG_CURRENT_SPEED_WIDTH; +typedef 2 PCIE_CFG_CURRENT_SPEED_WIDTH; typedef 2 PCIE_CFG_MAX_PAYLOAD_WIDTH; typedef 3 PCIE_CFG_MAX_READ_REQ_WIDTH; typedef Bit#(PCIE_CFG_NEGOTIATED_WIDTH_WIDTH) PcieCfgNegotiatedWidth; diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index d314686..ead5db3 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -424,7 +424,7 @@ module mkStreamHeaderRemove#(DataBytePtr headerLen)(StreamPipe); Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); Reg#(Bool) hasLastRemainReg <- mkReg(False); - DataBitPtr headerBitLen = zeroExtend(headerLen) >> valueOf(BYTE_WIDTH_WIDTH); + DataBitPtr headerBitLen = zeroExtend(headerLen) << valueOf(BYTE_WIDTH_WIDTH); rule removeHeader; if (hasLastRemainReg) begin diff --git a/test/TestCompletionFifo.bsv b/test/TestCompletionFifo.bsv index ab36404..fbb01d4 100644 --- a/test/TestCompletionFifo.bsv +++ b/test/TestCompletionFifo.bsv @@ -152,7 +152,7 @@ interface CFifoInstTb; interface FifoOut#(DataStream) drain; endinterface -// (* synthesize *) // +(* synthesize *) module mkCompletionFifoInst(CFifoInstTb); CompletionFifo#(TEST_SLOT_NUM, DataStream) cFifo <- mkCompletionFifo(valueOf(MAX_STREAM_NUM_PER_COMPLETION)); interface reserve = cFifo.reserve; diff --git a/test/TestDmaCore.bsv b/test/TestDmaCore.bsv index 1c99962..3e4de81 100755 --- a/test/TestDmaCore.bsv +++ b/test/TestDmaCore.bsv @@ -1,6 +1,7 @@ import GetPut::*; import Randomizable::*; import Vector::*; +import Connectable::*; import SemiFifo::*; import PcieAxiStreamTypes::*; @@ -22,7 +23,6 @@ typedef 2'b00 DEFAULT_TLP_SIZE_SETTING; typedef 4 CHUNK_TX_TEST_SETTING_NUM; typedef 6 CHUNK_RX_TEST_SETTING_NUM; -(* doc = "testcase" *) module mkChunkComputerTb(Empty); ChunkCompute dut <- mkChunkComputer(DMA_TX); @@ -102,10 +102,11 @@ module mkChunkComputerTb(Empty); endmodule +// Do not use any simple tests, run cocotb for whole verification + typedef 60 SIMPLE_TEST_BYTELEN; typedef 'hABCDEF SIMPLE_TEST_ADDR; -(* doc = "testcase" *) module mkSimpleC2HWriteCoreTb(Empty); C2HWriteCore dut <- mkC2HWriteCore; Reg#(UInt#(32)) testCntReg <- mkReg(0); @@ -215,5 +216,93 @@ module mkSimpleConvertStraddleAxisToDataStreamTb(Empty); endmodule +module mkSimpleConvertDataStreamsToStraddleAxisTb(Empty); + ConvertDataStreamsToStraddleAxis dut <- mkConvertDataStreamsToStraddleAxis; + Reg#(UInt#(32)) testCntReg <- mkReg(0); + rule testInput if (testCntReg < 1); + let stream = generatePsuedoStream(fromInteger(valueOf(SIMPLE_TEST_BYTELEN)), True, True); + let sideBandByteEn = tuple2(4'b1111, 4'b1111); + dut.dataFifoIn[0].enq(stream); + dut.byteEnFifoIn[0].enq(sideBandByteEn); + dut.dataFifoIn[1].enq(stream); + dut.byteEnFifoIn[1].enq(sideBandByteEn); + testCntReg <= testCntReg + 1; + endrule + rule testOutput; + let axiStream = dut.axiStreamFifoOut.first; + dut.axiStreamFifoOut.deq; + $display("tData: %h", axiStream.tData); + $display("tKeep: %h", axiStream.tKeep); + PcieRequesterRequestSideBandFrame sideBand = unpack(axiStream.tUser); + $display("isSop: %d", sideBand.isSop.isSop); + if (axiStream.tLast) begin + $finish(); + end + endrule +endmodule + +module mkSimpleC2HReadCoreTb(Empty); + C2HReadCore dut <- mkC2HReadCore(0); + Reg#(UInt#(32)) testCntReg <- mkReg(0); + + rule testInput if (testCntReg < 1); + let req = DmaRequest { + startAddr : fromInteger(valueOf(SIMPLE_TEST_ADDR)), + length : fromInteger(valueOf(SIMPLE_TEST_BYTELEN)), + isWrite : False + }; + dut.rdReqFifoIn.enq(req); + testCntReg <= testCntReg + 1; + endrule + + rule testOutput; + let stream = dut.tlpFifoOut.first; + dut.tlpFifoOut.deq; + $display(fshow(stream)); + if (stream.isFirst) begin + let {firstByteEn, lastByteEn} = dut.tlpSideBandFifoOut.first; + dut.tlpSideBandFifoOut.deq; + $display("firstByteEn:%b, lastByteEn:%b", firstByteEn, lastByteEn); + PcieRequesterRequestDescriptor desc = unpack(truncate(stream.data)); + $display("Descriptor Elements: dwordCnt:%d, address:%h", desc.dwordCnt, desc.address << 2); + end + if (stream.isLast) begin + $finish(); + end + endrule +endmodule + +module simpleWritePathTb(Empty); + C2HWriteCore c2hWriteCore <- mkC2HWriteCore; + ConvertDataStreamsToStraddleAxis adapter <- mkConvertDataStreamsToStraddleAxis; + mkConnection(c2hWriteCore.tlpFifoOut, adapter.dataFifoIn[0]); + mkConnection(c2hWriteCore.tlpSideBandFifoOut, adapter.byteEnFifoIn[0]); + Reg#(UInt#(32)) testCntReg <- mkReg(0); + + rule testInput if (testCntReg < 1); + let req = DmaRequest { + startAddr : fromInteger(valueOf(SIMPLE_TEST_ADDR)), + length : fromInteger(valueOf(SIMPLE_TEST_BYTELEN)), + isWrite : True + }; + let stream = generatePsuedoStream(fromInteger(valueOf(SIMPLE_TEST_BYTELEN)), True, True); + c2hWriteCore.wrReqFifoIn.enq(req); + c2hWriteCore.dataFifoIn.enq(stream); + testCntReg <= testCntReg + 1; + endrule + + rule testOutput; + let axiStream = adapter.axiStreamFifoOut.first; + adapter.axiStreamFifoOut.deq; + $display("tData: %h", axiStream.tData); + $display("tKeep: %h", axiStream.tKeep); + PcieRequesterRequestSideBandFrame sideBand = unpack(axiStream.tUser); + $display("isSop: %d", sideBand.isSop.isSop); + if (axiStream.tLast) begin + $finish(); + end + endrule + +endmodule diff --git a/test/TestStreamUtils.bsv b/test/TestStreamUtils.bsv index bae36fb..01b9f33 100755 --- a/test/TestStreamUtils.bsv +++ b/test/TestStreamUtils.bsv @@ -77,7 +77,6 @@ module mkRandomStreamSize(StreamSize seed, StreamSizeBitPtr maxSizeBitPtr, Rando endmethod endmodule -(* doc = "testcase" *) module mkStreamSplitTb(Empty); StreamSplit dut <- mkStreamSplit; @@ -179,7 +178,6 @@ module mkStreamSplitTb(Empty); endmodule -(* doc = "testcase" *) module mkStreamShiftTb(Empty); RandomStreamSize streamSizeRandomValue <- mkRandomStreamSize(fromInteger(valueOf(SEED_1)), fromInteger(valueOf(MAX_STREAM_SIZE_PTR))); Vector#(TAdd#(BYTE_EN_WIDTH, 1), FIFOF#(StreamSize)) setSizeFifo <- replicateM(mkSizedFIFOF(10)); From db450a5c84d46fa0ee5ea81b69585780b7b792ae Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Fri, 30 Aug 2024 14:20:34 +0800 Subject: [PATCH 38/53] Pass cocotb write tb Pass cocotb read tb Pass single path cocotb test --- cocotb/dma_wr_rd_tb.py | 96 ++++++++++----- run | 29 +++++ script.py | 7 ++ src/DmaC2HPipe.bsv | 217 +++++++++++++++++++++----------- src/DmaTypes.bsv | 2 + src/DmaUtils.bsv | 150 ++++++++++++---------- src/DmaWrapper.bsv | 3 +- src/PcieAdapter.bsv | 7 +- src/PrimUtils.bsv | 132 ++++++++++---------- src/StreamUtils.bsv | 260 ++++++++++++++++++++++++++++++--------- test/TestStreamUtils.bsv | 1 + 11 files changed, 610 insertions(+), 294 deletions(-) create mode 100755 run create mode 100644 script.py diff --git a/cocotb/dma_wr_rd_tb.py b/cocotb/dma_wr_rd_tb.py index 29be393..52655ce 100644 --- a/cocotb/dma_wr_rd_tb.py +++ b/cocotb/dma_wr_rd_tb.py @@ -2,6 +2,8 @@ import itertools import logging import os +import random +import queue import cocotb_test.simulator import pytest @@ -35,13 +37,16 @@ def __init__(self, dut, msix=False): self.clock = dut.CLK self.resetn = dut.RST_N + self._bus_width = 512 + self._bus_bytes = 64 + # PCIe self.rc = RootComplex() cq_straddle = False cc_straddle = False rq_straddle = True - rc_straddle = True + rc_straddle = False rc_4tlp_straddle = False self.client_tag = bool(int(os.getenv("CLIENT_TAG", "1"))) @@ -293,59 +298,88 @@ async def send_desc(self, channel, startAddr, length, isWrite): desc.start_addr = startAddr desc.byte_cnt = length desc.is_write = isWrite - self.log.debug("Send a DMA Request, isWrite:%s, startAddr:%s", isWrite, startAddr) if channel == 0: await self.c2h_desc_source_0.send(desc) else: await self.c2h_desc_source_1.send(desc) + + async def send_data(self, channel, data): + assert len(data) <= self._bus_bytes + if channel == 0: + await self.c2h_write_source_0.send(data) + else: + await self.c2h_write_source_1.send(data) - async def run_single_write_once(self, channel, addr, length): - await self.c2h_write_source_0.send(b'ABCD') - self.log.debug("Send test write request!") + async def run_single_write_once(self, channel, addr, data): + length = len(data) + self.log.info("Conduct DMA single write: addr %d, length %d, char %c", addr, length, data[0]) await self.send_desc(channel, addr, length, True) + await self.c2h_write_source_0.send(data) async def run_single_read_once(self, channel, addr, length): + self.log.info("Conduct DMA single read: addr %d, length %d", addr, length) await self.send_desc(channel, addr, length, False) - -# no input now -@cocotb.test(timeout_time=6000, timeout_unit="ns") -async def run_test_write(dut): + data = await self.c2h_read_sink_0.read() + data = bytes(''.join([chr(item) for item in data]), encoding='UTF-8') + self.log.info("Read data from RootComplex successfully, recv length %d, req length %d", len(data), length) + return data + +@cocotb.test(timeout_time=100000000, timeout_unit="ns") +async def random_write_test(dut): tb = TB(dut) await tb.gen_reset() - + await tb.rc.enumerate() - dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + await dev.enable_device() await dev.set_master() - - mem = tb.rc.mem_pool.alloc_region(16*1024*1024) + + mem = tb.rc.mem_pool.alloc_region(1024*1024) mem_base = mem.get_absolute_address(0) - await tb.run_single_write_once(0, mem_base, 4) - - await Timer(500, units='ns') - - await tb.run_single_read_once(0, mem_base, 4) - frame = await tb.c2h_read_sink_0.recv() - print(frame) - await RisingEdge(tb.clock) + dma_channel = 0 + for _ in range(10): + addr_offset = random.randint(0, 8192) + length = random.randint(0, 8192) + char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") + addr = addr_offset + mem_base + data = char * length + await tb.run_single_write_once(dma_channel, addr, data) + await Timer(100+length, units='ns') + assert mem[addr:addr+length] == char * length + await RisingEdge(tb.clock) - -if cocotb.SIM_NAME: - - factory = TestFactory(run_test_write) - factory.generate_tests() - - -# cocotb-test +@cocotb.test(timeout_time=10000000, timeout_unit="ns") +async def random_read_test(dut): + tb = TB(dut) + await tb.gen_reset() + + await tb.rc.enumerate() + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + mem = tb.rc.mem_pool.alloc_region(1024*1024) + mem_base = mem.get_absolute_address(0) + + dma_channel = 0 + for _ in range(100): + addr_offset = random.randint(0, 8192) + addr = addr_offset + mem_base + length = random.randint(0, 8192) + char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") + mem[addr:addr+length] = char * length + data = await tb.run_single_read_once(dma_channel, addr, length) + assert data == char * length tests_dir = os.path.dirname(__file__) rtl_dir = tests_dir -def test_dma_wr(): +def test_dma(): dut = "mkRawDmaController" module = os.path.splitext(os.path.basename(__file__))[0] toplevel = dut @@ -366,4 +400,4 @@ def test_dma_wr(): ) if __name__ == "__main__": - test_dma_wr() \ No newline at end of file + test_dma() \ No newline at end of file diff --git a/run b/run new file mode 100755 index 0000000..86169fa --- /dev/null +++ b/run @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +set -o errexit +set -o nounset +set -o xtrace + +BASH_PROFILE=$HOME/.bash_profile +if [ -f "$BASH_PROFILE" ]; then + source $BASH_PROFILE +fi + +TEST_DIR=`realpath ./test` +LOG_DIR=`realpath ./tmp` +ALL_LOG=$TEST_DIR/run.log + +mkdir -p $LOG_DIR + +make -j8 -f Makefile.test all TESTDIR=$TEST_DIR LOGDIR=$LOG_DIR +cat $LOG_DIR/*.log | tee $ALL_LOG + +FAIL_KEYWORKS='Error\|ImmAssert' +grep -w $FAIL_KEYWORKS $LOG_DIR/*.log | cat +ERR_NUM=`grep -c -w $FAIL_KEYWORKS $ALL_LOG | cat` +if [ $ERR_NUM -gt 0 ]; then + echo "FAIL" + false +else + echo "PASS" +fi diff --git a/script.py b/script.py new file mode 100644 index 0000000..89077a8 --- /dev/null +++ b/script.py @@ -0,0 +1,7 @@ +for i in range(64): + s = ['0'] * 64 + s[i] = 1 + if (i > 0): + s[i-1:0] = '?' * (i-1) + s = ''.join(s) + print("%64s" % s) \ No newline at end of file diff --git a/src/DmaC2HPipe.bsv b/src/DmaC2HPipe.bsv index 0d744f8..d25bf06 100644 --- a/src/DmaC2HPipe.bsv +++ b/src/DmaC2HPipe.bsv @@ -1,5 +1,6 @@ import FIFOF::*; import GetPut::*; +import Vector::*; import Connectable::*; import SemiFifo::*; @@ -50,8 +51,9 @@ module mkDmaC2HPipe#(DmaPathNo pathIdx)(DmaC2HPipe); else begin readCore.rdReqFifoIn.enq(req); end - $display($time, "ns SIM INFO @ mkDmaC2HPipe%d: New Request isWrite:%b startAddr:%h length:%d", - pathIdx, pack(req.isWrite), req.startAddr, req.length); + $display(" "); + $display($time, "ns SIM INFO @ mkDmaC2HPipe%d: recv new request, startAddr:%d length:%d isWrite:%b", + pathIdx, req.startAddr, req.length, pack(req.isWrite)); endrule rule tlpOutMux; @@ -93,6 +95,8 @@ interface C2HReadCore; interface FifoOut#(SideBandByteEn) tlpSideBandFifoOut; endinterface +// Total Latency(Tlp Output): 1 + 2 + 1 + 1 = 5 +// Total Latency(Tlp Input) : 1\2 + 2 + n + 2 + 1 = 5/6 + n (depends on the order) module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); FIFOF#(StraddleStream) tlpInFifo <- mkFIFOF; FIFOF#(DmaRequest) reqInFifo <- mkFIFOF; @@ -100,19 +104,30 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); FIFOF#(SideBandByteEn) tlpByteEnFifo <- mkFIFOF; FIFOF#(SlotToken) tagFifo <- mkSizedFIFOF(valueOf(TAdd#(1, STREAM_HEADER_REMOVE_LATENCY))); - FIFOF#(Bool) completedFifo <- mkSizedFIFOF(valueOf(TAdd#(1, STREAM_HEADER_REMOVE_LATENCY))); + FIFOF#(Bool) completedFifo <- mkSizedFIFOF(valueOf(TAdd#(1, STREAM_HEADER_REMOVE_LATENCY))); + FIFOF#(DmaMemAddr) expectTlpCntFifo <-mkSizedFIFOF(valueOf(SLOT_PER_PATH)); StreamPipe descRemove <- mkStreamHeaderRemove(fromInteger(valueOf(TDiv#(DES_RC_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); - StreamPipe streamReshape <- mkStreamReshape; + StreamPipe dwRemove <- mkStreamRemoveFromDW; + StreamPipe reshapeStrad <- mkStreamReshape; + StreamPipe reshapeRcb <- mkStreamReshape; + StreamPipe reshapeMrrs <- mkStreamReshape; ChunkCompute chunkSplitor <- mkChunkComputer(DMA_RX); CompletionFifo#(SLOT_PER_PATH, DataStream) cBuffer <- mkCompletionFifo(valueOf(MAX_STREAM_NUM_PER_COMPLETION)); + RqDescriptorGenerator rqDescGenerator <- mkRqDescriptorGenerator(False); Reg#(Bool) hasReadOnce <- mkReg(False); + Reg#(DmaMemAddr) recvTlpCntReg <- mkReg(0); + Reg#(DmaMemAddr) recvBytesReg <- mkReg(0); + Vector#(SLOT_PER_PATH, Reg#(DmaMemAddr)) chunkBytesRegs <- replicateM(mkReg(0)); + + mkConnection(chunkSplitor.chunkCntFifoOut, expectTlpCntFifo); + mkConnection(reshapeStrad.streamFifoOut, descRemove.streamFifoIn); + mkConnection(descRemove.streamFifoOut, dwRemove.streamFifoIn); // Pipeline stage 1: convert StraddleStream to DataStream, may cost 2 cycle for one StraddleStream rule convertStraddleToDataStream; let sdStream = tlpInFifo.first; - $display($time, "ns SIM INFO @ mkDmaC2HReadCore: recv new stream from straddle adapter, isDouble:%b", pack(sdStream.isDoubleFrame)); let stream = getEmptyStream; SlotToken tag = 0; Bool isCompleted = False; @@ -147,45 +162,95 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); tag = sdStream.tag[0]; isCompleted = sdStream.isCompleted[0]; end - descRemove.streamFifoIn.enq(stream); - tagFifo.enq(tag); - completedFifo.enq(isCompleted); - $display("parse from straddle", fshow(stream)); + stream.byteEn = stream.byteEn; + reshapeStrad.streamFifoIn.enq(stream); + $display($time, "ns SIM INFO @ mkDmaC2HReadCore: recv new stream from straddle adapter, tag: %d, isCompleted:%b" ,tag, isCompleted, fshow(stream)); + if (stream.isFirst) begin + tagFifo.enq(tag); + completedFifo.enq(isCompleted); + end + // $display("parse from straddle", fshow(stream)); endrule // Pipeline stage 2: remove the descriptor in the head of each TLP - // Pipeline stage 3: Buffer the received DataStreams and reorder the, + // Pipeline stage 3: Buffer the received DataStreams and reorder them rule reorderStream; - let stream = descRemove.streamFifoOut.first; + let stream = dwRemove.streamFifoOut.first; + let byteInStream = convertByteEn2BytePtr(stream.byteEn); let isCompleted = completedFifo.first; let tag = tagFifo.first; - $display($time, "ns SIM INFO @ mkDmaC2HReadCore: recv new tlp tag%d, isCompleted:%b", tag, pack(isCompleted)); - $display("desc remove output", fshow(stream)); - descRemove.streamFifoOut.deq; - completedFifo.deq; - tagFifo.deq; + let chunkBytes = zeroExtend(byteInStream) + chunkBytesRegs[tag]; + dwRemove.streamFifoOut.deq; + if (stream.isLast) begin + completedFifo.deq; + tagFifo.deq; + end stream.isLast = isCompleted && stream.isLast; cBuffer.append.enq(tuple2(tag, stream)); if (stream.isLast) begin cBuffer.complete.put(tag); + $display($time, "ns SIM INFO @ mkDmaC2HReadCore: a chunk is completed in cBuffer, tag: %d, recv bytes: %d", tag, chunkBytes); + chunkBytes = 0; end + chunkBytesRegs[tag] <= chunkBytes; + $display("tag%d", tag, fshow(stream)); endrule - // Pipeline stage 4: there may be bubbles in the first and last DataStream of a TLP because of RCB - // Reshape the DataStream and make sure it is continuous - rule reshapeStream; + // Pipeline stage 4: there may be a bubble ibetween the first and last DataStream of cBUffer drain output + // Reshape the DataStream from RCB chunks to MRRS chunks + rule reshapeRCB; let stream = cBuffer.drain.first; cBuffer.drain.deq; - streamReshape.streamFifoIn.enq(stream); - $display("cbuf output", fshow(stream)); + reshapeRcb.streamFifoIn.enq(stream); + // $display("cbuf output", fshow(stream)); endrule + // Pipeline stage 4: there may be bubbles in the first and last DataStream of a request because of MRRS chunk compute + // Reshape the DataStream from MRRS chunks to a whole DataStream + rule reshapeMRRS; + let stream = reshapeRcb.streamFifoOut.first; + let byteInStream = convertByteEn2BytePtr(stream.byteEn); + let recvBytesCnt = recvBytesReg + zeroExtend(byteInStream); + reshapeRcb.streamFifoOut.deq; + let recvTlpCnt = recvTlpCntReg; + if (stream.isFirst) begin + if (recvTlpCnt > 0) begin + stream.isFirst = False; + end + recvTlpCnt = recvTlpCntReg + 1; + end + if (stream.isLast) begin + if (expectTlpCntFifo.first == recvTlpCnt) begin + recvTlpCnt = 0; + expectTlpCntFifo.deq; + $display($time, "ns SIM INFO @ mkDmaC2HReadCore: a read request is done, total tlps counts : %5d, total recvd bytes: %d", expectTlpCntFifo.first, recvBytesCnt); + recvBytesCnt = 0; + end + else begin + stream.isLast = False; + end + end + recvTlpCntReg <= recvTlpCnt; + recvBytesReg <= recvBytesCnt; + reshapeMrrs.streamFifoIn.enq(stream); + endrule + + // rule log; + // let stream = dwRemove.streamFifoOut.first; + // $display("dwRemove output stream", fshow(stream)); + // endrule // Pipeline stage 1: split to req to MRRS chunks rule reqSplit; let req = reqInFifo.first; reqInFifo.deq; - chunkSplitor.dmaRequestFifoIn.enq(req); + let exReq = DmaExtendRequest { + startAddr : req.startAddr, + endAddr : req.startAddr + req.length - 1, + length : req.length, + tag : 0 + }; + chunkSplitor.dmaRequestFifoIn.enq(exReq); endrule // Pipeline stage 2: generate read descriptor @@ -193,39 +258,32 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); let req = chunkSplitor.chunkRequestFifoOut.first; chunkSplitor.chunkRequestFifoOut.deq; let token <- cBuffer.reserve.get; - let descriptor = PcieRequesterRequestDescriptor { - forceECRC : False, - attributes : 0, - trafficClass : 0, - requesterIdEn : False, - completerId : 0, - tag : zeroExtend(token) | (zeroExtend(pathIdx) << (valueOf(DES_NONEXTENDED_TAG_WIDTH)-1)), - requesterId : 0, - isPoisoned : False, - reqType : fromInteger(valueOf(MEM_READ_REQ)), - dwordCnt : truncate(req.length >> valueOf(BYTE_DWORD_SHIFT_WIDTH)) + zeroExtend(req.length[0]|req.length[1]), - address : truncate(req.startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)), - addrType : fromInteger(valueOf(TRANSLATED_ADDR)) - }; - let stream = DataStream { - data : zeroExtend(pack(descriptor)), - byteEn : convertBytePtr2ByteEn(fromInteger(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH)))), - isFirst : True, - isLast : True - }; + let exReq = DmaExtendRequest { + startAddr: req.startAddr, + endAddr : req.startAddr + req.length - 1, + length : req.length, + tag : zeroExtend(token) | (zeroExtend(pathIdx) << (valueOf(DES_NONEXTENDED_TAG_WIDTH)-1)) + }; + rqDescGenerator.exReqFifoIn.enq(exReq); + $display($time, "ns SIM INFO @ mkDmaC2HReadCore: tx a new read chunk, tag:%d, addr:%d, length:%d", exReq.tag, req.startAddr, req.length); + endrule + + // Pipeline stage 3: generate Tlp to PCIe Adapter + rule tlpGen; + let stream = rqDescGenerator.descFifoOut.first; + let sideBandByteEn = rqDescGenerator.byteEnFifoOut.first; + rqDescGenerator.descFifoOut.deq; + rqDescGenerator.byteEnFifoOut.deq; + stream.isFirst = True; + stream.isLast = True; tlpOutFifo.enq(stream); - let endAddr = req.startAddr + req.length; - ByteModDWord startAddrOffset = byteModDWord(req.startAddr); - ByteModDWord endAddrOffset = byteModDWord(endAddr); - let firstByteEn = convertDWordOffset2FirstByteEn(startAddrOffset); - let lastByteEn = convertDWordOffset2LastByteEn(endAddrOffset); - tlpByteEnFifo.enq(tuple2(firstByteEn, lastByteEn)); - $display($time, "ns SIM INFO @ mkDmaC2HReadCore: output new tlp, BE:%h/%h", firstByteEn, lastByteEn); + tlpByteEnFifo.enq(sideBandByteEn); + $display($time, "ns SIM INFO @ mkDmaC2HReadCore: output new tlp, BE:%h/%h", tpl_1(sideBandByteEn), tpl_2(sideBandByteEn)); endrule // User Logic Ifc interface rdReqFifoIn = convertFifoToFifoIn(reqInFifo); - interface dataFifoOut = streamReshape.streamFifoOut; + interface dataFifoOut = reshapeMrrs.streamFifoOut; // PCIe IP Ifc interface tlpFifoIn = convertFifoToFifoIn(tlpInFifo); interface tlpFifoOut = convertFifoToFifoOut(tlpOutFifo); @@ -243,12 +301,15 @@ interface C2HWriteCore; interface FifoOut#(SideBandByteEn) tlpSideBandFifoOut; endinterface +// Total Latency: 1 + 3 + 2 + 1 = 7 module mkC2HWriteCore(C2HWriteCore); FIFOF#(DataStream) dataInFifo <- mkFIFOF; FIFOF#(DmaRequest) wrReqInFifo <- mkFIFOF; FIFOF#(DataStream) dataOutFifo <- mkFIFOF; FIFOF#(SideBandByteEn) byteEnOutFifo <- mkFIFOF; + Reg#(Tag) tagReg <- mkReg(0); + ChunkSplit chunkSplit <- mkChunkSplit(DMA_TX); StreamShiftAlignToDw streamAlign <- mkStreamShiftAlignToDw(fromInteger(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); RqDescriptorGenerator rqDescGenerator <- mkRqDescriptorGenerator(True); @@ -256,13 +317,19 @@ module mkC2HWriteCore(C2HWriteCore); // Pipeline stage 1: split the whole write request to chunks, latency = 3 rule splitToChunks; let wrStream = dataInFifo.first; - $display($time, "ns SIM INFO @ mkDmaC2HWriteCore: get new writing stream"); + // if (wrStream.isLast || wrStream.isFirst) begin $display($time, "ns SIM INFO @ mkC2HWriteCore: ", fshow(wrStream)); end if (wrStream.isFirst && wrReqInFifo.notEmpty) begin wrReqInFifo.deq; - chunkSplit.reqFifoIn.enq(wrReqInFifo.first); + let wrReq = wrReqInFifo.first; + let exReq = DmaExtendRequest { + startAddr : wrReq.startAddr, + endAddr : wrReq.startAddr + wrReq.length - 1, + length : wrReq.length, + tag : 0 + }; + chunkSplit.reqFifoIn.enq(exReq); dataInFifo.deq; chunkSplit.dataFifoIn.enq(wrStream); - $display($time, "ns SIM INFO @ mkDmaC2HWriteCore: new write start, startAddr:%h, length:%d ", wrReqInFifo.first.startAddr, wrReqInFifo.first.length); end else if (!wrStream.isFirst) begin dataInFifo.deq; @@ -275,41 +342,47 @@ module mkC2HWriteCore(C2HWriteCore); if (chunkSplit.chunkReqFifoOut.notEmpty) begin let chunkReq = chunkSplit.chunkReqFifoOut.first; chunkSplit.chunkReqFifoOut.deq; - let endAddr = chunkReq.startAddr + chunkReq.length; let exReq = DmaExtendRequest { startAddr: chunkReq.startAddr, - endAddr : endAddr, - length : chunkReq.length + endAddr : chunkReq.startAddr + chunkReq.length - 1, + length : chunkReq.length, + tag : tagReg }; - streamAlign.reqFifoIn.enq(exReq); + tagReg <= tagReg + 1; + let startAddrOffset = byteModDWord(exReq.startAddr); + streamAlign.setAlignMode(unpack(startAddrOffset)); rqDescGenerator.exReqFifoIn.enq(exReq); + $display($time, "ns SIM INFO @ mkDmaC2HWriteCore: tx a new write chunk, tag:%d, addr:%d, length:%d", tagReg, chunkReq.startAddr, chunkReq.length); end if (chunkSplit.chunkDataFifoOut.notEmpty) begin let chunkDataStream = chunkSplit.chunkDataFifoOut.first; chunkSplit.chunkDataFifoOut.deq; streamAlign.dataFifoIn.enq(chunkDataStream); + if (chunkDataStream.isLast && chunkDataStream.isFirst) begin + $display($time, "ns SIM INFO @ mkDmaC2HWriteCore: tx write chunk end , tag:%d", tagReg); + end + else if (chunkDataStream.isLast) begin + $display($time, "ns SIM INFO @ mkDmaC2HWriteCore: tx write chunk end , tag:%d", tagReg - 1); + end end endrule // Pipeline stage 3: Add descriptor and add to the axis convert module rule addDescriptorToAxis; - if (streamAlign.byteEnFifoOut.notEmpty) begin - let sideBandByteEn = streamAlign.byteEnFifoOut.first; - streamAlign.byteEnFifoOut.deq; + let stream = streamAlign.dataFifoOut.first; + streamAlign.dataFifoOut.deq; + if (stream.isFirst) begin + let descStream = rqDescGenerator.descFifoOut.first; + let sideBandByteEn = rqDescGenerator.byteEnFifoOut.first; + rqDescGenerator.descFifoOut.deq; + rqDescGenerator.byteEnFifoOut.deq; + stream.data = stream.data | descStream.data; + stream.byteEn = stream.byteEn | descStream.byteEn; byteEnOutFifo.enq(sideBandByteEn); - $display($time, "ns SIM INFO @ mkDmaC2HWriteCore: output new tlp, BE:%h/%h", tpl_1(sideBandByteEn), tpl_2(sideBandByteEn)); - end - if (streamAlign.dataFifoOut.notEmpty) begin - let stream = streamAlign.dataFifoOut.first; - streamAlign.dataFifoOut.deq; - if (stream.isFirst) begin - let descStream = rqDescGenerator.descFifoOut.first; - rqDescGenerator.descFifoOut.deq; - stream.data = stream.data | descStream.data; - stream.byteEn = stream.byteEn | descStream.byteEn; - end - dataOutFifo.enq(stream); + $display($time, "ns SIM INFO @ mkDmaC2HWriteCore: tx a new tlp, BE:%b/%b", tpl_1(sideBandByteEn), tpl_2(sideBandByteEn)); end + dataOutFifo.enq(stream); + // $display($time, "ns SIM INFO @ mkDmaC2HWriteCore: tlp stream", fshow(stream)); endrule interface dataFifoIn = convertFifoToFifoIn(dataInFifo); diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 4c54c20..9162262 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -41,6 +41,7 @@ typedef Bit#(TAdd#(1, TLog#(DWORD_EN_WIDTH))) DataDwordPtr; typedef Bit#(TAdd#(1, TLog#(DWORD_BYTES))) DWordBytePtr; typedef Bit#(BYTE_DWORD_SHIFT_WIDTH) ByteModDWord; +typedef 2'b11 MaxByteModDword; typedef struct { DmaMemAddr startAddr; @@ -52,6 +53,7 @@ typedef struct { DmaMemAddr startAddr; DmaMemAddr endAddr; DmaMemAddr length; + Tag tag; } DmaExtendRequest deriving(Bits, Bounded, Eq); typedef enum { diff --git a/src/DmaUtils.bsv b/src/DmaUtils.bsv index bb420d5..66d1774 100644 --- a/src/DmaUtils.bsv +++ b/src/DmaUtils.bsv @@ -15,49 +15,53 @@ typedef Bit#(BUS_BOUNDARY_WIDTH) PcieTlpMaxMaxPayloadSize; typedef Bit#(TLog#(BUS_BOUNDARY_WIDTH)) PcieTlpSizeWidth; typedef 128 DEFAULT_TLP_SIZE; -typedef TAdd#(1, TLog#(DEFAULT_TLP_SIZE)) DEFAULT_TLP_SIZE_WIDTH; +typedef TLog#(DEFAULT_TLP_SIZE) DEFAULT_TLP_SIZE_WIDTH; typedef 3 PCIE_TLP_SIZE_SETTING_WIDTH; typedef Bit#(PCIE_TLP_SIZE_SETTING_WIDTH) PcieTlpSizeSetting; typedef TAdd#(1, TLog#(TDiv#(BUS_BOUNDARY, BYTE_EN_WIDTH))) DATA_BEATS_WIDTH; -typedef Bit#(DATA_BEATS_WIDTH) DataBeats; - -typedef struct { - DmaRequest dmaRequest; - DmaMemAddr firstChunkLen; -} ChunkRequestFrame deriving(Bits, Eq); +typedef Bit#(DATA_BEATS_WIDTH) DataBeats; +typedef 4 CHUNK_COMPUTE_LATENCY; // Split the input DmaRequest Info MRRS aligned chunkReqs interface ChunkCompute; - interface FifoIn#(DmaRequest) dmaRequestFifoIn; - interface FifoOut#(DmaRequest) chunkRequestFifoOut; - interface Put#(PcieTlpSizeSetting) setTlpMaxSize; + interface FifoIn#(DmaExtendRequest) dmaRequestFifoIn; + interface FifoOut#(DmaRequest) chunkRequestFifoOut; + interface FifoOut#(DmaMemAddr) chunkCntFifoOut; + interface Put#(PcieTlpSizeSetting) setTlpMaxSize; endinterface module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); - FIFOF#(DmaRequest) inputFifo <- mkFIFOF; - FIFOF#(DmaRequest) outputFifo <- mkFIFOF; - FIFOF#(ChunkRequestFrame) splitFifo <- mkFIFOF; + FIFOF#(DmaExtendRequest) inputFifo <- mkFIFOF; + FIFOF#(DmaRequest) outputFifo <- mkFIFOF; + FIFOF#(Tuple2#(DmaExtendRequest, DmaMemAddr)) pipeFifo <- mkFIFOF; + FIFOF#(DmaMemAddr) tlpCntFifo <- mkSizedFIFOF(valueOf(CHUNK_COMPUTE_LATENCY)); Reg#(DmaMemAddr) newChunkPtrReg <- mkReg(0); Reg#(DmaMemAddr) totalLenRemainReg <- mkReg(0); Reg#(Bool) isSplittingReg <- mkReg(False); - Reg#(DmaMemAddr) tlpMaxSize <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE))); - Reg#(PcieTlpSizeWidth) tlpMaxSizeWidth <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH))); + Reg#(DmaMemAddr) tlpMaxSizeReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE))); + Reg#(PcieTlpSizeWidth) tlpMaxSizeWidthReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH))); - function Bool hasBoundary(DmaRequest request); - let highIdx = (request.startAddr + request.length - 1) >> valueOf(BUS_BOUNDARY_WIDTH); - let lowIdx = request.startAddr >> valueOf(BUS_BOUNDARY_WIDTH); + function Bool hasBoundary(DmaExtendRequest request); + let highIdx = request.endAddr >> tlpMaxSizeWidthReg; + let lowIdx = request.startAddr >> tlpMaxSizeWidthReg; return (highIdx > lowIdx); endfunction - function DmaMemAddr getOffset(DmaRequest request); + function DmaMemAddr getTlpCnts(DmaExtendRequest request); + let highIdx = request.endAddr >> tlpMaxSizeWidthReg; + let lowIdx = request.startAddr >> tlpMaxSizeWidthReg; + return (highIdx - lowIdx + 1); + endfunction + + function DmaMemAddr getOffset(DmaExtendRequest request); // MPS - startAddr % MPS, MPS means MRRS when the module is set to RX mode - DmaMemAddr remainderOfMps = zeroExtend(PcieTlpMaxMaxPayloadSize'(request.startAddr[tlpMaxSizeWidth-1:0])); - DmaMemAddr offsetOfMps = tlpMaxSize - remainderOfMps; + DmaMemAddr remainderOfMps = zeroExtend(PcieTlpMaxMaxPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); + DmaMemAddr offsetOfMps = tlpMaxSizeReg - remainderOfMps; return offsetOfMps; endfunction @@ -65,65 +69,66 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); let request = inputFifo.first; inputFifo.deq; let offset = getOffset(request); - let firstLen = (request.length > tlpMaxSize) ? tlpMaxSize : request.length; - splitFifo.enq(ChunkRequestFrame { - dmaRequest: request, - firstChunkLen: hasBoundary(request) ? offset : firstLen - }); + let firstLen = (request.length > tlpMaxSizeReg) ? tlpMaxSizeReg : request.length; + let firstChunkLen = hasBoundary(request) ? offset : firstLen; + pipeFifo.enq(tuple2(request, firstChunkLen)); + let tlpCnt = getTlpCnts(request); + tlpCntFifo.enq(tlpCnt); endrule rule execChunkCompute; - let splitRequest = splitFifo.first; + let {request, firstChunkLen} = pipeFifo.first; if (isSplittingReg) begin // !isFirst - if (totalLenRemainReg <= tlpMaxSize) begin + if (totalLenRemainReg <= tlpMaxSizeReg) begin isSplittingReg <= False; outputFifo.enq(DmaRequest { startAddr : newChunkPtrReg, length : totalLenRemainReg, isWrite : False }); - splitFifo.deq; + pipeFifo.deq; totalLenRemainReg <= 0; end else begin isSplittingReg <= True; outputFifo.enq(DmaRequest { startAddr : newChunkPtrReg, - length : tlpMaxSize, + length : tlpMaxSizeReg, isWrite : False }); - newChunkPtrReg <= newChunkPtrReg + tlpMaxSize; - totalLenRemainReg <= totalLenRemainReg - tlpMaxSize; + newChunkPtrReg <= newChunkPtrReg + tlpMaxSizeReg; + totalLenRemainReg <= totalLenRemainReg - tlpMaxSizeReg; end end else begin // isFirst - let remainderLength = splitRequest.dmaRequest.length - splitRequest.firstChunkLen; + let remainderLength = request.length - firstChunkLen; Bool isSplittingNextCycle = (remainderLength > 0); isSplittingReg <= isSplittingNextCycle; outputFifo.enq(DmaRequest { - startAddr : splitRequest.dmaRequest.startAddr, - length : splitRequest.firstChunkLen, + startAddr : request.startAddr, + length : firstChunkLen, isWrite : False }); if (!isSplittingNextCycle) begin - splitFifo.deq; + pipeFifo.deq; end - newChunkPtrReg <= splitRequest.dmaRequest.startAddr + splitRequest.firstChunkLen; + newChunkPtrReg <= request.startAddr + firstChunkLen; totalLenRemainReg <= remainderLength; end endrule interface dmaRequestFifoIn = convertFifoToFifoIn(inputFifo); interface chunkRequestFifoOut = convertFifoToFifoOut(outputFifo); + interface chunkCntFifoOut = convertFifoToFifoOut(tlpCntFifo); interface Put setTlpMaxSize; method Action put (PcieTlpSizeSetting tlpSizeSetting); let setting = tlpSizeSetting; setting[valueOf(PCIE_TLP_SIZE_SETTING_WIDTH)-1] = (direction == DMA_TX) ? 0 : setting[valueOf(PCIE_TLP_SIZE_SETTING_WIDTH)-1]; DmaMemAddr defaultTlpMaxSize = fromInteger(valueOf(DEFAULT_TLP_SIZE)); - tlpMaxSize <= DmaMemAddr'(defaultTlpMaxSize << setting); + tlpMaxSizeReg <= DmaMemAddr'(defaultTlpMaxSize << setting); PcieTlpSizeWidth defaultTlpMaxSizeWidth = fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH)); - tlpMaxSizeWidth <= PcieTlpSizeWidth'(defaultTlpMaxSizeWidth + zeroExtend(setting)); + tlpMaxSizeWidthReg <= PcieTlpSizeWidth'(defaultTlpMaxSizeWidth + zeroExtend(setting)); endmethod endinterface @@ -136,7 +141,7 @@ endmodule // - The module may block the pipeline if one input beat is splited to two beats interface ChunkSplit; interface FifoIn#(DataStream) dataFifoIn; - interface FifoIn#(DmaRequest) reqFifoIn; + interface FifoIn#(DmaExtendRequest) reqFifoIn; interface FifoOut#(DataStream) chunkDataFifoOut; interface FifoOut#(DmaRequest) chunkReqFifoOut; interface Put#(PcieTlpSizeSetting) setTlpMaxSize; @@ -144,11 +149,12 @@ endinterface module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); FIFOF#(DataStream) dataInFifo <- mkFIFOF; - FIFOF#(DmaRequest) reqInFifo <- mkFIFOF; FIFOF#(DataStream) chunkOutFifo <- mkFIFOF; FIFOF#(DmaRequest) reqOutFifo <- mkFIFOF; FIFOF#(DmaRequest) firstReqPipeFifo <- mkSizedFIFOF(valueOf(STREAM_SPLIT_LATENCY)); - FIFOF#(DmaRequest) inputReqPipeFifo <- mkSizedFIFOF(valueOf(STREAM_SPLIT_LATENCY)); + + FIFOF#(DmaExtendRequest) reqInFifo <- mkFIFOF; + FIFOF#(DmaExtendRequest) inputReqPipeFifo <- mkSizedFIFOF(valueOf(STREAM_SPLIT_LATENCY)); StreamSplit firstChunkSplitor <- mkStreamSplit; @@ -163,13 +169,13 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); Reg#(DmaMemAddr) remainLenReg <- mkReg(0); - function Bool hasBoundary(DmaRequest request); - let highIdx = (request.startAddr + request.length - 1) >> valueOf(BUS_BOUNDARY_WIDTH); - let lowIdx = request.startAddr >> valueOf(BUS_BOUNDARY_WIDTH); + function Bool hasBoundary(DmaExtendRequest request); + let highIdx = request.endAddr >> tlpMaxSizeWidthReg; + let lowIdx = request.startAddr >> tlpMaxSizeWidthReg; return (highIdx > lowIdx); endfunction - function DmaMemAddr getOffset(DmaRequest request); + function DmaMemAddr getOffset(DmaExtendRequest request); // MPS - startAddr % MPS, MPS means MRRS when the module is set to RX mode DmaMemAddr remainderOfMps = zeroExtend(PcieTlpMaxMaxPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); DmaMemAddr offsetOfMps = tlpMaxSizeReg - remainderOfMps; @@ -187,11 +193,12 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); let offset = getOffset(request); let firstLen = (request.length > tlpMaxSizeReg) ? tlpMaxSizeReg : request.length; let firstChunkLen = hasBoundary(request) ? offset : firstLen; + // $display($time, "ns SIM INFO @ mkChunkSplit: get first chunkLen, offset %d, remainder %d", offset, PcieTlpMaxMaxPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); firstChunkSplitor.splitLocationFifoIn.enq(unpack(truncate(firstChunkLen))); let firstReq = DmaRequest { startAddr : request.startAddr, length : firstChunkLen, - isWrite : request.isWrite + isWrite : True }; firstReqPipeFifo.enq(firstReq); firstChunkSplitor.inputStreamFifoIn.enq(stream); @@ -215,7 +222,7 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); let stream = firstChunkSplitor.outputStreamFifoOut.first; firstChunkSplitor.outputStreamFifoOut.deq; // End of a TLP, reset beatsReg and tag isLast=True - if (stream.isLast || beatsReg == tlpMaxBeatsReg) begin + if (stream.isLast || beatsReg == tlpMaxBeatsReg - 1) begin stream.isLast = True; beatsReg <= 0; end @@ -224,43 +231,48 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); end // Start of a TLP, get Req Infos and tag isFirst=True if (beatsReg == 0) begin + // $display($time, "ns SIM INFO @ mkChunkSplit: start a new chunk, next addr %d, remainBytesLen %d", nextStartAddrReg, remainLenReg); stream.isFirst = True; // The first TLP of chunks if (firstReqPipeFifo.notEmpty) begin let chunkReq = firstReqPipeFifo.first; let oriReq = inputReqPipeFifo.first; firstReqPipeFifo.deq; - nextStartAddrReg <= oriReq.startAddr + chunkReq.length; - remainLenReg <= oriReq.length - chunkReq.length; + inputReqPipeFifo.deq; + if (chunkReq.length == oriReq.length) begin + nextStartAddrReg <= 0; + remainLenReg <= 0; + end + else begin + nextStartAddrReg <= oriReq.startAddr + chunkReq.length; + remainLenReg <= oriReq.length - chunkReq.length; + end reqOutFifo.enq(chunkReq); end // The following chunks else begin + let chunkReq = DmaRequest { + startAddr: nextStartAddrReg, + length : tlpMaxSizeReg, + isWrite : True + }; if (remainLenReg == 0) begin // Do nothing end else if (remainLenReg <= tlpMaxSizeReg) begin nextStartAddrReg <= 0; remainLenReg <= 0; - let chunkReq = DmaRequest { - startAddr: nextStartAddrReg, - length : remainLenReg, - isWrite : True - }; + chunkReq.length = remainLenReg; reqOutFifo.enq(chunkReq); end else begin nextStartAddrReg <= nextStartAddrReg + tlpMaxSizeReg; remainLenReg <= remainLenReg - tlpMaxSizeReg; - let chunkReq = DmaRequest { - startAddr: nextStartAddrReg, - length : tlpMaxSizeReg, - isWrite : True - }; reqOutFifo.enq(chunkReq); end end end + chunkOutFifo.enq(stream); endrule @@ -288,18 +300,19 @@ endmodule interface RqDescriptorGenerator; interface FifoIn#(DmaExtendRequest) exReqFifoIn; interface FifoOut#(DataStream) descFifoOut; + interface FifoOut#(SideBandByteEn) byteEnFifoOut; endinterface module mkRqDescriptorGenerator#(Bool isWrite)(RqDescriptorGenerator); FIFOF#(DmaExtendRequest) exReqInFifo <- mkFIFOF; FIFOF#(DataStream) descOutFifo <- mkFIFOF; + FIFOF#(SideBandByteEn) byteEnOutFifo <- mkFIFOF; rule genRqDesc; let exReq = exReqInFifo.first; exReqInFifo.deq; let endOffset = byteModDWord(exReq.endAddr); - DwordCount dwCnt = truncate((exReq.endAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)) - (exReq.startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH))); - dwCnt = (endOffset == 0) ? dwCnt : dwCnt + 1; + DwordCount dwCnt = truncate((exReq.endAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)) - (exReq.startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH))) + 1; dwCnt = (exReq.length == 0) ? 1 : dwCnt; DataBytePtr bytePtr = fromInteger(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH))); let descriptor = PcieRequesterRequestDescriptor { @@ -308,7 +321,7 @@ module mkRqDescriptorGenerator#(Bool isWrite)(RqDescriptorGenerator); trafficClass : 0, requesterIdEn : False, completerId : 0, - tag : 0, + tag : exReq.tag, requesterId : 0, isPoisoned : False, reqType : isWrite ? fromInteger(valueOf(MEM_WRITE_REQ)) : fromInteger(valueOf(MEM_READ_REQ)), @@ -323,9 +336,20 @@ module mkRqDescriptorGenerator#(Bool isWrite)(RqDescriptorGenerator); isLast : True }; descOutFifo.enq(stream); + let startAddrOffset = byteModDWord(exReq.startAddr); + let endAddrOffset = byteModDWord(exReq.endAddr); + let firstByteEn = convertDWordOffset2FirstByteEn(startAddrOffset); + let lastByteEn = convertDWordOffset2LastByteEn(endAddrOffset); + if (exReq.length <= fromInteger(valueOf(DWORD_BYTES))) begin + firstByteEn = firstByteEn & lastByteEn; + lastByteEn = 0; + end + byteEnOutFifo.enq(tuple2(firstByteEn, lastByteEn)); + // $display($time, "ns SIM INFO @ mkRqDescriptorGenerator: generate, dwcnt %d, start:%d, end:%d, byteCnt:%d ", dwCnt, exReq.startAddr, exReq.endAddr, exReq.length); endrule interface exReqFifoIn = convertFifoToFifoIn(exReqInFifo); interface descFifoOut = convertFifoToFifoOut(descOutFifo); + interface byteEnFifoOut = convertFifoToFifoOut(byteEnOutFifo); endmodule diff --git a/src/DmaWrapper.bsv b/src/DmaWrapper.bsv index 0bf04fc..bd3bbee 100755 --- a/src/DmaWrapper.bsv +++ b/src/DmaWrapper.bsv @@ -115,6 +115,7 @@ typedef RawBusSlave#(DmaCsrValue) RawDmaCsrSlave; module mkFifoInToRawDmaDataSlave#(FifoIn#(DataStream) pipe)(RawDmaDataSlave); Reg#(Bool) isFirstReg <- mkReg(True); let rawBus <- mkFifoInToRawBusSlave(pipe); + method Bool tReady = rawBus.ready; method Action tValid( Bool valid, @@ -123,7 +124,7 @@ module mkFifoInToRawDmaDataSlave#(FifoIn#(DataStream) pipe)(RawDmaDataSlave); Bool tLast, Bit#(DMA_DATA_USER_WIDTH) tUser ); - if (valid) begin + if (valid && rawBus.ready) begin if (tLast) begin isFirstReg <= True; end diff --git a/src/PcieAdapter.bsv b/src/PcieAdapter.bsv index 4aa8c7f..b90fc75 100644 --- a/src/PcieAdapter.bsv +++ b/src/PcieAdapter.bsv @@ -245,7 +245,7 @@ module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); shiftB.streamFifoOut.deq; end end - // In streamB sendging epoch, waiting streamB until isLast + // In streamB sending epoch, waiting streamB until isLast else if (isInStreamBReg) begin let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; sendingStream = isInShiftBReg ? shiftStreamB : oriStreamB; @@ -372,6 +372,10 @@ module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); tUser : pack(sideBand) }; axiStreamOutFifo.enq(axiStream); + // $display($time, "ns SIM INFO @ mkDataStreamToAxis: tx a AXIS frame, isSop:%h, isEop:%d, isEopPtr:%d, tData:%h", isSop.isSop, isEop.isEop, isEop.isEopPtrs[0], axiStream.tData); + if (isEop.isEop == 1 && isEop.isEopPtrs[0] == 0) begin + $display($time, "ns SIM Warning @ mkDataStreamToAxis: stream byteEn %b", sendingStream.byteEn); + end end endrule @@ -421,6 +425,7 @@ module mkConvertStraddleAxisToDataStream(ConvertStraddleAxisToDataStream); PcieRequesterCompleteSideBandFrame sideBand = unpack(axiStream.tUser); let isEop = sideBand.isEop; let isSop = sideBand.isSop; + $display($time, "ns SIM INFO @ mkAxisToDataStream: rx a AXIS frame, isSop:%h, isEop:%d, tData:%h", isSop.isSop, isEop.isEop, axiStream.tData); for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1) begin let sdStream = getEmptyStraddleStream; // 2 New TLP diff --git a/src/PrimUtils.bsv b/src/PrimUtils.bsv index bb0c8e2..e21fd18 100644 --- a/src/PrimUtils.bsv +++ b/src/PrimUtils.bsv @@ -181,10 +181,10 @@ endfunction function DWordByteEn convertDWordOffset2LastByteEn (ByteModDWord dwOffset); DWordByteEn dwByteEn = 0; case(dwOffset) - 0: dwByteEn = 'b1111; - 1: dwByteEn = 'b0001; - 2: dwByteEn = 'b0011; - 3: dwByteEn = 'b0111; + 0: dwByteEn = 'b0001; + 1: dwByteEn = 'b0011; + 2: dwByteEn = 'b0111; + 3: dwByteEn = 'b1111; default: dwByteEn = 'b0000; endcase return dwByteEn; @@ -205,71 +205,71 @@ endfunction // DWordPtr strarts from 0 not 1 to align to PcieTlpIsEop function DataDwordPtr convertByteEn2DwordPtr (ByteEn byteEn); DataDwordPtr ptr = 0; - case(byteEn) + case(byteEn) matches 'h0000000000000001: ptr = 0; 'h0000000000000003: ptr = 0; 'h0000000000000007: ptr = 0; - 'h000000000000000F: ptr = 0; - 'h000000000000001F: ptr = 1; - 'h000000000000003F: ptr = 1; - 'h000000000000007F: ptr = 1; - 'h00000000000000FF: ptr = 1; - 'h00000000000001FF: ptr = 2; - 'h00000000000003FF: ptr = 2; - 'h00000000000007FF: ptr = 2; - 'h0000000000000FFF: ptr = 2; - 'h0000000000001FFF: ptr = 3; - 'h0000000000003FFF: ptr = 3; - 'h0000000000007FFF: ptr = 3; - 'h000000000000FFFF: ptr = 3; - 'h000000000001FFFF: ptr = 4; - 'h000000000003FFFF: ptr = 4; - 'h000000000007FFFF: ptr = 4; - 'h00000000000FFFFF: ptr = 4; - 'h00000000001FFFFF: ptr = 5; - 'h00000000003FFFFF: ptr = 5; - 'h00000000007FFFFF: ptr = 5; - 'h0000000000FFFFFF: ptr = 5; - 'h0000000001FFFFFF: ptr = 6; - 'h0000000003FFFFFF: ptr = 6; - 'h0000000007FFFFFF: ptr = 6; - 'h000000000FFFFFFF: ptr = 6; - 'h000000001FFFFFFF: ptr = 7; - 'h000000003FFFFFFF: ptr = 7; - 'h000000007FFFFFFF: ptr = 7; - 'h00000000FFFFFFFF: ptr = 7; - 'h00000001FFFFFFFF: ptr = 8; - 'h00000003FFFFFFFF: ptr = 8; - 'h00000007FFFFFFFF: ptr = 8; - 'h0000000FFFFFFFFF: ptr = 8; - 'h0000001FFFFFFFFF: ptr = 9; - 'h0000003FFFFFFFFF: ptr = 9; - 'h0000007FFFFFFFFF: ptr = 9; - 'h000000FFFFFFFFFF: ptr = 9; - 'h000001FFFFFFFFFF: ptr = 10; - 'h000003FFFFFFFFFF: ptr = 10; - 'h000007FFFFFFFFFF: ptr = 10; - 'h00000FFFFFFFFFFF: ptr = 10; - 'h00001FFFFFFFFFFF: ptr = 11; - 'h00003FFFFFFFFFFF: ptr = 11; - 'h00007FFFFFFFFFFF: ptr = 11; - 'h0000FFFFFFFFFFFF: ptr = 11; - 'h0001FFFFFFFFFFFF: ptr = 12; - 'h0003FFFFFFFFFFFF: ptr = 12; - 'h0007FFFFFFFFFFFF: ptr = 12; - 'h000FFFFFFFFFFFFF: ptr = 12; - 'h001FFFFFFFFFFFFF: ptr = 13; - 'h003FFFFFFFFFFFFF: ptr = 13; - 'h007FFFFFFFFFFFFF: ptr = 13; - 'h00FFFFFFFFFFFFFF: ptr = 13; - 'h01FFFFFFFFFFFFFF: ptr = 14; - 'h03FFFFFFFFFFFFFF: ptr = 14; - 'h07FFFFFFFFFFFFFF: ptr = 14; - 'h0FFFFFFFFFFFFFFF: ptr = 14; - 'h1FFFFFFFFFFFFFFF: ptr = 15; - 'h3FFFFFFFFFFFFFFF: ptr = 15; - 'h7FFFFFFFFFFFFFFF: ptr = 15; - 'hFFFFFFFFFFFFFFFF: ptr = 15; + 'h000000000000000?: ptr = 0; + 'h000000000000001?: ptr = 1; + 'h000000000000003?: ptr = 1; + 'h000000000000007?: ptr = 1; + 'h00000000000000F?: ptr = 1; + 'h00000000000001F?: ptr = 2; + 'h00000000000003F?: ptr = 2; + 'h00000000000007F?: ptr = 2; + 'h0000000000000FF?: ptr = 2; + 'h0000000000001FF?: ptr = 3; + 'h0000000000003FF?: ptr = 3; + 'h0000000000007FF?: ptr = 3; + 'h000000000000FFF?: ptr = 3; + 'h000000000001FFF?: ptr = 4; + 'h000000000003FFF?: ptr = 4; + 'h000000000007FFF?: ptr = 4; + 'h00000000000FFFF?: ptr = 4; + 'h00000000001FFFF?: ptr = 5; + 'h00000000003FFFF?: ptr = 5; + 'h00000000007FFFF?: ptr = 5; + 'h0000000000FFFFF?: ptr = 5; + 'h0000000001FFFFF?: ptr = 6; + 'h0000000003FFFFF?: ptr = 6; + 'h0000000007FFFFF?: ptr = 6; + 'h000000000FFFFFF?: ptr = 6; + 'h000000001FFFFFF?: ptr = 7; + 'h000000003FFFFFF?: ptr = 7; + 'h000000007FFFFFF?: ptr = 7; + 'h00000000FFFFFFF?: ptr = 7; + 'h00000001FFFFFFF?: ptr = 8; + 'h00000003FFFFFFF?: ptr = 8; + 'h00000007FFFFFFF?: ptr = 8; + 'h0000000FFFFFFFF?: ptr = 8; + 'h0000001FFFFFFFF?: ptr = 9; + 'h0000003FFFFFFFF?: ptr = 9; + 'h0000007FFFFFFFF?: ptr = 9; + 'h000000FFFFFFFFF?: ptr = 9; + 'h000001FFFFFFFFF?: ptr = 10; + 'h000003FFFFFFFFF?: ptr = 10; + 'h000007FFFFFFFFF?: ptr = 10; + 'h00000FFFFFFFFFF?: ptr = 10; + 'h00001FFFFFFFFFF?: ptr = 11; + 'h00003FFFFFFFFFF?: ptr = 11; + 'h00007FFFFFFFFFF?: ptr = 11; + 'h0000FFFFFFFFFFF?: ptr = 11; + 'h0001FFFFFFFFFFF?: ptr = 12; + 'h0003FFFFFFFFFFF?: ptr = 12; + 'h0007FFFFFFFFFFF?: ptr = 12; + 'h000FFFFFFFFFFFF?: ptr = 12; + 'h001FFFFFFFFFFFF?: ptr = 13; + 'h003FFFFFFFFFFFF?: ptr = 13; + 'h007FFFFFFFFFFFF?: ptr = 13; + 'h00FFFFFFFFFFFFF?: ptr = 13; + 'h01FFFFFFFFFFFFF?: ptr = 14; + 'h03FFFFFFFFFFFFF?: ptr = 14; + 'h07FFFFFFFFFFFFF?: ptr = 14; + 'h0FFFFFFFFFFFFFF?: ptr = 14; + 'h1FFFFFFFFFFFFFF?: ptr = 15; + 'h3FFFFFFFFFFFFFF?: ptr = 15; + 'h7FFFFFFFFFFFFFF?: ptr = 15; + 'hFFFFFFFFFFFFFFF?: ptr = 15; default : ptr = 0; endcase return ptr; diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index ead5db3..c6245cd 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -1,6 +1,7 @@ import Vector::*; import FIFOF::*; import GetPut::*; +import Connectable::*; import SemiFifo::*; import PrimUtils::*; @@ -162,9 +163,15 @@ module mkStreamSplit(StreamSplit ifc); end DataBytePtr offsetBytePtr = 0; let curLocation = unpack(zeroExtend(bytePtr)) + streamByteCntReg; - if (!isSplittedReg && curLocation >= splitLocation) begin - offsetBytePtr = truncate(pack(splitLocation - curLocation)); + if (!isSplittedReg) begin + if (curLocation > splitLocation) begin + offsetBytePtr = truncate(pack(splitLocation - streamByteCntReg)); + end + else if (curLocation == splitLocation) begin + offsetBytePtr = bytePtr; + end end + // $display($time, "ns SIM INFO @ mkStreamSplit: curLocation:%d, splitLocation:%d, offset:%d", curLocation, splitLocation, offsetBytePtr); splitPtrFifo.enq(offsetBytePtr); if (offsetBytePtr > 0 && !stream.isLast) begin isSplittedReg <= True; @@ -212,7 +219,7 @@ module mkStreamSplit(StreamSplit ifc); isFirst: True, isLast: True }; - hasRemainReg <= True; + hasRemainReg <= streamWp.stream.isLast ? !isByteEnZero(remainStream.byteEn) : True; hasLastRemainReg <= streamWp.stream.isLast; remainStreamWpReg <= StreamWithPtr { stream : remainStream, @@ -355,63 +362,106 @@ module mkStreamShiftComplex#(DataBytePtr offset)(StreamShiftComplex); interface streamFifoOut = convertFifoToFifoOut(outFifo); endmodule +typedef enum { + Align0 = 0, + Align1 = 1, + Align2 = 2, + Align3 = 3 +} AlignDwMode deriving(Bits, Eq, Bounded, FShow); + interface StreamShiftAlignToDw; interface FifoIn#(DataStream) dataFifoIn; - interface FifoIn#(DmaExtendRequest) reqFifoIn; interface FifoOut#(DataStream) dataFifoOut; - interface FifoOut#(SideBandByteEn) byteEnFifoOut; + method Action setAlignMode(AlignDwMode align); endinterface typedef 2 STREAM_ALIGN_DW_LATENCY; module mkStreamShiftAlignToDw#(DataBytePtr offset)(StreamShiftAlignToDw); - FIFOF#(DataStream) dataInFifo <- mkFIFOF; - FIFOF#(DmaExtendRequest) reqInFifo <- mkFIFOF; - FIFOF#(DataStream) dataOutFifo <- mkFIFOF; - FIFOF#(SideBandByteEn) byteEnOutFifo <- mkFIFOF; + FIFOF#(DataStream) dataInFifo <- mkFIFOF; + FIFOF#(DataStream) pipeFifo <- mkFIFOF; + FIFOF#(DataStream) dataOutFifo <- mkFIFOF; + FIFOF#(AlignDwMode) alignModeFifo <- mkFIFOF; - FIFOF#(DataBytePtr) shiftSetFifo <- mkSizedFIFOF(valueOf(TMul#(2, STREAM_SHIFT_LATENCY))); + Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); + Reg#(Bool) hasLastRemainReg <- mkReg(False); - Vector#(DWORD_BYTES, StreamPipe) shifts = newVector; - for (DataBytePtr idx = 0; idx < fromInteger(valueOf(DWORD_BYTES)); idx = idx + 1 ) begin - shifts[idx] <- mkStreamShift(offset + idx); - end - - rule getOffset; - let exReq = reqInFifo.first; - reqInFifo.deq; - ByteModDWord startAddrOffset = byteModDWord(exReq.startAddr); - shiftSetFifo.enq(zeroExtend(startAddrOffset)); - ByteModDWord endAddrOffset = byteModDWord(exReq.endAddr); - let firstByteEn = convertDWordOffset2FirstByteEn(startAddrOffset); - let lastByteEn = convertDWordOffset2LastByteEn(endAddrOffset); - byteEnOutFifo.enq(tuple2(firstByteEn, lastByteEn)); - let stream = dataInFifo.first; + DataBytePtr resByte = getMaxBytePtr - (offset + 3); + DataBitPtr offsetBits = zeroExtend(offset) << valueOf(BYTE_WIDTH_WIDTH); + DataBitPtr resBits = zeroExtend(resByte) << valueOf(BYTE_WIDTH_WIDTH); + ByteEn byteEnMask1 = 1 << (offset); + ByteEn byteEnMask2 = 1 << (offset + 1) | byteEnMask1 ; + ByteEn byteEnMask3 = 1 << (offset + 2) | byteEnMask2; + + rule pipe; + pipeFifo.enq(dataInFifo.first); dataInFifo.deq; - for (DataBytePtr idx = 0; idx < fromInteger(valueOf(DWORD_BYTES)); idx = idx + 1 ) begin - shifts[idx].streamFifoIn.enq(stream); - end endrule - rule getShiftData; - DataStream stream = getEmptyStream; - let offset = shiftSetFifo.first; - for (DataBytePtr idx = 0; idx < fromInteger(valueOf(DWORD_BYTES)); idx = idx + 1 ) begin - shifts[idx].streamFifoOut.deq; - if (idx == offset) begin - stream = shifts[idx].streamFifoOut.first; - end + rule execShift; + if (hasLastRemainReg) begin + dataOutFifo.enq(remainStreamReg); + hasLastRemainReg <= False; + remainStreamReg <= getEmptyStream; end - if (stream.isLast) begin - shiftSetFifo.deq; + else begin + let stream = pipeFifo.first; + pipeFifo.deq; + let shiftStream = DataStream { + data : stream.data << offsetBits, + byteEn : stream.byteEn << offset , + isFirst : stream.isFirst, + isLast : stream.isLast + }; + let remainStream = DataStream { + data : stream.data >> resBits, + byteEn : stream.byteEn >> resByte, + isFirst : False, + isLast : True + }; + let alignMode = alignModeFifo.first; + if (stream.isLast) begin + alignModeFifo.deq; + end + case (alignMode) + Align1: begin + shiftStream.data = shiftStream.data << valueOf(TMul#(1, BYTE_WIDTH)) | remainStreamReg.data; + shiftStream.byteEn = shiftStream.byteEn << 1 | byteEnMask1 | remainStreamReg.byteEn; + remainStream.data = remainStream.data >> valueOf(TMul#(2, BYTE_WIDTH)); + remainStream.byteEn = remainStream.byteEn >> 2; + end + Align2: begin + shiftStream.data = shiftStream.data << valueOf(TMul#(2, BYTE_WIDTH)) | remainStreamReg.data; + shiftStream.byteEn = shiftStream.byteEn << 2 | byteEnMask2 | remainStreamReg.byteEn; + remainStream.data = remainStream.data >> valueOf(TMul#(1, BYTE_WIDTH)); + remainStream.byteEn = remainStream.byteEn >> 1; + end + Align3: begin + shiftStream.data = shiftStream.data << valueOf(TMul#(3, BYTE_WIDTH)) | remainStreamReg.data; + shiftStream.byteEn = shiftStream.byteEn << 3 | byteEnMask3 | remainStreamReg.byteEn; + remainStream.data = remainStream.data; + remainStream.byteEn = remainStream.byteEn; + end + default: begin + shiftStream.data = shiftStream.data | remainStreamReg.data; + shiftStream.byteEn = shiftStream.byteEn | remainStreamReg.byteEn; + remainStream.data = remainStream.data >> valueOf(TMul#(3, BYTE_WIDTH)); + remainStream.byteEn = remainStream.byteEn >> 3; + end + endcase + shiftStream.isLast = shiftStream.isLast && isByteEnZero(remainStream.byteEn); + dataOutFifo.enq(shiftStream); + remainStreamReg <= remainStream; + hasLastRemainReg <= stream.isLast && !isByteEnZero(remainStream.byteEn); end - dataOutFifo.enq(stream); endrule + method Action setAlignMode(AlignDwMode align); + alignModeFifo.enq(align); + endmethod + interface dataFifoIn = convertFifoToFifoIn(dataInFifo); - interface reqFifoIn = convertFifoToFifoIn(reqInFifo); interface dataFifoOut = convertFifoToFifoOut(dataOutFifo); - interface byteEnFifoOut = convertFifoToFifoOut(byteEnOutFifo); endmodule typedef 3 STREAM_HEADER_REMOVE_LATENCY; @@ -424,7 +474,9 @@ module mkStreamHeaderRemove#(DataBytePtr headerLen)(StreamPipe); Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); Reg#(Bool) hasLastRemainReg <- mkReg(False); - DataBitPtr headerBitLen = zeroExtend(headerLen) << valueOf(BYTE_WIDTH_WIDTH); + DataBitPtr headerBitLen = zeroExtend(headerLen) << valueOf(BYTE_WIDTH_WIDTH); + DataBytePtr shiftLen = getMaxBytePtr - headerLen; + DataBitPtr shiftBitLen = zeroExtend(shiftLen) << valueOf(BYTE_WIDTH_WIDTH); rule removeHeader; if (hasLastRemainReg) begin @@ -435,42 +487,42 @@ module mkStreamHeaderRemove#(DataBytePtr headerLen)(StreamPipe); else begin let stream = inFifo.first; inFifo.deq; - let resStream = DataStream { + let remainStream = DataStream { data : stream.data >> headerBitLen, byteEn : stream.byteEn >> headerLen, isFirst : stream.isFirst, isLast : stream.isLast }; - let removeStream = DataStream { - data : zeroExtend(Data'(stream.data[headerBitLen-1:0])), - byteEn : zeroExtend(ByteEn'(stream.byteEn[headerLen-1:0])), - isFirst : False, - isLast : False - }; let newStream = DataStream { - data : remainStreamReg.data | stream.data << headerBitLen, - byteEn : remainStreamReg.byteEn | stream.byteEn << headerLen, - isFirst : stream.isFirst, - isLast : stream.isLast + data : remainStreamReg.data | stream.data << shiftBitLen, + byteEn : remainStreamReg.byteEn | stream.byteEn << shiftLen, + isFirst : remainStreamReg.isFirst, + isLast : isByteEnZero(remainStream.byteEn) }; + if (stream.isLast && stream.isFirst) begin - outFifo.enq(resStream); + outFifo.enq(remainStream); + hasLastRemainReg <= False; + remainStreamReg <= getEmptyStream; end else if (stream.isFirst) begin - remainStreamReg <= resStream; + remainStreamReg <= remainStream; end else begin outFifo.enq(newStream); if (stream.isLast) begin - if(isByteEnZero(resStream.byteEn)) begin + if(isByteEnZero(remainStream.byteEn)) begin remainStreamReg <= getEmptyStream; hasLastRemainReg <= False; end else begin - remainStreamReg <= resStream; + remainStreamReg <= remainStream; hasLastRemainReg <= True; end end + else begin + remainStreamReg <= remainStream; + end end end endrule @@ -505,13 +557,16 @@ module mkStreamReshape(StreamPipe); Bool isDetect = !stream.isLast && !isByteEnFull(stream.byteEn) && (!isDetectedReg); if (isDetect) begin let bytePtr = convertByteEn2BytePtr(stream.byteEn); - DataBitPtr bitPtr = zeroExtend(bytePtr) >> valueOf(BYTE_WIDTH_WIDTH); + DataBitPtr bitPtr = zeroExtend(bytePtr) << valueOf(BYTE_WIDTH_WIDTH); rmBytePtrReg <= bytePtr; rmBitPtrReg <= bitPtr; rsBytePtrReg <= getMaxBytePtr - bytePtr; rsBitPtrReg <= getMaxBitPtr - bitPtr; remainStreamReg <= stream; isDetectedReg <= True; + if (bytePtr == 0) begin + $display($time, "ns SIM Warning @ mkStreamReshape: detect bubble, bytePtr:%d, byteEn: %b", bytePtr, stream.byteEn); + end end else begin if (isDetectedReg) begin @@ -522,7 +577,7 @@ module mkStreamReshape(StreamPipe); isLast : True }; remainStreamReg <= remainStream; - let isLast = isByteEnZero(remainStream.byteEn); + let isLast = isByteEnZero(remainStream.byteEn) && stream.isLast; let outStream = DataStream { data : (stream.data << rmBitPtrReg) | remainStreamReg.data, byteEn : (stream.byteEn << rmBytePtrReg) | remainStreamReg.byteEn, @@ -530,7 +585,7 @@ module mkStreamReshape(StreamPipe); isLast : isLast }; outFifo.enq(outStream); - hasLastRemainReg <= !isLast; + hasLastRemainReg <= !isByteEnZero(remainStream.byteEn) && stream.isLast; isDetectedReg <= isLast ? False : isDetectedReg; end else begin @@ -543,3 +598,88 @@ module mkStreamReshape(StreamPipe); interface streamFifoIn = convertFifoToFifoIn(inFifo); interface streamFifoOut = convertFifoToFifoOut(outFifo); endmodule + +typedef Bit#(DWORD_BYTES) DWordByteEn; + +module mkStreamRemoveFromDW(StreamPipe); + FIFOF#(DataStream) inFifo <- mkFIFOF; + FIFOF#(DataStream) outFifo <- mkFIFOF; + + Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); + Reg#(Bool) hasLastRemainReg <- mkReg(False); + Reg#(DataBytePtr) removeByteReg <- mkReg(0); + Reg#(DataBytePtr) resByteReg <- mkReg(getMaxBytePtr); + + + function Tuple2#(DataBytePtr, DataBytePtr) getRemoveOffset(DWordByteEn dwByteEn); + case (dwByteEn) matches + 4'b??10: return tuple2(1, getMaxBytePtr - 1); + 4'b?100: return tuple2(2, getMaxBytePtr - 2); + 4'b1000: return tuple2(3, getMaxBytePtr - 3); + default: return tuple2(0, getMaxBytePtr); + endcase + endfunction + + rule removeHeader; + if (hasLastRemainReg) begin + outFifo.enq(remainStreamReg); + hasLastRemainReg <= False; + remainStreamReg <= getEmptyStream; + end + else begin + let stream = inFifo.first; + inFifo.deq; + let removeByte = removeByteReg; + let resByte = resByteReg; + + if (stream.isFirst) begin + {removeByte, resByte} = getRemoveOffset(truncate(stream.byteEn)); + end + DataBitPtr removeBits = zeroExtend(removeByte) << valueOf(BYTE_WIDTH_WIDTH); + DataBitPtr resBits = zeroExtend(resByte) << valueOf(BYTE_WIDTH_WIDTH); + + let remainStream = DataStream { + data : stream.data >> removeBits, + byteEn : stream.byteEn >> removeByte, + isFirst : stream.isFirst, + isLast : stream.isLast + }; + let newStream = DataStream { + data : remainStreamReg.data | stream.data << resBits, + byteEn : remainStreamReg.byteEn | stream.byteEn << resByte, + isFirst : remainStreamReg.isFirst, + isLast : isByteEnZero(remainStream.byteEn) + }; + removeByteReg <= removeByte; + resByteReg <= resByte; + + if (stream.isLast && stream.isFirst) begin + outFifo.enq(remainStream); + hasLastRemainReg <= False; + remainStreamReg <= getEmptyStream; + end + else if (stream.isFirst) begin + remainStreamReg <= remainStream; + end + else begin + outFifo.enq(newStream); + if (stream.isLast) begin + if(isByteEnZero(remainStream.byteEn)) begin + remainStreamReg <= getEmptyStream; + hasLastRemainReg <= False; + end + else begin + remainStreamReg <= remainStream; + hasLastRemainReg <= True; + end + end + else begin + remainStreamReg <= remainStream; + end + end + end + endrule + + interface streamFifoIn = convertFifoToFifoIn(inFifo); + interface streamFifoOut = convertFifoToFifoOut(outFifo); +endmodule diff --git a/test/TestStreamUtils.bsv b/test/TestStreamUtils.bsv index 01b9f33..d5996ca 100755 --- a/test/TestStreamUtils.bsv +++ b/test/TestStreamUtils.bsv @@ -77,6 +77,7 @@ module mkRandomStreamSize(StreamSize seed, StreamSizeBitPtr maxSizeBitPtr, Rando endmethod endmodule +(* doc = "testcase" *) module mkStreamSplitTb(Empty); StreamSplit dut <- mkStreamSplit; From 1ad356c838472c52fe46d59cbd26b5069e1359e1 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Tue, 3 Sep 2024 17:49:12 +0800 Subject: [PATCH 39/53] Pass straddle mode cocotb test --- cocotb/Makefile | 9 +- cocotb/dma_straddle_tb.py | 438 ++++++++++++++++++++++++++++++++++++++ cocotb/dma_wr_rd_tb.py | 16 +- script.py | 6 +- src/DmaC2HPipe.bsv | 104 +++++---- src/DmaTypes.bsv | 12 ++ src/DmaUtils.bsv | 73 +++---- src/DmaWrapper.bsv | 26 ++- src/PcieAdapter.bsv | 86 ++++++-- src/PcieConfigurator.bsv | 56 ++++- src/PrimUtils.bsv | 260 +++++++++++----------- 11 files changed, 830 insertions(+), 256 deletions(-) create mode 100644 cocotb/dma_straddle_tb.py diff --git a/cocotb/Makefile b/cocotb/Makefile index f44df22..e7a64cc 100644 --- a/cocotb/Makefile +++ b/cocotb/Makefile @@ -10,23 +10,26 @@ TOP_MODULE = mk$(TARGET) TOP_FILE = $(TOP_MODULE).v VLOG_FILE = $(TB_DIR)/$(TOP_FILE) -TB_CASE = dma_wr_rd +TB_CASE = dma_straddle TB_FILE = $(TB_CASE)_tb.py DATE = $(shell date "+%Y%m%d") LOG_FILE = $(TB_DIR)/log/$(DATE)_$(TOP_MODULE).log verilog: cd $(BACKEND_DIR) && make verilog + +prepare: + rm -rf $(VLOG_FILE) bluetcl $(BACKEND_DIR)/listVlogFiles.tcl -bdir $(VBUILD_DIR) -vdir $(VSRC_DIR) $(TOP_MODULE) $(TOP_MODULE) | grep -i '\.v' | xargs -I {} cat {} >> $(VLOG_FILE) sed -i '1i `timescale 1ns/1ps' $(VLOG_FILE) -cocotb:clean verilog run - run: cd $(TB_DIR) mkdir -p log python3 $(TB_FILE) 2>&1 | tee $(LOG_FILE) +cocotb:clean verilog prepare run + clean: cd $(BACKEND_DIR) && make clean cd $(TB_DIR) && rm -rf $(VLOG_FILE) __pycache__ .pytest_cache sim_build *.log diff --git a/cocotb/dma_straddle_tb.py b/cocotb/dma_straddle_tb.py new file mode 100644 index 0000000..afe22d5 --- /dev/null +++ b/cocotb/dma_straddle_tb.py @@ -0,0 +1,438 @@ +#!/usr/bin/env python +import itertools +import logging +import os +import random +import queue + +import cocotb_test.simulator +import pytest + +import cocotb +from cocotb.triggers import RisingEdge, FallingEdge, Timer +from cocotb.regression import TestFactory +from cocotb.clock import Clock + +from cocotbext.pcie.core import RootComplex +from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice +from cocotbext.axi.stream import define_stream +from cocotbext.axi import (AxiStreamBus, AxiStreamSource, AxiStreamSink, AxiStreamMonitor, AxiStreamFrame) + +# class TB architecture +# -------------- ------------- ----------- +# | Root Complex | <-> | End Pointer | <-> | Dut(DMAC) | +# -------------- ------------- ----------- + +DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc", + signals=["start_addr", "byte_cnt", "is_write", "valid", "ready"] +) + +class TB(object): + def __init__(self, dut, msix=False): + self.dut = dut + + self.log = logging.getLogger("cocotb.tb") + self.log.setLevel(logging.DEBUG) + + self.clock = dut.CLK + self.resetn = dut.RST_N + + self._bus_width = 512 + self._bus_bytes = 64 + + # PCIe + self.rc = RootComplex() + + cq_straddle = False + cc_straddle = False + rq_straddle = True + rc_straddle = True + rc_4tlp_straddle = False + + self.client_tag = bool(int(os.getenv("CLIENT_TAG", "1"))) + + self.dev = UltraScalePlusPcieDevice( + # configuration options + pcie_generation=3, + # pcie_link_width=2, + # user_clk_frequency=250e6, + alignment="dword", + cq_straddle=cq_straddle, + cc_straddle=cc_straddle, + rq_straddle=rq_straddle, + rc_straddle=rc_straddle, + rc_4tlp_straddle=rc_4tlp_straddle, + pf_count=1, + max_payload_size=1024, + enable_client_tag=self.client_tag, + enable_extended_tag=False, + enable_parity=False, + enable_rx_msg_interface=False, + enable_sriov=False, + enable_extended_configuration=False, + + pf0_msi_enable=True, + pf0_msi_count=32, + pf1_msi_enable=False, + pf1_msi_count=1, + pf2_msi_enable=False, + pf2_msi_count=1, + pf3_msi_enable=False, + pf3_msi_count=1, + pf0_msix_enable=msix, + pf0_msix_table_size=63, + pf0_msix_table_bir=4, + pf0_msix_table_offset=0x00000000, + pf0_msix_pba_bir=4, + pf0_msix_pba_offset=0x00008000, + pf1_msix_enable=False, + pf1_msix_table_size=0, + pf1_msix_table_bir=0, + pf1_msix_table_offset=0x00000000, + pf1_msix_pba_bir=0, + pf1_msix_pba_offset=0x00000000, + pf2_msix_enable=False, + pf2_msix_table_size=0, + pf2_msix_table_bir=0, + pf2_msix_table_offset=0x00000000, + pf2_msix_pba_bir=0, + pf2_msix_pba_offset=0x00000000, + pf3_msix_enable=False, + pf3_msix_table_size=0, + pf3_msix_table_bir=0, + pf3_msix_table_offset=0x00000000, + pf3_msix_pba_bir=0, + pf3_msix_pba_offset=0x00000000, + + # signals + user_clk=self.clock, + # user_reset=~self.resetn, + user_lnk_up=dut.user_lnk_up, + # sys_clk=dut.sys_clk, + # sys_clk_gt=dut.sys_clk_gt, + # sys_reset=dut.sys_reset, + # phy_rdy_out=dut.phy_rdy_out, + + rq_bus=AxiStreamBus.from_prefix(dut, "m_axis_rq"), + pcie_rq_seq_num0=dut.pcie_rq_seq_num0, + pcie_rq_seq_num_vld0=dut.pcie_rq_seq_num_vld0, + pcie_rq_seq_num1=dut.pcie_rq_seq_num1, + pcie_rq_seq_num_vld1=dut.pcie_rq_seq_num_vld1, + pcie_rq_tag0=dut.pcie_rq_tag0, + pcie_rq_tag1=dut.pcie_rq_tag1, + # pcie_rq_tag_av=dut.pcie_rq_tag_av, + pcie_rq_tag_vld0=dut.pcie_rq_tag_vld0, + pcie_rq_tag_vld1=dut.pcie_rq_tag_vld1, + + rc_bus=AxiStreamBus.from_prefix(dut, "s_axis_rc"), + + cq_bus=AxiStreamBus.from_prefix(dut, "s_axis_cq"), + pcie_cq_np_req=dut.pcie_cq_np_req, + pcie_cq_np_req_count=dut.pcie_cq_np_req_count, + + cc_bus=AxiStreamBus.from_prefix(dut, "m_axis_cc"), + + pcie_tfc_nph_av=dut.pcie_tfc_nph_av, + pcie_tfc_npd_av=dut.pcie_tfc_npd_av, + cfg_phy_link_down=dut.cfg_phy_link_down, + cfg_phy_link_status=dut.cfg_phy_link_status, + cfg_negotiated_width=dut.cfg_negotiated_width, + cfg_current_speed=dut.cfg_current_speed, + cfg_max_payload=dut.cfg_max_payload, + cfg_max_read_req=dut.cfg_max_read_req, + cfg_function_status=dut.cfg_function_status, + cfg_function_power_state=dut.cfg_function_power_state, + cfg_vf_status=dut.cfg_vf_status, + cfg_vf_power_state=dut.cfg_vf_power_state, + cfg_link_power_state=dut.cfg_link_power_state, + cfg_mgmt_addr=dut.cfg_mgmt_addr, + cfg_mgmt_function_number=dut.cfg_mgmt_function_number, + cfg_mgmt_write=dut.cfg_mgmt_write, + cfg_mgmt_write_data=dut.cfg_mgmt_write_data, + cfg_mgmt_byte_enable=dut.cfg_mgmt_byte_enable, + cfg_mgmt_read=dut.cfg_mgmt_read, + cfg_mgmt_read_data=dut.cfg_mgmt_read_data, + cfg_mgmt_read_write_done=dut.cfg_mgmt_read_write_done, + cfg_mgmt_debug_access=dut.cfg_mgmt_debug_access, + cfg_err_cor_out=dut.cfg_err_cor_out, + cfg_err_nonfatal_out=dut.cfg_err_nonfatal_out, + cfg_err_fatal_out=dut.cfg_err_fatal_out, + cfg_local_error_valid=dut.cfg_local_error_valid, + cfg_local_error_out=dut.cfg_local_error_out, + cfg_ltssm_state=dut.cfg_ltssm_state, + cfg_rx_pm_state=dut.cfg_rx_pm_state, + cfg_tx_pm_state=dut.cfg_tx_pm_state, + cfg_rcb_status=dut.cfg_rcb_status, + cfg_obff_enable=dut.cfg_obff_enable, + # cfg_pl_status_change=dut.cfg_pl_status_change, + # cfg_tph_requester_enable=dut.cfg_tph_requester_enable, + # cfg_tph_st_mode=dut.cfg_tph_st_mode, + # cfg_vf_tph_requester_enable=dut.cfg_vf_tph_requester_enable, + # cfg_vf_tph_st_mode=dut.cfg_vf_tph_st_mode, + cfg_msg_received=dut.cfg_msg_received, + cfg_msg_received_data=dut.cfg_msg_received_data, + cfg_msg_received_type=dut.cfg_msg_received_type, + cfg_msg_transmit=dut.cfg_msg_transmit, + cfg_msg_transmit_type=dut.cfg_msg_transmit_type, + cfg_msg_transmit_data=dut.cfg_msg_transmit_data, + cfg_msg_transmit_done=dut.cfg_msg_transmit_done, + cfg_fc_ph=dut.cfg_fc_ph, + cfg_fc_pd=dut.cfg_fc_pd, + cfg_fc_nph=dut.cfg_fc_nph, + cfg_fc_npd=dut.cfg_fc_npd, + cfg_fc_cplh=dut.cfg_fc_cplh, + cfg_fc_cpld=dut.cfg_fc_cpld, + cfg_fc_sel=dut.cfg_fc_sel, + cfg_dsn=dut.cfg_dsn, + cfg_bus_number=dut.cfg_bus_number, + cfg_power_state_change_ack=dut.cfg_power_state_change_ack, + cfg_power_state_change_interrupt=dut.cfg_power_state_change_interrupt, + cfg_err_cor_in=dut.cfg_err_cor_in, + cfg_err_uncor_in=dut.cfg_err_uncor_in, + cfg_flr_in_process=dut.cfg_flr_in_process, + cfg_flr_done=dut.cfg_flr_done, + cfg_vf_flr_in_process=dut.cfg_vf_flr_in_process, + cfg_vf_flr_func_num=dut.cfg_vf_flr_func_num, + cfg_vf_flr_done=dut.cfg_vf_flr_done, + cfg_link_training_enable=dut.cfg_link_training_enable, + cfg_interrupt_int=dut.cfg_interrupt_int, + cfg_interrupt_pending=dut.cfg_interrupt_pending, + cfg_interrupt_sent=dut.cfg_interrupt_sent, + cfg_interrupt_msi_enable=dut.cfg_interrupt_msi_enable, + cfg_interrupt_msi_mmenable=dut.cfg_interrupt_msi_mmenable, + cfg_interrupt_msi_mask_update=dut.cfg_interrupt_msi_mask_update, + cfg_interrupt_msi_data=dut.cfg_interrupt_msi_data, + cfg_interrupt_msi_select=dut.cfg_interrupt_msi_select, + cfg_interrupt_msi_int=dut.cfg_interrupt_msi_int, + cfg_interrupt_msi_pending_status=dut.cfg_interrupt_msi_pending_status, + cfg_interrupt_msi_pending_status_data_enable=dut.cfg_interrupt_msi_pending_status_data_enable, + cfg_interrupt_msi_pending_status_function_num=dut.cfg_interrupt_msi_pending_status_function_num, + cfg_interrupt_msi_sent=dut.cfg_interrupt_msi_sent, + cfg_interrupt_msi_fail=dut.cfg_interrupt_msi_fail, + cfg_interrupt_msi_attr=dut.cfg_interrupt_msi_attr, + cfg_interrupt_msi_tph_present=dut.cfg_interrupt_msi_tph_present, + cfg_interrupt_msi_tph_type=dut.cfg_interrupt_msi_tph_type, + cfg_interrupt_msi_tph_st_tag=dut.cfg_interrupt_msi_tph_st_tag, + cfg_interrupt_msi_function_number=dut.cfg_interrupt_msi_function_number, + cfg_pm_aspm_l1_entry_reject=dut.cfg_pm_aspm_l1_entry_reject, + cfg_pm_aspm_tx_l0s_entry_disable=dut.cfg_pm_aspm_tx_l0s_entry_disable, + cfg_hot_reset_out=dut.cfg_hot_reset_out, + cfg_config_space_enable=dut.cfg_config_space_enable, + cfg_req_pm_transition_l23_ready=dut.cfg_req_pm_transition_l23_ready, + cfg_hot_reset_in=dut.cfg_hot_reset_in, + cfg_ds_port_number=dut.cfg_ds_port_number, + cfg_ds_bus_number=dut.cfg_ds_bus_number, + cfg_ds_device_number=dut.cfg_ds_device_number, + ) + + self.dev.log.setLevel(logging.INFO) + + dut.pcie_cq_np_req.setimmediatevalue(1) + dut.cfg_mgmt_addr.setimmediatevalue(0) + dut.cfg_mgmt_function_number.setimmediatevalue(0) + dut.cfg_mgmt_write.setimmediatevalue(0) + dut.cfg_mgmt_write_data.setimmediatevalue(0) + dut.cfg_mgmt_byte_enable.setimmediatevalue(0) + dut.cfg_mgmt_read.setimmediatevalue(0) + dut.cfg_mgmt_debug_access.setimmediatevalue(0) + dut.cfg_msg_transmit.setimmediatevalue(0) + dut.cfg_msg_transmit_type.setimmediatevalue(0) + dut.cfg_msg_transmit_data.setimmediatevalue(0) + dut.cfg_fc_sel.setimmediatevalue(0) + dut.cfg_dsn.setimmediatevalue(0) + dut.cfg_power_state_change_ack.setimmediatevalue(0) + dut.cfg_err_cor_in.setimmediatevalue(0) + dut.cfg_err_uncor_in.setimmediatevalue(0) + dut.cfg_flr_done.setimmediatevalue(0) + dut.cfg_vf_flr_func_num.setimmediatevalue(0) + dut.cfg_vf_flr_done.setimmediatevalue(0) + dut.cfg_link_training_enable.setimmediatevalue(1) + dut.cfg_interrupt_int.setimmediatevalue(0) + dut.cfg_interrupt_pending.setimmediatevalue(0) + dut.cfg_interrupt_msi_select.setimmediatevalue(0) + dut.cfg_interrupt_msi_int.setimmediatevalue(0) + dut.cfg_interrupt_msi_pending_status.setimmediatevalue(0) + dut.cfg_interrupt_msi_pending_status_data_enable.setimmediatevalue(0) + dut.cfg_interrupt_msi_pending_status_function_num.setimmediatevalue(0) + dut.cfg_interrupt_msi_attr.setimmediatevalue(0) + dut.cfg_interrupt_msi_tph_present.setimmediatevalue(0) + dut.cfg_interrupt_msi_tph_type.setimmediatevalue(0) + dut.cfg_interrupt_msi_tph_st_tag.setimmediatevalue(0) + dut.cfg_interrupt_msi_function_number.setimmediatevalue(0) + dut.cfg_pm_aspm_l1_entry_reject.setimmediatevalue(0) + dut.cfg_pm_aspm_tx_l0s_entry_disable.setimmediatevalue(0) + dut.cfg_config_space_enable.setimmediatevalue(1) + dut.cfg_req_pm_transition_l23_ready.setimmediatevalue(0) + dut.cfg_hot_reset_in.setimmediatevalue(0) + dut.cfg_ds_port_number.setimmediatevalue(0) + dut.cfg_ds_bus_number.setimmediatevalue(0) + dut.cfg_ds_device_number.setimmediatevalue(0) + + self.rc.make_port().connect(self.dev) + + # DMA + self.c2h_write_source_0 = AxiStreamSource(AxiStreamBus.from_prefix(dut, "s_axis_c2h_0"), self.clock, self.resetn, False) + self.c2h_desc_source_0 = DescSource(DescBus.from_prefix(dut, "s_desc_c2h_0"),self.clock, self.resetn, False) + self.c2h_read_sink_0 = AxiStreamSink(AxiStreamBus.from_prefix(dut, "m_axis_c2h_0"), self.clock, self.resetn, False) + self.c2h_write_source_1 = AxiStreamSource(AxiStreamBus.from_prefix(dut, "s_axis_c2h_1"), self.clock, self.resetn, False) + self.c2h_desc_source_1 = DescSource(DescBus.from_prefix(dut, "s_desc_c2h_1"), self.clock, self.resetn, False) + self.c2h_read_sink_1 = AxiStreamSink(AxiStreamBus.from_prefix(dut, "m_axis_c2h_1"), self.clock, self.resetn, False) + + #monitor + self.rq_monitor = AxiStreamMonitor(AxiStreamBus.from_prefix(dut, "m_axis_rq"), self.clock, self.resetn, False) + + def gen_random_req(self, channel): + low_boundry = channel * 8192 + high_boundry = (channel + 1) * 8192 + idxs = random.sample(range(low_boundry, high_boundry), 2) + lo_idx, hi_idx = idxs[0], idxs[1] + if (hi_idx < lo_idx): + temp = hi_idx + hi_idx = lo_idx + lo_idx = temp + length = hi_idx - lo_idx + 1 + return (lo_idx, length) + + #Do not use user_rst but gen rstn for bsv + async def gen_reset(self): + self.resetn.value = 0 + await RisingEdge(self.clock) + await RisingEdge(self.clock) + await RisingEdge(self.clock) + self.resetn.value = 1 + await RisingEdge(self.clock) + await RisingEdge(self.clock) + await RisingEdge(self.clock) + self.log.info("Generated DMA RST_N") + + async def send_desc(self, channel, startAddr, length, isWrite): + desc = DescTransaction() + desc.start_addr = startAddr + desc.byte_cnt = length + desc.is_write = isWrite + if channel == 0: + await self.c2h_desc_source_0.send(desc) + else: + await self.c2h_desc_source_1.send(desc) + + async def send_data(self, channel, data): + if channel == 0: + await self.c2h_write_source_0.send(data) + else: + await self.c2h_write_source_1.send(data) + + async def recv_data(self, channel): + if channel == 0 : + data = await self.c2h_read_sink_0.read() + else: + data = await self.c2h_read_sink_1.read() + data = bytes(''.join([chr(item) for item in data]), encoding='UTF-8') + return data + + async def run_single_write_once(self, channel, addr, data): + length = len(data) + self.log.info("Conduct DMA single write: channel %d addr %d, length %d, char %c", channel, addr, length, data[0]) + await self.send_desc(channel, addr, length, True) + await self.send_data(channel, data) + + async def run_single_read_once(self, channel, addr, length): + self.log.info("Conduct DMA single read: channel %d addr %d, length %d", channel, addr, length) + await self.send_desc(channel, addr, length, False) + data = await self.recv_data(channel) + self.log.info("Read data from RootComplex successfully, recv length %d, req length %d", len(data), length) + return data + +async def single_path_random_write_test(pcie_tb, dma_channel, mem): + for _ in range(100): + addr, length = pcie_tb.gen_random_req(dma_channel) + addr = mem.get_absolute_address(addr) + char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") + data = char * length + await pcie_tb.run_single_write_once(dma_channel, addr, data) + await Timer(100+length, units='ns') + assert mem[addr:addr+length] == data + + +async def single_path_random_read_test(pcie_tb, dma_channel, mem): + for _ in range(100): + addr, length = pcie_tb.gen_random_req(dma_channel) + addr = mem.get_absolute_address(addr) + char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") + mem[addr:addr+length] = char * length + data = await pcie_tb.run_single_read_once(dma_channel, addr, length) + assert data == char * length + + +@cocotb.test(timeout_time=100000000, timeout_unit="ns") +async def straddle_write_test(dut): + + tb = TB(dut) + await tb.gen_reset() + + await tb.rc.enumerate() + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + mem = tb.rc.mem_pool.alloc_region(1024*1024) + + channel0 = cocotb.start_soon(single_path_random_write_test(tb, 0, mem)) + channel1 = cocotb.start_soon(single_path_random_write_test(tb, 1, mem)) + + tb.log.info("Start write test in straddle mode!") + + await channel0 + await channel1 + + tb.log.info("End write test in straddle mode succesfully!") + +@cocotb.test(timeout_time=10000000, timeout_unit="ns") +async def random_read_test(dut): + tb = TB(dut) + await tb.gen_reset() + + await tb.rc.enumerate() + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + mem = tb.rc.mem_pool.alloc_region(1024*1024) + + channel0 = cocotb.start_soon(single_path_random_read_test(tb, 0, mem)) + channel1 = cocotb.start_soon(single_path_random_read_test(tb, 1, mem)) + + tb.log.info("Start Read test in straddle mode!") + + await channel0 + await channel1 + + tb.log.info("End Read test in straddle mode succesfully!") + +tests_dir = os.path.dirname(__file__) +rtl_dir = tests_dir + + +def test_dma(): + dut = "mkRawDmaController" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = dut + + verilog_sources = [ + os.path.join(rtl_dir, f"{dut}.v") + ] + + sim_build = os.path.join(tests_dir, "sim_build", dut) + + cocotb_test.simulator.run( + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + timescale="1ns/1ps", + sim_build=sim_build + ) + +if __name__ == "__main__": + test_dma() \ No newline at end of file diff --git a/cocotb/dma_wr_rd_tb.py b/cocotb/dma_wr_rd_tb.py index 52655ce..02160e2 100644 --- a/cocotb/dma_wr_rd_tb.py +++ b/cocotb/dma_wr_rd_tb.py @@ -304,23 +304,29 @@ async def send_desc(self, channel, startAddr, length, isWrite): await self.c2h_desc_source_1.send(desc) async def send_data(self, channel, data): - assert len(data) <= self._bus_bytes if channel == 0: await self.c2h_write_source_0.send(data) else: await self.c2h_write_source_1.send(data) + + async def recv_data(self, channel): + if channel == 0 : + data = await self.c2h_read_sink_0.read() + else: + data = await self.c2h_read_sink_1.read() + data = bytes(''.join([chr(item) for item in data]), encoding='UTF-8') + return data async def run_single_write_once(self, channel, addr, data): length = len(data) self.log.info("Conduct DMA single write: addr %d, length %d, char %c", addr, length, data[0]) await self.send_desc(channel, addr, length, True) - await self.c2h_write_source_0.send(data) + await self.send_data(channel, data) async def run_single_read_once(self, channel, addr, length): self.log.info("Conduct DMA single read: addr %d, length %d", addr, length) await self.send_desc(channel, addr, length, False) - data = await self.c2h_read_sink_0.read() - data = bytes(''.join([chr(item) for item in data]), encoding='UTF-8') + data = await self.recv_data(channel) self.log.info("Read data from RootComplex successfully, recv length %d, req length %d", len(data), length) return data @@ -339,7 +345,7 @@ async def random_write_test(dut): mem = tb.rc.mem_pool.alloc_region(1024*1024) mem_base = mem.get_absolute_address(0) - dma_channel = 0 + dma_channel = 1 for _ in range(10): addr_offset = random.randint(0, 8192) length = random.randint(0, 8192) diff --git a/script.py b/script.py index 89077a8..365f2d7 100644 --- a/script.py +++ b/script.py @@ -1,7 +1,7 @@ for i in range(64): s = ['0'] * 64 - s[i] = 1 + s[63-i] = '1' if (i > 0): - s[i-1:0] = '?' * (i-1) + s[63-i+1:63] = '?' * i s = ''.join(s) - print("%64s" % s) \ No newline at end of file + print("%s" % s) \ No newline at end of file diff --git a/src/DmaC2HPipe.bsv b/src/DmaC2HPipe.bsv index d25bf06..8822b59 100644 --- a/src/DmaC2HPipe.bsv +++ b/src/DmaC2HPipe.bsv @@ -24,8 +24,8 @@ interface DmaC2HPipe; interface FifoOut#(DataStream) tlpDataFifoOut; interface FifoOut#(SideBandByteEn) tlpSideBandFifoOut; interface FifoIn#(StraddleStream) tlpDataFifoIn; - // TODO: Cfg Ifc - // interface Put#(DmaConfig) configuration; + // TODO: CSR Ifc + interface Put#(TlpSizeCfg) tlpSizeCfg; // interface Client#(DmaCsrValue, DmaCsrValue) statusReg; endinterface @@ -33,7 +33,9 @@ endinterface (* synthesize *) module mkDmaC2HPipe#(DmaPathNo pathIdx)(DmaC2HPipe); C2HReadCore readCore <- mkC2HReadCore(pathIdx); - C2HWriteCore writeCore <- mkC2HWriteCore; + C2HWriteCore writeCore <- mkC2HWriteCore(pathIdx); + + Reg#(Bool) isInitDoneReg <- mkReg(False); FIFOF#(DataStream) dataInFifo <- mkFIFOF; FIFOF#(DmaRequest) reqInFifo <- mkFIFOF; @@ -42,7 +44,7 @@ module mkDmaC2HPipe#(DmaPathNo pathIdx)(DmaC2HPipe); mkConnection(dataInFifo, writeCore.dataFifoIn); - rule reqDeMux; + rule reqDeMux if (isInitDoneReg); let req = reqInFifo.first; reqInFifo.deq; if (req.isWrite) begin @@ -81,8 +83,14 @@ module mkDmaC2HPipe#(DmaPathNo pathIdx)(DmaC2HPipe); interface tlpDataFifoOut = convertFifoToFifoOut(tlpOutFifo); interface tlpSideBandFifoOut = convertFifoToFifoOut(tlpSideBandFifo); interface tlpDataFifoIn = readCore.tlpFifoIn; - // TODO: Cfg Ifc - + // TODO: CSR Ifc + interface Put tlpSizeCfg; + method Action put(sizeCfg); + writeCore.maxPayloadSize.put(tuple2(sizeCfg.mps, sizeCfg.mpsWidth)); + readCore.maxReadReqSize.put(tuple2(sizeCfg.mrrs, sizeCfg.mrrsWidth)); + isInitDoneReg <= True; + endmethod + endinterface endmodule interface C2HReadCore; @@ -93,6 +101,8 @@ interface C2HReadCore; interface FifoIn#(StraddleStream) tlpFifoIn; interface FifoOut#(DataStream) tlpFifoOut; interface FifoOut#(SideBandByteEn) tlpSideBandFifoOut; + + interface Put#(Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth)) maxReadReqSize; endinterface // Total Latency(Tlp Output): 1 + 2 + 1 + 1 = 5 @@ -103,9 +113,9 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); FIFOF#(DataStream) tlpOutFifo <- mkFIFOF; FIFOF#(SideBandByteEn) tlpByteEnFifo <- mkFIFOF; - FIFOF#(SlotToken) tagFifo <- mkSizedFIFOF(valueOf(TAdd#(1, STREAM_HEADER_REMOVE_LATENCY))); - FIFOF#(Bool) completedFifo <- mkSizedFIFOF(valueOf(TAdd#(1, STREAM_HEADER_REMOVE_LATENCY))); - FIFOF#(DmaMemAddr) expectTlpCntFifo <-mkSizedFIFOF(valueOf(SLOT_PER_PATH)); + FIFOF#(SlotToken) tagFifo <- mkSizedFIFOF(valueOf(TAdd#(1, STREAM_HEADER_REMOVE_LATENCY))); + FIFOF#(Bool) completedFifo <- mkSizedFIFOF(valueOf(TAdd#(1, STREAM_HEADER_REMOVE_LATENCY))); + FIFOF#(DmaRequest) reqInflightFifo <- mkSizedFIFOF(valueOf(SLOT_PER_PATH)); StreamPipe descRemove <- mkStreamHeaderRemove(fromInteger(valueOf(TDiv#(DES_RC_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); StreamPipe dwRemove <- mkStreamRemoveFromDW; @@ -117,11 +127,11 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); RqDescriptorGenerator rqDescGenerator <- mkRqDescriptorGenerator(False); Reg#(Bool) hasReadOnce <- mkReg(False); - Reg#(DmaMemAddr) recvTlpCntReg <- mkReg(0); + // Reg#(DmaMemAddr) recvTlpCntReg <- mkReg(0); Reg#(DmaMemAddr) recvBytesReg <- mkReg(0); Vector#(SLOT_PER_PATH, Reg#(DmaMemAddr)) chunkBytesRegs <- replicateM(mkReg(0)); - mkConnection(chunkSplitor.chunkCntFifoOut, expectTlpCntFifo); + // mkConnection(chunkSplitor.chunkCntFifoOut, expectTlpCntFifo); mkConnection(reshapeStrad.streamFifoOut, descRemove.streamFifoIn); mkConnection(descRemove.streamFifoOut, dwRemove.streamFifoIn); @@ -164,7 +174,7 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); end stream.byteEn = stream.byteEn; reshapeStrad.streamFifoIn.enq(stream); - $display($time, "ns SIM INFO @ mkDmaC2HReadCore: recv new stream from straddle adapter, tag: %d, isCompleted:%b" ,tag, isCompleted, fshow(stream)); + // $display($time, "ns SIM INFO @ mkDmaC2HReadCore%d: recv new stream from straddle adapter, tag: %d, isCompleted:%b" , pathIdx, tag, isCompleted, fshow(stream)); if (stream.isFirst) begin tagFifo.enq(tag); completedFifo.enq(isCompleted); @@ -190,11 +200,11 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); cBuffer.append.enq(tuple2(tag, stream)); if (stream.isLast) begin cBuffer.complete.put(tag); - $display($time, "ns SIM INFO @ mkDmaC2HReadCore: a chunk is completed in cBuffer, tag: %d, recv bytes: %d", tag, chunkBytes); + $display($time, "ns SIM INFO @ mkDmaC2HReadCore%d: a chunk is completed in cBuffer, tag: %d, recv bytes: %d", pathIdx, tag, chunkBytes); chunkBytes = 0; end chunkBytesRegs[tag] <= chunkBytes; - $display("tag%d", tag, fshow(stream)); + // $display("tag%d", tag, fshow(stream)); endrule // Pipeline stage 4: there may be a bubble ibetween the first and last DataStream of cBUffer drain output @@ -213,33 +223,30 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); let byteInStream = convertByteEn2BytePtr(stream.byteEn); let recvBytesCnt = recvBytesReg + zeroExtend(byteInStream); reshapeRcb.streamFifoOut.deq; - let recvTlpCnt = recvTlpCntReg; - if (stream.isFirst) begin - if (recvTlpCnt > 0) begin - stream.isFirst = False; - end - recvTlpCnt = recvTlpCntReg + 1; - end + // let recvTlpCnt = recvTlpCntReg; + // if (stream.isFirst) begin + // if (recvTlpCnt > 0) begin + // stream.isFirst = False; + // end + // recvTlpCnt = recvTlpCntReg + 1; + // end if (stream.isLast) begin - if (expectTlpCntFifo.first == recvTlpCnt) begin - recvTlpCnt = 0; - expectTlpCntFifo.deq; - $display($time, "ns SIM INFO @ mkDmaC2HReadCore: a read request is done, total tlps counts : %5d, total recvd bytes: %d", expectTlpCntFifo.first, recvBytesCnt); + if (reqInflightFifo.first.length == recvBytesCnt) begin + // recvTlpCnt = 0; + reqInflightFifo.deq; + $display($time, "ns SIM INFO @ mkDmaC2HReadCore%d: a read request is done, total recvd bytes: %d", pathIdx, recvBytesCnt); recvBytesCnt = 0; end else begin stream.isLast = False; + $display($time, "ns SIM DEBUG @ mkDmaC2HReadCore%d, expect bytes %drecv bytes %d", pathIdx, reqInflightFifo.first.length, recvBytesCnt); end end - recvTlpCntReg <= recvTlpCnt; + // recvTlpCntReg <= recvTlpCnt; recvBytesReg <= recvBytesCnt; reshapeMrrs.streamFifoIn.enq(stream); endrule - // rule log; - // let stream = dwRemove.streamFifoOut.first; - // $display("dwRemove output stream", fshow(stream)); - // endrule // Pipeline stage 1: split to req to MRRS chunks rule reqSplit; let req = reqInFifo.first; @@ -251,6 +258,7 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); tag : 0 }; chunkSplitor.dmaRequestFifoIn.enq(exReq); + reqInflightFifo.enq(req); endrule // Pipeline stage 2: generate read descriptor @@ -262,10 +270,10 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); startAddr: req.startAddr, endAddr : req.startAddr + req.length - 1, length : req.length, - tag : zeroExtend(token) | (zeroExtend(pathIdx) << (valueOf(DES_NONEXTENDED_TAG_WIDTH)-1)) + tag : convertSlotTokenToTag(token, pathIdx) }; rqDescGenerator.exReqFifoIn.enq(exReq); - $display($time, "ns SIM INFO @ mkDmaC2HReadCore: tx a new read chunk, tag:%d, addr:%d, length:%d", exReq.tag, req.startAddr, req.length); + $display($time, "ns SIM INFO @ mkDmaC2HReadCore%d: tx a new read chunk, tag:%d, addr:%d, length:%d", pathIdx, exReq.tag, req.startAddr, req.length); endrule // Pipeline stage 3: generate Tlp to PCIe Adapter @@ -278,7 +286,7 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); stream.isLast = True; tlpOutFifo.enq(stream); tlpByteEnFifo.enq(sideBandByteEn); - $display($time, "ns SIM INFO @ mkDmaC2HReadCore: output new tlp, BE:%h/%h", tpl_1(sideBandByteEn), tpl_2(sideBandByteEn)); + // $display($time, "ns SIM INFO @ mkDmaC2HReadCore: output new tlp, BE:%h/%h", tpl_1(sideBandByteEn), tpl_2(sideBandByteEn)); endrule // User Logic Ifc @@ -288,6 +296,12 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); interface tlpFifoIn = convertFifoToFifoIn(tlpInFifo); interface tlpFifoOut = convertFifoToFifoOut(tlpOutFifo); interface tlpSideBandFifoOut = convertFifoToFifoOut(tlpByteEnFifo); + // Cfg Ifc + interface Put maxReadReqSize; + method Action put(Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth) mrrsCfg); + chunkSplitor.maxReadReqSize.put(mrrsCfg); + endmethod + endinterface endmodule // Core path of a single stream, from (DataStream, DmaRequest) ==> (DataStream, SideBandByteEn) @@ -299,16 +313,18 @@ interface C2HWriteCore; // PCIe IP Ifc interface FifoOut#(DataStream) tlpFifoOut; interface FifoOut#(SideBandByteEn) tlpSideBandFifoOut; + + interface Put#(Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth)) maxPayloadSize; endinterface // Total Latency: 1 + 3 + 2 + 1 = 7 -module mkC2HWriteCore(C2HWriteCore); +module mkC2HWriteCore#(DmaPathNo pathIdx)(C2HWriteCore); FIFOF#(DataStream) dataInFifo <- mkFIFOF; FIFOF#(DmaRequest) wrReqInFifo <- mkFIFOF; FIFOF#(DataStream) dataOutFifo <- mkFIFOF; FIFOF#(SideBandByteEn) byteEnOutFifo <- mkFIFOF; - Reg#(Tag) tagReg <- mkReg(0); + Reg#(SlotToken) tagReg <- mkReg(0); ChunkSplit chunkSplit <- mkChunkSplit(DMA_TX); StreamShiftAlignToDw streamAlign <- mkStreamShiftAlignToDw(fromInteger(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); @@ -346,23 +362,23 @@ module mkC2HWriteCore(C2HWriteCore); startAddr: chunkReq.startAddr, endAddr : chunkReq.startAddr + chunkReq.length - 1, length : chunkReq.length, - tag : tagReg + tag : convertSlotTokenToTag(tagReg, pathIdx) }; tagReg <= tagReg + 1; let startAddrOffset = byteModDWord(exReq.startAddr); streamAlign.setAlignMode(unpack(startAddrOffset)); rqDescGenerator.exReqFifoIn.enq(exReq); - $display($time, "ns SIM INFO @ mkDmaC2HWriteCore: tx a new write chunk, tag:%d, addr:%d, length:%d", tagReg, chunkReq.startAddr, chunkReq.length); + $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: tx a new write chunk, tag:%d, addr:%d, length:%d", pathIdx, convertSlotTokenToTag(tagReg, pathIdx), chunkReq.startAddr, chunkReq.length); end if (chunkSplit.chunkDataFifoOut.notEmpty) begin let chunkDataStream = chunkSplit.chunkDataFifoOut.first; chunkSplit.chunkDataFifoOut.deq; streamAlign.dataFifoIn.enq(chunkDataStream); if (chunkDataStream.isLast && chunkDataStream.isFirst) begin - $display($time, "ns SIM INFO @ mkDmaC2HWriteCore: tx write chunk end , tag:%d", tagReg); + $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: tx write chunk end , tag:%d", pathIdx, convertSlotTokenToTag(tagReg, pathIdx)); end else if (chunkDataStream.isLast) begin - $display($time, "ns SIM INFO @ mkDmaC2HWriteCore: tx write chunk end , tag:%d", tagReg - 1); + $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: tx write chunk end , tag:%d", pathIdx, convertSlotTokenToTag(tagReg-1, pathIdx)); end end endrule @@ -379,14 +395,22 @@ module mkC2HWriteCore(C2HWriteCore); stream.data = stream.data | descStream.data; stream.byteEn = stream.byteEn | descStream.byteEn; byteEnOutFifo.enq(sideBandByteEn); - $display($time, "ns SIM INFO @ mkDmaC2HWriteCore: tx a new tlp, BE:%b/%b", tpl_1(sideBandByteEn), tpl_2(sideBandByteEn)); + // $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: tx a new tlp, BE:%b/%b", pathIdx, tpl_1(sideBandByteEn), tpl_2(sideBandByteEn)); end dataOutFifo.enq(stream); // $display($time, "ns SIM INFO @ mkDmaC2HWriteCore: tlp stream", fshow(stream)); endrule + // User Logic Ifc interface dataFifoIn = convertFifoToFifoIn(dataInFifo); interface wrReqFifoIn = convertFifoToFifoIn(wrReqInFifo); + // PCIe Adapter Ifc interface tlpFifoOut = convertFifoToFifoOut(dataOutFifo); interface tlpSideBandFifoOut = convertFifoToFifoOut(byteEnOutFifo); + // Cfg Ifc + interface Put maxPayloadSize; + method Action put(Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth) mpsCfg); + chunkSplit.maxPayloadSize.put(mpsCfg); + endmethod + endinterface endmodule diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 9162262..0d70ea2 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -26,6 +26,18 @@ typedef Bit#(1) ByteParity; typedef 4096 BUS_BOUNDARY; typedef TAdd#(1, TLog#(BUS_BOUNDARY)) BUS_BOUNDARY_WIDTH; +typedef 128 DEFAULT_TLP_SIZE; +typedef TLog#(DEFAULT_TLP_SIZE) DEFAULT_TLP_SIZE_WIDTH; +typedef Bit#(BUS_BOUNDARY_WIDTH) TlpPayloadSize; +typedef Bit#(TLog#(BUS_BOUNDARY_WIDTH)) TlpPayloadSizeWidth; + +typedef struct { + TlpPayloadSize mps; + TlpPayloadSizeWidth mpsWidth; + TlpPayloadSize mrrs; + TlpPayloadSizeWidth mrrsWidth; +} TlpSizeCfg deriving(Bits, Eq, Bounded, FShow); + typedef 2 CONCAT_STREAM_NUM; typedef TDiv#(DATA_WIDTH, BYTE_WIDTH) BYTE_EN_WIDTH; diff --git a/src/DmaUtils.bsv b/src/DmaUtils.bsv index 66d1774..8916e64 100644 --- a/src/DmaUtils.bsv +++ b/src/DmaUtils.bsv @@ -10,26 +10,21 @@ import PrimUtils::*; import StreamUtils::*; import PcieDescriptorTypes::*; - -typedef Bit#(BUS_BOUNDARY_WIDTH) PcieTlpMaxMaxPayloadSize; -typedef Bit#(TLog#(BUS_BOUNDARY_WIDTH)) PcieTlpSizeWidth; - -typedef 128 DEFAULT_TLP_SIZE; -typedef TLog#(DEFAULT_TLP_SIZE) DEFAULT_TLP_SIZE_WIDTH; - -typedef 3 PCIE_TLP_SIZE_SETTING_WIDTH; -typedef Bit#(PCIE_TLP_SIZE_SETTING_WIDTH) PcieTlpSizeSetting; - typedef TAdd#(1, TLog#(TDiv#(BUS_BOUNDARY, BYTE_EN_WIDTH))) DATA_BEATS_WIDTH; typedef Bit#(DATA_BEATS_WIDTH) DataBeats; +function Tag convertSlotTokenToTag(SlotToken token, DmaPathNo pathIdx); + Tag tag = zeroExtend(token) | (zeroExtend(pathIdx) << (valueOf(DES_NONEXTENDED_TAG_WIDTH)-1)); + return tag; +endfunction + typedef 4 CHUNK_COMPUTE_LATENCY; // Split the input DmaRequest Info MRRS aligned chunkReqs interface ChunkCompute; interface FifoIn#(DmaExtendRequest) dmaRequestFifoIn; interface FifoOut#(DmaRequest) chunkRequestFifoOut; - interface FifoOut#(DmaMemAddr) chunkCntFifoOut; - interface Put#(PcieTlpSizeSetting) setTlpMaxSize; + // interface FifoOut#(DmaMemAddr) chunkCntFifoOut; + interface Put#(Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth)) maxReadReqSize; endinterface module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); @@ -37,14 +32,14 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); FIFOF#(DmaExtendRequest) inputFifo <- mkFIFOF; FIFOF#(DmaRequest) outputFifo <- mkFIFOF; FIFOF#(Tuple2#(DmaExtendRequest, DmaMemAddr)) pipeFifo <- mkFIFOF; - FIFOF#(DmaMemAddr) tlpCntFifo <- mkSizedFIFOF(valueOf(CHUNK_COMPUTE_LATENCY)); + // FIFOF#(DmaMemAddr) tlpCntFifo <- mkSizedFIFOF(valueOf(CHUNK_COMPUTE_LATENCY)); Reg#(DmaMemAddr) newChunkPtrReg <- mkReg(0); Reg#(DmaMemAddr) totalLenRemainReg <- mkReg(0); Reg#(Bool) isSplittingReg <- mkReg(False); - Reg#(DmaMemAddr) tlpMaxSizeReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE))); - Reg#(PcieTlpSizeWidth) tlpMaxSizeWidthReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH))); + Reg#(DmaMemAddr) tlpMaxSizeReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE))); + Reg#(TlpPayloadSizeWidth) tlpMaxSizeWidthReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH))); function Bool hasBoundary(DmaExtendRequest request); let highIdx = request.endAddr >> tlpMaxSizeWidthReg; @@ -60,7 +55,7 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); function DmaMemAddr getOffset(DmaExtendRequest request); // MPS - startAddr % MPS, MPS means MRRS when the module is set to RX mode - DmaMemAddr remainderOfMps = zeroExtend(PcieTlpMaxMaxPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); + DmaMemAddr remainderOfMps = zeroExtend(TlpPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); DmaMemAddr offsetOfMps = tlpMaxSizeReg - remainderOfMps; return offsetOfMps; endfunction @@ -72,8 +67,8 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); let firstLen = (request.length > tlpMaxSizeReg) ? tlpMaxSizeReg : request.length; let firstChunkLen = hasBoundary(request) ? offset : firstLen; pipeFifo.enq(tuple2(request, firstChunkLen)); - let tlpCnt = getTlpCnts(request); - tlpCntFifo.enq(tlpCnt); + // let tlpCnt = getTlpCnts(request); + // tlpCntFifo.enq(tlpCnt); endrule rule execChunkCompute; @@ -119,16 +114,12 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); interface dmaRequestFifoIn = convertFifoToFifoIn(inputFifo); interface chunkRequestFifoOut = convertFifoToFifoOut(outputFifo); - interface chunkCntFifoOut = convertFifoToFifoOut(tlpCntFifo); - - interface Put setTlpMaxSize; - method Action put (PcieTlpSizeSetting tlpSizeSetting); - let setting = tlpSizeSetting; - setting[valueOf(PCIE_TLP_SIZE_SETTING_WIDTH)-1] = (direction == DMA_TX) ? 0 : setting[valueOf(PCIE_TLP_SIZE_SETTING_WIDTH)-1]; - DmaMemAddr defaultTlpMaxSize = fromInteger(valueOf(DEFAULT_TLP_SIZE)); - tlpMaxSizeReg <= DmaMemAddr'(defaultTlpMaxSize << setting); - PcieTlpSizeWidth defaultTlpMaxSizeWidth = fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH)); - tlpMaxSizeWidthReg <= PcieTlpSizeWidth'(defaultTlpMaxSizeWidth + zeroExtend(setting)); + // interface chunkCntFifoOut = convertFifoToFifoOut(tlpCntFifo); + + interface Put maxReadReqSize; + method Action put (Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth) mrrsCfg); + tlpMaxSizeReg <= zeroExtend(tpl_1(mrrsCfg)); + tlpMaxSizeWidthReg <= zeroExtend(tpl_2(mrrsCfg)); endmethod endinterface @@ -144,7 +135,7 @@ interface ChunkSplit; interface FifoIn#(DmaExtendRequest) reqFifoIn; interface FifoOut#(DataStream) chunkDataFifoOut; interface FifoOut#(DmaRequest) chunkReqFifoOut; - interface Put#(PcieTlpSizeSetting) setTlpMaxSize; + interface Put#(Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth)) maxPayloadSize; endinterface module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); @@ -158,9 +149,9 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); StreamSplit firstChunkSplitor <- mkStreamSplit; - Reg#(DmaMemAddr) tlpMaxSizeReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE))); - Reg#(PcieTlpSizeWidth) tlpMaxSizeWidthReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH))); - Reg#(DataBeats) tlpMaxBeatsReg <- mkReg(fromInteger(valueOf(TDiv#(DEFAULT_TLP_SIZE, BYTE_EN_WIDTH)))); + Reg#(DmaMemAddr) tlpMaxSizeReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE))); + Reg#(TlpPayloadSizeWidth) tlpMaxSizeWidthReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH))); + Reg#(DataBeats) tlpMaxBeatsReg <- mkReg(fromInteger(valueOf(TDiv#(DEFAULT_TLP_SIZE, BYTE_EN_WIDTH)))); Reg#(Bool) isInProcReg <- mkReg(False); Reg#(DataBeats) beatsReg <- mkReg(0); @@ -177,7 +168,7 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); function DmaMemAddr getOffset(DmaExtendRequest request); // MPS - startAddr % MPS, MPS means MRRS when the module is set to RX mode - DmaMemAddr remainderOfMps = zeroExtend(PcieTlpMaxMaxPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); + DmaMemAddr remainderOfMps = zeroExtend(TlpPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); DmaMemAddr offsetOfMps = tlpMaxSizeReg - remainderOfMps; return offsetOfMps; endfunction @@ -193,7 +184,7 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); let offset = getOffset(request); let firstLen = (request.length > tlpMaxSizeReg) ? tlpMaxSizeReg : request.length; let firstChunkLen = hasBoundary(request) ? offset : firstLen; - // $display($time, "ns SIM INFO @ mkChunkSplit: get first chunkLen, offset %d, remainder %d", offset, PcieTlpMaxMaxPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); + // $display($time, "ns SIM INFO @ mkChunkSplit: get first chunkLen, offset %d, remainder %d", offset, TlpPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); firstChunkSplitor.splitLocationFifoIn.enq(unpack(truncate(firstChunkLen))); let firstReq = DmaRequest { startAddr : request.startAddr, @@ -282,16 +273,12 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); interface chunkDataFifoOut = convertFifoToFifoOut(chunkOutFifo); interface chunkReqFifoOut = convertFifoToFifoOut(reqOutFifo); - interface Put setTlpMaxSize; - method Action put (PcieTlpSizeSetting tlpSizeSetting); - let setting = tlpSizeSetting; - setting[valueOf(PCIE_TLP_SIZE_SETTING_WIDTH)-1] = (direction == DMA_TX) ? 0 : setting[valueOf(PCIE_TLP_SIZE_SETTING_WIDTH)-1]; - DmaMemAddr defaultTlpMaxSize = fromInteger(valueOf(DEFAULT_TLP_SIZE)); - tlpMaxSizeReg <= DmaMemAddr'(defaultTlpMaxSize << setting); - PcieTlpSizeWidth defaultTlpMaxSizeWidth = fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH)); - tlpMaxSizeWidthReg <= PcieTlpSizeWidth'(defaultTlpMaxSizeWidth + zeroExtend(setting)); + interface Put maxPayloadSize; + method Action put (Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth) mpsCfg); + tlpMaxSizeReg <= zeroExtend(tpl_1(mpsCfg)); + tlpMaxSizeWidthReg <= tpl_2(mpsCfg); // BeatsNum = (MaxPayloadSize + DescriptorSize) / BytesPerBeat - tlpMaxBeatsReg <= truncate(DmaMemAddr'(defaultTlpMaxSize << setting) >> valueOf(BYTE_EN_WIDTH)); + tlpMaxBeatsReg <= truncate(tpl_1(mpsCfg) >> valueOf(TLog#(BYTE_EN_WIDTH))); endmethod endinterface endmodule diff --git a/src/DmaWrapper.bsv b/src/DmaWrapper.bsv index bd3bbee..aa23fb5 100755 --- a/src/DmaWrapper.bsv +++ b/src/DmaWrapper.bsv @@ -1,6 +1,8 @@ import FIFOF::*; import Vector::*; import Connectable :: *; +import DReg::*; +import GetPut::*; import SemiFifo::*; import BusConversion::*; @@ -34,6 +36,12 @@ endinterface (* synthesize *) module mkDmaController(DmaController); Vector#(DMA_PATH_NUM, DmaC2HPipe) c2hPipes = newVector; + + Wire#(Bool) linkUpWire <- mkWire; + + Reg#(Bool) linkUpReg <- mkReg(False); + Reg#(Bool) cfgFlagReg <- mkDReg(False); + for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1) begin c2hPipes[pathIdx] <- mkDmaC2HPipe(pathIdx); end @@ -60,6 +68,21 @@ module mkDmaController(DmaController); mkConnection(cmplAdapter.dmaDataFifoOut, h2cPipe.tlpDataFifoIn); mkConnection(h2cPipe.tlpDataFifoOut, cmplAdapter.dmaDataFifoIn); + rule detectLink if (linkUpWire && !linkUpReg); + configurator.initCfg; + cfgFlagReg <= True; + linkUpReg <= True; + $display($time, "ns SIM INFO @ BLUE-DMAC: PCIe link is up!"); + endrule + + rule setCfg if (cfgFlagReg); + let tlpSizeCfg <- configurator.tlpSizeCfg.get; + for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1) begin + c2hPipes[pathIdx].tlpSizeCfg.put(tlpSizeCfg); + end + $display($time, "ns SIM INFO @ BLUE-DMAC: Get PCIe configurations, mps:%d, mrrs:%d", tlpSizeCfg.mps, tlpSizeCfg.mrrs); + endrule + // User Logic Ifc interface c2hDataFifoIn = c2hDataInIfc; interface c2hDataFifoOut = c2hDataOutIfc; @@ -76,8 +99,7 @@ module mkDmaController(DmaController); interface completerComplete = cmplAdapter.rawCompleterComplete; interface configuration = configurator.rawConfiguration; method Action linkUp(Bool isLinkUp); - // let cfgs = configurator.get; - // c2hpipes[pathIdx].setCfg(cfgs); + linkUpWire <= isLinkUp; endmethod endinterface endmodule diff --git a/src/PcieAdapter.bsv b/src/PcieAdapter.bsv index b90fc75..6f1edca 100644 --- a/src/PcieAdapter.bsv +++ b/src/PcieAdapter.bsv @@ -194,6 +194,11 @@ module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); return !unpack(sdStream.byteEn[valueOf(STRADDLE_THRESH_BYTE_WIDTH)]); endfunction + function Bool isValidShiftStream(DataStream shiftStream); + Bool valid = !unpack(shiftStream.byteEn[0]) && unpack(shiftStream.byteEn[valueOf(STRADDLE_THRESH_BYTE_WIDTH)]); + return valid; + endfunction + function PcieRequesterRequestSideBandFrame genRQSideBand( PcieTlpCtlIsEopCommon isEop, PcieTlpCtlIsSopCommon isSop, SideBandByteEn byteEnA, SideBandByteEn byteEnB ); @@ -241,8 +246,10 @@ module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); isSendingA = True; if (shiftB.streamFifoOut.notEmpty && sendingStream.isLast && hasStraddleSpace(sendingStream)) begin let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; - pendingStream = shiftStreamB; shiftB.streamFifoOut.deq; + if (isValidShiftStream(shiftStreamB)) begin + pendingStream = shiftStreamB; + end end end // In streamB sending epoch, waiting streamB until isLast @@ -253,8 +260,10 @@ module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); isSendingA = False; if (shiftA.streamFifoOut.notEmpty && sendingStream.isLast && hasStraddleSpace(sendingStream)) begin let {oriStreamA, shiftStreamA} = shiftA.streamFifoOut.first; - pendingStream = shiftStreamA; shiftA.streamFifoOut.deq; + if (isValidShiftStream(shiftStreamA)) begin + pendingStream = shiftStreamA; + end end end // In Idle, choose one stream to enter new epoch @@ -268,8 +277,10 @@ module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); isSendingA = True; if (sendingStream.isLast && hasStraddleSpace(sendingStream)) begin let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; - pendingStream = shiftStreamB; shiftB.streamFifoOut.deq; + if (isValidShiftStream(shiftStreamB)) begin + pendingStream = shiftStreamB; + end end end else begin @@ -279,8 +290,10 @@ module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); isSendingA = False; if (sendingStream.isLast && hasStraddleSpace(sendingStream)) begin let {oriStreamA, shiftStreamA} = shiftA.streamFifoOut.first; - pendingStream = shiftStreamA; shiftA.streamFifoOut.deq; + if (isValidShiftStream(shiftStreamA)) begin + pendingStream = shiftStreamA; + end end end end @@ -305,33 +318,54 @@ module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); if (!isByteEnZero(sendingStream.byteEn)) begin // Change the registers and generate PcieAxiStream - let sideBandByteEnA = tuple2(0, 0); - let sideBandByteEnB = tuple2(0, 0); + let sideBandByteEn0 = tuple2(0, 0); + let sideBandByteEn1 = tuple2(0, 0); if (isSendingA) begin isInStreamAReg <= !sendingStream.isLast; isInShiftAReg <= sendingStream.isLast ? False : isInShiftAReg; - if (sendingStream.isFirst) begin - sideBandByteEnA = byteEnAFifo.first; + // Only A sop + if (sendingStream.isFirst && !pendingStream.isFirst) begin + sideBandByteEn0 = byteEnAFifo.first; + byteEnAFifo.deq; + end + // A sop and B sop + else if (sendingStream.isFirst && hasStraddleSpace(sendingStream) && pendingStream.isFirst) begin + isInStreamBReg <= !pendingStream.isLast; + isInShiftBReg <= !pendingStream.isLast; + sideBandByteEn0 = byteEnAFifo.first; byteEnAFifo.deq; + sideBandByteEn1 = byteEnBFifo.first; + byteEnBFifo.deq; end - if (sendingStream.isLast && hasStraddleSpace(sendingStream) && !isByteEnZero(pendingStream.byteEn)) begin + // Only B sop + else if (sendingStream.isLast && hasStraddleSpace(sendingStream) && pendingStream.isFirst) begin isInStreamBReg <= !pendingStream.isLast; isInShiftBReg <= !pendingStream.isLast; - sideBandByteEnB = byteEnBFifo.first; + sideBandByteEn0 = byteEnBFifo.first; byteEnBFifo.deq; end end else begin isInStreamBReg <= !sendingStream.isLast; isInShiftBReg <= sendingStream.isLast ? False : isInShiftBReg; - if (sendingStream.isFirst) begin - sideBandByteEnB = byteEnBFifo.first; + // Only B sop + if (sendingStream.isFirst && !pendingStream.isFirst) begin + sideBandByteEn0 = byteEnBFifo.first; byteEnBFifo.deq; end - if (sendingStream.isLast && hasStraddleSpace(sendingStream) && !isByteEnZero(pendingStream.byteEn)) begin + // B sop and A sop + else if (sendingStream.isFirst && hasStraddleSpace(sendingStream) && pendingStream.isFirst) begin isInStreamAReg <= !pendingStream.isLast; isInShiftAReg <= !pendingStream.isLast; - sideBandByteEnA = byteEnAFifo.first; + sideBandByteEn0 = byteEnBFifo.first; + byteEnBFifo.deq; + sideBandByteEn1 = byteEnAFifo.first; + byteEnAFifo.deq; + end + else if (sendingStream.isLast && hasStraddleSpace(sendingStream) && pendingStream.isFirst) begin + isInStreamAReg <= !pendingStream.isLast; + isInShiftAReg <= !pendingStream.isLast; + sideBandByteEn0 = byteEnAFifo.first; byteEnAFifo.deq; end end @@ -350,21 +384,25 @@ module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_0)); isSop.isSopPtrs[1] = fromInteger(valueOf(ISSOP_LANE_32)); end - else if (sendingStream.isFirst || pendingStream.isFirst) begin + else if (sendingStream.isFirst) begin isSop.isSop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_0)); end - if (pendingStream.isLast && !isByteEnZero(pendingStream.byteEn)) begin + else if (pendingStream.isFirst) begin + isSop.isSop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); + isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_32)); + end + if (pendingStream.isLast && isValidShiftStream(pendingStream)) begin isEop.isEop = fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT)); isEop.isEopPtrs[0] = truncate(convertByteEn2DwordPtr(sendingStream.byteEn)); - isEop.isEopPtrs[1] = fromInteger(valueOf(STRADDLE_THRESH_DWORD_WIDTH)) + truncate(convertByteEn2DwordPtr(pendingStream.byteEn)); + isEop.isEopPtrs[1] = truncate(convertByteEn2DwordPtr(pendingStream.byteEn)); end else if (sendingStream.isLast) begin isEop.isEop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); isEop.isEopPtrs[0] = truncate(convertByteEn2DwordPtr(sendingStream.byteEn)); end - let sideBand = genRQSideBand(isEop, isSop, sideBandByteEnA, sideBandByteEnB); + let sideBand = genRQSideBand(isEop, isSop, sideBandByteEn0, sideBandByteEn1); let axiStream = ReqReqAxiStream { tData : sendingStream.data | pendingStream.data, tKeep : -1, @@ -372,9 +410,13 @@ module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); tUser : pack(sideBand) }; axiStreamOutFifo.enq(axiStream); - // $display($time, "ns SIM INFO @ mkDataStreamToAxis: tx a AXIS frame, isSop:%h, isEop:%d, isEopPtr:%d, tData:%h", isSop.isSop, isEop.isEop, isEop.isEopPtrs[0], axiStream.tData); - if (isEop.isEop == 1 && isEop.isEopPtrs[0] == 0) begin - $display($time, "ns SIM Warning @ mkDataStreamToAxis: stream byteEn %b", sendingStream.byteEn); + $display($time, "ns SIM INFO @ mkDataStreamToAxis: tx a AXIS frame, isSop:%d, isSopPtr:%d/%d, isEop:%d, isEopPtr:%d/%d, BE0:%b/%b, BE1:%b/%b, tData:%h", + isSop.isSop, isSop.isSopPtrs[0], isSop.isSopPtrs[1], isEop.isEop, isEop.isEopPtrs[0], isEop.isEopPtrs[1], tpl_1(sideBandByteEn0), tpl_2(sideBandByteEn0), tpl_1(sideBandByteEn1), tpl_2(sideBandByteEn1), axiStream.tData); + if (isEop.isEop >= fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)) && isEop.isEopPtrs[0] == 0) begin + $display($time, "ns SIM Warning @ mkDataStreamToAxis: sendingstream byteEn %b", sendingStream.byteEn); + end + else if (isEop.isEop == fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT)) && isEop.isEopPtrs[1] == 0) begin + $display($time, "ns SIM Warning @ mkDataStreamToAxis: pendingstream byteEn %b", pendingStream.byteEn); end end endrule @@ -526,7 +568,7 @@ module mkConvertStraddleAxisToDataStream(ConvertStraddleAxisToDataStream); sdStream.isFirst[0] = True; sdStream.isLast[0] = unpack(isEop.isEop[1]); sdStream.tag[0] = truncate(desc.tag); - sdStream.isCompleted[0] = isCompleted[pathIdx]; + sdStream.isCompleted[0] = desc.isRequestCompleted; outFifos[pathIdx].enq(sdStream); tagReg[pathIdx] <= sdStream.tag[0]; isInTlpRegs[pathIdx] <= !sdStream.isLast[0]; diff --git a/src/PcieConfigurator.bsv b/src/PcieConfigurator.bsv index f68ac2c..a045033 100644 --- a/src/PcieConfigurator.bsv +++ b/src/PcieConfigurator.bsv @@ -1,18 +1,37 @@ - +import GetPut::*; +import DReg::*; import PcieTypes::*; import PcieAxiStreamTypes::*; +import DmaTypes::*; typedef 256 PCIE_CFG_VF_FLR_INPROC_EXTEND_WIDTH; interface PcieConfigurator; interface RawPcieConfiguration rawConfiguration; // TODO: translate raw Ifcs to bluespec style Get Ifcs - method PcieCfgLtssmState getPcieLtssmState(); + interface Get#(TlpSizeCfg) tlpSizeCfg; + method Action initCfg; endinterface module mkPcieConfigurator(PcieConfigurator); // TODO: the powerStateChangeAck must waitng for completing Done Reg#(Bool) powerStateChangeIntrReg <- mkReg(False); + Reg#(Bool) isInitDoneReg <- mkDReg(False); + + // status wires + Wire#(PcieCfgMaxPayloadSize) mpsSettingWire <- mkWire; + Wire#(PCieCfgMaxReadReqSize) mrrsSettingWire <- mkWire; + Wire#(PCieCfgCurrentSpeed) speedSettingWire <- mkWire; + Wire#(PcieCfgNegotiatedWidth) linkWidthSettingWire <- mkWire; + + // Cfg Regs + Reg#(TlpSizeCfg) tlpSizeCfgReg <- mkReg(TlpSizeCfg { + mps : fromInteger(valueOf(DEFAULT_TLP_SIZE)), + mpsWidth : fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH)), + mrrs : fromInteger(valueOf(DEFAULT_TLP_SIZE)), + mrrsWidth : fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH)) + }); + // Here has a 2-stage pipeline for FLR, according to the Xilinx PCIe Example Design // Reg0 means stage0, and Reg1 means stage1 @@ -22,7 +41,7 @@ module mkPcieConfigurator(PcieConfigurator); Reg#(PcieCfgVFFlrFuncNum) cfgVFFlrFuncNumReg1 <- mkReg(0); Reg#(Bool) cfgVFFlrDoneReg1 <- mkReg(False); Reg#(Bit#(PCIE_CFG_VF_FLR_INPROC_EXTEND_WIDTH)) cfgVfFlrInprocReg0 <- mkReg(0); - + rule functionLevelRst; cfgVFFlrFuncNumReg <= cfgVFFlrFuncNumReg + 1; cfgFlrDoneReg1 <= cfgFlrDoneReg0; @@ -30,6 +49,28 @@ module mkPcieConfigurator(PcieConfigurator); cfgVFFlrFuncNumReg1 <= cfgVFFlrFuncNumReg; endrule + method Action initCfg; + TlpPayloadSize defaultTlpMaxSize = fromInteger(valueOf(DEFAULT_TLP_SIZE)); + TlpPayloadSizeWidth defaultTlpMaxSizeWidth = fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH)); + let mps = defaultTlpMaxSize << mpsSettingWire; + let mpsWidth = defaultTlpMaxSizeWidth + zeroExtend(mpsSettingWire); + let mrrs = defaultTlpMaxSize << mrrsSettingWire; + let mrrsWidth = defaultTlpMaxSizeWidth + zeroExtend(mrrsSettingWire); + tlpSizeCfgReg <= TlpSizeCfg { + mps : mps, + mpsWidth : mpsWidth, + mrrs : mrrs, + mrrsWidth : mrrsWidth + }; + isInitDoneReg <= True; + endmethod + + interface Get tlpSizeCfg; + method ActionValue#(TlpSizeCfg) get(); + return tlpSizeCfgReg; + endmethod + endinterface + interface RawPcieConfiguration rawConfiguration; // not use mgmt @@ -352,7 +393,10 @@ module mkPcieConfigurator(PcieConfigurator); PcieCfgLtssmState ltssmState, PcieCfgRcbStatus rcbStatus, PcieCfgDpaSubstageChange dpaSubstageChange, - PcieCfgObffEn obffEnable); + PcieCfgObffEn obffEnable + ); + mpsSettingWire <= maxPayloadSize; + mrrsSettingWire <= maxReadReqSize; endmethod endinterface @@ -365,8 +409,4 @@ module mkPcieConfigurator(PcieConfigurator); endinterface - method PcieCfgLtssmState getPcieLtssmState(); - return 0; - endmethod - endmodule diff --git a/src/PrimUtils.bsv b/src/PrimUtils.bsv index e21fd18..5c35e5e 100644 --- a/src/PrimUtils.bsv +++ b/src/PrimUtils.bsv @@ -24,72 +24,72 @@ endfunction function DataBytePtr convertByteEn2BytePtr (ByteEn byteEn); DataBytePtr ptr = 0; - case(byteEn) - 'h0000000000000001: ptr = 1; - 'h0000000000000003: ptr = 2; - 'h0000000000000007: ptr = 3; - 'h000000000000000F: ptr = 4; - 'h000000000000001F: ptr = 5; - 'h000000000000003F: ptr = 6; - 'h000000000000007F: ptr = 7; - 'h00000000000000FF: ptr = 8; - 'h00000000000001FF: ptr = 9; - 'h00000000000003FF: ptr = 10; - 'h00000000000007FF: ptr = 11; - 'h0000000000000FFF: ptr = 12; - 'h0000000000001FFF: ptr = 13; - 'h0000000000003FFF: ptr = 14; - 'h0000000000007FFF: ptr = 15; - 'h000000000000FFFF: ptr = 16; - 'h000000000001FFFF: ptr = 17; - 'h000000000003FFFF: ptr = 18; - 'h000000000007FFFF: ptr = 19; - 'h00000000000FFFFF: ptr = 20; - 'h00000000001FFFFF: ptr = 21; - 'h00000000003FFFFF: ptr = 22; - 'h00000000007FFFFF: ptr = 23; - 'h0000000000FFFFFF: ptr = 24; - 'h0000000001FFFFFF: ptr = 25; - 'h0000000003FFFFFF: ptr = 26; - 'h0000000007FFFFFF: ptr = 27; - 'h000000000FFFFFFF: ptr = 28; - 'h000000001FFFFFFF: ptr = 29; - 'h000000003FFFFFFF: ptr = 30; - 'h000000007FFFFFFF: ptr = 31; - 'h00000000FFFFFFFF: ptr = 32; - 'h00000001FFFFFFFF: ptr = 33; - 'h00000003FFFFFFFF: ptr = 34; - 'h00000007FFFFFFFF: ptr = 35; - 'h0000000FFFFFFFFF: ptr = 36; - 'h0000001FFFFFFFFF: ptr = 37; - 'h0000003FFFFFFFFF: ptr = 38; - 'h0000007FFFFFFFFF: ptr = 39; - 'h000000FFFFFFFFFF: ptr = 40; - 'h000001FFFFFFFFFF: ptr = 41; - 'h000003FFFFFFFFFF: ptr = 42; - 'h000007FFFFFFFFFF: ptr = 43; - 'h00000FFFFFFFFFFF: ptr = 44; - 'h00001FFFFFFFFFFF: ptr = 45; - 'h00003FFFFFFFFFFF: ptr = 46; - 'h00007FFFFFFFFFFF: ptr = 47; - 'h0000FFFFFFFFFFFF: ptr = 48; - 'h0001FFFFFFFFFFFF: ptr = 49; - 'h0003FFFFFFFFFFFF: ptr = 50; - 'h0007FFFFFFFFFFFF: ptr = 51; - 'h000FFFFFFFFFFFFF: ptr = 52; - 'h001FFFFFFFFFFFFF: ptr = 53; - 'h003FFFFFFFFFFFFF: ptr = 54; - 'h007FFFFFFFFFFFFF: ptr = 55; - 'h00FFFFFFFFFFFFFF: ptr = 56; - 'h01FFFFFFFFFFFFFF: ptr = 57; - 'h03FFFFFFFFFFFFFF: ptr = 58; - 'h07FFFFFFFFFFFFFF: ptr = 59; - 'h0FFFFFFFFFFFFFFF: ptr = 60; - 'h1FFFFFFFFFFFFFFF: ptr = 61; - 'h3FFFFFFFFFFFFFFF: ptr = 62; - 'h7FFFFFFFFFFFFFFF: ptr = 63; - 'hFFFFFFFFFFFFFFFF: ptr = 64; - default : ptr = 0; + case(byteEn) matches + 'b0000000000000000000000000000000000000000000000000000000000000001: ptr = 1; + 'b000000000000000000000000000000000000000000000000000000000000001?: ptr = 2; + 'b00000000000000000000000000000000000000000000000000000000000001??: ptr = 3; + 'b0000000000000000000000000000000000000000000000000000000000001???: ptr = 4; + 'b000000000000000000000000000000000000000000000000000000000001????: ptr = 5; + 'b00000000000000000000000000000000000000000000000000000000001?????: ptr = 6; + 'b0000000000000000000000000000000000000000000000000000000001??????: ptr = 7; + 'b000000000000000000000000000000000000000000000000000000001???????: ptr = 8; + 'b00000000000000000000000000000000000000000000000000000001????????: ptr = 9; + 'b0000000000000000000000000000000000000000000000000000001?????????: ptr = 10; + 'b000000000000000000000000000000000000000000000000000001??????????: ptr = 11; + 'b00000000000000000000000000000000000000000000000000001???????????: ptr = 12; + 'b0000000000000000000000000000000000000000000000000001????????????: ptr = 13; + 'b000000000000000000000000000000000000000000000000001?????????????: ptr = 14; + 'b00000000000000000000000000000000000000000000000001??????????????: ptr = 15; + 'b0000000000000000000000000000000000000000000000001???????????????: ptr = 16; + 'b000000000000000000000000000000000000000000000001????????????????: ptr = 17; + 'b00000000000000000000000000000000000000000000001?????????????????: ptr = 18; + 'b0000000000000000000000000000000000000000000001??????????????????: ptr = 19; + 'b000000000000000000000000000000000000000000001???????????????????: ptr = 20; + 'b00000000000000000000000000000000000000000001????????????????????: ptr = 21; + 'b0000000000000000000000000000000000000000001?????????????????????: ptr = 22; + 'b000000000000000000000000000000000000000001??????????????????????: ptr = 23; + 'b00000000000000000000000000000000000000001???????????????????????: ptr = 24; + 'b0000000000000000000000000000000000000001????????????????????????: ptr = 25; + 'b000000000000000000000000000000000000001?????????????????????????: ptr = 26; + 'b00000000000000000000000000000000000001??????????????????????????: ptr = 27; + 'b0000000000000000000000000000000000001???????????????????????????: ptr = 28; + 'b000000000000000000000000000000000001????????????????????????????: ptr = 29; + 'b00000000000000000000000000000000001?????????????????????????????: ptr = 30; + 'b0000000000000000000000000000000001??????????????????????????????: ptr = 31; + 'b000000000000000000000000000000001???????????????????????????????: ptr = 32; + 'b00000000000000000000000000000001????????????????????????????????: ptr = 33; + 'b0000000000000000000000000000001?????????????????????????????????: ptr = 34; + 'b000000000000000000000000000001??????????????????????????????????: ptr = 35; + 'b00000000000000000000000000001???????????????????????????????????: ptr = 36; + 'b0000000000000000000000000001????????????????????????????????????: ptr = 37; + 'b000000000000000000000000001?????????????????????????????????????: ptr = 38; + 'b00000000000000000000000001??????????????????????????????????????: ptr = 39; + 'b0000000000000000000000001???????????????????????????????????????: ptr = 40; + 'b000000000000000000000001????????????????????????????????????????: ptr = 41; + 'b00000000000000000000001?????????????????????????????????????????: ptr = 42; + 'b0000000000000000000001??????????????????????????????????????????: ptr = 43; + 'b000000000000000000001???????????????????????????????????????????: ptr = 44; + 'b00000000000000000001????????????????????????????????????????????: ptr = 45; + 'b0000000000000000001?????????????????????????????????????????????: ptr = 46; + 'b000000000000000001??????????????????????????????????????????????: ptr = 47; + 'b00000000000000001???????????????????????????????????????????????: ptr = 48; + 'b0000000000000001????????????????????????????????????????????????: ptr = 49; + 'b000000000000001?????????????????????????????????????????????????: ptr = 50; + 'b00000000000001??????????????????????????????????????????????????: ptr = 51; + 'b0000000000001???????????????????????????????????????????????????: ptr = 52; + 'b000000000001????????????????????????????????????????????????????: ptr = 53; + 'b00000000001?????????????????????????????????????????????????????: ptr = 54; + 'b0000000001??????????????????????????????????????????????????????: ptr = 55; + 'b000000001???????????????????????????????????????????????????????: ptr = 56; + 'b00000001????????????????????????????????????????????????????????: ptr = 57; + 'b0000001?????????????????????????????????????????????????????????: ptr = 58; + 'b000001??????????????????????????????????????????????????????????: ptr = 59; + 'b00001???????????????????????????????????????????????????????????: ptr = 60; + 'b0001????????????????????????????????????????????????????????????: ptr = 61; + 'b001?????????????????????????????????????????????????????????????: ptr = 62; + 'b01??????????????????????????????????????????????????????????????: ptr = 63; + 'b1???????????????????????????????????????????????????????????????: ptr = 64; + default : ptr = 0; endcase return ptr; endfunction @@ -206,70 +206,70 @@ endfunction function DataDwordPtr convertByteEn2DwordPtr (ByteEn byteEn); DataDwordPtr ptr = 0; case(byteEn) matches - 'h0000000000000001: ptr = 0; - 'h0000000000000003: ptr = 0; - 'h0000000000000007: ptr = 0; - 'h000000000000000?: ptr = 0; - 'h000000000000001?: ptr = 1; - 'h000000000000003?: ptr = 1; - 'h000000000000007?: ptr = 1; - 'h00000000000000F?: ptr = 1; - 'h00000000000001F?: ptr = 2; - 'h00000000000003F?: ptr = 2; - 'h00000000000007F?: ptr = 2; - 'h0000000000000FF?: ptr = 2; - 'h0000000000001FF?: ptr = 3; - 'h0000000000003FF?: ptr = 3; - 'h0000000000007FF?: ptr = 3; - 'h000000000000FFF?: ptr = 3; - 'h000000000001FFF?: ptr = 4; - 'h000000000003FFF?: ptr = 4; - 'h000000000007FFF?: ptr = 4; - 'h00000000000FFFF?: ptr = 4; - 'h00000000001FFFF?: ptr = 5; - 'h00000000003FFFF?: ptr = 5; - 'h00000000007FFFF?: ptr = 5; - 'h0000000000FFFFF?: ptr = 5; - 'h0000000001FFFFF?: ptr = 6; - 'h0000000003FFFFF?: ptr = 6; - 'h0000000007FFFFF?: ptr = 6; - 'h000000000FFFFFF?: ptr = 6; - 'h000000001FFFFFF?: ptr = 7; - 'h000000003FFFFFF?: ptr = 7; - 'h000000007FFFFFF?: ptr = 7; - 'h00000000FFFFFFF?: ptr = 7; - 'h00000001FFFFFFF?: ptr = 8; - 'h00000003FFFFFFF?: ptr = 8; - 'h00000007FFFFFFF?: ptr = 8; - 'h0000000FFFFFFFF?: ptr = 8; - 'h0000001FFFFFFFF?: ptr = 9; - 'h0000003FFFFFFFF?: ptr = 9; - 'h0000007FFFFFFFF?: ptr = 9; - 'h000000FFFFFFFFF?: ptr = 9; - 'h000001FFFFFFFFF?: ptr = 10; - 'h000003FFFFFFFFF?: ptr = 10; - 'h000007FFFFFFFFF?: ptr = 10; - 'h00000FFFFFFFFFF?: ptr = 10; - 'h00001FFFFFFFFFF?: ptr = 11; - 'h00003FFFFFFFFFF?: ptr = 11; - 'h00007FFFFFFFFFF?: ptr = 11; - 'h0000FFFFFFFFFFF?: ptr = 11; - 'h0001FFFFFFFFFFF?: ptr = 12; - 'h0003FFFFFFFFFFF?: ptr = 12; - 'h0007FFFFFFFFFFF?: ptr = 12; - 'h000FFFFFFFFFFFF?: ptr = 12; - 'h001FFFFFFFFFFFF?: ptr = 13; - 'h003FFFFFFFFFFFF?: ptr = 13; - 'h007FFFFFFFFFFFF?: ptr = 13; - 'h00FFFFFFFFFFFFF?: ptr = 13; - 'h01FFFFFFFFFFFFF?: ptr = 14; - 'h03FFFFFFFFFFFFF?: ptr = 14; - 'h07FFFFFFFFFFFFF?: ptr = 14; - 'h0FFFFFFFFFFFFFF?: ptr = 14; - 'h1FFFFFFFFFFFFFF?: ptr = 15; - 'h3FFFFFFFFFFFFFF?: ptr = 15; - 'h7FFFFFFFFFFFFFF?: ptr = 15; - 'hFFFFFFFFFFFFFFF?: ptr = 15; + 'b0000000000000000000000000000000000000000000000000000000000000001: ptr = 0; + 'b000000000000000000000000000000000000000000000000000000000000001?: ptr = 0; + 'b00000000000000000000000000000000000000000000000000000000000001??: ptr = 0; + 'b0000000000000000000000000000000000000000000000000000000000001???: ptr = 0; + 'b000000000000000000000000000000000000000000000000000000000001????: ptr = 1; + 'b00000000000000000000000000000000000000000000000000000000001?????: ptr = 1; + 'b0000000000000000000000000000000000000000000000000000000001??????: ptr = 1; + 'b000000000000000000000000000000000000000000000000000000001???????: ptr = 1; + 'b00000000000000000000000000000000000000000000000000000001????????: ptr = 2; + 'b0000000000000000000000000000000000000000000000000000001?????????: ptr = 2; + 'b000000000000000000000000000000000000000000000000000001??????????: ptr = 2; + 'b00000000000000000000000000000000000000000000000000001???????????: ptr = 2; + 'b0000000000000000000000000000000000000000000000000001????????????: ptr = 3; + 'b000000000000000000000000000000000000000000000000001?????????????: ptr = 3; + 'b00000000000000000000000000000000000000000000000001??????????????: ptr = 3; + 'b0000000000000000000000000000000000000000000000001???????????????: ptr = 3; + 'b000000000000000000000000000000000000000000000001????????????????: ptr = 4; + 'b00000000000000000000000000000000000000000000001?????????????????: ptr = 4; + 'b0000000000000000000000000000000000000000000001??????????????????: ptr = 4; + 'b000000000000000000000000000000000000000000001???????????????????: ptr = 4; + 'b00000000000000000000000000000000000000000001????????????????????: ptr = 5; + 'b0000000000000000000000000000000000000000001?????????????????????: ptr = 5; + 'b000000000000000000000000000000000000000001??????????????????????: ptr = 5; + 'b00000000000000000000000000000000000000001???????????????????????: ptr = 5; + 'b0000000000000000000000000000000000000001????????????????????????: ptr = 6; + 'b000000000000000000000000000000000000001?????????????????????????: ptr = 6; + 'b00000000000000000000000000000000000001??????????????????????????: ptr = 6; + 'b0000000000000000000000000000000000001???????????????????????????: ptr = 6; + 'b000000000000000000000000000000000001????????????????????????????: ptr = 7; + 'b00000000000000000000000000000000001?????????????????????????????: ptr = 7; + 'b0000000000000000000000000000000001??????????????????????????????: ptr = 7; + 'b000000000000000000000000000000001???????????????????????????????: ptr = 7; + 'b00000000000000000000000000000001????????????????????????????????: ptr = 8; + 'b0000000000000000000000000000001?????????????????????????????????: ptr = 8; + 'b000000000000000000000000000001??????????????????????????????????: ptr = 8; + 'b00000000000000000000000000001???????????????????????????????????: ptr = 8; + 'b0000000000000000000000000001????????????????????????????????????: ptr = 9; + 'b000000000000000000000000001?????????????????????????????????????: ptr = 9; + 'b00000000000000000000000001??????????????????????????????????????: ptr = 9; + 'b0000000000000000000000001???????????????????????????????????????: ptr = 9; + 'b000000000000000000000001????????????????????????????????????????: ptr = 10; + 'b00000000000000000000001?????????????????????????????????????????: ptr = 10; + 'b0000000000000000000001??????????????????????????????????????????: ptr = 10; + 'b000000000000000000001???????????????????????????????????????????: ptr = 10; + 'b00000000000000000001????????????????????????????????????????????: ptr = 11; + 'b0000000000000000001?????????????????????????????????????????????: ptr = 11; + 'b000000000000000001??????????????????????????????????????????????: ptr = 11; + 'b00000000000000001???????????????????????????????????????????????: ptr = 11; + 'b0000000000000001????????????????????????????????????????????????: ptr = 12; + 'b000000000000001?????????????????????????????????????????????????: ptr = 12; + 'b00000000000001??????????????????????????????????????????????????: ptr = 12; + 'b0000000000001???????????????????????????????????????????????????: ptr = 12; + 'b000000000001????????????????????????????????????????????????????: ptr = 13; + 'b00000000001?????????????????????????????????????????????????????: ptr = 13; + 'b0000000001??????????????????????????????????????????????????????: ptr = 13; + 'b000000001???????????????????????????????????????????????????????: ptr = 13; + 'b00000001????????????????????????????????????????????????????????: ptr = 14; + 'b0000001?????????????????????????????????????????????????????????: ptr = 14; + 'b000001??????????????????????????????????????????????????????????: ptr = 14; + 'b00001???????????????????????????????????????????????????????????: ptr = 14; + 'b0001????????????????????????????????????????????????????????????: ptr = 15; + 'b001?????????????????????????????????????????????????????????????: ptr = 15; + 'b01??????????????????????????????????????????????????????????????: ptr = 15; + 'b1???????????????????????????????????????????????????????????????: ptr = 15; default : ptr = 0; endcase return ptr; From d1f9974ac4bb4c6d6d53f25ac12cf4bcec92aaca Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Sat, 21 Sep 2024 14:10:18 +0800 Subject: [PATCH 40/53] Add H2C functions --- backend/Makefile | 2 +- cocotb/Makefile | 2 +- cocotb/dma_fullypipeline_tb.py | 384 +++++++++++++++++++++++++++++++++ cocotb/dma_loop_tb.py | 382 ++++++++++++++++++++++++++++++++ src/DmaC2HPipe.bsv | 13 +- src/DmaH2CPipe.bsv | 121 ++++++----- src/DmaTypes.bsv | 77 ++++++- src/DmaUtils.bsv | 109 ++++++---- src/DmaWrapper.bsv | 102 ++++++--- src/PcieAdapter.bsv | 1 + src/SimpleModeUtils.bsv | 312 +++++++++++++++++++++++++++ 11 files changed, 1367 insertions(+), 138 deletions(-) create mode 100644 cocotb/dma_fullypipeline_tb.py create mode 100644 cocotb/dma_loop_tb.py create mode 100644 src/SimpleModeUtils.bsv diff --git a/backend/Makefile b/backend/Makefile index 0be2d3e..b92fad2 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -9,7 +9,7 @@ LOGFILE ?= run.log RUNTOPHASE ?= place # synth place route all PARTNAME = xcvu13p-fhgb2104-2-i TARGETFILE ?= ../src/DmaWrapper.bsv -TOPMODULE ?= mkRawDmaController +TOPMODULE ?= mkRawSimpleDmaController export TOP = $(TOPMODULE) export RTL = $(VLOGDIR) diff --git a/cocotb/Makefile b/cocotb/Makefile index e7a64cc..adb081e 100644 --- a/cocotb/Makefile +++ b/cocotb/Makefile @@ -10,7 +10,7 @@ TOP_MODULE = mk$(TARGET) TOP_FILE = $(TOP_MODULE).v VLOG_FILE = $(TB_DIR)/$(TOP_FILE) -TB_CASE = dma_straddle +TB_CASE = dma_fullypipeline TB_FILE = $(TB_CASE)_tb.py DATE = $(shell date "+%Y%m%d") LOG_FILE = $(TB_DIR)/log/$(DATE)_$(TOP_MODULE).log diff --git a/cocotb/dma_fullypipeline_tb.py b/cocotb/dma_fullypipeline_tb.py new file mode 100644 index 0000000..79589a5 --- /dev/null +++ b/cocotb/dma_fullypipeline_tb.py @@ -0,0 +1,384 @@ +#!/usr/bin/env python +import itertools +import logging +import os +import random +import queue + +import cocotb_test.simulator +import pytest + +import cocotb +from cocotb.triggers import RisingEdge, FallingEdge, Timer +from cocotb.regression import TestFactory +from cocotb.clock import Clock + +from cocotbext.pcie.core import RootComplex +from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice +from cocotbext.axi.stream import define_stream +from cocotbext.axi import (AxiStreamBus, AxiStreamSource, AxiStreamSink, AxiStreamMonitor, AxiStreamFrame) + +# class TB architecture +# -------------- ------------- ----------- +# | Root Complex | <-> | End Pointer | <-> | Dut(DMAC) | +# -------------- ------------- ----------- + +DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc", + signals=["start_addr", "byte_cnt", "is_write", "valid", "ready"] +) + +class TB(object): + def __init__(self, dut, msix=False): + self.dut = dut + + self.log = logging.getLogger("cocotb.tb") + self.log.setLevel(logging.DEBUG) + + self.clock = dut.CLK + self.resetn = dut.RST_N + + self._bus_width = 512 + self._bus_bytes = 64 + + # PCIe + self.rc = RootComplex() + + cq_straddle = False + cc_straddle = False + rq_straddle = True + rc_straddle = True + rc_4tlp_straddle = False + + self.client_tag = bool(int(os.getenv("CLIENT_TAG", "1"))) + + self.dev = UltraScalePlusPcieDevice( + # configuration options + pcie_generation=3, + # pcie_link_width=2, + # user_clk_frequency=250e6, + alignment="dword", + cq_straddle=cq_straddle, + cc_straddle=cc_straddle, + rq_straddle=rq_straddle, + rc_straddle=rc_straddle, + rc_4tlp_straddle=rc_4tlp_straddle, + pf_count=1, + max_payload_size=1024, + enable_client_tag=self.client_tag, + enable_extended_tag=False, + enable_parity=False, + enable_rx_msg_interface=False, + enable_sriov=False, + enable_extended_configuration=False, + + pf0_msi_enable=True, + pf0_msi_count=32, + pf1_msi_enable=False, + pf1_msi_count=1, + pf2_msi_enable=False, + pf2_msi_count=1, + pf3_msi_enable=False, + pf3_msi_count=1, + pf0_msix_enable=msix, + pf0_msix_table_size=63, + pf0_msix_table_bir=4, + pf0_msix_table_offset=0x00000000, + pf0_msix_pba_bir=4, + pf0_msix_pba_offset=0x00008000, + pf1_msix_enable=False, + pf1_msix_table_size=0, + pf1_msix_table_bir=0, + pf1_msix_table_offset=0x00000000, + pf1_msix_pba_bir=0, + pf1_msix_pba_offset=0x00000000, + pf2_msix_enable=False, + pf2_msix_table_size=0, + pf2_msix_table_bir=0, + pf2_msix_table_offset=0x00000000, + pf2_msix_pba_bir=0, + pf2_msix_pba_offset=0x00000000, + pf3_msix_enable=False, + pf3_msix_table_size=0, + pf3_msix_table_bir=0, + pf3_msix_table_offset=0x00000000, + pf3_msix_pba_bir=0, + pf3_msix_pba_offset=0x00000000, + + # signals + user_clk=self.clock, + # user_reset=~self.resetn, + user_lnk_up=dut.user_lnk_up, + # sys_clk=dut.sys_clk, + # sys_clk_gt=dut.sys_clk_gt, + # sys_reset=dut.sys_reset, + # phy_rdy_out=dut.phy_rdy_out, + + rq_bus=AxiStreamBus.from_prefix(dut, "m_axis_rq"), + pcie_rq_seq_num0=dut.pcie_rq_seq_num0, + pcie_rq_seq_num_vld0=dut.pcie_rq_seq_num_vld0, + pcie_rq_seq_num1=dut.pcie_rq_seq_num1, + pcie_rq_seq_num_vld1=dut.pcie_rq_seq_num_vld1, + pcie_rq_tag0=dut.pcie_rq_tag0, + pcie_rq_tag1=dut.pcie_rq_tag1, + # pcie_rq_tag_av=dut.pcie_rq_tag_av, + pcie_rq_tag_vld0=dut.pcie_rq_tag_vld0, + pcie_rq_tag_vld1=dut.pcie_rq_tag_vld1, + + rc_bus=AxiStreamBus.from_prefix(dut, "s_axis_rc"), + + cq_bus=AxiStreamBus.from_prefix(dut, "s_axis_cq"), + pcie_cq_np_req=dut.pcie_cq_np_req, + pcie_cq_np_req_count=dut.pcie_cq_np_req_count, + + cc_bus=AxiStreamBus.from_prefix(dut, "m_axis_cc"), + + pcie_tfc_nph_av=dut.pcie_tfc_nph_av, + pcie_tfc_npd_av=dut.pcie_tfc_npd_av, + cfg_phy_link_down=dut.cfg_phy_link_down, + cfg_phy_link_status=dut.cfg_phy_link_status, + cfg_negotiated_width=dut.cfg_negotiated_width, + cfg_current_speed=dut.cfg_current_speed, + cfg_max_payload=dut.cfg_max_payload, + cfg_max_read_req=dut.cfg_max_read_req, + cfg_function_status=dut.cfg_function_status, + cfg_function_power_state=dut.cfg_function_power_state, + cfg_vf_status=dut.cfg_vf_status, + cfg_vf_power_state=dut.cfg_vf_power_state, + cfg_link_power_state=dut.cfg_link_power_state, + cfg_mgmt_addr=dut.cfg_mgmt_addr, + cfg_mgmt_function_number=dut.cfg_mgmt_function_number, + cfg_mgmt_write=dut.cfg_mgmt_write, + cfg_mgmt_write_data=dut.cfg_mgmt_write_data, + cfg_mgmt_byte_enable=dut.cfg_mgmt_byte_enable, + cfg_mgmt_read=dut.cfg_mgmt_read, + cfg_mgmt_read_data=dut.cfg_mgmt_read_data, + cfg_mgmt_read_write_done=dut.cfg_mgmt_read_write_done, + cfg_mgmt_debug_access=dut.cfg_mgmt_debug_access, + cfg_err_cor_out=dut.cfg_err_cor_out, + cfg_err_nonfatal_out=dut.cfg_err_nonfatal_out, + cfg_err_fatal_out=dut.cfg_err_fatal_out, + cfg_local_error_valid=dut.cfg_local_error_valid, + cfg_local_error_out=dut.cfg_local_error_out, + cfg_ltssm_state=dut.cfg_ltssm_state, + cfg_rx_pm_state=dut.cfg_rx_pm_state, + cfg_tx_pm_state=dut.cfg_tx_pm_state, + cfg_rcb_status=dut.cfg_rcb_status, + cfg_obff_enable=dut.cfg_obff_enable, + # cfg_pl_status_change=dut.cfg_pl_status_change, + # cfg_tph_requester_enable=dut.cfg_tph_requester_enable, + # cfg_tph_st_mode=dut.cfg_tph_st_mode, + # cfg_vf_tph_requester_enable=dut.cfg_vf_tph_requester_enable, + # cfg_vf_tph_st_mode=dut.cfg_vf_tph_st_mode, + cfg_msg_received=dut.cfg_msg_received, + cfg_msg_received_data=dut.cfg_msg_received_data, + cfg_msg_received_type=dut.cfg_msg_received_type, + cfg_msg_transmit=dut.cfg_msg_transmit, + cfg_msg_transmit_type=dut.cfg_msg_transmit_type, + cfg_msg_transmit_data=dut.cfg_msg_transmit_data, + cfg_msg_transmit_done=dut.cfg_msg_transmit_done, + cfg_fc_ph=dut.cfg_fc_ph, + cfg_fc_pd=dut.cfg_fc_pd, + cfg_fc_nph=dut.cfg_fc_nph, + cfg_fc_npd=dut.cfg_fc_npd, + cfg_fc_cplh=dut.cfg_fc_cplh, + cfg_fc_cpld=dut.cfg_fc_cpld, + cfg_fc_sel=dut.cfg_fc_sel, + cfg_dsn=dut.cfg_dsn, + cfg_bus_number=dut.cfg_bus_number, + cfg_power_state_change_ack=dut.cfg_power_state_change_ack, + cfg_power_state_change_interrupt=dut.cfg_power_state_change_interrupt, + cfg_err_cor_in=dut.cfg_err_cor_in, + cfg_err_uncor_in=dut.cfg_err_uncor_in, + cfg_flr_in_process=dut.cfg_flr_in_process, + cfg_flr_done=dut.cfg_flr_done, + cfg_vf_flr_in_process=dut.cfg_vf_flr_in_process, + cfg_vf_flr_func_num=dut.cfg_vf_flr_func_num, + cfg_vf_flr_done=dut.cfg_vf_flr_done, + cfg_link_training_enable=dut.cfg_link_training_enable, + cfg_interrupt_int=dut.cfg_interrupt_int, + cfg_interrupt_pending=dut.cfg_interrupt_pending, + cfg_interrupt_sent=dut.cfg_interrupt_sent, + cfg_interrupt_msi_enable=dut.cfg_interrupt_msi_enable, + cfg_interrupt_msi_mmenable=dut.cfg_interrupt_msi_mmenable, + cfg_interrupt_msi_mask_update=dut.cfg_interrupt_msi_mask_update, + cfg_interrupt_msi_data=dut.cfg_interrupt_msi_data, + cfg_interrupt_msi_select=dut.cfg_interrupt_msi_select, + cfg_interrupt_msi_int=dut.cfg_interrupt_msi_int, + cfg_interrupt_msi_pending_status=dut.cfg_interrupt_msi_pending_status, + cfg_interrupt_msi_pending_status_data_enable=dut.cfg_interrupt_msi_pending_status_data_enable, + cfg_interrupt_msi_pending_status_function_num=dut.cfg_interrupt_msi_pending_status_function_num, + cfg_interrupt_msi_sent=dut.cfg_interrupt_msi_sent, + cfg_interrupt_msi_fail=dut.cfg_interrupt_msi_fail, + cfg_interrupt_msi_attr=dut.cfg_interrupt_msi_attr, + cfg_interrupt_msi_tph_present=dut.cfg_interrupt_msi_tph_present, + cfg_interrupt_msi_tph_type=dut.cfg_interrupt_msi_tph_type, + cfg_interrupt_msi_tph_st_tag=dut.cfg_interrupt_msi_tph_st_tag, + cfg_interrupt_msi_function_number=dut.cfg_interrupt_msi_function_number, + cfg_pm_aspm_l1_entry_reject=dut.cfg_pm_aspm_l1_entry_reject, + cfg_pm_aspm_tx_l0s_entry_disable=dut.cfg_pm_aspm_tx_l0s_entry_disable, + cfg_hot_reset_out=dut.cfg_hot_reset_out, + cfg_config_space_enable=dut.cfg_config_space_enable, + cfg_req_pm_transition_l23_ready=dut.cfg_req_pm_transition_l23_ready, + cfg_hot_reset_in=dut.cfg_hot_reset_in, + cfg_ds_port_number=dut.cfg_ds_port_number, + cfg_ds_bus_number=dut.cfg_ds_bus_number, + cfg_ds_device_number=dut.cfg_ds_device_number, + ) + + self.dev.log.setLevel(logging.INFO) + + dut.pcie_cq_np_req.setimmediatevalue(1) + dut.cfg_mgmt_addr.setimmediatevalue(0) + dut.cfg_mgmt_function_number.setimmediatevalue(0) + dut.cfg_mgmt_write.setimmediatevalue(0) + dut.cfg_mgmt_write_data.setimmediatevalue(0) + dut.cfg_mgmt_byte_enable.setimmediatevalue(0) + dut.cfg_mgmt_read.setimmediatevalue(0) + dut.cfg_mgmt_debug_access.setimmediatevalue(0) + dut.cfg_msg_transmit.setimmediatevalue(0) + dut.cfg_msg_transmit_type.setimmediatevalue(0) + dut.cfg_msg_transmit_data.setimmediatevalue(0) + dut.cfg_fc_sel.setimmediatevalue(0) + dut.cfg_dsn.setimmediatevalue(0) + dut.cfg_power_state_change_ack.setimmediatevalue(0) + dut.cfg_err_cor_in.setimmediatevalue(0) + dut.cfg_err_uncor_in.setimmediatevalue(0) + dut.cfg_flr_done.setimmediatevalue(0) + dut.cfg_vf_flr_func_num.setimmediatevalue(0) + dut.cfg_vf_flr_done.setimmediatevalue(0) + dut.cfg_link_training_enable.setimmediatevalue(1) + dut.cfg_interrupt_int.setimmediatevalue(0) + dut.cfg_interrupt_pending.setimmediatevalue(0) + dut.cfg_interrupt_msi_select.setimmediatevalue(0) + dut.cfg_interrupt_msi_int.setimmediatevalue(0) + dut.cfg_interrupt_msi_pending_status.setimmediatevalue(0) + dut.cfg_interrupt_msi_pending_status_data_enable.setimmediatevalue(0) + dut.cfg_interrupt_msi_pending_status_function_num.setimmediatevalue(0) + dut.cfg_interrupt_msi_attr.setimmediatevalue(0) + dut.cfg_interrupt_msi_tph_present.setimmediatevalue(0) + dut.cfg_interrupt_msi_tph_type.setimmediatevalue(0) + dut.cfg_interrupt_msi_tph_st_tag.setimmediatevalue(0) + dut.cfg_interrupt_msi_function_number.setimmediatevalue(0) + dut.cfg_pm_aspm_l1_entry_reject.setimmediatevalue(0) + dut.cfg_pm_aspm_tx_l0s_entry_disable.setimmediatevalue(0) + dut.cfg_config_space_enable.setimmediatevalue(1) + dut.cfg_req_pm_transition_l23_ready.setimmediatevalue(0) + dut.cfg_hot_reset_in.setimmediatevalue(0) + dut.cfg_ds_port_number.setimmediatevalue(0) + dut.cfg_ds_bus_number.setimmediatevalue(0) + dut.cfg_ds_device_number.setimmediatevalue(0) + + self.rc.make_port().connect(self.dev) + + # DMA + self.c2h_write_source_0 = AxiStreamSource(AxiStreamBus.from_prefix(dut, "s_axis_c2h_0"), self.clock, self.resetn, False) + self.c2h_desc_source_0 = DescSource(DescBus.from_prefix(dut, "s_desc_c2h_0"),self.clock, self.resetn, False) + self.c2h_read_sink_0 = AxiStreamSink(AxiStreamBus.from_prefix(dut, "m_axis_c2h_0"), self.clock, self.resetn, False) + self.c2h_write_source_1 = AxiStreamSource(AxiStreamBus.from_prefix(dut, "s_axis_c2h_1"), self.clock, self.resetn, False) + self.c2h_desc_source_1 = DescSource(DescBus.from_prefix(dut, "s_desc_c2h_1"), self.clock, self.resetn, False) + self.c2h_read_sink_1 = AxiStreamSink(AxiStreamBus.from_prefix(dut, "m_axis_c2h_1"), self.clock, self.resetn, False) + + #monitor + self.rq_monitor = AxiStreamMonitor(AxiStreamBus.from_prefix(dut, "m_axis_rq"), self.clock, self.resetn, False) + + #Do not use user_rst but gen rstn for bsv + async def gen_reset(self): + self.resetn.value = 0 + await RisingEdge(self.clock) + await RisingEdge(self.clock) + await RisingEdge(self.clock) + self.resetn.value = 1 + await RisingEdge(self.clock) + await RisingEdge(self.clock) + await RisingEdge(self.clock) + self.log.info("Generated DMA RST_N") + + async def send_desc(self, channel, startAddr, length, isWrite): + desc = DescTransaction() + desc.start_addr = startAddr + desc.byte_cnt = length + desc.is_write = isWrite + if channel == 0: + await self.c2h_desc_source_0.send(desc) + else: + await self.c2h_desc_source_1.send(desc) + + async def send_data(self, channel, data): + if channel == 0: + await self.c2h_write_source_0.send(data) + else: + await self.c2h_write_source_1.send(data) + + async def recv_data(self, channel): + if channel == 0 : + data = await self.c2h_read_sink_0.read() + else: + data = await self.c2h_read_sink_1.read() + data = bytes(''.join([chr(item) for item in data]), encoding='UTF-8') + return data + + async def run_single_write_once(self, channel, addr, data): + length = len(data) + self.log.info("Conduct DMA single write: channel %d addr %d, length %d, char %c", channel, addr, length, data[0]) + await self.send_desc(channel, addr, length, True) + await self.send_data(channel, data) + + async def run_single_read_once(self, channel, addr, length): + self.log.info("Conduct DMA single read: channel %d addr %d, length %d", channel, addr, length) + await self.send_desc(channel, addr, length, False) + data = await self.recv_data(channel) + self.log.info("Read data from RootComplex successfully, recv length %d, req length %d", len(data), length) + return data + +async def small_write_drive(pcie_tb, dma_channel, mem): + for _ in range(100): + addr= random.randint(0, 8192) + addr = mem.get_absolute_address(addr) + length = random.randint(1, 64) + char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") + data = char * length + await pcie_tb.run_single_write_once(dma_channel, addr, data) + +@cocotb.test(timeout_time=100000000, timeout_unit="ns") +async def small_desc_fp_test(dut): + + tb = TB(dut) + await tb.gen_reset() + + await tb.rc.enumerate() + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + mem = tb.rc.mem_pool.alloc_region(1024*1024) + + await small_write_drive(tb, 0, mem) + await Timer(1000, units='ns') + +tests_dir = os.path.dirname(__file__) +rtl_dir = tests_dir + + +def test_dma(): + dut = "mkRawDmaController" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = dut + + verilog_sources = [ + os.path.join(rtl_dir, f"{dut}.v") + ] + + sim_build = os.path.join(tests_dir, "sim_build", dut) + + cocotb_test.simulator.run( + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + timescale="1ns/1ps", + sim_build=sim_build + ) + +if __name__ == "__main__": + test_dma() \ No newline at end of file diff --git a/cocotb/dma_loop_tb.py b/cocotb/dma_loop_tb.py new file mode 100644 index 0000000..666db6c --- /dev/null +++ b/cocotb/dma_loop_tb.py @@ -0,0 +1,382 @@ +#!/usr/bin/env python +import itertools +import logging +import os +import random +import queue + +import cocotb_test.simulator +import pytest + +import cocotb +from cocotb.triggers import RisingEdge, FallingEdge, Timer +from cocotb.regression import TestFactory +from cocotb.clock import Clock + +from cocotbext.pcie.core import RootComplex +from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice +from cocotbext.axi.stream import define_stream +from cocotbext.axi import (AxiStreamBus, AxiStreamSource, AxiStreamSink, AxiStreamMonitor, AxiStreamFrame) + +# class TB architecture +# -------------- ------------- ----------- +# | Root Complex | <-> | End Pointer | <-> | Dut(DMAC) | +# -------------- ------------- ----------- +# Loop TB only runs for Simple Mode + +DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc", + signals=["start_addr", "byte_cnt", "is_write", "valid", "ready"] +) +class h2cHelper(object): + def __init__(pcie_tb, dut, self): + self._desc_addr_lo_0 = 0 + self._desc_addr_hi_0 = 1 + self._desc_len_0 = 2 + self._desc_cntl_0 = 3 + self._desc_addr_lo_1 = 4 + self._desc_addr_hi_1 = 5 + self._desc_len_1 = 6 + self._desc_cntl_1 = 7 + self._flag_addr_lo_0 = 8 + self._flag_addr_hi_0 = 9 + self._flag_addr_lo_1 = 10 + self._flag_addr_hi_1 = 11 + self._va_head_lo_0 = 12 + self._va_head_hi_0 = 13 + self._va_head_lo_1 = 12 + self._va_head_hi_1 = 13 + + self._bar_offset = 0 + self._reg_block_size = 512 + self._phy_addr_table_size = 512 + + self.tb = TB(dut) + self.mem = self.tb.rc.mem_pool.alloc_region(1024*1024) + + async def init(self): + await self.tb.gen_reset() + + await self.tb.rc.enumerate() + dev = self.tb.rc.find_device(self.tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + async def ioWrite32(self, address, value): + raise NotImplementedError + + async def send_desc(self, channel, address, length, isWrite): + if (channel == 0): + addrLo, addrHi = self.splitDword2Word(address) + await self.ioWrite32(self._desc_addr_lo_0, addrLo.to_bytes()) + await self.ioWrite32(self._desc_addr_hi_0, addrHi.to_bytes()) + await self.ioWrite32(self._desc_len_0, length.to_bytes()) + + + + def set_bar_offset(self, offset): + self._bar_offset = offset + + def splitDword2Word(dword): + lo = dword & 0xFFFF + hi = (dword >> 32) & 0xFFFF + return (lo, hi) + + +class TB(object): + def __init__(self, dut, msix=False): + self.dut = dut + + self.log = logging.getLogger("cocotb.tb") + self.log.setLevel(logging.DEBUG) + + self.clock = dut.CLK + self.resetn = dut.RST_N + + self._bus_width = 512 + self._bus_bytes = 64 + + # PCIe + self.rc = RootComplex() + + cq_straddle = False + cc_straddle = False + rq_straddle = True + rc_straddle = True + rc_4tlp_straddle = False + + self.client_tag = bool(int(os.getenv("CLIENT_TAG", "1"))) + + self.dev = UltraScalePlusPcieDevice( + # configuration options + pcie_generation=3, + # pcie_link_width=2, + # user_clk_frequency=250e6, + alignment="dword", + cq_straddle=cq_straddle, + cc_straddle=cc_straddle, + rq_straddle=rq_straddle, + rc_straddle=rc_straddle, + rc_4tlp_straddle=rc_4tlp_straddle, + pf_count=1, + max_payload_size=1024, + enable_client_tag=self.client_tag, + enable_extended_tag=False, + enable_parity=False, + enable_rx_msg_interface=False, + enable_sriov=False, + enable_extended_configuration=False, + + pf0_msi_enable=True, + pf0_msi_count=32, + pf1_msi_enable=False, + pf1_msi_count=1, + pf2_msi_enable=False, + pf2_msi_count=1, + pf3_msi_enable=False, + pf3_msi_count=1, + pf0_msix_enable=msix, + pf0_msix_table_size=63, + pf0_msix_table_bir=4, + pf0_msix_table_offset=0x00000000, + pf0_msix_pba_bir=4, + pf0_msix_pba_offset=0x00008000, + pf1_msix_enable=False, + pf1_msix_table_size=0, + pf1_msix_table_bir=0, + pf1_msix_table_offset=0x00000000, + pf1_msix_pba_bir=0, + pf1_msix_pba_offset=0x00000000, + pf2_msix_enable=False, + pf2_msix_table_size=0, + pf2_msix_table_bir=0, + pf2_msix_table_offset=0x00000000, + pf2_msix_pba_bir=0, + pf2_msix_pba_offset=0x00000000, + pf3_msix_enable=False, + pf3_msix_table_size=0, + pf3_msix_table_bir=0, + pf3_msix_table_offset=0x00000000, + pf3_msix_pba_bir=0, + pf3_msix_pba_offset=0x00000000, + + # signals + user_clk=self.clock, + # user_reset=~self.resetn, + user_lnk_up=dut.user_lnk_up, + # sys_clk=dut.sys_clk, + # sys_clk_gt=dut.sys_clk_gt, + # sys_reset=dut.sys_reset, + # phy_rdy_out=dut.phy_rdy_out, + + rq_bus=AxiStreamBus.from_prefix(dut, "m_axis_rq"), + pcie_rq_seq_num0=dut.pcie_rq_seq_num0, + pcie_rq_seq_num_vld0=dut.pcie_rq_seq_num_vld0, + pcie_rq_seq_num1=dut.pcie_rq_seq_num1, + pcie_rq_seq_num_vld1=dut.pcie_rq_seq_num_vld1, + pcie_rq_tag0=dut.pcie_rq_tag0, + pcie_rq_tag1=dut.pcie_rq_tag1, + # pcie_rq_tag_av=dut.pcie_rq_tag_av, + pcie_rq_tag_vld0=dut.pcie_rq_tag_vld0, + pcie_rq_tag_vld1=dut.pcie_rq_tag_vld1, + + rc_bus=AxiStreamBus.from_prefix(dut, "s_axis_rc"), + + cq_bus=AxiStreamBus.from_prefix(dut, "s_axis_cq"), + pcie_cq_np_req=dut.pcie_cq_np_req, + pcie_cq_np_req_count=dut.pcie_cq_np_req_count, + + cc_bus=AxiStreamBus.from_prefix(dut, "m_axis_cc"), + + pcie_tfc_nph_av=dut.pcie_tfc_nph_av, + pcie_tfc_npd_av=dut.pcie_tfc_npd_av, + cfg_phy_link_down=dut.cfg_phy_link_down, + cfg_phy_link_status=dut.cfg_phy_link_status, + cfg_negotiated_width=dut.cfg_negotiated_width, + cfg_current_speed=dut.cfg_current_speed, + cfg_max_payload=dut.cfg_max_payload, + cfg_max_read_req=dut.cfg_max_read_req, + cfg_function_status=dut.cfg_function_status, + cfg_function_power_state=dut.cfg_function_power_state, + cfg_vf_status=dut.cfg_vf_status, + cfg_vf_power_state=dut.cfg_vf_power_state, + cfg_link_power_state=dut.cfg_link_power_state, + cfg_mgmt_addr=dut.cfg_mgmt_addr, + cfg_mgmt_function_number=dut.cfg_mgmt_function_number, + cfg_mgmt_write=dut.cfg_mgmt_write, + cfg_mgmt_write_data=dut.cfg_mgmt_write_data, + cfg_mgmt_byte_enable=dut.cfg_mgmt_byte_enable, + cfg_mgmt_read=dut.cfg_mgmt_read, + cfg_mgmt_read_data=dut.cfg_mgmt_read_data, + cfg_mgmt_read_write_done=dut.cfg_mgmt_read_write_done, + cfg_mgmt_debug_access=dut.cfg_mgmt_debug_access, + cfg_err_cor_out=dut.cfg_err_cor_out, + cfg_err_nonfatal_out=dut.cfg_err_nonfatal_out, + cfg_err_fatal_out=dut.cfg_err_fatal_out, + cfg_local_error_valid=dut.cfg_local_error_valid, + cfg_local_error_out=dut.cfg_local_error_out, + cfg_ltssm_state=dut.cfg_ltssm_state, + cfg_rx_pm_state=dut.cfg_rx_pm_state, + cfg_tx_pm_state=dut.cfg_tx_pm_state, + cfg_rcb_status=dut.cfg_rcb_status, + cfg_obff_enable=dut.cfg_obff_enable, + # cfg_pl_status_change=dut.cfg_pl_status_change, + # cfg_tph_requester_enable=dut.cfg_tph_requester_enable, + # cfg_tph_st_mode=dut.cfg_tph_st_mode, + # cfg_vf_tph_requester_enable=dut.cfg_vf_tph_requester_enable, + # cfg_vf_tph_st_mode=dut.cfg_vf_tph_st_mode, + cfg_msg_received=dut.cfg_msg_received, + cfg_msg_received_data=dut.cfg_msg_received_data, + cfg_msg_received_type=dut.cfg_msg_received_type, + cfg_msg_transmit=dut.cfg_msg_transmit, + cfg_msg_transmit_type=dut.cfg_msg_transmit_type, + cfg_msg_transmit_data=dut.cfg_msg_transmit_data, + cfg_msg_transmit_done=dut.cfg_msg_transmit_done, + cfg_fc_ph=dut.cfg_fc_ph, + cfg_fc_pd=dut.cfg_fc_pd, + cfg_fc_nph=dut.cfg_fc_nph, + cfg_fc_npd=dut.cfg_fc_npd, + cfg_fc_cplh=dut.cfg_fc_cplh, + cfg_fc_cpld=dut.cfg_fc_cpld, + cfg_fc_sel=dut.cfg_fc_sel, + cfg_dsn=dut.cfg_dsn, + cfg_bus_number=dut.cfg_bus_number, + cfg_power_state_change_ack=dut.cfg_power_state_change_ack, + cfg_power_state_change_interrupt=dut.cfg_power_state_change_interrupt, + cfg_err_cor_in=dut.cfg_err_cor_in, + cfg_err_uncor_in=dut.cfg_err_uncor_in, + cfg_flr_in_process=dut.cfg_flr_in_process, + cfg_flr_done=dut.cfg_flr_done, + cfg_vf_flr_in_process=dut.cfg_vf_flr_in_process, + cfg_vf_flr_func_num=dut.cfg_vf_flr_func_num, + cfg_vf_flr_done=dut.cfg_vf_flr_done, + cfg_link_training_enable=dut.cfg_link_training_enable, + cfg_interrupt_int=dut.cfg_interrupt_int, + cfg_interrupt_pending=dut.cfg_interrupt_pending, + cfg_interrupt_sent=dut.cfg_interrupt_sent, + cfg_interrupt_msi_enable=dut.cfg_interrupt_msi_enable, + cfg_interrupt_msi_mmenable=dut.cfg_interrupt_msi_mmenable, + cfg_interrupt_msi_mask_update=dut.cfg_interrupt_msi_mask_update, + cfg_interrupt_msi_data=dut.cfg_interrupt_msi_data, + cfg_interrupt_msi_select=dut.cfg_interrupt_msi_select, + cfg_interrupt_msi_int=dut.cfg_interrupt_msi_int, + cfg_interrupt_msi_pending_status=dut.cfg_interrupt_msi_pending_status, + cfg_interrupt_msi_pending_status_data_enable=dut.cfg_interrupt_msi_pending_status_data_enable, + cfg_interrupt_msi_pending_status_function_num=dut.cfg_interrupt_msi_pending_status_function_num, + cfg_interrupt_msi_sent=dut.cfg_interrupt_msi_sent, + cfg_interrupt_msi_fail=dut.cfg_interrupt_msi_fail, + cfg_interrupt_msi_attr=dut.cfg_interrupt_msi_attr, + cfg_interrupt_msi_tph_present=dut.cfg_interrupt_msi_tph_present, + cfg_interrupt_msi_tph_type=dut.cfg_interrupt_msi_tph_type, + cfg_interrupt_msi_tph_st_tag=dut.cfg_interrupt_msi_tph_st_tag, + cfg_interrupt_msi_function_number=dut.cfg_interrupt_msi_function_number, + cfg_pm_aspm_l1_entry_reject=dut.cfg_pm_aspm_l1_entry_reject, + cfg_pm_aspm_tx_l0s_entry_disable=dut.cfg_pm_aspm_tx_l0s_entry_disable, + cfg_hot_reset_out=dut.cfg_hot_reset_out, + cfg_config_space_enable=dut.cfg_config_space_enable, + cfg_req_pm_transition_l23_ready=dut.cfg_req_pm_transition_l23_ready, + cfg_hot_reset_in=dut.cfg_hot_reset_in, + cfg_ds_port_number=dut.cfg_ds_port_number, + cfg_ds_bus_number=dut.cfg_ds_bus_number, + cfg_ds_device_number=dut.cfg_ds_device_number, + ) + + self.dev.log.setLevel(logging.INFO) + + dut.pcie_cq_np_req.setimmediatevalue(1) + dut.cfg_mgmt_addr.setimmediatevalue(0) + dut.cfg_mgmt_function_number.setimmediatevalue(0) + dut.cfg_mgmt_write.setimmediatevalue(0) + dut.cfg_mgmt_write_data.setimmediatevalue(0) + dut.cfg_mgmt_byte_enable.setimmediatevalue(0) + dut.cfg_mgmt_read.setimmediatevalue(0) + dut.cfg_mgmt_debug_access.setimmediatevalue(0) + dut.cfg_msg_transmit.setimmediatevalue(0) + dut.cfg_msg_transmit_type.setimmediatevalue(0) + dut.cfg_msg_transmit_data.setimmediatevalue(0) + dut.cfg_fc_sel.setimmediatevalue(0) + dut.cfg_dsn.setimmediatevalue(0) + dut.cfg_power_state_change_ack.setimmediatevalue(0) + dut.cfg_err_cor_in.setimmediatevalue(0) + dut.cfg_err_uncor_in.setimmediatevalue(0) + dut.cfg_flr_done.setimmediatevalue(0) + dut.cfg_vf_flr_func_num.setimmediatevalue(0) + dut.cfg_vf_flr_done.setimmediatevalue(0) + dut.cfg_link_training_enable.setimmediatevalue(1) + dut.cfg_interrupt_int.setimmediatevalue(0) + dut.cfg_interrupt_pending.setimmediatevalue(0) + dut.cfg_interrupt_msi_select.setimmediatevalue(0) + dut.cfg_interrupt_msi_int.setimmediatevalue(0) + dut.cfg_interrupt_msi_pending_status.setimmediatevalue(0) + dut.cfg_interrupt_msi_pending_status_data_enable.setimmediatevalue(0) + dut.cfg_interrupt_msi_pending_status_function_num.setimmediatevalue(0) + dut.cfg_interrupt_msi_attr.setimmediatevalue(0) + dut.cfg_interrupt_msi_tph_present.setimmediatevalue(0) + dut.cfg_interrupt_msi_tph_type.setimmediatevalue(0) + dut.cfg_interrupt_msi_tph_st_tag.setimmediatevalue(0) + dut.cfg_interrupt_msi_function_number.setimmediatevalue(0) + dut.cfg_pm_aspm_l1_entry_reject.setimmediatevalue(0) + dut.cfg_pm_aspm_tx_l0s_entry_disable.setimmediatevalue(0) + dut.cfg_config_space_enable.setimmediatevalue(1) + dut.cfg_req_pm_transition_l23_ready.setimmediatevalue(0) + dut.cfg_hot_reset_in.setimmediatevalue(0) + dut.cfg_ds_port_number.setimmediatevalue(0) + dut.cfg_ds_bus_number.setimmediatevalue(0) + dut.cfg_ds_device_number.setimmediatevalue(0) + + self.rc.make_port().connect(self.dev) + + #Do not use user_rst but gen rstn for bsv + async def gen_reset(self): + self.resetn.value = 0 + await RisingEdge(self.clock) + await RisingEdge(self.clock) + await RisingEdge(self.clock) + self.resetn.value = 1 + await RisingEdge(self.clock) + await RisingEdge(self.clock) + await RisingEdge(self.clock) + self.log.info("Generated DMA RST_N") + +@cocotb.test(timeout_time=100000000, timeout_unit="ns") +async def small_desc_fp_test(dut): + + tb = TB(dut) + await tb.gen_reset() + + await tb.rc.enumerate() + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + mem = tb.rc.mem_pool.alloc_region(1024*1024) + + await Timer(1000, units='ns') + +tests_dir = os.path.dirname(__file__) +rtl_dir = tests_dir + + +def test_dma(): + dut = "mkRawDmaController" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = dut + + verilog_sources = [ + os.path.join(rtl_dir, f"{dut}.v") + ] + + sim_build = os.path.join(tests_dir, "sim_build", dut) + + cocotb_test.simulator.run( + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + timescale="1ns/1ps", + sim_build=sim_build + ) + +if __name__ == "__main__": + test_dma() \ No newline at end of file diff --git a/src/DmaC2HPipe.bsv b/src/DmaC2HPipe.bsv index 8822b59..4e4cc8f 100644 --- a/src/DmaC2HPipe.bsv +++ b/src/DmaC2HPipe.bsv @@ -127,9 +127,8 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); RqDescriptorGenerator rqDescGenerator <- mkRqDescriptorGenerator(False); Reg#(Bool) hasReadOnce <- mkReg(False); - // Reg#(DmaMemAddr) recvTlpCntReg <- mkReg(0); - Reg#(DmaMemAddr) recvBytesReg <- mkReg(0); - Vector#(SLOT_PER_PATH, Reg#(DmaMemAddr)) chunkBytesRegs <- replicateM(mkReg(0)); + Reg#(DmaReqLen) recvBytesReg <- mkReg(0); + Vector#(SLOT_PER_PATH, Reg#(DmaReqLen)) chunkBytesRegs <- replicateM(mkReg(0)); // mkConnection(chunkSplitor.chunkCntFifoOut, expectTlpCntFifo); mkConnection(reshapeStrad.streamFifoOut, descRemove.streamFifoIn); @@ -253,7 +252,7 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); reqInFifo.deq; let exReq = DmaExtendRequest { startAddr : req.startAddr, - endAddr : req.startAddr + req.length - 1, + endAddr : req.startAddr + zeroExtend(req.length - 1), length : req.length, tag : 0 }; @@ -268,7 +267,7 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); let token <- cBuffer.reserve.get; let exReq = DmaExtendRequest { startAddr: req.startAddr, - endAddr : req.startAddr + req.length - 1, + endAddr : req.startAddr + zeroExtend(req.length - 1), length : req.length, tag : convertSlotTokenToTag(token, pathIdx) }; @@ -339,7 +338,7 @@ module mkC2HWriteCore#(DmaPathNo pathIdx)(C2HWriteCore); let wrReq = wrReqInFifo.first; let exReq = DmaExtendRequest { startAddr : wrReq.startAddr, - endAddr : wrReq.startAddr + wrReq.length - 1, + endAddr : wrReq.startAddr + zeroExtend(wrReq.length - 1), length : wrReq.length, tag : 0 }; @@ -360,7 +359,7 @@ module mkC2HWriteCore#(DmaPathNo pathIdx)(C2HWriteCore); chunkSplit.chunkReqFifoOut.deq; let exReq = DmaExtendRequest { startAddr: chunkReq.startAddr, - endAddr : chunkReq.startAddr + chunkReq.length - 1, + endAddr : chunkReq.startAddr + zeroExtend(chunkReq.length - 1), length : chunkReq.length, tag : convertSlotTokenToTag(tagReg, pathIdx) }; diff --git a/src/DmaH2CPipe.bsv b/src/DmaH2CPipe.bsv index d7dd6f6..903a64b 100644 --- a/src/DmaH2CPipe.bsv +++ b/src/DmaH2CPipe.bsv @@ -1,5 +1,6 @@ import FIFOF::*; import Vector::*; +import RegFile::*; import SemiFifo::*; import PrimUtils::*; @@ -16,9 +17,8 @@ typedef 4 IDEA_FIRST_BE_HIGH_VALID_PTR_OF_CSR; interface DmaH2CPipe; // User Logic Ifc - interface FifoOut#(DmaRequest) reqFifoOut; - interface FifoIn#(DmaCsrValue) rdDataFifoIn; - interface FifoOut#(DmaCsrValue) wrDataFifoOut; + interface FifoOut#(CsrRequest) csrReqFifoOut; + interface FifoIn#(CsrResponse) csrRespFifoIn; // Pcie Adapter Ifc interface FifoIn#(DataStream) tlpDataFifoIn; interface FifoOut#(DataStream) tlpDataFifoOut; @@ -31,11 +31,10 @@ module mkDmaH2CPipe(DmaH2CPipe); FIFOF#(DataStream) tlpInFifo <- mkFIFOF; FIFOF#(DataStream) tlpOutFifo <- mkFIFOF; - FIFOF#(DmaRequest) reqOutFifo <- mkFIFOF; - FIFOF#(DmaCsrValue) dataInFifo <- mkFIFOF; - FIFOF#(DmaCsrValue) dataOutFifo <- mkFIFOF; + FIFOF#(CsrRequest) reqOutFifo <- mkFIFOF; + FIFOF#(CsrResponse) respInFifo <- mkFIFOF; - FIFOF#(Tuple2#(DmaRequest, PcieCompleterRequestDescriptor)) pendingFifo <- mkSizedFIFOF(valueOf(CMPL_NPREQ_INFLIGHT_NUM)); + FIFOF#(Tuple2#(CsrRequest, PcieCompleterRequestDescriptor)) pendingFifo <- mkSizedFIFOF(valueOf(CMPL_NPREQ_INFLIGHT_NUM)); function PcieCompleterRequestDescriptor getDescriptorFromFirstBeat(DataStream stream); return unpack(truncate(stream.data)); @@ -50,7 +49,10 @@ module mkDmaH2CPipe(DmaH2CPipe); DataBytePtr csrBytes = fromInteger(valueOf(TDiv#(DMA_CSR_DATA_WIDTH, BYTE_WIDTH))); + // This function returns DW addr pointing to inner registers, where byteAddr = DWordAddr << 2 + // The registers in the hw are all of 32bit DW type function DmaCsrAddr getCsrAddrFromCqDescriptor(PcieCompleterRequestDescriptor descriptor); + // Only care about low bits, because the offset is allocated. let addr = getAddrLowBits(zeroExtend(descriptor.address), descriptor.barAperture); // Only support one BAR now, no operation if (descriptor.barId == 0) begin @@ -59,7 +61,7 @@ module mkDmaH2CPipe(DmaH2CPipe); else begin addr = 0; end - return truncate(addr << valueOf(TSub#(DMA_MEM_ADDR_WIDTH, DES_ADDR_WIDTH))); + return truncate(addr); endfunction rule parseTlp; @@ -70,34 +72,33 @@ module mkDmaH2CPipe(DmaH2CPipe); let descriptor = getDescriptorFromFirstBeat(stream); case (descriptor.reqType) fromInteger(valueOf(MEM_WRITE_REQ)): begin - $display("SIM INFO @ mkDmaH2CPipe: MemWrite Detect!"); + $display($time, "ns SIM INFO @ mkDmaH2CPipe: MemWrite Detect!"); let firstData = getDataFromFirstBeat(stream); DmaCsrValue wrValue = truncate(firstData); let wrAddr = getCsrAddrFromCqDescriptor(descriptor); if (descriptor.dwordCnt == fromInteger(valueOf(IDEA_CQ_CSR_DWORD_CNT))) begin - $display("SIM INFO @ mkDmaH2CPipe: Valid wrReq with Addr %h, data %h", wrAddr, wrValue); - let req = DmaRequest { - startAddr : zeroExtend(wrAddr), - length : zeroExtend(csrBytes), + $display($time, "ns SIM INFO @ mkDmaH2CPipe: Valid wrReq with Addr %h, data %h", wrAddr << valueOf(TLog#(DWORD_BYTES)), wrValue); + let req = CsrRequest { + addr : wrAddr, + value : wrValue, isWrite : True }; reqOutFifo.enq(req); - dataOutFifo.enq(wrValue); end else begin - $display("SIM INFO @ mkDmaH2CPipe: Invalid wrReq with Addr %h, data %h", wrAddr, wrValue); + $display($time, "ns SIM INFO @ mkDmaH2CPipe: Invalid wrReq with Addr %h, data %h", wrAddr << valueOf(TLog#(DWORD_BYTES)), wrValue); illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; end end fromInteger(valueOf(MEM_READ_REQ)): begin - $display("SIM INFO @ mkDmaH2CPipe: MemRead Detect!"); + $display($time, "ns SIM INFO @ mkDmaH2CPipe: MemRead Detect!"); let rdAddr = getCsrAddrFromCqDescriptor(descriptor); - let req = DmaRequest{ - startAddr : zeroExtend(rdAddr), - length : zeroExtend(csrBytes), + let req = CsrRequest{ + addr : rdAddr, + value : zeroExtend(csrBytes), isWrite : False }; - $display("SIM INFO @ mkDmaH2CPipe: Valid rdReq with Addr %h", rdAddr); + $display($time, "ns SIM INFO @ mkDmaH2CPipe: Valid rdReq with Addr %h", rdAddr << valueOf(TLog#(DWORD_BYTES))); reqOutFifo.enq(req); pendingFifo.enq(tuple2(req, descriptor)); end @@ -107,47 +108,55 @@ module mkDmaH2CPipe(DmaH2CPipe); endrule rule genTlp; - let value = dataInFifo.first; - dataInFifo.deq; + let resp = respInFifo.first; + let addr = resp.addr; + let value = resp.value; + respInFifo.deq; let {req, cqDescriptor} = pendingFifo.first; - pendingFifo.deq; - let addr = req.startAddr; - $display("SIM INFO @ mkDmaH2CPipe: Valid rdResp with Addr %h, data %h", addr, value); - let ccDescriptor = PcieCompleterCompleteDescriptor { - reserve0 : 0, - attributes : cqDescriptor.attributes, - trafficClass : cqDescriptor.trafficClass, - completerIdEn : False, - completerId : 0, - tag : cqDescriptor.tag, - requesterId : cqDescriptor.requesterId, - reserve1 : 0, - isPoisoned : False, - status : fromInteger(valueOf(DES_CC_STAUS_SUCCESS)), - dwordCnt : fromInteger(valueOf(IDEA_CC_CSR_DWORD_CNT)), - reserve2 : 0, - isLockedReadCmpl: False, - byteCnt : fromInteger(valueOf(IDEA_BYTE_CNT_OF_CSR)), - reserve3 : 0, - addrType : cqDescriptor.addrType, - lowerAddr : truncate(addr) - }; - Data data = zeroExtend(pack(ccDescriptor)); - data = data | (zeroExtend(value) << valueOf(DES_CC_DESCRIPTOR_WIDTH)); - let stream = DataStream { - data : data, - byteEn : convertBytePtr2ByteEn(csrBytes), - isFirst : True, - isLast : True - }; - tlpOutFifo.enq(stream); + if (addr == req.addr) begin + pendingFifo.deq; + $display($time, "ns SIM INFO @ mkDmaH2CPipe: Valid rdResp with Addr %h, data %h", addr, value); + let ccDescriptor = PcieCompleterCompleteDescriptor { + reserve0 : 0, + attributes : cqDescriptor.attributes, + trafficClass : cqDescriptor.trafficClass, + completerIdEn : False, + completerId : 0, + tag : cqDescriptor.tag, + requesterId : cqDescriptor.requesterId, + reserve1 : 0, + isPoisoned : False, + status : fromInteger(valueOf(DES_CC_STAUS_SUCCESS)), + dwordCnt : fromInteger(valueOf(IDEA_CC_CSR_DWORD_CNT)), + reserve2 : 0, + isLockedReadCmpl: False, + byteCnt : fromInteger(valueOf(IDEA_BYTE_CNT_OF_CSR)), + reserve3 : 0, + addrType : cqDescriptor.addrType, + lowerAddr : truncate(addr) + }; + Data data = zeroExtend(pack(ccDescriptor)); + data = data | (zeroExtend(value) << valueOf(DES_CC_DESCRIPTOR_WIDTH)); + let stream = DataStream { + data : data, + byteEn : convertBytePtr2ByteEn(csrBytes), + isFirst : True, + isLast : True + }; + tlpOutFifo.enq(stream); + end + else begin + $display($time, "ns SIM ERROR @ mkDmaH2CPipe: InValid rdResp with Addr %h, data %h and Expect Addr %h", addr, value, req.addr); + end endrule // User Logic Ifc - interface reqFifoOut = convertFifoToFifoOut(reqOutFifo); - interface rdDataFifoIn = convertFifoToFifoIn(dataInFifo); - interface wrDataFifoOut = convertFifoToFifoOut(dataOutFifo); + interface csrReqFifoOut = convertFifoToFifoOut(reqOutFifo); + interface csrRespFifoIn = convertFifoToFifoIn(respInFifo); // Pcie Adapter Ifc interface tlpDataFifoIn = convertFifoToFifoIn(tlpInFifo); interface tlpDataFifoOut = convertFifoToFifoOut(tlpOutFifo); endmodule + + + diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 0d70ea2..0db6ed8 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -8,11 +8,13 @@ import PcieDescriptorTypes::*; typedef PCIE_AXIS_DATA_WIDTH DATA_WIDTH; typedef 64 DMA_MEM_ADDR_WIDTH; +typedef 32 DMA_REQ_LEN_WIDTH; typedef 32 DMA_CSR_ADDR_WIDTH; typedef 32 DMA_CSR_DATA_WIDTH; typedef Bit#(DMA_MEM_ADDR_WIDTH) DmaMemAddr; +typedef Bit#(DMA_REQ_LEN_WIDTH) DmaReqLen; typedef Bit#(DMA_CSR_ADDR_WIDTH) DmaCsrAddr; typedef Bit#(DMA_CSR_DATA_WIDTH) DmaCsrValue; @@ -57,17 +59,28 @@ typedef 2'b11 MaxByteModDword; typedef struct { DmaMemAddr startAddr; - DmaMemAddr length; + DmaReqLen length; Bool isWrite; } DmaRequest deriving(Bits, Bounded, Eq); typedef struct { DmaMemAddr startAddr; DmaMemAddr endAddr; - DmaMemAddr length; + DmaReqLen length; Tag tag; } DmaExtendRequest deriving(Bits, Bounded, Eq); +typedef struct { + DmaCsrAddr addr; + DmaCsrValue value; + Bool isWrite; +} CsrRequest deriving(Bits, Bounded, Eq); + +typedef struct { + DmaCsrAddr addr; + DmaCsrValue value; +} CsrResponse deriving(Bits, Bounded, Eq); + typedef enum { DMA_RX, DMA_TX @@ -178,6 +191,66 @@ typedef Bit#(SLOT_TOKEN_WIDTH) SlotToken; typedef 16 SLOT_PER_PATH; typedef TAdd#(1, TDiv#(BUS_BOUNDARY, BYTE_EN_WIDTH)) MAX_STREAM_NUM_PER_COMPLETION; +// Internal Registers +/* Block 1 - DMA inner Ctrl Regs + * Block 2 - Addr Table Lo Addr Path0 + * Block 3 - Addr Table Hi Addr Path0 + * Block 4 - Addr Table Lo Addr Path1 + * Block 5 - Addr Table Hi Addr Path1 + * Block 6 ~ 7 - Reserved Or External Modules Use + * 4K Boundary + * Block 8 ~ N - External Modules Use + */ +typedef 512 DMA_INTERNAL_REG_BLOCK; +typedef TLog#(DMA_INTERNAL_REG_BLOCK) DMA_INTERNAL_REG_BLOCK_WIDTH; + +typedef 16 DMA_INTERNAL_REG_BLOCK_NUM; +typedef Bit#(TLog#(DMA_INTERNAL_REG_BLOCK_NUM)) DmaRegBlockIdx; + +typedef TMul#(DMA_INTERNAL_REG_BLOCK, 1) DMA_PA_TABLE0_OFFSET; +typedef TMul#(DMA_INTERNAL_REG_BLOCK, 3) DMA_PA_TABLE1_OFFSET; +typedef TMul#(DMA_INTERNAL_REG_BLOCK, 5) DMA_EX_REG_OFFSET; + +// Control Reg offset of Block 0 +typedef Bit#(TLog#(DMA_INTERNAL_REG_BLOCK)) DmaRegIndex; +typedef 16 DMA_USING_REG_LEN; + +typedef 0 REG_DESC_ADDR_LO_0; +typedef 1 REG_DESC_ADDR_HI_0; +typedef 2 REG_DESC_LEN_0; +typedef 3 REG_DESC_CNTL_0; // Doorbell 0 + +typedef 4 REG_DESC_ADDR_LO_1; +typedef 5 REG_DESC_ADDR_HI_1; +typedef 6 REG_DESC_LEN_1; +typedef 7 REG_DESC_CNTL_1; // Doorbell 1 + +typedef 8 REG_FLAG_ADDR_LO_0; // request status write back address +typedef 9 REG_FLAG_ADDR_HI_0; + +typedef 10 REG_FLAG_ADDR_LO_1; // request status write back address +typedef 11 REG_FLAG_ADDR_HI_1; + +typedef 12 REG_VA_HEADER_LO_0; +typedef 13 REG_VA_HEADER_HI_0; +typedef 14 REG_VA_HEADER_LO_1; +typedef 15 REG_VA_HEADER_HI_1; + +// VA-PA Table, allow 512 VA-PA Page Elements, i.e. 2M(4K page) or 1G(2M huge page, recommend configuration) +typedef DMA_INTERNAL_REG_BLOCK PA_NUM; +typedef TMul#(PA_NUM, 2) DMA_PHY_ADDR_REG_LEN; + +typedef Bit#(TLog#(PA_NUM)) PaBramAddr; +typedef 2 PA_TABLE0_BLOCK_OFFSET; +typedef 4 PA_TABLE1_BLOCK_OFFSET; + +typedef 4096 PAGE_SIZE; +typedef TLog#(PAGE_SIZE) PAGE_SIZE_WIDTH; + +typedef 'h200000 HUGE_PAGE_SIZE; +typedef TLog#(HUGE_PAGE_SIZE) HUGE_PAGE_SIZE_WIDTH; + + diff --git a/src/DmaUtils.bsv b/src/DmaUtils.bsv index 8916e64..9a1525f 100644 --- a/src/DmaUtils.bsv +++ b/src/DmaUtils.bsv @@ -31,32 +31,31 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); FIFOF#(DmaExtendRequest) inputFifo <- mkFIFOF; FIFOF#(DmaRequest) outputFifo <- mkFIFOF; - FIFOF#(Tuple2#(DmaExtendRequest, DmaMemAddr)) pipeFifo <- mkFIFOF; - // FIFOF#(DmaMemAddr) tlpCntFifo <- mkSizedFIFOF(valueOf(CHUNK_COMPUTE_LATENCY)); + FIFOF#(Tuple2#(DmaExtendRequest, DmaReqLen)) pipeFifo <- mkFIFOF; Reg#(DmaMemAddr) newChunkPtrReg <- mkReg(0); - Reg#(DmaMemAddr) totalLenRemainReg <- mkReg(0); + Reg#(DmaReqLen) totalLenRemainReg <- mkReg(0); Reg#(Bool) isSplittingReg <- mkReg(False); - Reg#(DmaMemAddr) tlpMaxSizeReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE))); + Reg#(DmaReqLen) tlpMaxSizeReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE))); Reg#(TlpPayloadSizeWidth) tlpMaxSizeWidthReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH))); - function Bool hasBoundary(DmaExtendRequest request); - let highIdx = request.endAddr >> tlpMaxSizeWidthReg; - let lowIdx = request.startAddr >> tlpMaxSizeWidthReg; + function Bool has4KBoundary(DmaExtendRequest request); + let highIdx = request.endAddr >> valueOf(TLog#(BUS_BOUNDARY)); + let lowIdx = request.startAddr >> valueOf(TLog#(BUS_BOUNDARY)); return (highIdx > lowIdx); endfunction - function DmaMemAddr getTlpCnts(DmaExtendRequest request); + function Bool hasBoundary(DmaExtendRequest request); let highIdx = request.endAddr >> tlpMaxSizeWidthReg; let lowIdx = request.startAddr >> tlpMaxSizeWidthReg; - return (highIdx - lowIdx + 1); + return (highIdx > lowIdx); endfunction - function DmaMemAddr getOffset(DmaExtendRequest request); - // MPS - startAddr % MPS, MPS means MRRS when the module is set to RX mode - DmaMemAddr remainderOfMps = zeroExtend(TlpPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); - DmaMemAddr offsetOfMps = tlpMaxSizeReg - remainderOfMps; + function DmaReqLen getOffset(DmaExtendRequest request); + // offset = MPS - startAddr % MPS + DmaReqLen remainderOfMps = zeroExtend(TlpPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); + DmaReqLen offsetOfMps = tlpMaxSizeReg - remainderOfMps; return offsetOfMps; endfunction @@ -64,11 +63,14 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); let request = inputFifo.first; inputFifo.deq; let offset = getOffset(request); - let firstLen = (request.length > tlpMaxSizeReg) ? tlpMaxSizeReg : request.length; - let firstChunkLen = hasBoundary(request) ? offset : firstLen; + let firstChunkLen = tlpMaxSizeReg; + if (request.length > tlpMaxSizeReg || has4KBoundary(request)) begin + firstChunkLen = offset; + end + else begin + firstChunkLen = request.length; + end pipeFifo.enq(tuple2(request, firstChunkLen)); - // let tlpCnt = getTlpCnts(request); - // tlpCntFifo.enq(tlpCnt); endrule rule execChunkCompute; @@ -91,7 +93,7 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); length : tlpMaxSizeReg, isWrite : False }); - newChunkPtrReg <= newChunkPtrReg + tlpMaxSizeReg; + newChunkPtrReg <= newChunkPtrReg + zeroExtend(tlpMaxSizeReg); totalLenRemainReg <= totalLenRemainReg - tlpMaxSizeReg; end end @@ -107,14 +109,13 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); if (!isSplittingNextCycle) begin pipeFifo.deq; end - newChunkPtrReg <= request.startAddr + firstChunkLen; + newChunkPtrReg <= request.startAddr + zeroExtend(firstChunkLen); totalLenRemainReg <= remainderLength; end endrule interface dmaRequestFifoIn = convertFifoToFifoIn(inputFifo); interface chunkRequestFifoOut = convertFifoToFifoOut(outputFifo); - // interface chunkCntFifoOut = convertFifoToFifoOut(tlpCntFifo); interface Put maxReadReqSize; method Action put (Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth) mrrsCfg); @@ -149,16 +150,22 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); StreamSplit firstChunkSplitor <- mkStreamSplit; - Reg#(DmaMemAddr) tlpMaxSizeReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE))); + Reg#(DmaReqLen) tlpMaxSizeReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE))); Reg#(TlpPayloadSizeWidth) tlpMaxSizeWidthReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH))); Reg#(DataBeats) tlpMaxBeatsReg <- mkReg(fromInteger(valueOf(TDiv#(DEFAULT_TLP_SIZE, BYTE_EN_WIDTH)))); - Reg#(Bool) isInProcReg <- mkReg(False); - Reg#(DataBeats) beatsReg <- mkReg(0); + Reg#(Bool) isInProcReg <- mkReg(False); + Reg#(Bool) isInSplitReg <- mkReg(False); + Reg#(DataBeats) beatsReg <- mkReg(0); Reg#(DmaMemAddr) nextStartAddrReg <- mkReg(0); - Reg#(DmaMemAddr) remainLenReg <- mkReg(0); - + Reg#(DmaReqLen) remainLenReg <- mkReg(0); + + function Bool has4KBoundary(DmaExtendRequest request); + let highIdx = request.endAddr >> valueOf(TLog#(BUS_BOUNDARY)); + let lowIdx = request.startAddr >> valueOf(TLog#(BUS_BOUNDARY)); + return (highIdx > lowIdx); + endfunction function Bool hasBoundary(DmaExtendRequest request); let highIdx = request.endAddr >> tlpMaxSizeWidthReg; @@ -166,10 +173,10 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); return (highIdx > lowIdx); endfunction - function DmaMemAddr getOffset(DmaExtendRequest request); + function DmaReqLen getOffset(DmaExtendRequest request); // MPS - startAddr % MPS, MPS means MRRS when the module is set to RX mode - DmaMemAddr remainderOfMps = zeroExtend(TlpPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); - DmaMemAddr offsetOfMps = tlpMaxSizeReg - remainderOfMps; + DmaReqLen remainderOfMps = zeroExtend(TlpPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); + DmaReqLen offsetOfMps = tlpMaxSizeReg - remainderOfMps; return offsetOfMps; endfunction @@ -182,9 +189,14 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); let stream = dataInFifo.first; dataInFifo.deq; let offset = getOffset(request); - let firstLen = (request.length > tlpMaxSizeReg) ? tlpMaxSizeReg : request.length; - let firstChunkLen = hasBoundary(request) ? offset : firstLen; - // $display($time, "ns SIM INFO @ mkChunkSplit: get first chunkLen, offset %d, remainder %d", offset, TlpPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); + let firstChunkLen = tlpMaxSizeReg; + if (request.length > tlpMaxSizeReg || has4KBoundary(request)) begin + firstChunkLen = offset; + end + else begin + firstChunkLen = request.length; + end + $display($time, "ns SIM INFO @ mkChunkSplit: get first chunkLen, offset %d, remainder %d", offset, TlpPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); firstChunkSplitor.splitLocationFifoIn.enq(unpack(truncate(firstChunkLen))); let firstReq = DmaRequest { startAddr : request.startAddr, @@ -222,46 +234,51 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); end // Start of a TLP, get Req Infos and tag isFirst=True if (beatsReg == 0) begin - // $display($time, "ns SIM INFO @ mkChunkSplit: start a new chunk, next addr %d, remainBytesLen %d", nextStartAddrReg, remainLenReg); stream.isFirst = True; + let nextStartAddr = nextStartAddrReg; + let remainLen = remainLenReg; // The first TLP of chunks - if (firstReqPipeFifo.notEmpty) begin + if (firstReqPipeFifo.notEmpty && !isInSplitReg) begin let chunkReq = firstReqPipeFifo.first; let oriReq = inputReqPipeFifo.first; firstReqPipeFifo.deq; inputReqPipeFifo.deq; if (chunkReq.length == oriReq.length) begin - nextStartAddrReg <= 0; - remainLenReg <= 0; + nextStartAddr = 0; + remainLen = 0; end else begin - nextStartAddrReg <= oriReq.startAddr + chunkReq.length; - remainLenReg <= oriReq.length - chunkReq.length; + nextStartAddr = oriReq.startAddr + zeroExtend(chunkReq.length); + remainLen = oriReq.length - chunkReq.length; end reqOutFifo.enq(chunkReq); end // The following chunks else begin let chunkReq = DmaRequest { - startAddr: nextStartAddrReg, + startAddr: nextStartAddr, length : tlpMaxSizeReg, isWrite : True }; - if (remainLenReg == 0) begin + if (!isInSplitReg) begin // Do nothing end - else if (remainLenReg <= tlpMaxSizeReg) begin - nextStartAddrReg <= 0; - remainLenReg <= 0; - chunkReq.length = remainLenReg; + else if (remainLen <= tlpMaxSizeReg) begin + chunkReq.length = remainLen; reqOutFifo.enq(chunkReq); + nextStartAddr = 0; + remainLen = 0; end else begin - nextStartAddrReg <= nextStartAddrReg + tlpMaxSizeReg; - remainLenReg <= remainLenReg - tlpMaxSizeReg; + nextStartAddr = nextStartAddr + zeroExtend(tlpMaxSizeReg); + remainLen = remainLen - tlpMaxSizeReg; reqOutFifo.enq(chunkReq); end end + $display($time, "ns SIM INFO @ mkChunkSplit: debug, next addr %d, remainBytesLen %d", nextStartAddr, remainLen); + nextStartAddrReg <= nextStartAddr; + remainLenReg <= remainLen; + isInSplitReg <= (remainLen != 0); end chunkOutFifo.enq(stream); @@ -332,7 +349,7 @@ module mkRqDescriptorGenerator#(Bool isWrite)(RqDescriptorGenerator); lastByteEn = 0; end byteEnOutFifo.enq(tuple2(firstByteEn, lastByteEn)); - // $display($time, "ns SIM INFO @ mkRqDescriptorGenerator: generate, dwcnt %d, start:%d, end:%d, byteCnt:%d ", dwCnt, exReq.startAddr, exReq.endAddr, exReq.length); + $display($time, "ns SIM INFO @ mkRqDescriptorGenerator: generate desc, tag %d, dwcnt %d, start:%d, end:%d, byteCnt:%d ", exReq.tag, dwCnt, exReq.startAddr, exReq.endAddr, exReq.length); endrule interface exReqFifoIn = convertFifoToFifoIn(exReqInFifo); diff --git a/src/DmaWrapper.bsv b/src/DmaWrapper.bsv index aa23fb5..6aaaa68 100755 --- a/src/DmaWrapper.bsv +++ b/src/DmaWrapper.bsv @@ -1,8 +1,10 @@ import FIFOF::*; +import FIFO::*; import Vector::*; import Connectable :: *; import DReg::*; import GetPut::*; +import BRAMFIFO::*; import SemiFifo::*; import BusConversion::*; @@ -15,18 +17,18 @@ import DmaTypes::*; import DmaUtils::*; import DmaC2HPipe::*; import DmaH2CPipe::*; +import SimpleModeUtils::*; // For Bsv User - +// Native Blue-DMA Interface, the addrs in the req should be pa interface DmaController; // User Logic Ifc interface Vector#(DMA_PATH_NUM, FifoIn#(DataStream)) c2hDataFifoIn; interface Vector#(DMA_PATH_NUM, FifoOut#(DataStream)) c2hDataFifoOut; interface Vector#(DMA_PATH_NUM, FifoIn#(DmaRequest)) c2hReqFifoIn; - interface FifoIn#(DmaCsrValue) h2cDataFifoIn; - interface FifoOut#(DmaCsrValue) h2cDataFifoOut; - interface FifoOut#(DmaRequest) h2cReqFifoOut; + interface FifoIn#(CsrResponse) h2cRespFifoIn; + interface FifoOut#(CsrRequest) h2cReqFifoOut; // Raw PCIe interfaces, connected to the Xilinx PCIe IP (* prefix = "" *)interface RawXilinxPcieIp rawPcie; @@ -87,9 +89,8 @@ module mkDmaController(DmaController); interface c2hDataFifoIn = c2hDataInIfc; interface c2hDataFifoOut = c2hDataOutIfc; interface c2hReqFifoIn = c2hReqInIfc; - interface h2cDataFifoIn = h2cPipe.rdDataFifoIn; - interface h2cDataFifoOut = h2cPipe.wrDataFifoOut; - interface h2cReqFifoOut = h2cPipe.reqFifoOut; + interface h2cRespFifoIn = h2cPipe.csrRespFifoIn; + interface h2cReqFifoOut = h2cPipe.csrReqFifoOut; // Raw PCIe Ifc interface RawXilinxPcieIp rawPcie; @@ -112,27 +113,37 @@ interface RawDmaReqSlave; method Action validReq( (* port = "valid" *) Bool valid, (* port = "start_addr" *) DmaMemAddr startAddr, - (* port = "byte_cnt" *) DmaMemAddr length, + (* port = "byte_cnt" *) DmaReqLen length, (* port = "is_write" *) Bool isWrite ); (* result = "ready" *) method Bool ready; endinterface (* always_ready, always_enabled *) -interface RawDmaCsrReqMaster; +interface RawDmaCsrMaster; (* result = "address" *) method DmaCsrAddr address; + (* result = "value" *) method DmaCsrValue value; (* result = "is_write" *) method Bool isWrite; (* result = "valid" *) method Bool valid; (* prefix = "" *) method Action ready((* port = "ready" *) Bool rdy); endinterface +(* always_ready, always_enabled *) +interface RawDmaCsrSlave; + (* prefix = "" *) + method Action validResp( + (* port = "valid" *) Bool valid, + (* port = "address" *) DmaCsrAddr address, + (* port = "value" *) DmaCsrValue value + ); + (* result = "ready" *) method Bool ready; +endinterface + typedef TDiv#(DATA_WIDTH, BYTE_WIDTH) DMA_DATA_KEEP_WIDTH; typedef 1 DMA_DATA_USER_WIDTH; typedef RawAxiStreamSlave#(DMA_DATA_KEEP_WIDTH, DMA_DATA_USER_WIDTH) RawDmaDataSlave; typedef RawAxiStreamMaster#(DMA_DATA_KEEP_WIDTH, DMA_DATA_USER_WIDTH) RawDmaDataMaster; typedef AxiStream#(DMA_DATA_KEEP_WIDTH, DMA_DATA_USER_WIDTH) DmaAxiStream; -typedef RawBusMaster#(DmaCsrValue) RawDmaCsrMaster; -typedef RawBusSlave#(DmaCsrValue) RawDmaCsrSlave; module mkFifoInToRawDmaDataSlave#(FifoIn#(DataStream) pipe)(RawDmaDataSlave); Reg#(Bool) isFirstReg <- mkReg(True); @@ -181,7 +192,7 @@ module mkFifoInToRawDmaReqSlave#(FifoIn#(DmaRequest) pipe)(RawDmaReqSlave); method Action validReq( Bool valid, DmaMemAddr startAddr, - DmaMemAddr length, + DmaReqLen length, Bool isWrite ); let request = DmaRequest { @@ -194,9 +205,10 @@ module mkFifoInToRawDmaReqSlave#(FifoIn#(DmaRequest) pipe)(RawDmaReqSlave); method Bool ready = rawBus.ready; endmodule -module mkFifoOutToRawCsrReqMaster#(FifoOut#(DmaRequest) pipe)(RawDmaCsrReqMaster); +module mkFifoOutToRawCsrMaster#(FifoOut#(CsrRequest) pipe)(RawDmaCsrMaster); let rawBus <- mkFifoOutToRawBusMaster(pipe); - method DmaCsrAddr address = truncate(rawBus.data.startAddr); + method DmaCsrAddr address = rawBus.data.addr; + method DmaCsrValue value = rawBus.data.value; method Bool isWrite = rawBus.data.isWrite; method Bool valid = rawBus.valid; method Action ready(Bool rdy); @@ -204,8 +216,25 @@ module mkFifoOutToRawCsrReqMaster#(FifoOut#(DmaRequest) pipe)(RawDmaCsrReqMaster endmethod endmodule +module mkFifoInToRawCsrClient#(FifoIn#(CsrResponse) pipe)(RawDmaCsrSlave); + let rawBus <- mkFifoInToRawBusSlave(pipe); + method Action validResp( + Bool valid, + DmaCsrAddr addr, + DmaCsrValue value + ); + let resp = CsrResponse { + addr : addr, + value : value + }; + rawBus.validData(valid, resp); + endmethod + method Bool ready = rawBus.ready; +endmodule + +// Bypass Mode // Raw verilog Wrapper of Dma User Logic Ifc -interface RawDmaController; +interface RawBypassDmaController; // User Logic Ifc (* prefix = "s_axis_c2h_0" *) interface RawDmaDataSlave dmaWrData0; (* prefix = "s_desc_c2h_0" *) interface RawDmaReqSlave dmaDesc0; @@ -215,16 +244,15 @@ interface RawDmaController; (* prefix = "s_desc_c2h_1" *) interface RawDmaReqSlave dmaDesc1; (* prefix = "m_axis_c2h_1" *) interface RawDmaDataMaster dmaRdData1; - (* prefix = "s_h2c_value" *) interface RawDmaCsrSlave dmaRdCsr; - (* prefix = "m_h2c_value" *) interface RawDmaCsrReqMaster dmaCsrDesc; - (* prefix = "m_h2c_desc" *) interface RawDmaCsrMaster dmaWrCsr; + (* prefix = "s_h2c_csr" *) interface RawDmaCsrSlave dmaCsrResp; + (* prefix = "m_h2c_csr" *) interface RawDmaCsrMaster dmaCsrReq; // Raw PCIe interfaces, connected to the Xilinx PCIe IP (* prefix = "" *) interface RawXilinxPcieIp rawPcie; endinterface (* synthesize *) -module mkRawDmaController(RawDmaController); +module mkRawBypassDmaController(RawBypassDmaController); DmaController dmac <- mkDmaController; let dmaWrData0Ifc <- mkFifoInToRawDmaDataSlave(dmac.c2hDataFifoIn[0]); @@ -235,9 +263,8 @@ module mkRawDmaController(RawDmaController); let dmaDesc1Ifc <- mkFifoInToRawDmaReqSlave(dmac.c2hReqFifoIn[1]); let dmaRdData1Ifc <- mkFifoOutToRawDmaDataMaster(dmac.c2hDataFifoOut[1]); - let dmaRdCsrIfc <- mkFifoInToRawBusSlave(dmac.h2cDataFifoIn); - let dmaWrCsrIfc <- mkFifoOutToRawBusMaster(dmac.h2cDataFifoOut); - let dmaCsrDescIfc <- mkFifoOutToRawCsrReqMaster(dmac.h2cReqFifoOut); + let csrRespIfc <- mkFifoInToRawCsrClient(dmac.h2cRespFifoIn); + let csrReqIfc <- mkFifoOutToRawCsrMaster(dmac.h2cReqFifoOut); interface dmaWrData0 = dmaWrData0Ifc; interface dmaDesc0 = dmaDesc0Ifc; @@ -245,11 +272,36 @@ module mkRawDmaController(RawDmaController); interface dmaWrData1 = dmaWrData1Ifc; interface dmaDesc1 = dmaDesc1Ifc; interface dmaRdData1 = dmaRdData1Ifc; - interface dmaRdCsr = dmaRdCsrIfc; - interface dmaCsrDesc = dmaCsrDescIfc; - interface dmaWrCsr = dmaWrCsrIfc; + interface dmaCsrResp = csrRespIfc; + interface dmaCsrReq = csrReqIfc; interface rawPcie = dmac.rawPcie; endmodule +interface RawSimpleDmaController; + // User Logic Ifc + + // Raw PCIe interfaces, connected to the Xilinx PCIe IP + (* prefix = "" *) interface RawXilinxPcieIp rawPcie; +endinterface +// Simple Mode For Read-Write Loop Testing, which has no external ports +module mkRawSimpleDmaController(RawSimpleDmaController); + DmaController dmac <- mkDmaController; + DmaSimpleCore simpleCore <- mkDmaSimpleCore; + GenericCsr dummyCsr <- mkDummyCsr; + Vector#(DMA_PATH_NUM, FIFOF#(DataStream)) dataFifo <- replicateM(mkSizedBRAMFIFOF(valueOf(BUS_BOUNDARY))); + + for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1 ) begin + mkConnection(dataFifo[pathIdx], dmac.c2hDataFifoIn[pathIdx]); + mkConnection(dmac.c2hDataFifoOut[pathIdx], dataFifo[pathIdx]); + mkConnection(dmac.c2hReqFifoIn[pathIdx], simpleCore.c2hReqFifoOut[pathIdx]); + end + + mkConnection(dmac.h2cReqFifoOut, simpleCore.reqFifoIn); + mkConnection(dmac.h2cRespFifoIn, simpleCore.respFifoOut); + mkConnection(simpleCore.externalReqFifoOut, dummyCsr.reqFifoIn); + mkConnection(simpleCore.externalRespFifoIn, dummyCsr.respFifoOut); + + interface rawPcie = dmac.rawPcie; +endmodule diff --git a/src/PcieAdapter.bsv b/src/PcieAdapter.bsv index 6f1edca..8b5c73f 100644 --- a/src/PcieAdapter.bsv +++ b/src/PcieAdapter.bsv @@ -34,6 +34,7 @@ interface RequesterAxiStreamAdapter; (* prefix = "" *) interface RawPcieRequesterComplete rawRequesterComplete; endinterface +// TODO: optimize fully-pipeline performance (* synthesize *) module mkRequesterAxiStreamAdapter(RequesterAxiStreamAdapter); ConvertDataStreamsToStraddleAxis dmaToAxisConverter <- mkConvertDataStreamsToStraddleAxis; diff --git a/src/SimpleModeUtils.bsv b/src/SimpleModeUtils.bsv new file mode 100644 index 0000000..4664147 --- /dev/null +++ b/src/SimpleModeUtils.bsv @@ -0,0 +1,312 @@ +import Vector::*; +import RegFile::*; +import GetPut::*; +import SemiFifo::*; +import FIFOF::*; +import BRAM::*; + +import DmaTypes::*; +import StreamUtils::*; + +function Bit#(TMul#(2,n)) doubleExtend(Bit#(n) lo, Bit#(n) hi) provisos(Add#(1, _a, n), Add#(_b, n, TMul#(2, n))); + return zeroExtend(lo) & (zeroExtend(hi) << valueOf(n)); +endfunction + +interface DmaSimpleCore; + // from H2C user Ifc, where the addr is already aligned to DWord + interface FifoIn#(CsrRequest) reqFifoIn; + interface FifoOut#(CsrResponse) respFifoOut; + // to external peripherals (connect to dummy reg in test) + interface FifoIn#(CsrResponse) externalRespFifoIn; + interface FifoOut#(CsrRequest) externalReqFifoOut; + // new dma descriptor (conncet to H2C user Ifc) + interface Vector#(DMA_PATH_NUM, FifoOut#(DmaRequest)) c2hReqFifoOut; +endinterface + +module mkDmaSimpleCore(DmaSimpleCore); + FIFOF#(CsrRequest) internalReqFifo <- mkFIFOF; + FIFOF#(CsrRequest) externalReqFifo <- mkFIFOF; + + FIFOF#(CsrResponse) internalRespFifo <- mkFIFOF; + FIFOF#(CsrResponse) externalRespFifo <- mkFIFOF; + FIFOF#(CsrResponse) tempRespFifo <- mkFIFOF; + + RegFile#(DmaRegIndex, DmaCsrValue) controlRegFile <- mkRegFileFull; + + Vector#(DMA_PATH_NUM, FifoOut#(DmaRequest)) c2hReqFifoOutIfc = newVector; + Vector#(DMA_PATH_NUM, PhyAddrBram) paTableBram = newVector; + for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1) begin + paTableBram[pathIdx] <- mkPhyAddrBram; + c2hReqFifoOutIfc[pathIdx] = paTableBram[pathIdx].paReqFifoOut; + end + + function DmaRegBlockIdx getRegBlockIdx (DmaCsrAddr csrAddr); + DmaRegBlockIdx idx = truncate(csrAddr >> valueOf(DMA_INTERNAL_REG_BLOCK_WIDTH)); + return idx; + endfunction + + rule map; + let req = internalReqFifo.first; + internalReqFifo.deq; + let blockIdx = getRegBlockIdx(req.addr); + DmaRegIndex regIdx = truncate(req.addr); + // Write Request + if (req.isWrite) begin + // Block 0 : DMA Inner Ctrl Regs + if (blockIdx == 0) begin + if (regIdx == fromInteger(valueOf(REG_DESC_CNTL_0))) begin + let addrLo = controlRegFile.sub(fromInteger(valueOf(REG_DESC_ADDR_LO_0))); + let addrHi = controlRegFile.sub(fromInteger(valueOf(REG_DESC_ADDR_HI_0))); + let length = controlRegFile.sub(fromInteger(valueOf(REG_DESC_LEN_0))); + let vaHeaderLo = controlRegFile.sub(fromInteger(valueOf(REG_VA_HEADER_LO_0))); + let vaHeaderHi = controlRegFile.sub(fromInteger(valueOf(REG_VA_HEADER_HI_0))); + let vaHeader = doubleExtend(vaHeaderLo, vaHeaderHi); + Bool isWrite = unpack(truncate(req.value)); + let desc = DmaRequest { + startAddr: doubleExtend(addrLo, addrHi), + length : length, + isWrite : isWrite + }; + if (desc.startAddr > 0) begin + paTableBram[0].vaReqFifoIn.enq(desc); + paTableBram[0].vaHeader.put(vaHeader); + end + end + else if (regIdx == fromInteger(valueOf(REG_DESC_CNTL_1))) begin + let addrLo = controlRegFile.sub(fromInteger(valueOf(REG_DESC_ADDR_LO_1))); + let addrHi = controlRegFile.sub(fromInteger(valueOf(REG_DESC_ADDR_HI_1))); + let length = controlRegFile.sub(fromInteger(valueOf(REG_DESC_LEN_1))); + let vaHeaderLo = controlRegFile.sub(fromInteger(valueOf(REG_VA_HEADER_LO_1))); + let vaHeaderHi = controlRegFile.sub(fromInteger(valueOf(REG_VA_HEADER_HI_1))); + let vaHeader = doubleExtend(vaHeaderLo, vaHeaderHi); + Bool isWrite = unpack(truncate(req.value)); + let desc = DmaRequest { + startAddr: doubleExtend(addrLo, addrHi), + length : length, + isWrite : isWrite + }; + if (desc.startAddr > 0) begin + paTableBram[1].vaReqFifoIn.enq(desc); + paTableBram[1].vaHeader.put(vaHeader); + end + end + // if not doorbell, write the register + else begin + controlRegFile.upd(regIdx, req.value); + end + end + // Block 1~2 : Channel 0 Va-Pa Table + else if (blockIdx <= fromInteger(valueOf(PA_TABLE0_BLOCK_OFFSET))) begin + let vaReq = CsrRequest { + addr : req.addr - fromInteger(valueOf(DMA_PA_TABLE0_OFFSET)), + value : req.value, + isWrite : True + }; + paTableBram[0].paSetFifoIn.enq(vaReq); + end + // Block 3~4 : Channel 1 Va-Pa Table + else if (blockIdx <= fromInteger(valueOf(PA_TABLE1_BLOCK_OFFSET))) begin + let vaReq = CsrRequest { + addr : req.addr - fromInteger(valueOf(DMA_PA_TABLE1_OFFSET)), + value : req.value, + isWrite : True + }; + paTableBram[1].paSetFifoIn.enq(vaReq); + end + // Block 5~N : External Peripherals Regs + else begin + req.addr = req.addr - fromInteger(valueOf(DMA_EX_REG_OFFSET)); + externalReqFifo.enq(req); + end + end + // Read Request + else begin + if (blockIdx == 0) begin + if (regIdx <= fromInteger(valueOf(DMA_USING_REG_LEN))) begin + let value = controlRegFile.sub(regIdx); + let resp = CsrResponse { + addr : req.addr, + value : value + }; + tempRespFifo.enq(resp); + end + else begin + let resp = CsrResponse { + addr : req.addr, + value : 0 + }; + tempRespFifo.enq(resp); + end + end + else if (blockIdx > fromInteger(valueOf(PA_TABLE1_BLOCK_OFFSET))) begin + req.addr = req.addr - fromInteger(valueOf(DMA_EX_REG_OFFSET)); + externalReqFifo.enq(req); + end + else begin + let resp = CsrResponse { + addr : req.addr, + value : 0 + }; + tempRespFifo.enq(resp); + end + end + endrule + + rule muxResp; + if (tempRespFifo.notEmpty) begin + tempRespFifo.deq; + internalRespFifo.enq(tempRespFifo.first); + end + else if (externalRespFifo.notEmpty) begin + externalRespFifo.deq; + let extResp = externalRespFifo.first; + extResp.addr = extResp.addr + fromInteger(valueOf(DMA_EX_REG_OFFSET)); + internalRespFifo.enq(extResp); + end + endrule + + interface reqFifoIn = convertFifoToFifoIn(internalReqFifo); + interface respFifoOut = convertFifoToFifoOut(internalRespFifo); + + interface externalReqFifoOut = convertFifoToFifoOut(externalReqFifo); + interface externalRespFifoIn = convertFifoToFifoIn(externalRespFifo); + + interface c2hReqFifoOut = c2hReqFifoOutIfc; +endmodule + +typedef 3 BRAM_LATENCY; + +interface PhyAddrBram; + // Address transfer + interface FifoIn#(DmaRequest) vaReqFifoIn; + interface FifoOut#(DmaRequest) paReqFifoOut; + // va-pa table set + interface FifoIn#(CsrRequest) paSetFifoIn; + interface Put#(DmaMemAddr) vaHeader; +endinterface + +// This module does not check if the request address is valid(in MR) +module mkPhyAddrBram(PhyAddrBram); + FIFOF#(DmaRequest) vaReqFifo <- mkFIFOF; + FIFOF#(DmaRequest) paReqFifo <- mkFIFOF; + FIFOF#(CsrRequest) paSetFifo <- mkFIFOF; + FIFOF#(DmaRequest) pendingFifo <- mkSizedFIFOF(valueOf(BRAM_LATENCY)); + + Reg#(DmaMemAddr) vaHeaderReg <- mkReg(0); + + BRAM1Port#(PaBramAddr, DmaCsrValue) phyAddrLoBram <- mkBRAM1Server(defaultValue); + BRAM1Port#(PaBramAddr, DmaCsrValue) phyAddrHiBram <- mkBRAM1Server(defaultValue); + + function Bool isLoAddr(DmaCsrAddr addr); + return unpack(addr[0]); + endfunction + + function PaBramAddr convertCsrAddrToBramAddr(DmaCsrAddr csrAddr); + let addr = csrAddr >> 1; + return truncate(addr); + endfunction + + function PaBramAddr convertDmaAddrToBramAddr(DmaMemAddr dmaAddr); + let pageIdx = (dmaAddr - vaHeaderReg) >> valueOf(HUGE_PAGE_SIZE_WIDTH); + return truncate(pageIdx); + endfunction + + rule putVaReq; + // if is setting va-pa table + if (paSetFifo.notEmpty) begin + let paSet = paSetFifo.first; + let bramAddr = convertCsrAddrToBramAddr(paSet.addr); + if (isLoAddr(paSet.addr)) begin + let bramReq = BRAMRequest { + write : True, + responseOnWrite : False, + address : bramAddr, + datain : paSet.value + }; + phyAddrLoBram.portA.request.put(bramReq); + end + else begin + let bramReq = BRAMRequest { + write : True, + responseOnWrite : False, + address : bramAddr, + datain : paSet.value + }; + phyAddrHiBram.portA.request.put(bramReq); + end + end + // if is getting phy address + else begin + let vaReq = vaReqFifo.first; + let bramReq = BRAMRequest { + write : False, + responseOnWrite : False, + address : convertDmaAddrToBramAddr(vaReq.startAddr), + datain : 0 + }; + phyAddrLoBram.portA.request.put(bramReq); + phyAddrHiBram.portA.request.put(bramReq); + pendingFifo.enq(vaReq); + end + endrule + + rule getPaReq; + let pa_lo <- phyAddrLoBram.portA.response.get; + let pa_hi <- phyAddrHiBram.portA.response.get; + let pa = zeroExtend(pa_hi << valueOf(DMA_CSR_ADDR_WIDTH)) & zeroExtend(pa_lo); + let oriReq = pendingFifo.first; + pendingFifo.deq; + oriReq.startAddr = pa; + paReqFifo.enq(oriReq); + endrule + + interface vaReqFifoIn = convertFifoToFifoIn(vaReqFifo); + interface paReqFifoOut = convertFifoToFifoOut(paReqFifo); + interface paSetFifoIn = convertFifoToFifoIn(paSetFifo); + interface Put vaHeader; + method Action put(DmaMemAddr vaHeadAddr); + vaHeaderReg <= vaHeadAddr; + endmethod + endinterface +endmodule + +typedef 12 DUMMY_ADDR_WIDTH; +typedef Bit#(DUMMY_ADDR_WIDTH) DummyAddr; + +interface GenericCsr; + interface FifoIn#(CsrRequest) reqFifoIn; + interface FifoOut#(CsrResponse) respFifoOut; +endinterface + +module mkDummyCsr(GenericCsr); + FIFOF#(CsrRequest) reqFifo <- mkFIFOF; + FIFOF#(CsrResponse) respFifo <- mkFIFOF; + FIFOF#(DmaCsrAddr) pendingFifo <- mkSizedFIFOF(valueOf(BRAM_LATENCY)); + BRAM1Port#(DummyAddr, DmaCsrValue) bram <- mkBRAM1Server(defaultValue); + + rule request; + let req = reqFifo.first; + reqFifo.deq; + let bramReq = BRAMRequest { + write : req.isWrite, + responseOnWrite : False, + address : truncate(req.addr), + datain : req.value + }; + bram.portA.request.put(bramReq); + endrule + + rule response; + let value <- bram.portA.response.get; + let addr = pendingFifo.first; + pendingFifo.deq; + let resp = CsrResponse { + addr : addr, + value : value + }; + respFifo.enq(resp); + endrule + + interface reqFifoIn = convertFifoToFifoIn(reqFifo); + interface respFifoOut = convertFifoToFifoOut(respFifo); +endmodule From 7af1a43c7ffcfe366f0fde58881d5dbacbff8d38 Mon Sep 17 00:00:00 2001 From: BIGWJZ Date: Tue, 8 Oct 2024 20:16:48 +0800 Subject: [PATCH 41/53] Add Simple Mode --- backend/Makefile | 3 +- cocotb/.gitignore | 4 ++ cocotb/Makefile | 4 +- cocotb/dma_wr_rd_tb.py | 2 +- src/DmaC2HPipe.bsv | 26 ++++++--- src/DmaH2CPipe.bsv | 52 ++++++++++++----- src/DmaTypes.bsv | 32 ++++------ src/DmaWrapper.bsv | 44 ++++++++++++-- src/PcieAdapter.bsv | 2 +- src/SimpleModeUtils.bsv | 125 +++++++++++----------------------------- src/TestUtils.bsv | 115 ++++++++++++++++++++++++++++++++++++ 11 files changed, 265 insertions(+), 144 deletions(-) create mode 100644 cocotb/.gitignore create mode 100644 src/TestUtils.bsv diff --git a/backend/Makefile b/backend/Makefile index b92fad2..2318490 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -9,7 +9,8 @@ LOGFILE ?= run.log RUNTOPHASE ?= place # synth place route all PARTNAME = xcvu13p-fhgb2104-2-i TARGETFILE ?= ../src/DmaWrapper.bsv -TOPMODULE ?= mkRawSimpleDmaController +#MODULE List: mkRawSimpleDmaController mkRawBypassDmaController mkRawTestDmaController +TOPMODULE ?= mkRawBypassDmaController export TOP = $(TOPMODULE) export RTL = $(VLOGDIR) diff --git a/cocotb/.gitignore b/cocotb/.gitignore new file mode 100644 index 0000000..dc75fb2 --- /dev/null +++ b/cocotb/.gitignore @@ -0,0 +1,4 @@ +__pycache__ +sim_build +*.v +log \ No newline at end of file diff --git a/cocotb/Makefile b/cocotb/Makefile index adb081e..d2c4b44 100644 --- a/cocotb/Makefile +++ b/cocotb/Makefile @@ -5,12 +5,12 @@ include $(ROOT_DIR)/Makefile.base VBUILD_DIR = $(BACKEND_DIR)/build VSRC_DIR = $(BACKEND_DIR)/verilog -TARGET = RawDmaController +TARGET = RawBypassDmaController TOP_MODULE = mk$(TARGET) TOP_FILE = $(TOP_MODULE).v VLOG_FILE = $(TB_DIR)/$(TOP_FILE) -TB_CASE = dma_fullypipeline +TB_CASE = dma_wr_rd TB_FILE = $(TB_CASE)_tb.py DATE = $(shell date "+%Y%m%d") LOG_FILE = $(TB_DIR)/log/$(DATE)_$(TOP_MODULE).log diff --git a/cocotb/dma_wr_rd_tb.py b/cocotb/dma_wr_rd_tb.py index 02160e2..2143e49 100644 --- a/cocotb/dma_wr_rd_tb.py +++ b/cocotb/dma_wr_rd_tb.py @@ -386,7 +386,7 @@ async def random_read_test(dut): def test_dma(): - dut = "mkRawDmaController" + dut = "mkRawBypassDmaController" module = os.path.splitext(os.path.basename(__file__))[0] toplevel = dut diff --git a/src/DmaC2HPipe.bsv b/src/DmaC2HPipe.bsv index 4e4cc8f..3902ce7 100644 --- a/src/DmaC2HPipe.bsv +++ b/src/DmaC2HPipe.bsv @@ -126,7 +126,8 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); CompletionFifo#(SLOT_PER_PATH, DataStream) cBuffer <- mkCompletionFifo(valueOf(MAX_STREAM_NUM_PER_COMPLETION)); RqDescriptorGenerator rqDescGenerator <- mkRqDescriptorGenerator(False); - Reg#(Bool) hasReadOnce <- mkReg(False); + Reg#(Bool) hasReadOnceReg <- mkReg(False); + Reg#(Bool) isStreamValidReg <- mkReg(True); Reg#(DmaReqLen) recvBytesReg <- mkReg(0); Vector#(SLOT_PER_PATH, Reg#(DmaReqLen)) chunkBytesRegs <- replicateM(mkReg(0)); @@ -142,13 +143,13 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); Bool isCompleted = False; if (sdStream.isDoubleFrame) begin PcieTlpCtlIsSopPtr isSopPtr = 0; - if (hasReadOnce) begin + if (hasReadOnceReg) begin tlpInFifo.deq; - hasReadOnce <= False; + hasReadOnceReg <= False; isSopPtr = 1; end else begin - hasReadOnce <= True; + hasReadOnceReg <= True; end stream = DataStream { data : getStraddleData(isSopPtr, sdStream.data), @@ -161,7 +162,7 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); end else begin tlpInFifo.deq; - hasReadOnce <= False; + hasReadOnceReg <= False; stream = DataStream { data : sdStream.data, byteEn : sdStream.byteEn, @@ -172,12 +173,19 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); isCompleted = sdStream.isCompleted[0]; end stream.byteEn = stream.byteEn; - reshapeStrad.streamFifoIn.enq(stream); - // $display($time, "ns SIM INFO @ mkDmaC2HReadCore%d: recv new stream from straddle adapter, tag: %d, isCompleted:%b" , pathIdx, tag, isCompleted, fshow(stream)); + Bool isStreamValid = isStreamValidReg; if (stream.isFirst) begin - tagFifo.enq(tag); - completedFifo.enq(isCompleted); + PcieRequesterCompleteDescriptor desc = unpack(truncate(stream.data)); + isStreamValid = (desc.errorcode == 0); + end + if (isStreamValid) begin + reshapeStrad.streamFifoIn.enq(stream); + if (stream.isFirst) begin + tagFifo.enq(tag); + completedFifo.enq(isCompleted); + end end + isStreamValidReg <= isStreamValid; // $display("parse from straddle", fshow(stream)); endrule diff --git a/src/DmaH2CPipe.bsv b/src/DmaH2CPipe.bsv index 903a64b..0a2aa0a 100644 --- a/src/DmaH2CPipe.bsv +++ b/src/DmaH2CPipe.bsv @@ -15,10 +15,20 @@ typedef 2 IDEA_CC_CSR_DWORD_CNT; typedef 4 IDEA_BYTE_CNT_OF_CSR; typedef 4 IDEA_FIRST_BE_HIGH_VALID_PTR_OF_CSR; +function CsrResponse getEmptyCsrResponse(); + return CsrResponse { + addr : 0, + value : 0 + }; +endfunction + interface DmaH2CPipe; - // User Logic Ifc + // DMA Internal Csr interface FifoOut#(CsrRequest) csrReqFifoOut; interface FifoIn#(CsrResponse) csrRespFifoIn; + // User Ifc + interface FifoOut#(CsrRequest) userReqFifoOut; + interface FifoIn#(CsrResponse) userRespFifoIn; // Pcie Adapter Ifc interface FifoIn#(DataStream) tlpDataFifoIn; interface FifoOut#(DataStream) tlpDataFifoOut; @@ -34,6 +44,9 @@ module mkDmaH2CPipe(DmaH2CPipe); FIFOF#(CsrRequest) reqOutFifo <- mkFIFOF; FIFOF#(CsrResponse) respInFifo <- mkFIFOF; + FIFOF#(CsrRequest) userOutFifo <- mkFIFOF; + FIFOF#(CsrResponse) userInFifo <- mkFIFOF; + FIFOF#(Tuple2#(CsrRequest, PcieCompleterRequestDescriptor)) pendingFifo <- mkSizedFIFOF(valueOf(CMPL_NPREQ_INFLIGHT_NUM)); function PcieCompleterRequestDescriptor getDescriptorFromFirstBeat(DataStream stream); @@ -54,13 +67,6 @@ module mkDmaH2CPipe(DmaH2CPipe); function DmaCsrAddr getCsrAddrFromCqDescriptor(PcieCompleterRequestDescriptor descriptor); // Only care about low bits, because the offset is allocated. let addr = getAddrLowBits(zeroExtend(descriptor.address), descriptor.barAperture); - // Only support one BAR now, no operation - if (descriptor.barId == 0) begin - addr = addr; - end - else begin - addr = 0; - end return truncate(addr); endfunction @@ -83,7 +89,12 @@ module mkDmaH2CPipe(DmaH2CPipe); value : wrValue, isWrite : True }; - reqOutFifo.enq(req); + if (descriptor.barId == 0) begin + reqOutFifo.enq(req); + end + else if (descriptor.barId == 1) begin + userOutFifo.enq(req); + end end else begin $display($time, "ns SIM INFO @ mkDmaH2CPipe: Invalid wrReq with Addr %h, data %h", wrAddr << valueOf(TLog#(DWORD_BYTES)), wrValue); @@ -99,7 +110,12 @@ module mkDmaH2CPipe(DmaH2CPipe); isWrite : False }; $display($time, "ns SIM INFO @ mkDmaH2CPipe: Valid rdReq with Addr %h", rdAddr << valueOf(TLog#(DWORD_BYTES))); - reqOutFifo.enq(req); + if (descriptor.barId == 0) begin + reqOutFifo.enq(req); + end + else if (descriptor.barId == 1) begin + userOutFifo.enq(req); + end pendingFifo.enq(tuple2(req, descriptor)); end default: illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; @@ -108,10 +124,17 @@ module mkDmaH2CPipe(DmaH2CPipe); endrule rule genTlp; - let resp = respInFifo.first; + CsrResponse resp = getEmptyCsrResponse; + if (respInFifo.notEmpty) begin + resp = respInFifo.first; + respInFifo.deq; + end + else if (userInFifo.notEmpty) begin + resp = userInFifo.first; + userInFifo.deq; + end let addr = resp.addr; let value = resp.value; - respInFifo.deq; let {req, cqDescriptor} = pendingFifo.first; if (addr == req.addr) begin pendingFifo.deq; @@ -150,9 +173,12 @@ module mkDmaH2CPipe(DmaH2CPipe); end endrule - // User Logic Ifc + // DMA Csr Ifc interface csrReqFifoOut = convertFifoToFifoOut(reqOutFifo); interface csrRespFifoIn = convertFifoToFifoIn(respInFifo); + // User Ifc + interface userReqFifoOut = convertFifoToFifoOut(userOutFifo); + interface userRespFifoIn = convertFifoToFifoIn(userInFifo); // Pcie Adapter Ifc interface tlpDataFifoIn = convertFifoToFifoIn(tlpInFifo); interface tlpDataFifoOut = convertFifoToFifoOut(tlpOutFifo); diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 0db6ed8..5c9a802 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -209,34 +209,22 @@ typedef Bit#(TLog#(DMA_INTERNAL_REG_BLOCK_NUM)) DmaRegBlockIdx; typedef TMul#(DMA_INTERNAL_REG_BLOCK, 1) DMA_PA_TABLE0_OFFSET; typedef TMul#(DMA_INTERNAL_REG_BLOCK, 3) DMA_PA_TABLE1_OFFSET; -typedef TMul#(DMA_INTERNAL_REG_BLOCK, 5) DMA_EX_REG_OFFSET; // Control Reg offset of Block 0 typedef Bit#(TLog#(DMA_INTERNAL_REG_BLOCK)) DmaRegIndex; typedef 16 DMA_USING_REG_LEN; -typedef 0 REG_DESC_ADDR_LO_0; -typedef 1 REG_DESC_ADDR_HI_0; -typedef 2 REG_DESC_LEN_0; -typedef 3 REG_DESC_CNTL_0; // Doorbell 0 +// Engine's Registers +typedef 1 REG_REQ_VA_LO_OFFSET; // DmaRequest.startAddr +typedef 2 REG_REQ_VA_HI_OFFSET; +typedef 3 REG_REQ_BYTES_OFFSET; // DmaRequest.length +typedef 4 REG_RESULT_VA_LO_OFFSET; // Done flag write back address +typedef 5 REG_RESULT_VA_HI_OFFSET; -typedef 4 REG_DESC_ADDR_LO_1; -typedef 5 REG_DESC_ADDR_HI_1; -typedef 6 REG_DESC_LEN_1; -typedef 7 REG_DESC_CNTL_1; // Doorbell 1 +typedef 0 REG_ENGINE_0_OFFSET; // Doorbell, indicates +typedef 6 REG_ENGINE_1_OFFSET; -typedef 8 REG_FLAG_ADDR_LO_0; // request status write back address -typedef 9 REG_FLAG_ADDR_HI_0; - -typedef 10 REG_FLAG_ADDR_LO_1; // request status write back address -typedef 11 REG_FLAG_ADDR_HI_1; - -typedef 12 REG_VA_HEADER_LO_0; -typedef 13 REG_VA_HEADER_HI_0; -typedef 14 REG_VA_HEADER_LO_1; -typedef 15 REG_VA_HEADER_HI_1; - -// VA-PA Table, allow 512 VA-PA Page Elements, i.e. 2M(4K page) or 1G(2M huge page, recommend configuration) +// VA-PA Table, allow 512 VA-PA Page Elements, i.e. 2M(4K page, default) or 1G(2M huge page, recommend configuration) typedef DMA_INTERNAL_REG_BLOCK PA_NUM; typedef TMul#(PA_NUM, 2) DMA_PHY_ADDR_REG_LEN; @@ -244,6 +232,8 @@ typedef Bit#(TLog#(PA_NUM)) PaBramAddr; typedef 2 PA_TABLE0_BLOCK_OFFSET; typedef 4 PA_TABLE1_BLOCK_OFFSET; +typedef 1 IS_HUGE_PAGE; + typedef 4096 PAGE_SIZE; typedef TLog#(PAGE_SIZE) PAGE_SIZE_WIDTH; diff --git a/src/DmaWrapper.bsv b/src/DmaWrapper.bsv index 6aaaa68..c5292f5 100755 --- a/src/DmaWrapper.bsv +++ b/src/DmaWrapper.bsv @@ -18,6 +18,7 @@ import DmaUtils::*; import DmaC2HPipe::*; import DmaH2CPipe::*; import SimpleModeUtils::*; +import TestUtils::*; // For Bsv User // Native Blue-DMA Interface, the addrs in the req should be pa @@ -29,6 +30,9 @@ interface DmaController; interface FifoIn#(CsrResponse) h2cRespFifoIn; interface FifoOut#(CsrRequest) h2cReqFifoOut; + + interface FifoIn#(CsrResponse) innerRespFifoIn; + interface FifoOut#(CsrRequest) innerReqFifoOut; // Raw PCIe interfaces, connected to the Xilinx PCIe IP (* prefix = "" *)interface RawXilinxPcieIp rawPcie; @@ -89,8 +93,10 @@ module mkDmaController(DmaController); interface c2hDataFifoIn = c2hDataInIfc; interface c2hDataFifoOut = c2hDataOutIfc; interface c2hReqFifoIn = c2hReqInIfc; - interface h2cRespFifoIn = h2cPipe.csrRespFifoIn; - interface h2cReqFifoOut = h2cPipe.csrReqFifoOut; + interface h2cRespFifoIn = h2cPipe.userRespFifoIn; + interface h2cReqFifoOut = h2cPipe.userReqFifoOut; + interface innerRespFifoIn = h2cPipe.csrRespFifoIn; + interface innerReqFifoOut = h2cPipe.csrReqFifoOut; // Raw PCIe Ifc interface RawXilinxPcieIp rawPcie; @@ -254,6 +260,7 @@ endinterface (* synthesize *) module mkRawBypassDmaController(RawBypassDmaController); DmaController dmac <- mkDmaController; + GenericCsr dummyCsr <- mkDummyCsr; let dmaWrData0Ifc <- mkFifoInToRawDmaDataSlave(dmac.c2hDataFifoIn[0]); let dmaDesc0Ifc <- mkFifoInToRawDmaReqSlave(dmac.c2hReqFifoIn[0]); @@ -266,6 +273,9 @@ module mkRawBypassDmaController(RawBypassDmaController); let csrRespIfc <- mkFifoInToRawCsrClient(dmac.h2cRespFifoIn); let csrReqIfc <- mkFifoOutToRawCsrMaster(dmac.h2cReqFifoOut); + mkConnection(dmac.innerReqFifoOut, dummyCsr.reqFifoIn); + mkConnection(dummyCsr.respFifoOut, dmac.innerRespFifoIn); + interface dmaWrData0 = dmaWrData0Ifc; interface dmaDesc0 = dmaDesc0Ifc; interface dmaRdData0 = dmaRdData0Ifc; @@ -284,7 +294,9 @@ interface RawSimpleDmaController; // Raw PCIe interfaces, connected to the Xilinx PCIe IP (* prefix = "" *) interface RawXilinxPcieIp rawPcie; endinterface + // Simple Mode For Read-Write Loop Testing, which has no external ports +(* synthesize *) module mkRawSimpleDmaController(RawSimpleDmaController); DmaController dmac <- mkDmaController; DmaSimpleCore simpleCore <- mkDmaSimpleCore; @@ -297,11 +309,31 @@ module mkRawSimpleDmaController(RawSimpleDmaController); mkConnection(dmac.c2hReqFifoIn[pathIdx], simpleCore.c2hReqFifoOut[pathIdx]); end - mkConnection(dmac.h2cReqFifoOut, simpleCore.reqFifoIn); - mkConnection(dmac.h2cRespFifoIn, simpleCore.respFifoOut); + mkConnection(dmac.innerReqFifoOut, simpleCore.reqFifoIn); + mkConnection(dmac.innerRespFifoIn, simpleCore.respFifoOut); + + mkConnection(dmac.h2cReqFifoOut, dummyCsr.reqFifoIn); + mkConnection(dmac.h2cRespFifoIn, dummyCsr.respFifoOut); + + interface rawPcie = dmac.rawPcie; +endmodule + +(* synthesize *) +module mkRawTestDmaController(RawSimpleDmaController); + DmaController dmac <- mkDmaController; + TestModule tm <- mkTestModule; + GenericCsr dummyCsr <- mkDummyCsr; + + for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1 ) begin + mkConnection(tm.c2hDataFifoOut[pathIdx], dmac.c2hDataFifoIn[pathIdx]); + mkConnection(tm.c2hReqFifoOut[pathIdx], dmac.c2hReqFifoIn[pathIdx]); + mkConnection(dmac.c2hDataFifoOut[pathIdx], tm.c2hDataFifoIn[pathIdx]); + end - mkConnection(simpleCore.externalReqFifoOut, dummyCsr.reqFifoIn); - mkConnection(simpleCore.externalRespFifoIn, dummyCsr.respFifoOut); + mkConnection(dmac.innerReqFifoOut, dummyCsr.reqFifoIn); + mkConnection(dummyCsr.respFifoOut, dmac.innerRespFifoIn); + mkConnection(dmac.h2cReqFifoOut, tm.h2cReqFifoIn); + mkConnection(dmac.h2cRespFifoIn, tm.h2cRespFifoOut); interface rawPcie = dmac.rawPcie; endmodule diff --git a/src/PcieAdapter.bsv b/src/PcieAdapter.bsv index 8b5c73f..914bb3c 100644 --- a/src/PcieAdapter.bsv +++ b/src/PcieAdapter.bsv @@ -407,7 +407,7 @@ module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); let axiStream = ReqReqAxiStream { tData : sendingStream.data | pendingStream.data, tKeep : -1, - tLast : False, + tLast : True, tUser : pack(sideBand) }; axiStreamOutFifo.enq(axiStream); diff --git a/src/SimpleModeUtils.bsv b/src/SimpleModeUtils.bsv index 4664147..156352c 100644 --- a/src/SimpleModeUtils.bsv +++ b/src/SimpleModeUtils.bsv @@ -16,20 +16,14 @@ interface DmaSimpleCore; // from H2C user Ifc, where the addr is already aligned to DWord interface FifoIn#(CsrRequest) reqFifoIn; interface FifoOut#(CsrResponse) respFifoOut; - // to external peripherals (connect to dummy reg in test) - interface FifoIn#(CsrResponse) externalRespFifoIn; - interface FifoOut#(CsrRequest) externalReqFifoOut; // new dma descriptor (conncet to H2C user Ifc) interface Vector#(DMA_PATH_NUM, FifoOut#(DmaRequest)) c2hReqFifoOut; endinterface +(* synthesize *) module mkDmaSimpleCore(DmaSimpleCore); - FIFOF#(CsrRequest) internalReqFifo <- mkFIFOF; - FIFOF#(CsrRequest) externalReqFifo <- mkFIFOF; - - FIFOF#(CsrResponse) internalRespFifo <- mkFIFOF; - FIFOF#(CsrResponse) externalRespFifo <- mkFIFOF; - FIFOF#(CsrResponse) tempRespFifo <- mkFIFOF; + FIFOF#(CsrRequest) reqFifo <- mkFIFOF; + FIFOF#(CsrResponse) respFifo <- mkFIFOF; RegFile#(DmaRegIndex, DmaCsrValue) controlRegFile <- mkRegFileFull; @@ -46,21 +40,18 @@ module mkDmaSimpleCore(DmaSimpleCore); endfunction rule map; - let req = internalReqFifo.first; - internalReqFifo.deq; + let req = reqFifo.first; + reqFifo.deq; let blockIdx = getRegBlockIdx(req.addr); DmaRegIndex regIdx = truncate(req.addr); // Write Request if (req.isWrite) begin // Block 0 : DMA Inner Ctrl Regs if (blockIdx == 0) begin - if (regIdx == fromInteger(valueOf(REG_DESC_CNTL_0))) begin - let addrLo = controlRegFile.sub(fromInteger(valueOf(REG_DESC_ADDR_LO_0))); - let addrHi = controlRegFile.sub(fromInteger(valueOf(REG_DESC_ADDR_HI_0))); - let length = controlRegFile.sub(fromInteger(valueOf(REG_DESC_LEN_0))); - let vaHeaderLo = controlRegFile.sub(fromInteger(valueOf(REG_VA_HEADER_LO_0))); - let vaHeaderHi = controlRegFile.sub(fromInteger(valueOf(REG_VA_HEADER_HI_0))); - let vaHeader = doubleExtend(vaHeaderLo, vaHeaderHi); + if (regIdx == fromInteger(valueOf(REG_ENGINE_0_OFFSET))) begin + let addrLo = controlRegFile.sub(fromInteger(valueOf(TAdd#(REG_ENGINE_0_OFFSET, REG_REQ_VA_LO_OFFSET)))); + let addrHi = controlRegFile.sub(fromInteger(valueOf(TAdd#(REG_ENGINE_0_OFFSET, REG_REQ_VA_HI_OFFSET)))); + let length = controlRegFile.sub(fromInteger(valueOf(TAdd#(REG_ENGINE_0_OFFSET, REG_REQ_BYTES_OFFSET)))); Bool isWrite = unpack(truncate(req.value)); let desc = DmaRequest { startAddr: doubleExtend(addrLo, addrHi), @@ -69,16 +60,12 @@ module mkDmaSimpleCore(DmaSimpleCore); }; if (desc.startAddr > 0) begin paTableBram[0].vaReqFifoIn.enq(desc); - paTableBram[0].vaHeader.put(vaHeader); end end - else if (regIdx == fromInteger(valueOf(REG_DESC_CNTL_1))) begin - let addrLo = controlRegFile.sub(fromInteger(valueOf(REG_DESC_ADDR_LO_1))); - let addrHi = controlRegFile.sub(fromInteger(valueOf(REG_DESC_ADDR_HI_1))); - let length = controlRegFile.sub(fromInteger(valueOf(REG_DESC_LEN_1))); - let vaHeaderLo = controlRegFile.sub(fromInteger(valueOf(REG_VA_HEADER_LO_1))); - let vaHeaderHi = controlRegFile.sub(fromInteger(valueOf(REG_VA_HEADER_HI_1))); - let vaHeader = doubleExtend(vaHeaderLo, vaHeaderHi); + else if (regIdx == fromInteger(valueOf(REG_ENGINE_1_OFFSET))) begin + let addrLo = controlRegFile.sub(fromInteger(valueOf(TAdd#(REG_ENGINE_1_OFFSET, REG_REQ_VA_LO_OFFSET)))); + let addrHi = controlRegFile.sub(fromInteger(valueOf(TAdd#(REG_ENGINE_1_OFFSET, REG_REQ_VA_HI_OFFSET)))); + let length = controlRegFile.sub(fromInteger(valueOf(TAdd#(REG_ENGINE_1_OFFSET, REG_REQ_BYTES_OFFSET)))); Bool isWrite = unpack(truncate(req.value)); let desc = DmaRequest { startAddr: doubleExtend(addrLo, addrHi), @@ -87,7 +74,6 @@ module mkDmaSimpleCore(DmaSimpleCore); }; if (desc.startAddr > 0) begin paTableBram[1].vaReqFifoIn.enq(desc); - paTableBram[1].vaHeader.put(vaHeader); end end // if not doorbell, write the register @@ -105,7 +91,7 @@ module mkDmaSimpleCore(DmaSimpleCore); paTableBram[0].paSetFifoIn.enq(vaReq); end // Block 3~4 : Channel 1 Va-Pa Table - else if (blockIdx <= fromInteger(valueOf(PA_TABLE1_BLOCK_OFFSET))) begin + else begin let vaReq = CsrRequest { addr : req.addr - fromInteger(valueOf(DMA_PA_TABLE1_OFFSET)), value : req.value, @@ -113,64 +99,29 @@ module mkDmaSimpleCore(DmaSimpleCore); }; paTableBram[1].paSetFifoIn.enq(vaReq); end - // Block 5~N : External Peripherals Regs - else begin - req.addr = req.addr - fromInteger(valueOf(DMA_EX_REG_OFFSET)); - externalReqFifo.enq(req); - end end // Read Request else begin - if (blockIdx == 0) begin - if (regIdx <= fromInteger(valueOf(DMA_USING_REG_LEN))) begin - let value = controlRegFile.sub(regIdx); - let resp = CsrResponse { - addr : req.addr, - value : value - }; - tempRespFifo.enq(resp); - end - else begin - let resp = CsrResponse { - addr : req.addr, - value : 0 - }; - tempRespFifo.enq(resp); - end - end - else if (blockIdx > fromInteger(valueOf(PA_TABLE1_BLOCK_OFFSET))) begin - req.addr = req.addr - fromInteger(valueOf(DMA_EX_REG_OFFSET)); - externalReqFifo.enq(req); + if (blockIdx == 0 && regIdx <= fromInteger(valueOf(DMA_USING_REG_LEN))) begin + let value = controlRegFile.sub(regIdx); + let resp = CsrResponse { + addr : req.addr, + value : value + }; + respFifo.enq(resp); end else begin let resp = CsrResponse { addr : req.addr, value : 0 }; - tempRespFifo.enq(resp); + respFifo.enq(resp); end end endrule - rule muxResp; - if (tempRespFifo.notEmpty) begin - tempRespFifo.deq; - internalRespFifo.enq(tempRespFifo.first); - end - else if (externalRespFifo.notEmpty) begin - externalRespFifo.deq; - let extResp = externalRespFifo.first; - extResp.addr = extResp.addr + fromInteger(valueOf(DMA_EX_REG_OFFSET)); - internalRespFifo.enq(extResp); - end - endrule - - interface reqFifoIn = convertFifoToFifoIn(internalReqFifo); - interface respFifoOut = convertFifoToFifoOut(internalRespFifo); - - interface externalReqFifoOut = convertFifoToFifoOut(externalReqFifo); - interface externalRespFifoIn = convertFifoToFifoIn(externalRespFifo); - + interface reqFifoIn = convertFifoToFifoIn(reqFifo); + interface respFifoOut = convertFifoToFifoOut(respFifo); interface c2hReqFifoOut = c2hReqFifoOutIfc; endmodule @@ -182,18 +133,16 @@ interface PhyAddrBram; interface FifoOut#(DmaRequest) paReqFifoOut; // va-pa table set interface FifoIn#(CsrRequest) paSetFifoIn; - interface Put#(DmaMemAddr) vaHeader; endinterface // This module does not check if the request address is valid(in MR) +(* synthesize *) module mkPhyAddrBram(PhyAddrBram); FIFOF#(DmaRequest) vaReqFifo <- mkFIFOF; FIFOF#(DmaRequest) paReqFifo <- mkFIFOF; FIFOF#(CsrRequest) paSetFifo <- mkFIFOF; FIFOF#(DmaRequest) pendingFifo <- mkSizedFIFOF(valueOf(BRAM_LATENCY)); - Reg#(DmaMemAddr) vaHeaderReg <- mkReg(0); - BRAM1Port#(PaBramAddr, DmaCsrValue) phyAddrLoBram <- mkBRAM1Server(defaultValue); BRAM1Port#(PaBramAddr, DmaCsrValue) phyAddrHiBram <- mkBRAM1Server(defaultValue); @@ -201,13 +150,20 @@ module mkPhyAddrBram(PhyAddrBram); return unpack(addr[0]); endfunction + // The Csr Address map to Bram Address. As 0:pa_lo[0], 1:pa_hi[0], 2:pa_lo[1], 3:pa_hi[1],..., csrAddr:pa_lo[csrAddr/2]. function PaBramAddr convertCsrAddrToBramAddr(DmaCsrAddr csrAddr); let addr = csrAddr >> 1; return truncate(addr); endfunction function PaBramAddr convertDmaAddrToBramAddr(DmaMemAddr dmaAddr); - let pageIdx = (dmaAddr - vaHeaderReg) >> valueOf(HUGE_PAGE_SIZE_WIDTH); + DmaMemAddr pageIdx = 0; + if (valueOf(IS_HUGE_PAGE) > 0) begin + pageIdx = (dmaAddr) >> valueOf(HUGE_PAGE_SIZE_WIDTH); + end + else begin + pageIdx = (dmaAddr) >> valueOf(PAGE_SIZE_WIDTH); + end return truncate(pageIdx); endfunction @@ -216,22 +172,16 @@ module mkPhyAddrBram(PhyAddrBram); if (paSetFifo.notEmpty) begin let paSet = paSetFifo.first; let bramAddr = convertCsrAddrToBramAddr(paSet.addr); - if (isLoAddr(paSet.addr)) begin - let bramReq = BRAMRequest { + let bramReq = BRAMRequest { write : True, responseOnWrite : False, address : bramAddr, datain : paSet.value }; + if (isLoAddr(paSet.addr)) begin phyAddrLoBram.portA.request.put(bramReq); end else begin - let bramReq = BRAMRequest { - write : True, - responseOnWrite : False, - address : bramAddr, - datain : paSet.value - }; phyAddrHiBram.portA.request.put(bramReq); end end @@ -263,11 +213,6 @@ module mkPhyAddrBram(PhyAddrBram); interface vaReqFifoIn = convertFifoToFifoIn(vaReqFifo); interface paReqFifoOut = convertFifoToFifoOut(paReqFifo); interface paSetFifoIn = convertFifoToFifoIn(paSetFifo); - interface Put vaHeader; - method Action put(DmaMemAddr vaHeadAddr); - vaHeaderReg <= vaHeadAddr; - endmethod - endinterface endmodule typedef 12 DUMMY_ADDR_WIDTH; diff --git a/src/TestUtils.bsv b/src/TestUtils.bsv new file mode 100644 index 0000000..68e50e2 --- /dev/null +++ b/src/TestUtils.bsv @@ -0,0 +1,115 @@ +import Vector::*; +import FIFOF::*; + +import SemiFifo::*; +import DmaTypes::*; + +typedef 'hAB PSEUDO_DATA; +typedef 8 PSEUDO_DATA_WIDTH; + +function Data getPseudoData(); + Data pseudoData = fromInteger(valueOf(PSEUDO_DATA)); + for (Integer idx = 0; idx < valueOf(TDiv#(DATA_WIDTH, PSEUDO_DATA_WIDTH)); idx = idx + 1) begin + pseudoData = pseudoData | (pseudoData << idx*valueOf(PSEUDO_DATA_WIDTH)); + end + return pseudoData; +endfunction + +function DataStream getPsuedoStream (Bool isFirst, Bool isLast); + return DataStream{ + data: getPseudoData, + byteEn: -1, + isFirst: isFirst, + isLast: isLast + }; +endfunction + +interface TestModule; + interface Vector#(DMA_PATH_NUM, FifoIn#(DataStream)) c2hDataFifoIn; + interface Vector#(DMA_PATH_NUM, FifoOut#(DataStream)) c2hDataFifoOut; + interface Vector#(DMA_PATH_NUM, FifoOut#(DmaRequest)) c2hReqFifoOut; + + interface FifoIn#(CsrRequest) h2cReqFifoIn; + interface FifoOut#(CsrResponse) h2cRespFifoOut; +endinterface + +typedef 250000 ONE_SECOND_COUNTER; +// typedef 250 ONE_SECOND_COUNTER; +typedef 'hfff0 TEST_BASE_ADDR; + +typedef Bit#(2) TestState; +typedef 0 IDLE; +typedef 1 WRITING; +typedef 2 READING; + +module mkTestModule(TestModule); + Vector#(DMA_PATH_NUM, FIFOF#(DataStream)) dataInFifo <- replicateM(mkFIFOF); + Vector#(DMA_PATH_NUM, FIFOF#(DataStream)) dataOutFifo <- replicateM(mkFIFOF); + Vector#(DMA_PATH_NUM, FIFOF#(DmaRequest)) reqOutFifo <- replicateM(mkFIFOF); + FIFOF#(CsrRequest) csrReqFifo <- mkFIFOF; + FIFOF#(CsrResponse) csrRespFifo <- mkFIFOF; + + Reg#(UInt#(32)) cntReg <- mkReg(0); + Reg#(UInt#(4)) iterReg <- mkReg(0); + Reg#(TestState) stateReg <- mkReg(fromInteger(valueOf(IDLE))); + + Vector#(DMA_PATH_NUM, FifoIn#(DataStream)) c2hDataFifoInIfc = newVector; + Vector#(DMA_PATH_NUM, FifoOut#(DataStream)) c2hDataFifoOutIfc = newVector; + Vector#(DMA_PATH_NUM, FifoOut#(DmaRequest)) c2hReqFifoOutIfc = newVector; + + for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1) begin + c2hDataFifoInIfc[pathIdx] = convertFifoToFifoIn(dataInFifo[pathIdx]); + c2hDataFifoOutIfc[pathIdx] = convertFifoToFifoOut(dataOutFifo[pathIdx]); + c2hReqFifoOutIfc[pathIdx] = convertFifoToFifoOut(reqOutFifo[pathIdx]); + end + + rule counter; + if (cntReg < fromInteger(valueOf(ONE_SECOND_COUNTER))) begin + cntReg <= cntReg + 1; + end + else begin + cntReg <= 0; + end + endrule + + rule generator; + case (stateReg) + fromInteger(valueOf(IDLE)): begin + if (cntReg == fromInteger(valueOf(ONE_SECOND_COUNTER)-1)) begin + stateReg <= fromInteger(valueOf(WRITING)); + iterReg <= iterReg + 1; + let stream = getPsuedoStream(True, False); + let req = DmaRequest { + startAddr : (zeroExtend(pack(iterReg))) + fromInteger(valueOf(TEST_BASE_ADDR)), + length : 128, + isWrite : True + }; + dataOutFifo[0].enq(stream); + reqOutFifo[0].enq(req); + end + end + fromInteger(valueOf(WRITING)): begin + stateReg <= fromInteger(valueOf(READING)); + let stream = getPsuedoStream(False, True); + dataOutFifo[0].enq(stream); + end + fromInteger(valueOf(READING)): begin + stateReg <= fromInteger(valueOf(IDLE)); + let req = DmaRequest { + startAddr : (zeroExtend(pack(iterReg))) + fromInteger(valueOf(TEST_BASE_ADDR)), + length : 128, + isWrite : False + }; + reqOutFifo[0].enq(req); + end + default: stateReg <= fromInteger(valueOf(IDLE)); + endcase + endrule + + interface c2hDataFifoIn = c2hDataFifoInIfc; + interface c2hDataFifoOut = c2hDataFifoOutIfc; + interface c2hReqFifoOut = c2hReqFifoOutIfc; + + interface h2cReqFifoIn = convertFifoToFifoIn(csrReqFifo); + interface h2cRespFifoOut = convertFifoToFifoOut(csrRespFifo); +endmodule \ No newline at end of file From f9e46faa52b7921c9d7737882d2bdca816f70543 Mon Sep 17 00:00:00 2001 From: BIGWJZ Date: Sat, 12 Oct 2024 08:46:09 +0800 Subject: [PATCH 42/53] solve h2c r&w --- backend/Makefile | 2 +- src/DmaH2CPipe.bsv | 16 +++-- src/PcieAdapter.bsv | 1 + src/PcieDescriptorTypes.bsv | 1 + src/SimpleModeUtils.bsv | 71 ++++++++++++------- test/Makefile | 4 +- test/TestSimpleUtils.bsv | 138 ++++++++++++++++++++++++++++++++++++ 7 files changed, 196 insertions(+), 37 deletions(-) create mode 100644 test/TestSimpleUtils.bsv diff --git a/backend/Makefile b/backend/Makefile index 2318490..4baf722 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -10,7 +10,7 @@ RUNTOPHASE ?= place # synth place route all PARTNAME = xcvu13p-fhgb2104-2-i TARGETFILE ?= ../src/DmaWrapper.bsv #MODULE List: mkRawSimpleDmaController mkRawBypassDmaController mkRawTestDmaController -TOPMODULE ?= mkRawBypassDmaController +TOPMODULE ?= mkRawSimpleDmaController export TOP = $(TOPMODULE) export RTL = $(VLOGDIR) diff --git a/src/DmaH2CPipe.bsv b/src/DmaH2CPipe.bsv index 0a2aa0a..8c1000c 100644 --- a/src/DmaH2CPipe.bsv +++ b/src/DmaH2CPipe.bsv @@ -11,8 +11,8 @@ import PcieAdapter::*; import DmaTypes::*; typedef 1 IDEA_CQ_CSR_DWORD_CNT; -typedef 2 IDEA_CC_CSR_DWORD_CNT; -typedef 4 IDEA_BYTE_CNT_OF_CSR; +typedef 1 IDEA_CC_CSR_DWORD_CNT; +typedef 4 IDEA_CC_CSR_BYTE_CNT; typedef 4 IDEA_FIRST_BE_HIGH_VALID_PTR_OF_CSR; function CsrResponse getEmptyCsrResponse(); @@ -60,7 +60,7 @@ module mkDmaH2CPipe(DmaH2CPipe); Reg#(Bool) isInPacket <- mkReg(False); Reg#(UInt#(32)) illegalPcieReqCntReg <- mkReg(0); - DataBytePtr csrBytes = fromInteger(valueOf(TDiv#(DMA_CSR_DATA_WIDTH, BYTE_WIDTH))); + DataBytePtr csrCmplBytes = fromInteger(valueOf(TDiv#(TAdd#(DES_CC_DESCRIPTOR_WIDTH ,DMA_CSR_DATA_WIDTH), BYTE_WIDTH))); // This function returns DW addr pointing to inner registers, where byteAddr = DWordAddr << 2 // The registers in the hw are all of 32bit DW type @@ -106,7 +106,7 @@ module mkDmaH2CPipe(DmaH2CPipe); let rdAddr = getCsrAddrFromCqDescriptor(descriptor); let req = CsrRequest{ addr : rdAddr, - value : zeroExtend(csrBytes), + value : 0, isWrite : False }; $display($time, "ns SIM INFO @ mkDmaH2CPipe: Valid rdReq with Addr %h", rdAddr << valueOf(TLog#(DWORD_BYTES))); @@ -153,20 +153,22 @@ module mkDmaH2CPipe(DmaH2CPipe); dwordCnt : fromInteger(valueOf(IDEA_CC_CSR_DWORD_CNT)), reserve2 : 0, isLockedReadCmpl: False, - byteCnt : fromInteger(valueOf(IDEA_BYTE_CNT_OF_CSR)), + byteCnt : fromInteger(valueOf(IDEA_CC_CSR_BYTE_CNT)), reserve3 : 0, addrType : cqDescriptor.addrType, - lowerAddr : truncate(addr) + reserve4 : 0, + lowerAddr : truncate(addr << valueOf(TLog#(DWORD_BYTES))) // Suppose all cq/cc requests are 32 bit aligned }; Data data = zeroExtend(pack(ccDescriptor)); data = data | (zeroExtend(value) << valueOf(DES_CC_DESCRIPTOR_WIDTH)); let stream = DataStream { data : data, - byteEn : convertBytePtr2ByteEn(csrBytes), + byteEn : convertBytePtr2ByteEn(csrCmplBytes), isFirst : True, isLast : True }; tlpOutFifo.enq(stream); + // $display($time, "ns SIM INFO @ mkDmaH2CPipe: output a cmpl tlp", fshow(stream)); end else begin $display($time, "ns SIM ERROR @ mkDmaH2CPipe: InValid rdResp with Addr %h, data %h and Expect Addr %h", addr, value, req.addr); diff --git a/src/PcieAdapter.bsv b/src/PcieAdapter.bsv index 914bb3c..7d3bab3 100644 --- a/src/PcieAdapter.bsv +++ b/src/PcieAdapter.bsv @@ -116,6 +116,7 @@ module mkCompleterAxiStreamAdapter(CompleterAxiStreamAdapter); isEopPtrs : replicate(0), isEop : 1 }; + isEop.isEopPtrs[0] = truncate(convertByteEn2DwordPtr(stream.byteEn)); // Do not enable parity check in the core let sideBand = PcieCompleterCompleteSideBandFrame { parity : 0, diff --git a/src/PcieDescriptorTypes.bsv b/src/PcieDescriptorTypes.bsv index dcf26c9..d1e1748 100644 --- a/src/PcieDescriptorTypes.bsv +++ b/src/PcieDescriptorTypes.bsv @@ -86,6 +86,7 @@ typedef struct { CmplByteCnt byteCnt; ReserveBit6 reserve3; AddrType addrType; + ReserveBit1 reserve4; CCLowerAddr lowerAddr; } PcieCompleterCompleteDescriptor deriving(Bits, Eq, Bounded, FShow); diff --git a/src/SimpleModeUtils.bsv b/src/SimpleModeUtils.bsv index 156352c..966204a 100644 --- a/src/SimpleModeUtils.bsv +++ b/src/SimpleModeUtils.bsv @@ -9,7 +9,7 @@ import DmaTypes::*; import StreamUtils::*; function Bit#(TMul#(2,n)) doubleExtend(Bit#(n) lo, Bit#(n) hi) provisos(Add#(1, _a, n), Add#(_b, n, TMul#(2, n))); - return zeroExtend(lo) & (zeroExtend(hi) << valueOf(n)); + return zeroExtend(lo) | (zeroExtend(hi) << valueOf(n)); endfunction interface DmaSimpleCore; @@ -39,6 +39,35 @@ module mkDmaSimpleCore(DmaSimpleCore); return idx; endfunction + function Tuple3#(DmaRegIndex, DmaRegIndex, DmaRegIndex) getDescRegIdxs(DmaPathNo pathIdx); + DmaRegIndex baseRegIdx = 0; + Tuple3#(DmaRegIndex, DmaRegIndex, DmaRegIndex) result = tuple3(0, 0, 0); + if (pathIdx == 0) + result = tuple3(fromInteger(valueOf(TAdd#(REG_ENGINE_0_OFFSET, REG_REQ_VA_LO_OFFSET))), + fromInteger(valueOf(TAdd#(REG_ENGINE_0_OFFSET, REG_REQ_VA_HI_OFFSET))), + fromInteger(valueOf(TAdd#(REG_ENGINE_0_OFFSET, REG_REQ_BYTES_OFFSET)))); + else if (pathIdx == 1) + result = tuple3(fromInteger(valueOf(TAdd#(REG_ENGINE_1_OFFSET, REG_REQ_VA_LO_OFFSET))), + fromInteger(valueOf(TAdd#(REG_ENGINE_1_OFFSET, REG_REQ_VA_HI_OFFSET))), + fromInteger(valueOf(TAdd#(REG_ENGINE_1_OFFSET, REG_REQ_BYTES_OFFSET)))); + return result; + endfunction + + function ActionValue#(DmaRequest) genVaReq(RegFile#(DmaRegIndex, DmaCsrValue) regFile, DmaPathNo pathIdx, Bool isWrite); + actionvalue + let {addrLoIdx, addrHiIdx, lenIdx} = getDescRegIdxs(pathIdx); + let addrLo = regFile.sub(addrLoIdx); + let addrHi = regFile.sub(addrHiIdx); + let length = regFile.sub(lenIdx); + let desc = DmaRequest { + startAddr : doubleExtend(addrLo, addrHi), + length : length, + isWrite : isWrite + }; + return desc; + endactionvalue + endfunction + rule map; let req = reqFifo.first; reqFifo.deq; @@ -48,37 +77,18 @@ module mkDmaSimpleCore(DmaSimpleCore); if (req.isWrite) begin // Block 0 : DMA Inner Ctrl Regs if (blockIdx == 0) begin - if (regIdx == fromInteger(valueOf(REG_ENGINE_0_OFFSET))) begin - let addrLo = controlRegFile.sub(fromInteger(valueOf(TAdd#(REG_ENGINE_0_OFFSET, REG_REQ_VA_LO_OFFSET)))); - let addrHi = controlRegFile.sub(fromInteger(valueOf(TAdd#(REG_ENGINE_0_OFFSET, REG_REQ_VA_HI_OFFSET)))); - let length = controlRegFile.sub(fromInteger(valueOf(TAdd#(REG_ENGINE_0_OFFSET, REG_REQ_BYTES_OFFSET)))); - Bool isWrite = unpack(truncate(req.value)); - let desc = DmaRequest { - startAddr: doubleExtend(addrLo, addrHi), - length : length, - isWrite : isWrite - }; + if (regIdx == fromInteger(valueOf(REG_ENGINE_0_OFFSET)) || regIdx == fromInteger(valueOf(REG_ENGINE_1_OFFSET))) begin + DmaPathNo pathIdx = (regIdx == fromInteger(valueOf(REG_ENGINE_0_OFFSET))) ? 0 : 1; + let desc <- genVaReq(controlRegFile, pathIdx, unpack(truncate(req.value))); + $display($time, "ns SIM INFO @ mkDmaSimpleCore: doorbell%d triggerd, va:%h bytes:%d", pathIdx, desc.startAddr, desc.length); if (desc.startAddr > 0) begin - paTableBram[0].vaReqFifoIn.enq(desc); - end - end - else if (regIdx == fromInteger(valueOf(REG_ENGINE_1_OFFSET))) begin - let addrLo = controlRegFile.sub(fromInteger(valueOf(TAdd#(REG_ENGINE_1_OFFSET, REG_REQ_VA_LO_OFFSET)))); - let addrHi = controlRegFile.sub(fromInteger(valueOf(TAdd#(REG_ENGINE_1_OFFSET, REG_REQ_VA_HI_OFFSET)))); - let length = controlRegFile.sub(fromInteger(valueOf(TAdd#(REG_ENGINE_1_OFFSET, REG_REQ_BYTES_OFFSET)))); - Bool isWrite = unpack(truncate(req.value)); - let desc = DmaRequest { - startAddr: doubleExtend(addrLo, addrHi), - length : length, - isWrite : isWrite - }; - if (desc.startAddr > 0) begin - paTableBram[1].vaReqFifoIn.enq(desc); + paTableBram[pathIdx].vaReqFifoIn.enq(desc); end end // if not doorbell, write the register else begin controlRegFile.upd(regIdx, req.value); + $display($time, "ns SIM INFO @ mkDmaSimpleCore: register writing regIdx:%d value:%d", regIdx, req.value); end end // Block 1~2 : Channel 0 Va-Pa Table @@ -180,14 +190,18 @@ module mkPhyAddrBram(PhyAddrBram); }; if (isLoAddr(paSet.addr)) begin phyAddrLoBram.portA.request.put(bramReq); + $display($time, "ns SIM INFO @ mkPhyAddrBram: pa writing, va offset:%h mapping pa low:%h", bramAddr, bramReq.datain ); end else begin phyAddrHiBram.portA.request.put(bramReq); + $display($time, "ns SIM INFO @ mkPhyAddrBram: pa writing, va offset:%h mapping pa high:%h", bramAddr, bramReq.datain ); end + end // if is getting phy address else begin let vaReq = vaReqFifo.first; + vaReqFifo.deq; let bramReq = BRAMRequest { write : False, responseOnWrite : False, @@ -197,15 +211,17 @@ module mkPhyAddrBram(PhyAddrBram); phyAddrLoBram.portA.request.put(bramReq); phyAddrHiBram.portA.request.put(bramReq); pendingFifo.enq(vaReq); + $display($time, "ns SIM INFO @ mkPhyAddrBram: receive pa mapping request, va:%h", vaReq.startAddr); end endrule rule getPaReq; let pa_lo <- phyAddrLoBram.portA.response.get; let pa_hi <- phyAddrHiBram.portA.response.get; - let pa = zeroExtend(pa_hi << valueOf(DMA_CSR_ADDR_WIDTH)) & zeroExtend(pa_lo); + DmaMemAddr pa = doubleExtend(pa_lo, pa_hi); let oriReq = pendingFifo.first; pendingFifo.deq; + $display($time, "ns SIM INFO @ mkPhyAddrBram: got a pa mapping, va:%h pa:%h", oriReq.startAddr, pa); oriReq.startAddr = pa; paReqFifo.enq(oriReq); endrule @@ -223,6 +239,7 @@ interface GenericCsr; interface FifoOut#(CsrResponse) respFifoOut; endinterface +(* synthesize *) module mkDummyCsr(GenericCsr); FIFOF#(CsrRequest) reqFifo <- mkFIFOF; FIFOF#(CsrResponse) respFifo <- mkFIFOF; diff --git a/test/Makefile b/test/Makefile index 4667929..10975d9 100755 --- a/test/Makefile +++ b/test/Makefile @@ -1,7 +1,7 @@ include ../Makefile.base -TESTFILE ?= TestDmaCore.bsv -TOPMODULE ?= mkChunkComputerTb +TESTFILE ?= TestSimpleUtils.bsv +TOPMODULE ?= mkTestSimpleH2CCore SIMSCRIPT = $(BUILDDIR)/$(TOPMODULE).sh diff --git a/test/TestSimpleUtils.bsv b/test/TestSimpleUtils.bsv new file mode 100644 index 0000000..8ae323e --- /dev/null +++ b/test/TestSimpleUtils.bsv @@ -0,0 +1,138 @@ +import Vector::*; +import RegFile::*; +import GetPut::*; +import SemiFifo::*; +import FIFOF::*; +import BRAM::*; +import Connectable :: *; + +import DmaTypes::*; +import StreamUtils::*; +import SimpleModeUtils::*; +import PcieDescriptorTypes::*; +import PcieAxiStreamTypes::*; +import PcieTypes::*; +import PcieAdapter::*; +import DmaH2CPipe::*; + +typedef 2'b10 TRANSLATED_ADDR_TYPE; + +module mkTestSimpleCore(Empty); + DmaSimpleCore core <- mkDmaSimpleCore; + Reg#(UInt#(32)) testRoundReg <- mkReg(0); + + rule test if (testRoundReg < 50); + testRoundReg <= testRoundReg + 1; + case (testRoundReg) + 0: begin + core.reqFifoIn.enq(CsrRequest { + addr : 1, + value : 'h1234, + isWrite: True + }); + end + 1: begin + core.reqFifoIn.enq(CsrRequest { + addr : 2, + value : 'h1234, + isWrite: True + }); + end + 2: begin + core.reqFifoIn.enq(CsrRequest { + addr : 3, + value : 100, + isWrite: True + }); + end + 4: begin + core.reqFifoIn.enq(CsrRequest { + addr : 0, + value : 1, + isWrite: True + }); + end + 5: begin + core.reqFifoIn.enq(CsrRequest { + addr : 1, + value : 0, + isWrite: False + }); + end + endcase + if (core.respFifoOut.notEmpty) begin + let resp = core.respFifoOut.first; + core.respFifoOut.deq; + $display($time, "ns SIM INFO @ mkTestSimpleCore: recv response from dut, address:%h value:%d", resp.addr, resp.value); + end + if (core.c2hReqFifoOut[0].notEmpty) begin + let c2hReq = core.c2hReqFifoOut[0].first; + core.c2hReqFifoOut[0].deq; + $display($time, "ns SIM INFO @ mkTestSimpleCore: recv c2hReq from dut, startAddr:%h length:%d isWrite:%d", c2hReq.startAddr, c2hReq.length, c2hReq.isWrite); + end + endrule +endmodule + +module mkTestSimpleH2CCore(Empty); + DmaH2CPipe pipe <- mkDmaH2CPipe; + DmaSimpleCore sCore <- mkDmaSimpleCore; + + mkConnection(pipe.csrReqFifoOut, sCore.reqFifoIn); + mkConnection(pipe.csrRespFifoIn, sCore.respFifoOut); + + Reg#(Bool) testInitReg <- mkReg(False); + Reg#(Bool) simuDoneReg <- mkReg(False); + + function DataStream genCsrReqTlp(CsrRequest req); + let pcieDesc = PcieCompleterRequestDescriptor { + reserve0 : 0, + attributes : 0, + trafficClass : 0, + barAperture : 12, + barId : 0, + targetFunction: 0, + tag : 0, + requesterId : 'hABCD, + reserve1 : 0, + reqType : req.isWrite ? fromInteger(valueOf(MEM_WRITE_REQ)) :fromInteger(valueOf(MEM_READ_REQ)) , + dwordCnt : 1, + address : zeroExtend(req.addr >> 2), + addrType : fromInteger(valueOf(TRANSLATED_ADDR_TYPE)) + }; + let tlpData = DataStream { + data : zeroExtend(pack(pcieDesc)) | (zeroExtend(req.value) << valueOf(TDiv#(DES_CQ_DESCRIPTOR_WIDTH, BYTE_WIDTH))), + byteEn : 'hFFF, + isFirst: True, + isLast : True + }; + return tlpData; + endfunction + + rule testInit if (!testInitReg); + testInitReg <= True; + endrule + + + rule testRead if (testInitReg); + let tlpData = genCsrReqTlp(CsrRequest { + addr : 1, + value : 0, + isWrite : False + }); + pipe.tlpDataFifoIn.enq(tlpData); + simuDoneReg <= True; + $display($time, "ns SIM INFO @ mkTestSimpleH2CCore: send a test read req"); + endrule + + rule testResult if (simuDoneReg); + let tlp = pipe.tlpDataFifoOut.first; + pipe.tlpDataFifoOut.deq; + let desc = truncate(tlp.data); + DmaCsrValue value = truncate(tlp.data >> valueOf(DES_CQ_DESCRIPTOR_WIDTH)); + $display($time, "ns SIM INFO @ mkTestSimpleH2CCore: received h2c path value:%d, whole cc tlp:%h", value, tlp.data); + $finish; + endrule +endmodule + + + From 875df73e79a634d1bcb9bad18bdfd5beb01271f7 Mon Sep 17 00:00:00 2001 From: BIGWJZ Date: Tue, 15 Oct 2024 15:49:57 +0800 Subject: [PATCH 43/53] add cocotb bar tb --- cocotb/Makefile | 6 +- cocotb/{dma_wr_rd_tb.py => bdmatb.py} | 149 +++---- .../dma_straddle_tb.py | 0 cocotb/bypass_write_read/dma_wr_rd_tb.py | 94 +++++ cocotb/dma_loop_tb.py | 382 ------------------ .../simple_write_read/dma_simple_wr_rd_tb.py | 60 +++ 6 files changed, 207 insertions(+), 484 deletions(-) rename cocotb/{dma_wr_rd_tb.py => bdmatb.py} (84%) rename cocotb/{ => bypass_write_read}/dma_straddle_tb.py (100%) create mode 100644 cocotb/bypass_write_read/dma_wr_rd_tb.py delete mode 100644 cocotb/dma_loop_tb.py create mode 100644 cocotb/simple_write_read/dma_simple_wr_rd_tb.py diff --git a/cocotb/Makefile b/cocotb/Makefile index d2c4b44..0f23a2f 100644 --- a/cocotb/Makefile +++ b/cocotb/Makefile @@ -5,18 +5,18 @@ include $(ROOT_DIR)/Makefile.base VBUILD_DIR = $(BACKEND_DIR)/build VSRC_DIR = $(BACKEND_DIR)/verilog -TARGET = RawBypassDmaController +TARGET = RawSimpleDmaController TOP_MODULE = mk$(TARGET) TOP_FILE = $(TOP_MODULE).v VLOG_FILE = $(TB_DIR)/$(TOP_FILE) -TB_CASE = dma_wr_rd +TB_CASE = dma_bar TB_FILE = $(TB_CASE)_tb.py DATE = $(shell date "+%Y%m%d") LOG_FILE = $(TB_DIR)/log/$(DATE)_$(TOP_MODULE).log verilog: - cd $(BACKEND_DIR) && make verilog + cd $(BACKEND_DIR) && make clean && make verilog TOPMODULE=$(TOP_MODULE) prepare: rm -rf $(VLOG_FILE) diff --git a/cocotb/dma_wr_rd_tb.py b/cocotb/bdmatb.py similarity index 84% rename from cocotb/dma_wr_rd_tb.py rename to cocotb/bdmatb.py index 2143e49..0edcbbf 100644 --- a/cocotb/dma_wr_rd_tb.py +++ b/cocotb/bdmatb.py @@ -1,12 +1,6 @@ -#!/usr/bin/env python -import itertools import logging import os import random -import queue - -import cocotb_test.simulator -import pytest import cocotb from cocotb.triggers import RisingEdge, FallingEdge, Timer @@ -27,8 +21,11 @@ signals=["start_addr", "byte_cnt", "is_write", "valid", "ready"] ) -class TB(object): +class BdmaTb(object): def __init__(self, dut, msix=False): + self._pcie_init(dut, msix) + + def _pcie_init(self, dut, msix=False): self.dut = dut self.log = logging.getLogger("cocotb.tb") @@ -46,7 +43,7 @@ def __init__(self, dut, msix=False): cq_straddle = False cc_straddle = False rq_straddle = True - rc_straddle = False + rc_straddle = True rc_4tlp_straddle = False self.client_tag = bool(int(os.getenv("CLIENT_TAG", "1"))) @@ -268,20 +265,11 @@ def __init__(self, dut, msix=False): dut.cfg_ds_bus_number.setimmediatevalue(0) dut.cfg_ds_device_number.setimmediatevalue(0) - self.rc.make_port().connect(self.dev) - - # DMA - self.c2h_write_source_0 = AxiStreamSource(AxiStreamBus.from_prefix(dut, "s_axis_c2h_0"), self.clock, self.resetn, False) - self.c2h_desc_source_0 = DescSource(DescBus.from_prefix(dut, "s_desc_c2h_0"),self.clock, self.resetn, False) - self.c2h_read_sink_0 = AxiStreamSink(AxiStreamBus.from_prefix(dut, "m_axis_c2h_0"), self.clock, self.resetn, False) - self.c2h_write_source_1 = AxiStreamSource(AxiStreamBus.from_prefix(dut, "s_axis_c2h_1"), self.clock, self.resetn, False) - self.c2h_desc_source_1 = DescSource(DescBus.from_prefix(dut, "s_desc_c2h_1"), self.clock, self.resetn, False) - self.c2h_read_sink_1 = AxiStreamSink(AxiStreamBus.from_prefix(dut, "m_axis_c2h_1"), self.clock, self.resetn, False) + self.dev.functions[0].configure_bar(0, 16*1024*1024) + self.dev.functions[0].configure_bar(1, 16*1024, io=True) - #monitor - self.rq_monitor = AxiStreamMonitor(AxiStreamBus.from_prefix(dut, "m_axis_rq"), self.clock, self.resetn, False) + self.rc.make_port().connect(self.dev) - #Do not use user_rst but gen rstn for bsv async def gen_reset(self): self.resetn.value = 0 await RisingEdge(self.clock) @@ -292,6 +280,33 @@ async def gen_reset(self): await RisingEdge(self.clock) await RisingEdge(self.clock) self.log.info("Generated DMA RST_N") + + def gen_random_req(self, channel): + low_boundry = channel * 8192 + high_boundry = (channel + 1) * 8192 + idxs = random.sample(range(low_boundry, high_boundry), 2) + lo_idx, hi_idx = idxs[0], idxs[1] + if (hi_idx < lo_idx): + temp = hi_idx + hi_idx = lo_idx + lo_idx = temp + length = hi_idx - lo_idx + 1 + return (lo_idx, length) + +class BdmaBypassTb(BdmaTb): + def __init__(self, dut, msix=False): + super().__init__(dut, msix) + + # DMA + self.c2h_write_source_0 = AxiStreamSource(AxiStreamBus.from_prefix(dut, "s_axis_c2h_0"), self.clock, self.resetn, False) + self.c2h_desc_source_0 = DescSource(DescBus.from_prefix(dut, "s_desc_c2h_0"),self.clock, self.resetn, False) + self.c2h_read_sink_0 = AxiStreamSink(AxiStreamBus.from_prefix(dut, "m_axis_c2h_0"), self.clock, self.resetn, False) + self.c2h_write_source_1 = AxiStreamSource(AxiStreamBus.from_prefix(dut, "s_axis_c2h_1"), self.clock, self.resetn, False) + self.c2h_desc_source_1 = DescSource(DescBus.from_prefix(dut, "s_desc_c2h_1"), self.clock, self.resetn, False) + self.c2h_read_sink_1 = AxiStreamSink(AxiStreamBus.from_prefix(dut, "m_axis_c2h_1"), self.clock, self.resetn, False) + + #monitor + self.rq_monitor = AxiStreamMonitor(AxiStreamBus.from_prefix(dut, "m_axis_rq"), self.clock, self.resetn, False) async def send_desc(self, channel, startAddr, length, isWrite): desc = DescTransaction() @@ -319,91 +334,27 @@ async def recv_data(self, channel): async def run_single_write_once(self, channel, addr, data): length = len(data) - self.log.info("Conduct DMA single write: addr %d, length %d, char %c", addr, length, data[0]) + self.log.info("Conduct DMA single write: channel %d addr %d, length %d, char %c", channel, addr, length, data[0]) await self.send_desc(channel, addr, length, True) await self.send_data(channel, data) async def run_single_read_once(self, channel, addr, length): - self.log.info("Conduct DMA single read: addr %d, length %d", addr, length) + self.log.info("Conduct DMA single read: channel %d addr %d, length %d", channel, addr, length) await self.send_desc(channel, addr, length, False) data = await self.recv_data(channel) self.log.info("Read data from RootComplex successfully, recv length %d, req length %d", len(data), length) return data -@cocotb.test(timeout_time=100000000, timeout_unit="ns") -async def random_write_test(dut): - - tb = TB(dut) - await tb.gen_reset() - - await tb.rc.enumerate() - dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) - - await dev.enable_device() - await dev.set_master() - - mem = tb.rc.mem_pool.alloc_region(1024*1024) - mem_base = mem.get_absolute_address(0) - - dma_channel = 1 - for _ in range(10): - addr_offset = random.randint(0, 8192) - length = random.randint(0, 8192) - char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") - addr = addr_offset + mem_base - data = char * length - await tb.run_single_write_once(dma_channel, addr, data) - await Timer(100+length, units='ns') - assert mem[addr:addr+length] == char * length - await RisingEdge(tb.clock) - -@cocotb.test(timeout_time=10000000, timeout_unit="ns") -async def random_read_test(dut): - tb = TB(dut) - await tb.gen_reset() - - await tb.rc.enumerate() - dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) - - await dev.enable_device() - await dev.set_master() - - mem = tb.rc.mem_pool.alloc_region(1024*1024) - mem_base = mem.get_absolute_address(0) - - dma_channel = 0 - for _ in range(100): - addr_offset = random.randint(0, 8192) - addr = addr_offset + mem_base - length = random.randint(0, 8192) - char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") - mem[addr:addr+length] = char * length - data = await tb.run_single_read_once(dma_channel, addr, length) - assert data == char * length - -tests_dir = os.path.dirname(__file__) -rtl_dir = tests_dir - - -def test_dma(): - dut = "mkRawBypassDmaController" - module = os.path.splitext(os.path.basename(__file__))[0] - toplevel = dut - - verilog_sources = [ - os.path.join(rtl_dir, f"{dut}.v") - ] - - sim_build = os.path.join(tests_dir, "sim_build", dut) - - cocotb_test.simulator.run( - python_search=[tests_dir], - verilog_sources=verilog_sources, - toplevel=toplevel, - module=module, - timescale="1ns/1ps", - sim_build=sim_build - ) - -if __name__ == "__main__": - test_dma() \ No newline at end of file + +class BdmaSimpleTb(BdmaTb): + def conbine_bar(self, bar): + self.ep_bar = bar + + async def submit_transfer(self, channel, addr, length, isWrite=True): + addrLo = addr & 0xFFFFFFFF + addrHi = (addr >> 32) & 0xFFFFFFFF + base_addr = channel * 6 + await self.ep_bar.write(base_addr + 1, addrLo.to_bytes(4, byteorder='big', signed=False)) + await self.ep_bar.write(base_addr + 2, addrHi.to_bytes(4, byteorder='big', signed=False)) + await self.ep_bar.write(base_addr + 3, length.to_bytes(4, byteorder='big', signed=False)) + await self.ep_bar.write(base_addr, int(isWrite).to_bytes(4, byteorder='big', signed=False)) \ No newline at end of file diff --git a/cocotb/dma_straddle_tb.py b/cocotb/bypass_write_read/dma_straddle_tb.py similarity index 100% rename from cocotb/dma_straddle_tb.py rename to cocotb/bypass_write_read/dma_straddle_tb.py diff --git a/cocotb/bypass_write_read/dma_wr_rd_tb.py b/cocotb/bypass_write_read/dma_wr_rd_tb.py new file mode 100644 index 0000000..54552fe --- /dev/null +++ b/cocotb/bypass_write_read/dma_wr_rd_tb.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python +import logging +import os +import random + +import cocotb +from cocotb.triggers import RisingEdge, Timer +from cocotb.clock import Clock + +import cocotb_test.simulator + +from bdmatb import BdmaBypassTb +# class TB architecture +# -------------- ------------- ----------- +# | Root Complex | <-> | End Pointer | <-> | Dut(DMAC) | +# -------------- ------------- ----------- + +async def single_path_random_write_test(pcie_tb, dma_channel, mem): + for _ in range(100): + addr, length = pcie_tb.gen_random_req(dma_channel) + addr = mem.get_absolute_address(addr) + char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") + data = char * length + await pcie_tb.run_single_write_once(dma_channel, addr, data) + await Timer(100+length, units='ns') + assert mem[addr:addr+length] == data + + +async def single_path_random_read_test(pcie_tb, dma_channel, mem): + for _ in range(100): + addr, length = pcie_tb.gen_random_req(dma_channel) + addr = mem.get_absolute_address(addr) + char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") + mem[addr:addr+length] = char * length + data = await pcie_tb.run_single_read_once(dma_channel, addr, length) + assert data == char * length + +@cocotb.test(timeout_time=100000000, timeout_unit="ns") +async def step_random_write_test(dut): + + tb = BdmaBypassTb(dut) + await tb.gen_reset() + + await tb.rc.enumerate() + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + mem = tb.rc.mem_pool.alloc_region(1024*1024) + + await single_path_random_write_test(tb, 0, mem) + +@cocotb.test(timeout_time=10000000, timeout_unit="ns") +async def step_random_read_test(dut): + tb = BdmaBypassTb(dut) + await tb.gen_reset() + + await tb.rc.enumerate() + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + mem = tb.rc.mem_pool.alloc_region(1024*1024) + + await single_path_random_read_test(tb, 0, mem) + +tests_dir = os.path.dirname(__file__) +rtl_dir = tests_dir + + +def test_dma(): + dut = "mkRawBypassDmaController" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = dut + + verilog_sources = [ + os.path.join(rtl_dir, f"{dut}.v") + ] + + sim_build = os.path.join(tests_dir, "sim_build", dut) + + cocotb_test.simulator.run( + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + timescale="1ns/1ps", + sim_build=sim_build + ) + +if __name__ == "__main__": + test_dma() \ No newline at end of file diff --git a/cocotb/dma_loop_tb.py b/cocotb/dma_loop_tb.py deleted file mode 100644 index 666db6c..0000000 --- a/cocotb/dma_loop_tb.py +++ /dev/null @@ -1,382 +0,0 @@ -#!/usr/bin/env python -import itertools -import logging -import os -import random -import queue - -import cocotb_test.simulator -import pytest - -import cocotb -from cocotb.triggers import RisingEdge, FallingEdge, Timer -from cocotb.regression import TestFactory -from cocotb.clock import Clock - -from cocotbext.pcie.core import RootComplex -from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice -from cocotbext.axi.stream import define_stream -from cocotbext.axi import (AxiStreamBus, AxiStreamSource, AxiStreamSink, AxiStreamMonitor, AxiStreamFrame) - -# class TB architecture -# -------------- ------------- ----------- -# | Root Complex | <-> | End Pointer | <-> | Dut(DMAC) | -# -------------- ------------- ----------- -# Loop TB only runs for Simple Mode - -DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc", - signals=["start_addr", "byte_cnt", "is_write", "valid", "ready"] -) -class h2cHelper(object): - def __init__(pcie_tb, dut, self): - self._desc_addr_lo_0 = 0 - self._desc_addr_hi_0 = 1 - self._desc_len_0 = 2 - self._desc_cntl_0 = 3 - self._desc_addr_lo_1 = 4 - self._desc_addr_hi_1 = 5 - self._desc_len_1 = 6 - self._desc_cntl_1 = 7 - self._flag_addr_lo_0 = 8 - self._flag_addr_hi_0 = 9 - self._flag_addr_lo_1 = 10 - self._flag_addr_hi_1 = 11 - self._va_head_lo_0 = 12 - self._va_head_hi_0 = 13 - self._va_head_lo_1 = 12 - self._va_head_hi_1 = 13 - - self._bar_offset = 0 - self._reg_block_size = 512 - self._phy_addr_table_size = 512 - - self.tb = TB(dut) - self.mem = self.tb.rc.mem_pool.alloc_region(1024*1024) - - async def init(self): - await self.tb.gen_reset() - - await self.tb.rc.enumerate() - dev = self.tb.rc.find_device(self.tb.dev.functions[0].pcie_id) - - await dev.enable_device() - await dev.set_master() - - async def ioWrite32(self, address, value): - raise NotImplementedError - - async def send_desc(self, channel, address, length, isWrite): - if (channel == 0): - addrLo, addrHi = self.splitDword2Word(address) - await self.ioWrite32(self._desc_addr_lo_0, addrLo.to_bytes()) - await self.ioWrite32(self._desc_addr_hi_0, addrHi.to_bytes()) - await self.ioWrite32(self._desc_len_0, length.to_bytes()) - - - - def set_bar_offset(self, offset): - self._bar_offset = offset - - def splitDword2Word(dword): - lo = dword & 0xFFFF - hi = (dword >> 32) & 0xFFFF - return (lo, hi) - - -class TB(object): - def __init__(self, dut, msix=False): - self.dut = dut - - self.log = logging.getLogger("cocotb.tb") - self.log.setLevel(logging.DEBUG) - - self.clock = dut.CLK - self.resetn = dut.RST_N - - self._bus_width = 512 - self._bus_bytes = 64 - - # PCIe - self.rc = RootComplex() - - cq_straddle = False - cc_straddle = False - rq_straddle = True - rc_straddle = True - rc_4tlp_straddle = False - - self.client_tag = bool(int(os.getenv("CLIENT_TAG", "1"))) - - self.dev = UltraScalePlusPcieDevice( - # configuration options - pcie_generation=3, - # pcie_link_width=2, - # user_clk_frequency=250e6, - alignment="dword", - cq_straddle=cq_straddle, - cc_straddle=cc_straddle, - rq_straddle=rq_straddle, - rc_straddle=rc_straddle, - rc_4tlp_straddle=rc_4tlp_straddle, - pf_count=1, - max_payload_size=1024, - enable_client_tag=self.client_tag, - enable_extended_tag=False, - enable_parity=False, - enable_rx_msg_interface=False, - enable_sriov=False, - enable_extended_configuration=False, - - pf0_msi_enable=True, - pf0_msi_count=32, - pf1_msi_enable=False, - pf1_msi_count=1, - pf2_msi_enable=False, - pf2_msi_count=1, - pf3_msi_enable=False, - pf3_msi_count=1, - pf0_msix_enable=msix, - pf0_msix_table_size=63, - pf0_msix_table_bir=4, - pf0_msix_table_offset=0x00000000, - pf0_msix_pba_bir=4, - pf0_msix_pba_offset=0x00008000, - pf1_msix_enable=False, - pf1_msix_table_size=0, - pf1_msix_table_bir=0, - pf1_msix_table_offset=0x00000000, - pf1_msix_pba_bir=0, - pf1_msix_pba_offset=0x00000000, - pf2_msix_enable=False, - pf2_msix_table_size=0, - pf2_msix_table_bir=0, - pf2_msix_table_offset=0x00000000, - pf2_msix_pba_bir=0, - pf2_msix_pba_offset=0x00000000, - pf3_msix_enable=False, - pf3_msix_table_size=0, - pf3_msix_table_bir=0, - pf3_msix_table_offset=0x00000000, - pf3_msix_pba_bir=0, - pf3_msix_pba_offset=0x00000000, - - # signals - user_clk=self.clock, - # user_reset=~self.resetn, - user_lnk_up=dut.user_lnk_up, - # sys_clk=dut.sys_clk, - # sys_clk_gt=dut.sys_clk_gt, - # sys_reset=dut.sys_reset, - # phy_rdy_out=dut.phy_rdy_out, - - rq_bus=AxiStreamBus.from_prefix(dut, "m_axis_rq"), - pcie_rq_seq_num0=dut.pcie_rq_seq_num0, - pcie_rq_seq_num_vld0=dut.pcie_rq_seq_num_vld0, - pcie_rq_seq_num1=dut.pcie_rq_seq_num1, - pcie_rq_seq_num_vld1=dut.pcie_rq_seq_num_vld1, - pcie_rq_tag0=dut.pcie_rq_tag0, - pcie_rq_tag1=dut.pcie_rq_tag1, - # pcie_rq_tag_av=dut.pcie_rq_tag_av, - pcie_rq_tag_vld0=dut.pcie_rq_tag_vld0, - pcie_rq_tag_vld1=dut.pcie_rq_tag_vld1, - - rc_bus=AxiStreamBus.from_prefix(dut, "s_axis_rc"), - - cq_bus=AxiStreamBus.from_prefix(dut, "s_axis_cq"), - pcie_cq_np_req=dut.pcie_cq_np_req, - pcie_cq_np_req_count=dut.pcie_cq_np_req_count, - - cc_bus=AxiStreamBus.from_prefix(dut, "m_axis_cc"), - - pcie_tfc_nph_av=dut.pcie_tfc_nph_av, - pcie_tfc_npd_av=dut.pcie_tfc_npd_av, - cfg_phy_link_down=dut.cfg_phy_link_down, - cfg_phy_link_status=dut.cfg_phy_link_status, - cfg_negotiated_width=dut.cfg_negotiated_width, - cfg_current_speed=dut.cfg_current_speed, - cfg_max_payload=dut.cfg_max_payload, - cfg_max_read_req=dut.cfg_max_read_req, - cfg_function_status=dut.cfg_function_status, - cfg_function_power_state=dut.cfg_function_power_state, - cfg_vf_status=dut.cfg_vf_status, - cfg_vf_power_state=dut.cfg_vf_power_state, - cfg_link_power_state=dut.cfg_link_power_state, - cfg_mgmt_addr=dut.cfg_mgmt_addr, - cfg_mgmt_function_number=dut.cfg_mgmt_function_number, - cfg_mgmt_write=dut.cfg_mgmt_write, - cfg_mgmt_write_data=dut.cfg_mgmt_write_data, - cfg_mgmt_byte_enable=dut.cfg_mgmt_byte_enable, - cfg_mgmt_read=dut.cfg_mgmt_read, - cfg_mgmt_read_data=dut.cfg_mgmt_read_data, - cfg_mgmt_read_write_done=dut.cfg_mgmt_read_write_done, - cfg_mgmt_debug_access=dut.cfg_mgmt_debug_access, - cfg_err_cor_out=dut.cfg_err_cor_out, - cfg_err_nonfatal_out=dut.cfg_err_nonfatal_out, - cfg_err_fatal_out=dut.cfg_err_fatal_out, - cfg_local_error_valid=dut.cfg_local_error_valid, - cfg_local_error_out=dut.cfg_local_error_out, - cfg_ltssm_state=dut.cfg_ltssm_state, - cfg_rx_pm_state=dut.cfg_rx_pm_state, - cfg_tx_pm_state=dut.cfg_tx_pm_state, - cfg_rcb_status=dut.cfg_rcb_status, - cfg_obff_enable=dut.cfg_obff_enable, - # cfg_pl_status_change=dut.cfg_pl_status_change, - # cfg_tph_requester_enable=dut.cfg_tph_requester_enable, - # cfg_tph_st_mode=dut.cfg_tph_st_mode, - # cfg_vf_tph_requester_enable=dut.cfg_vf_tph_requester_enable, - # cfg_vf_tph_st_mode=dut.cfg_vf_tph_st_mode, - cfg_msg_received=dut.cfg_msg_received, - cfg_msg_received_data=dut.cfg_msg_received_data, - cfg_msg_received_type=dut.cfg_msg_received_type, - cfg_msg_transmit=dut.cfg_msg_transmit, - cfg_msg_transmit_type=dut.cfg_msg_transmit_type, - cfg_msg_transmit_data=dut.cfg_msg_transmit_data, - cfg_msg_transmit_done=dut.cfg_msg_transmit_done, - cfg_fc_ph=dut.cfg_fc_ph, - cfg_fc_pd=dut.cfg_fc_pd, - cfg_fc_nph=dut.cfg_fc_nph, - cfg_fc_npd=dut.cfg_fc_npd, - cfg_fc_cplh=dut.cfg_fc_cplh, - cfg_fc_cpld=dut.cfg_fc_cpld, - cfg_fc_sel=dut.cfg_fc_sel, - cfg_dsn=dut.cfg_dsn, - cfg_bus_number=dut.cfg_bus_number, - cfg_power_state_change_ack=dut.cfg_power_state_change_ack, - cfg_power_state_change_interrupt=dut.cfg_power_state_change_interrupt, - cfg_err_cor_in=dut.cfg_err_cor_in, - cfg_err_uncor_in=dut.cfg_err_uncor_in, - cfg_flr_in_process=dut.cfg_flr_in_process, - cfg_flr_done=dut.cfg_flr_done, - cfg_vf_flr_in_process=dut.cfg_vf_flr_in_process, - cfg_vf_flr_func_num=dut.cfg_vf_flr_func_num, - cfg_vf_flr_done=dut.cfg_vf_flr_done, - cfg_link_training_enable=dut.cfg_link_training_enable, - cfg_interrupt_int=dut.cfg_interrupt_int, - cfg_interrupt_pending=dut.cfg_interrupt_pending, - cfg_interrupt_sent=dut.cfg_interrupt_sent, - cfg_interrupt_msi_enable=dut.cfg_interrupt_msi_enable, - cfg_interrupt_msi_mmenable=dut.cfg_interrupt_msi_mmenable, - cfg_interrupt_msi_mask_update=dut.cfg_interrupt_msi_mask_update, - cfg_interrupt_msi_data=dut.cfg_interrupt_msi_data, - cfg_interrupt_msi_select=dut.cfg_interrupt_msi_select, - cfg_interrupt_msi_int=dut.cfg_interrupt_msi_int, - cfg_interrupt_msi_pending_status=dut.cfg_interrupt_msi_pending_status, - cfg_interrupt_msi_pending_status_data_enable=dut.cfg_interrupt_msi_pending_status_data_enable, - cfg_interrupt_msi_pending_status_function_num=dut.cfg_interrupt_msi_pending_status_function_num, - cfg_interrupt_msi_sent=dut.cfg_interrupt_msi_sent, - cfg_interrupt_msi_fail=dut.cfg_interrupt_msi_fail, - cfg_interrupt_msi_attr=dut.cfg_interrupt_msi_attr, - cfg_interrupt_msi_tph_present=dut.cfg_interrupt_msi_tph_present, - cfg_interrupt_msi_tph_type=dut.cfg_interrupt_msi_tph_type, - cfg_interrupt_msi_tph_st_tag=dut.cfg_interrupt_msi_tph_st_tag, - cfg_interrupt_msi_function_number=dut.cfg_interrupt_msi_function_number, - cfg_pm_aspm_l1_entry_reject=dut.cfg_pm_aspm_l1_entry_reject, - cfg_pm_aspm_tx_l0s_entry_disable=dut.cfg_pm_aspm_tx_l0s_entry_disable, - cfg_hot_reset_out=dut.cfg_hot_reset_out, - cfg_config_space_enable=dut.cfg_config_space_enable, - cfg_req_pm_transition_l23_ready=dut.cfg_req_pm_transition_l23_ready, - cfg_hot_reset_in=dut.cfg_hot_reset_in, - cfg_ds_port_number=dut.cfg_ds_port_number, - cfg_ds_bus_number=dut.cfg_ds_bus_number, - cfg_ds_device_number=dut.cfg_ds_device_number, - ) - - self.dev.log.setLevel(logging.INFO) - - dut.pcie_cq_np_req.setimmediatevalue(1) - dut.cfg_mgmt_addr.setimmediatevalue(0) - dut.cfg_mgmt_function_number.setimmediatevalue(0) - dut.cfg_mgmt_write.setimmediatevalue(0) - dut.cfg_mgmt_write_data.setimmediatevalue(0) - dut.cfg_mgmt_byte_enable.setimmediatevalue(0) - dut.cfg_mgmt_read.setimmediatevalue(0) - dut.cfg_mgmt_debug_access.setimmediatevalue(0) - dut.cfg_msg_transmit.setimmediatevalue(0) - dut.cfg_msg_transmit_type.setimmediatevalue(0) - dut.cfg_msg_transmit_data.setimmediatevalue(0) - dut.cfg_fc_sel.setimmediatevalue(0) - dut.cfg_dsn.setimmediatevalue(0) - dut.cfg_power_state_change_ack.setimmediatevalue(0) - dut.cfg_err_cor_in.setimmediatevalue(0) - dut.cfg_err_uncor_in.setimmediatevalue(0) - dut.cfg_flr_done.setimmediatevalue(0) - dut.cfg_vf_flr_func_num.setimmediatevalue(0) - dut.cfg_vf_flr_done.setimmediatevalue(0) - dut.cfg_link_training_enable.setimmediatevalue(1) - dut.cfg_interrupt_int.setimmediatevalue(0) - dut.cfg_interrupt_pending.setimmediatevalue(0) - dut.cfg_interrupt_msi_select.setimmediatevalue(0) - dut.cfg_interrupt_msi_int.setimmediatevalue(0) - dut.cfg_interrupt_msi_pending_status.setimmediatevalue(0) - dut.cfg_interrupt_msi_pending_status_data_enable.setimmediatevalue(0) - dut.cfg_interrupt_msi_pending_status_function_num.setimmediatevalue(0) - dut.cfg_interrupt_msi_attr.setimmediatevalue(0) - dut.cfg_interrupt_msi_tph_present.setimmediatevalue(0) - dut.cfg_interrupt_msi_tph_type.setimmediatevalue(0) - dut.cfg_interrupt_msi_tph_st_tag.setimmediatevalue(0) - dut.cfg_interrupt_msi_function_number.setimmediatevalue(0) - dut.cfg_pm_aspm_l1_entry_reject.setimmediatevalue(0) - dut.cfg_pm_aspm_tx_l0s_entry_disable.setimmediatevalue(0) - dut.cfg_config_space_enable.setimmediatevalue(1) - dut.cfg_req_pm_transition_l23_ready.setimmediatevalue(0) - dut.cfg_hot_reset_in.setimmediatevalue(0) - dut.cfg_ds_port_number.setimmediatevalue(0) - dut.cfg_ds_bus_number.setimmediatevalue(0) - dut.cfg_ds_device_number.setimmediatevalue(0) - - self.rc.make_port().connect(self.dev) - - #Do not use user_rst but gen rstn for bsv - async def gen_reset(self): - self.resetn.value = 0 - await RisingEdge(self.clock) - await RisingEdge(self.clock) - await RisingEdge(self.clock) - self.resetn.value = 1 - await RisingEdge(self.clock) - await RisingEdge(self.clock) - await RisingEdge(self.clock) - self.log.info("Generated DMA RST_N") - -@cocotb.test(timeout_time=100000000, timeout_unit="ns") -async def small_desc_fp_test(dut): - - tb = TB(dut) - await tb.gen_reset() - - await tb.rc.enumerate() - dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) - - await dev.enable_device() - await dev.set_master() - - mem = tb.rc.mem_pool.alloc_region(1024*1024) - - await Timer(1000, units='ns') - -tests_dir = os.path.dirname(__file__) -rtl_dir = tests_dir - - -def test_dma(): - dut = "mkRawDmaController" - module = os.path.splitext(os.path.basename(__file__))[0] - toplevel = dut - - verilog_sources = [ - os.path.join(rtl_dir, f"{dut}.v") - ] - - sim_build = os.path.join(tests_dir, "sim_build", dut) - - cocotb_test.simulator.run( - python_search=[tests_dir], - verilog_sources=verilog_sources, - toplevel=toplevel, - module=module, - timescale="1ns/1ps", - sim_build=sim_build - ) - -if __name__ == "__main__": - test_dma() \ No newline at end of file diff --git a/cocotb/simple_write_read/dma_simple_wr_rd_tb.py b/cocotb/simple_write_read/dma_simple_wr_rd_tb.py new file mode 100644 index 0000000..355ca86 --- /dev/null +++ b/cocotb/simple_write_read/dma_simple_wr_rd_tb.py @@ -0,0 +1,60 @@ +import os +import random + +import cocotb +from cocotb.triggers import RisingEdge, Timer +from cocotb.clock import Clock + +import cocotb_test.simulator + +from bdmatb import BdmaSimpleTb + +tests_dir = os.path.dirname(__file__) +rtl_dir = tests_dir + +@cocotb.test(timeout_time=10000000, timeout_unit="ns") +async def bar_test(dut): + tb = BdmaSimpleTb(dut) + await tb.gen_reset() + + await tb.rc.enumerate() + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + dev_bar0 = dev.bar_window[0] + addr = 0x12345678 + length = 0xffff + isWrite = True + addrLo = addr & 0xFFFFFFFF + addrHi = (addr >> 32) & 0xFFFFFFFF + base_addr = 0 + await dev_bar0.write(base_addr + 1, addrLo.to_bytes(4, byteorder='big', signed=False)) + await dev_bar0.write(base_addr + 2, addrHi.to_bytes(4, byteorder='big', signed=False)) + await dev_bar0.write(base_addr + 3, length.to_bytes(4, byteorder='big', signed=False)) + await dev_bar0.write(base_addr, int(isWrite).to_bytes(4, byteorder='big', signed=False)) + + await Timer(500, units='ns') +def test_dma(): + dut = "mkRawSimpleDmaController" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = dut + + verilog_sources = [ + os.path.join(rtl_dir, f"{dut}.v") + ] + + sim_build = os.path.join(tests_dir, "sim_build", dut) + + cocotb_test.simulator.run( + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + timescale="1ns/1ps", + sim_build=sim_build + ) + +if __name__ == "__main__": + test_dma() \ No newline at end of file From d4d5a4caa7e52fa4122df81f2cbda06bd14e92fb Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Wed, 16 Oct 2024 18:22:22 +0800 Subject: [PATCH 44/53] pass simple mode tb --- cocotb/bdmatb.py | 74 +++- cocotb/bypass_stress/Makefile | 37 ++ cocotb/bypass_stress/dma_stress_tb.py | 87 ++++ cocotb/bypass_write_read/Makefile | 37 ++ cocotb/bypass_write_read/dma_straddle_tb.py | 339 +--------------- cocotb/bypass_write_read/dma_wr_rd_tb.py | 4 + cocotb/dma_fullypipeline_tb.py | 384 ------------------ cocotb/{ => simple_write_read}/Makefile | 9 +- .../simple_write_read/dma_simple_wr_rd_tb.py | 28 +- src/DmaC2HPipe.bsv | 2 +- src/DmaH2CPipe.bsv | 8 +- src/DmaUtils.bsv | 5 +- src/DmaWrapper.bsv | 59 ++- src/SimpleModeUtils.bsv | 15 +- 14 files changed, 323 insertions(+), 765 deletions(-) create mode 100644 cocotb/bypass_stress/Makefile create mode 100644 cocotb/bypass_stress/dma_stress_tb.py create mode 100644 cocotb/bypass_write_read/Makefile delete mode 100644 cocotb/dma_fullypipeline_tb.py rename cocotb/{ => simple_write_read}/Makefile (91%) diff --git a/cocotb/bdmatb.py b/cocotb/bdmatb.py index 0edcbbf..18383bb 100644 --- a/cocotb/bdmatb.py +++ b/cocotb/bdmatb.py @@ -293,6 +293,12 @@ def gen_random_req(self, channel): length = hi_idx - lo_idx + 1 return (lo_idx, length) + def gen_random_len(self): + return random.randint(1, 8192) + + def gen_random_aligned_len(self): + return random.randint(1, 2048) * 4 + class BdmaBypassTb(BdmaTb): def __init__(self, dut, msix=False): super().__init__(dut, msix) @@ -347,14 +353,74 @@ async def run_single_read_once(self, channel, addr, length): class BdmaSimpleTb(BdmaTb): + def __init__(self, dut, msix=False): + super().__init__(dut, msix) + + # DMA + self.c2h_write_source_0 = AxiStreamSource(AxiStreamBus.from_prefix(dut, "s_axis_c2h_0"), self.clock, self.resetn, False) + self.c2h_read_sink_0 = AxiStreamSink(AxiStreamBus.from_prefix(dut, "m_axis_c2h_0"), self.clock, self.resetn, False) + self.c2h_write_source_1 = AxiStreamSource(AxiStreamBus.from_prefix(dut, "s_axis_c2h_1"), self.clock, self.resetn, False) + self.c2h_read_sink_1 = AxiStreamSink(AxiStreamBus.from_prefix(dut, "m_axis_c2h_1"), self.clock, self.resetn, False) + + async def send_data(self, channel, data): + if channel == 0: + await self.c2h_write_source_0.send(data) + else: + await self.c2h_write_source_1.send(data) + + async def recv_data(self, channel): + if channel == 0 : + data = await self.c2h_read_sink_0.read() + else: + data = await self.c2h_read_sink_1.read() + data = bytes(''.join([chr(item) for item in data]), encoding='UTF-8') + return data + def conbine_bar(self, bar): self.ep_bar = bar + + async def write_register(self, addr:int, x:int): + x = x & 0xFFFFFFFF + self.log.debug("BdmaTb: write register at %d, value %d" % (addr, x)) + await self.ep_bar.write(addr * 4, x.to_bytes(4, byteorder='little', signed=False)) + + async def write_pa_table(self, channel, page_offset, pa): + base_addr = 512 + channel * 1024 + page_offset = page_offset & 0x1FF + paLo = pa & 0xFFFFFFFF + paHi = (pa >> 32) & 0xFFFFFFFF + await self.write_register(base_addr + 2*page_offset, paLo) + await self.write_register(base_addr + 2*page_offset + 1, paHi) + + + async def memory_map(self): + self.log.info("BdmaTb: Starting memory map...") + await self.write_pa_table(0, 1, 123456) + await self.write_pa_table(1, 2, 1) + for i in range(512): + await self.write_pa_table(0, i, 4096*i) + await self.write_pa_table(1, i, 4096*i) + await Timer(4 * 512 * 2 * 2, units='ns') async def submit_transfer(self, channel, addr, length, isWrite=True): addrLo = addr & 0xFFFFFFFF addrHi = (addr >> 32) & 0xFFFFFFFF base_addr = channel * 6 - await self.ep_bar.write(base_addr + 1, addrLo.to_bytes(4, byteorder='big', signed=False)) - await self.ep_bar.write(base_addr + 2, addrHi.to_bytes(4, byteorder='big', signed=False)) - await self.ep_bar.write(base_addr + 3, length.to_bytes(4, byteorder='big', signed=False)) - await self.ep_bar.write(base_addr, int(isWrite).to_bytes(4, byteorder='big', signed=False)) \ No newline at end of file + await self.write_register(base_addr + 1, addrLo) + await self.write_register(base_addr + 2, addrHi) + await self.write_register(base_addr + 3, length) + await self.write_register(base_addr, int(isWrite)) + + async def run_single_write_once(self, channel, addr, data): + length = len(data) + self.log.info("Conduct DMA single write: channel %d addr %d, length %d, char %c", channel, addr, length, data[0]) + await self.submit_transfer(channel, addr, length, True) + await self.send_data(channel, data) + + async def run_single_read_once(self, channel, addr, length): + self.log.info("Conduct DMA single read: channel %d addr %d, length %d", channel, addr, length) + await self.submit_transfer(channel, addr, length, False) + data = await self.recv_data(channel) + self.log.info("Read data from RootComplex successfully, recv length %d, req length %d", len(data), length) + return data + \ No newline at end of file diff --git a/cocotb/bypass_stress/Makefile b/cocotb/bypass_stress/Makefile new file mode 100644 index 0000000..d27d674 --- /dev/null +++ b/cocotb/bypass_stress/Makefile @@ -0,0 +1,37 @@ +ROOT_DIR = $(abspath ../../) +BACKEND_DIR = $(ROOT_DIR)/backend +COCOTB_DIR = $(abspath ../) +TB_DIR = $(abspath ./) +include $(ROOT_DIR)/Makefile.base +VBUILD_DIR = $(BACKEND_DIR)/build +VSRC_DIR = $(BACKEND_DIR)/verilog + +TARGET = RawBypassDmaController +TOP_MODULE = mk$(TARGET) +TOP_FILE = $(TOP_MODULE).v +VLOG_FILE = $(TB_DIR)/$(TOP_FILE) + +TB_CASE = dma_stress +TB_FILE = $(TB_CASE)_tb.py +DATE = $(shell date "+%Y%m%d") +LOG_FILE = $(TB_DIR)/log/$(DATE)_$(TOP_MODULE).log + +cocotb:clean verilog prepare run + +verilog: + cd $(BACKEND_DIR) && make clean && make verilog TOPMODULE=$(TOP_MODULE) + +prepare: + rm -rf $(VLOG_FILE) + bluetcl $(BACKEND_DIR)/listVlogFiles.tcl -bdir $(VBUILD_DIR) -vdir $(VSRC_DIR) $(TOP_MODULE) $(TOP_MODULE) | grep -i '\.v' | xargs -I {} cat {} >> $(VLOG_FILE) + sed -i '1i `timescale 1ns/1ps' $(VLOG_FILE) + +run: + cd $(TB_DIR) + mkdir -p log + python3 $(TB_FILE) 2>&1 | tee $(LOG_FILE) + +clean: + cd $(BACKEND_DIR) && make clean + cd $(TB_DIR) && rm -rf $(VLOG_FILE) __pycache__ .pytest_cache sim_build *.log + \ No newline at end of file diff --git a/cocotb/bypass_stress/dma_stress_tb.py b/cocotb/bypass_stress/dma_stress_tb.py new file mode 100644 index 0000000..0fd3ad1 --- /dev/null +++ b/cocotb/bypass_stress/dma_stress_tb.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python +import os +import random + +import cocotb +from cocotb.triggers import RisingEdge, Timer +from cocotb.clock import Clock + +import cocotb_test.simulator + +import sys +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) +from bdmatb import BdmaBypassTb + +# class TB architecture +# -------------- ------------- ----------- +# | Root Complex | <-> | End Pointer | <-> | Dut(DMAC) | +# -------------- ------------- ----------- + +def gen_pseudo_data(addr, length): + start = int(addr / 4) + data = start.to_bytes(4, byteorder='little', signed=False) + for i in range(1, int(length/4)): + data = data + (start + i).to_bytes(4, byteorder='little', signed=False) + return data + +async def stress_random_write_test(pcie_tb, dma_channel, mem, n): + addr = 0 + length = 0 + for _ in range(n): + length = pcie_tb.gen_random_aligned_len() + data = gen_pseudo_data(addr, length) + await pcie_tb.run_single_write_once(dma_channel, addr, data) + addr = int((addr + length)/4) * 4 + return addr + +async def run_stress_write(pcie_tb, mem): + n = 10 + end = await stress_random_write_test(pcie_tb, 0, mem, n) + await Timer(2048 * 2048) + for i in range(int(end/4)): + assert i == int.from_bytes(mem[i*4:(i+1)*4], byteorder='little', signed=False) + +@cocotb.test(timeout_time=100000000, timeout_unit="ns") +async def step_random_write_test(dut): + + tb = BdmaBypassTb(dut) + await tb.gen_reset() + + await tb.rc.enumerate() + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + mem = tb.rc.mem_pool.alloc_region(1024*1024) + + await run_stress_write(tb, mem) + + +tests_dir = os.path.dirname(__file__) +rtl_dir = tests_dir +bdmatb_dir = os.path.dirname(tests_dir) + + +def test_dma(): + dut = "mkRawBypassDmaController" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = dut + + verilog_sources = [ + os.path.join(rtl_dir, f"{dut}.v") + ] + + sim_build = os.path.join(tests_dir, "sim_build", dut) + + cocotb_test.simulator.run( + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + timescale="1ns/1ps", + sim_build=sim_build + ) + +if __name__ == "__main__": + test_dma() \ No newline at end of file diff --git a/cocotb/bypass_write_read/Makefile b/cocotb/bypass_write_read/Makefile new file mode 100644 index 0000000..13e1213 --- /dev/null +++ b/cocotb/bypass_write_read/Makefile @@ -0,0 +1,37 @@ +ROOT_DIR = $(abspath ../../) +BACKEND_DIR = $(ROOT_DIR)/backend +COCOTB_DIR = $(abspath ../) +TB_DIR = $(abspath ./) +include $(ROOT_DIR)/Makefile.base +VBUILD_DIR = $(BACKEND_DIR)/build +VSRC_DIR = $(BACKEND_DIR)/verilog + +TARGET = RawBypassDmaController +TOP_MODULE = mk$(TARGET) +TOP_FILE = $(TOP_MODULE).v +VLOG_FILE = $(TB_DIR)/$(TOP_FILE) + +TB_CASE = dma_straddle +TB_FILE = $(TB_CASE)_tb.py +DATE = $(shell date "+%Y%m%d") +LOG_FILE = $(TB_DIR)/log/$(DATE)_$(TOP_MODULE).log + +cocotb:clean verilog prepare run + +verilog: + cd $(BACKEND_DIR) && make clean && make verilog TOPMODULE=$(TOP_MODULE) + +prepare: + rm -rf $(VLOG_FILE) + bluetcl $(BACKEND_DIR)/listVlogFiles.tcl -bdir $(VBUILD_DIR) -vdir $(VSRC_DIR) $(TOP_MODULE) $(TOP_MODULE) | grep -i '\.v' | xargs -I {} cat {} >> $(VLOG_FILE) + sed -i '1i `timescale 1ns/1ps' $(VLOG_FILE) + +run: + cd $(TB_DIR) + mkdir -p log + python3 $(TB_FILE) 2>&1 | tee $(LOG_FILE) + +clean: + cd $(BACKEND_DIR) && make clean + cd $(TB_DIR) && rm -rf $(VLOG_FILE) __pycache__ .pytest_cache sim_build *.log + \ No newline at end of file diff --git a/cocotb/bypass_write_read/dma_straddle_tb.py b/cocotb/bypass_write_read/dma_straddle_tb.py index afe22d5..9276064 100644 --- a/cocotb/bypass_write_read/dma_straddle_tb.py +++ b/cocotb/bypass_write_read/dma_straddle_tb.py @@ -1,346 +1,19 @@ #!/usr/bin/env python -import itertools -import logging import os import random -import queue import cocotb_test.simulator -import pytest - import cocotb from cocotb.triggers import RisingEdge, FallingEdge, Timer -from cocotb.regression import TestFactory -from cocotb.clock import Clock -from cocotbext.pcie.core import RootComplex -from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice -from cocotbext.axi.stream import define_stream -from cocotbext.axi import (AxiStreamBus, AxiStreamSource, AxiStreamSink, AxiStreamMonitor, AxiStreamFrame) +import sys +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) +from bdmatb import BdmaBypassTb # class TB architecture # -------------- ------------- ----------- # | Root Complex | <-> | End Pointer | <-> | Dut(DMAC) | # -------------- ------------- ----------- - -DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc", - signals=["start_addr", "byte_cnt", "is_write", "valid", "ready"] -) - -class TB(object): - def __init__(self, dut, msix=False): - self.dut = dut - - self.log = logging.getLogger("cocotb.tb") - self.log.setLevel(logging.DEBUG) - - self.clock = dut.CLK - self.resetn = dut.RST_N - - self._bus_width = 512 - self._bus_bytes = 64 - - # PCIe - self.rc = RootComplex() - - cq_straddle = False - cc_straddle = False - rq_straddle = True - rc_straddle = True - rc_4tlp_straddle = False - - self.client_tag = bool(int(os.getenv("CLIENT_TAG", "1"))) - - self.dev = UltraScalePlusPcieDevice( - # configuration options - pcie_generation=3, - # pcie_link_width=2, - # user_clk_frequency=250e6, - alignment="dword", - cq_straddle=cq_straddle, - cc_straddle=cc_straddle, - rq_straddle=rq_straddle, - rc_straddle=rc_straddle, - rc_4tlp_straddle=rc_4tlp_straddle, - pf_count=1, - max_payload_size=1024, - enable_client_tag=self.client_tag, - enable_extended_tag=False, - enable_parity=False, - enable_rx_msg_interface=False, - enable_sriov=False, - enable_extended_configuration=False, - - pf0_msi_enable=True, - pf0_msi_count=32, - pf1_msi_enable=False, - pf1_msi_count=1, - pf2_msi_enable=False, - pf2_msi_count=1, - pf3_msi_enable=False, - pf3_msi_count=1, - pf0_msix_enable=msix, - pf0_msix_table_size=63, - pf0_msix_table_bir=4, - pf0_msix_table_offset=0x00000000, - pf0_msix_pba_bir=4, - pf0_msix_pba_offset=0x00008000, - pf1_msix_enable=False, - pf1_msix_table_size=0, - pf1_msix_table_bir=0, - pf1_msix_table_offset=0x00000000, - pf1_msix_pba_bir=0, - pf1_msix_pba_offset=0x00000000, - pf2_msix_enable=False, - pf2_msix_table_size=0, - pf2_msix_table_bir=0, - pf2_msix_table_offset=0x00000000, - pf2_msix_pba_bir=0, - pf2_msix_pba_offset=0x00000000, - pf3_msix_enable=False, - pf3_msix_table_size=0, - pf3_msix_table_bir=0, - pf3_msix_table_offset=0x00000000, - pf3_msix_pba_bir=0, - pf3_msix_pba_offset=0x00000000, - - # signals - user_clk=self.clock, - # user_reset=~self.resetn, - user_lnk_up=dut.user_lnk_up, - # sys_clk=dut.sys_clk, - # sys_clk_gt=dut.sys_clk_gt, - # sys_reset=dut.sys_reset, - # phy_rdy_out=dut.phy_rdy_out, - - rq_bus=AxiStreamBus.from_prefix(dut, "m_axis_rq"), - pcie_rq_seq_num0=dut.pcie_rq_seq_num0, - pcie_rq_seq_num_vld0=dut.pcie_rq_seq_num_vld0, - pcie_rq_seq_num1=dut.pcie_rq_seq_num1, - pcie_rq_seq_num_vld1=dut.pcie_rq_seq_num_vld1, - pcie_rq_tag0=dut.pcie_rq_tag0, - pcie_rq_tag1=dut.pcie_rq_tag1, - # pcie_rq_tag_av=dut.pcie_rq_tag_av, - pcie_rq_tag_vld0=dut.pcie_rq_tag_vld0, - pcie_rq_tag_vld1=dut.pcie_rq_tag_vld1, - - rc_bus=AxiStreamBus.from_prefix(dut, "s_axis_rc"), - - cq_bus=AxiStreamBus.from_prefix(dut, "s_axis_cq"), - pcie_cq_np_req=dut.pcie_cq_np_req, - pcie_cq_np_req_count=dut.pcie_cq_np_req_count, - - cc_bus=AxiStreamBus.from_prefix(dut, "m_axis_cc"), - - pcie_tfc_nph_av=dut.pcie_tfc_nph_av, - pcie_tfc_npd_av=dut.pcie_tfc_npd_av, - cfg_phy_link_down=dut.cfg_phy_link_down, - cfg_phy_link_status=dut.cfg_phy_link_status, - cfg_negotiated_width=dut.cfg_negotiated_width, - cfg_current_speed=dut.cfg_current_speed, - cfg_max_payload=dut.cfg_max_payload, - cfg_max_read_req=dut.cfg_max_read_req, - cfg_function_status=dut.cfg_function_status, - cfg_function_power_state=dut.cfg_function_power_state, - cfg_vf_status=dut.cfg_vf_status, - cfg_vf_power_state=dut.cfg_vf_power_state, - cfg_link_power_state=dut.cfg_link_power_state, - cfg_mgmt_addr=dut.cfg_mgmt_addr, - cfg_mgmt_function_number=dut.cfg_mgmt_function_number, - cfg_mgmt_write=dut.cfg_mgmt_write, - cfg_mgmt_write_data=dut.cfg_mgmt_write_data, - cfg_mgmt_byte_enable=dut.cfg_mgmt_byte_enable, - cfg_mgmt_read=dut.cfg_mgmt_read, - cfg_mgmt_read_data=dut.cfg_mgmt_read_data, - cfg_mgmt_read_write_done=dut.cfg_mgmt_read_write_done, - cfg_mgmt_debug_access=dut.cfg_mgmt_debug_access, - cfg_err_cor_out=dut.cfg_err_cor_out, - cfg_err_nonfatal_out=dut.cfg_err_nonfatal_out, - cfg_err_fatal_out=dut.cfg_err_fatal_out, - cfg_local_error_valid=dut.cfg_local_error_valid, - cfg_local_error_out=dut.cfg_local_error_out, - cfg_ltssm_state=dut.cfg_ltssm_state, - cfg_rx_pm_state=dut.cfg_rx_pm_state, - cfg_tx_pm_state=dut.cfg_tx_pm_state, - cfg_rcb_status=dut.cfg_rcb_status, - cfg_obff_enable=dut.cfg_obff_enable, - # cfg_pl_status_change=dut.cfg_pl_status_change, - # cfg_tph_requester_enable=dut.cfg_tph_requester_enable, - # cfg_tph_st_mode=dut.cfg_tph_st_mode, - # cfg_vf_tph_requester_enable=dut.cfg_vf_tph_requester_enable, - # cfg_vf_tph_st_mode=dut.cfg_vf_tph_st_mode, - cfg_msg_received=dut.cfg_msg_received, - cfg_msg_received_data=dut.cfg_msg_received_data, - cfg_msg_received_type=dut.cfg_msg_received_type, - cfg_msg_transmit=dut.cfg_msg_transmit, - cfg_msg_transmit_type=dut.cfg_msg_transmit_type, - cfg_msg_transmit_data=dut.cfg_msg_transmit_data, - cfg_msg_transmit_done=dut.cfg_msg_transmit_done, - cfg_fc_ph=dut.cfg_fc_ph, - cfg_fc_pd=dut.cfg_fc_pd, - cfg_fc_nph=dut.cfg_fc_nph, - cfg_fc_npd=dut.cfg_fc_npd, - cfg_fc_cplh=dut.cfg_fc_cplh, - cfg_fc_cpld=dut.cfg_fc_cpld, - cfg_fc_sel=dut.cfg_fc_sel, - cfg_dsn=dut.cfg_dsn, - cfg_bus_number=dut.cfg_bus_number, - cfg_power_state_change_ack=dut.cfg_power_state_change_ack, - cfg_power_state_change_interrupt=dut.cfg_power_state_change_interrupt, - cfg_err_cor_in=dut.cfg_err_cor_in, - cfg_err_uncor_in=dut.cfg_err_uncor_in, - cfg_flr_in_process=dut.cfg_flr_in_process, - cfg_flr_done=dut.cfg_flr_done, - cfg_vf_flr_in_process=dut.cfg_vf_flr_in_process, - cfg_vf_flr_func_num=dut.cfg_vf_flr_func_num, - cfg_vf_flr_done=dut.cfg_vf_flr_done, - cfg_link_training_enable=dut.cfg_link_training_enable, - cfg_interrupt_int=dut.cfg_interrupt_int, - cfg_interrupt_pending=dut.cfg_interrupt_pending, - cfg_interrupt_sent=dut.cfg_interrupt_sent, - cfg_interrupt_msi_enable=dut.cfg_interrupt_msi_enable, - cfg_interrupt_msi_mmenable=dut.cfg_interrupt_msi_mmenable, - cfg_interrupt_msi_mask_update=dut.cfg_interrupt_msi_mask_update, - cfg_interrupt_msi_data=dut.cfg_interrupt_msi_data, - cfg_interrupt_msi_select=dut.cfg_interrupt_msi_select, - cfg_interrupt_msi_int=dut.cfg_interrupt_msi_int, - cfg_interrupt_msi_pending_status=dut.cfg_interrupt_msi_pending_status, - cfg_interrupt_msi_pending_status_data_enable=dut.cfg_interrupt_msi_pending_status_data_enable, - cfg_interrupt_msi_pending_status_function_num=dut.cfg_interrupt_msi_pending_status_function_num, - cfg_interrupt_msi_sent=dut.cfg_interrupt_msi_sent, - cfg_interrupt_msi_fail=dut.cfg_interrupt_msi_fail, - cfg_interrupt_msi_attr=dut.cfg_interrupt_msi_attr, - cfg_interrupt_msi_tph_present=dut.cfg_interrupt_msi_tph_present, - cfg_interrupt_msi_tph_type=dut.cfg_interrupt_msi_tph_type, - cfg_interrupt_msi_tph_st_tag=dut.cfg_interrupt_msi_tph_st_tag, - cfg_interrupt_msi_function_number=dut.cfg_interrupt_msi_function_number, - cfg_pm_aspm_l1_entry_reject=dut.cfg_pm_aspm_l1_entry_reject, - cfg_pm_aspm_tx_l0s_entry_disable=dut.cfg_pm_aspm_tx_l0s_entry_disable, - cfg_hot_reset_out=dut.cfg_hot_reset_out, - cfg_config_space_enable=dut.cfg_config_space_enable, - cfg_req_pm_transition_l23_ready=dut.cfg_req_pm_transition_l23_ready, - cfg_hot_reset_in=dut.cfg_hot_reset_in, - cfg_ds_port_number=dut.cfg_ds_port_number, - cfg_ds_bus_number=dut.cfg_ds_bus_number, - cfg_ds_device_number=dut.cfg_ds_device_number, - ) - - self.dev.log.setLevel(logging.INFO) - - dut.pcie_cq_np_req.setimmediatevalue(1) - dut.cfg_mgmt_addr.setimmediatevalue(0) - dut.cfg_mgmt_function_number.setimmediatevalue(0) - dut.cfg_mgmt_write.setimmediatevalue(0) - dut.cfg_mgmt_write_data.setimmediatevalue(0) - dut.cfg_mgmt_byte_enable.setimmediatevalue(0) - dut.cfg_mgmt_read.setimmediatevalue(0) - dut.cfg_mgmt_debug_access.setimmediatevalue(0) - dut.cfg_msg_transmit.setimmediatevalue(0) - dut.cfg_msg_transmit_type.setimmediatevalue(0) - dut.cfg_msg_transmit_data.setimmediatevalue(0) - dut.cfg_fc_sel.setimmediatevalue(0) - dut.cfg_dsn.setimmediatevalue(0) - dut.cfg_power_state_change_ack.setimmediatevalue(0) - dut.cfg_err_cor_in.setimmediatevalue(0) - dut.cfg_err_uncor_in.setimmediatevalue(0) - dut.cfg_flr_done.setimmediatevalue(0) - dut.cfg_vf_flr_func_num.setimmediatevalue(0) - dut.cfg_vf_flr_done.setimmediatevalue(0) - dut.cfg_link_training_enable.setimmediatevalue(1) - dut.cfg_interrupt_int.setimmediatevalue(0) - dut.cfg_interrupt_pending.setimmediatevalue(0) - dut.cfg_interrupt_msi_select.setimmediatevalue(0) - dut.cfg_interrupt_msi_int.setimmediatevalue(0) - dut.cfg_interrupt_msi_pending_status.setimmediatevalue(0) - dut.cfg_interrupt_msi_pending_status_data_enable.setimmediatevalue(0) - dut.cfg_interrupt_msi_pending_status_function_num.setimmediatevalue(0) - dut.cfg_interrupt_msi_attr.setimmediatevalue(0) - dut.cfg_interrupt_msi_tph_present.setimmediatevalue(0) - dut.cfg_interrupt_msi_tph_type.setimmediatevalue(0) - dut.cfg_interrupt_msi_tph_st_tag.setimmediatevalue(0) - dut.cfg_interrupt_msi_function_number.setimmediatevalue(0) - dut.cfg_pm_aspm_l1_entry_reject.setimmediatevalue(0) - dut.cfg_pm_aspm_tx_l0s_entry_disable.setimmediatevalue(0) - dut.cfg_config_space_enable.setimmediatevalue(1) - dut.cfg_req_pm_transition_l23_ready.setimmediatevalue(0) - dut.cfg_hot_reset_in.setimmediatevalue(0) - dut.cfg_ds_port_number.setimmediatevalue(0) - dut.cfg_ds_bus_number.setimmediatevalue(0) - dut.cfg_ds_device_number.setimmediatevalue(0) - - self.rc.make_port().connect(self.dev) - - # DMA - self.c2h_write_source_0 = AxiStreamSource(AxiStreamBus.from_prefix(dut, "s_axis_c2h_0"), self.clock, self.resetn, False) - self.c2h_desc_source_0 = DescSource(DescBus.from_prefix(dut, "s_desc_c2h_0"),self.clock, self.resetn, False) - self.c2h_read_sink_0 = AxiStreamSink(AxiStreamBus.from_prefix(dut, "m_axis_c2h_0"), self.clock, self.resetn, False) - self.c2h_write_source_1 = AxiStreamSource(AxiStreamBus.from_prefix(dut, "s_axis_c2h_1"), self.clock, self.resetn, False) - self.c2h_desc_source_1 = DescSource(DescBus.from_prefix(dut, "s_desc_c2h_1"), self.clock, self.resetn, False) - self.c2h_read_sink_1 = AxiStreamSink(AxiStreamBus.from_prefix(dut, "m_axis_c2h_1"), self.clock, self.resetn, False) - - #monitor - self.rq_monitor = AxiStreamMonitor(AxiStreamBus.from_prefix(dut, "m_axis_rq"), self.clock, self.resetn, False) - - def gen_random_req(self, channel): - low_boundry = channel * 8192 - high_boundry = (channel + 1) * 8192 - idxs = random.sample(range(low_boundry, high_boundry), 2) - lo_idx, hi_idx = idxs[0], idxs[1] - if (hi_idx < lo_idx): - temp = hi_idx - hi_idx = lo_idx - lo_idx = temp - length = hi_idx - lo_idx + 1 - return (lo_idx, length) - - #Do not use user_rst but gen rstn for bsv - async def gen_reset(self): - self.resetn.value = 0 - await RisingEdge(self.clock) - await RisingEdge(self.clock) - await RisingEdge(self.clock) - self.resetn.value = 1 - await RisingEdge(self.clock) - await RisingEdge(self.clock) - await RisingEdge(self.clock) - self.log.info("Generated DMA RST_N") - - async def send_desc(self, channel, startAddr, length, isWrite): - desc = DescTransaction() - desc.start_addr = startAddr - desc.byte_cnt = length - desc.is_write = isWrite - if channel == 0: - await self.c2h_desc_source_0.send(desc) - else: - await self.c2h_desc_source_1.send(desc) - - async def send_data(self, channel, data): - if channel == 0: - await self.c2h_write_source_0.send(data) - else: - await self.c2h_write_source_1.send(data) - - async def recv_data(self, channel): - if channel == 0 : - data = await self.c2h_read_sink_0.read() - else: - data = await self.c2h_read_sink_1.read() - data = bytes(''.join([chr(item) for item in data]), encoding='UTF-8') - return data - - async def run_single_write_once(self, channel, addr, data): - length = len(data) - self.log.info("Conduct DMA single write: channel %d addr %d, length %d, char %c", channel, addr, length, data[0]) - await self.send_desc(channel, addr, length, True) - await self.send_data(channel, data) - - async def run_single_read_once(self, channel, addr, length): - self.log.info("Conduct DMA single read: channel %d addr %d, length %d", channel, addr, length) - await self.send_desc(channel, addr, length, False) - data = await self.recv_data(channel) - self.log.info("Read data from RootComplex successfully, recv length %d, req length %d", len(data), length) - return data async def single_path_random_write_test(pcie_tb, dma_channel, mem): for _ in range(100): @@ -366,7 +39,7 @@ async def single_path_random_read_test(pcie_tb, dma_channel, mem): @cocotb.test(timeout_time=100000000, timeout_unit="ns") async def straddle_write_test(dut): - tb = TB(dut) + tb = BdmaBypassTb(dut) await tb.gen_reset() await tb.rc.enumerate() @@ -389,7 +62,7 @@ async def straddle_write_test(dut): @cocotb.test(timeout_time=10000000, timeout_unit="ns") async def random_read_test(dut): - tb = TB(dut) + tb = BdmaBypassTb(dut) await tb.gen_reset() await tb.rc.enumerate() @@ -415,7 +88,7 @@ async def random_read_test(dut): def test_dma(): - dut = "mkRawDmaController" + dut = "mkRawBypassDmaController" module = os.path.splitext(os.path.basename(__file__))[0] toplevel = dut diff --git a/cocotb/bypass_write_read/dma_wr_rd_tb.py b/cocotb/bypass_write_read/dma_wr_rd_tb.py index 54552fe..7964fbc 100644 --- a/cocotb/bypass_write_read/dma_wr_rd_tb.py +++ b/cocotb/bypass_write_read/dma_wr_rd_tb.py @@ -9,7 +9,10 @@ import cocotb_test.simulator +import sys +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) from bdmatb import BdmaBypassTb + # class TB architecture # -------------- ------------- ----------- # | Root Complex | <-> | End Pointer | <-> | Dut(DMAC) | @@ -68,6 +71,7 @@ async def step_random_read_test(dut): tests_dir = os.path.dirname(__file__) rtl_dir = tests_dir +bdmatb_dir = os.path.dirname(tests_dir) def test_dma(): diff --git a/cocotb/dma_fullypipeline_tb.py b/cocotb/dma_fullypipeline_tb.py deleted file mode 100644 index 79589a5..0000000 --- a/cocotb/dma_fullypipeline_tb.py +++ /dev/null @@ -1,384 +0,0 @@ -#!/usr/bin/env python -import itertools -import logging -import os -import random -import queue - -import cocotb_test.simulator -import pytest - -import cocotb -from cocotb.triggers import RisingEdge, FallingEdge, Timer -from cocotb.regression import TestFactory -from cocotb.clock import Clock - -from cocotbext.pcie.core import RootComplex -from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice -from cocotbext.axi.stream import define_stream -from cocotbext.axi import (AxiStreamBus, AxiStreamSource, AxiStreamSink, AxiStreamMonitor, AxiStreamFrame) - -# class TB architecture -# -------------- ------------- ----------- -# | Root Complex | <-> | End Pointer | <-> | Dut(DMAC) | -# -------------- ------------- ----------- - -DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc", - signals=["start_addr", "byte_cnt", "is_write", "valid", "ready"] -) - -class TB(object): - def __init__(self, dut, msix=False): - self.dut = dut - - self.log = logging.getLogger("cocotb.tb") - self.log.setLevel(logging.DEBUG) - - self.clock = dut.CLK - self.resetn = dut.RST_N - - self._bus_width = 512 - self._bus_bytes = 64 - - # PCIe - self.rc = RootComplex() - - cq_straddle = False - cc_straddle = False - rq_straddle = True - rc_straddle = True - rc_4tlp_straddle = False - - self.client_tag = bool(int(os.getenv("CLIENT_TAG", "1"))) - - self.dev = UltraScalePlusPcieDevice( - # configuration options - pcie_generation=3, - # pcie_link_width=2, - # user_clk_frequency=250e6, - alignment="dword", - cq_straddle=cq_straddle, - cc_straddle=cc_straddle, - rq_straddle=rq_straddle, - rc_straddle=rc_straddle, - rc_4tlp_straddle=rc_4tlp_straddle, - pf_count=1, - max_payload_size=1024, - enable_client_tag=self.client_tag, - enable_extended_tag=False, - enable_parity=False, - enable_rx_msg_interface=False, - enable_sriov=False, - enable_extended_configuration=False, - - pf0_msi_enable=True, - pf0_msi_count=32, - pf1_msi_enable=False, - pf1_msi_count=1, - pf2_msi_enable=False, - pf2_msi_count=1, - pf3_msi_enable=False, - pf3_msi_count=1, - pf0_msix_enable=msix, - pf0_msix_table_size=63, - pf0_msix_table_bir=4, - pf0_msix_table_offset=0x00000000, - pf0_msix_pba_bir=4, - pf0_msix_pba_offset=0x00008000, - pf1_msix_enable=False, - pf1_msix_table_size=0, - pf1_msix_table_bir=0, - pf1_msix_table_offset=0x00000000, - pf1_msix_pba_bir=0, - pf1_msix_pba_offset=0x00000000, - pf2_msix_enable=False, - pf2_msix_table_size=0, - pf2_msix_table_bir=0, - pf2_msix_table_offset=0x00000000, - pf2_msix_pba_bir=0, - pf2_msix_pba_offset=0x00000000, - pf3_msix_enable=False, - pf3_msix_table_size=0, - pf3_msix_table_bir=0, - pf3_msix_table_offset=0x00000000, - pf3_msix_pba_bir=0, - pf3_msix_pba_offset=0x00000000, - - # signals - user_clk=self.clock, - # user_reset=~self.resetn, - user_lnk_up=dut.user_lnk_up, - # sys_clk=dut.sys_clk, - # sys_clk_gt=dut.sys_clk_gt, - # sys_reset=dut.sys_reset, - # phy_rdy_out=dut.phy_rdy_out, - - rq_bus=AxiStreamBus.from_prefix(dut, "m_axis_rq"), - pcie_rq_seq_num0=dut.pcie_rq_seq_num0, - pcie_rq_seq_num_vld0=dut.pcie_rq_seq_num_vld0, - pcie_rq_seq_num1=dut.pcie_rq_seq_num1, - pcie_rq_seq_num_vld1=dut.pcie_rq_seq_num_vld1, - pcie_rq_tag0=dut.pcie_rq_tag0, - pcie_rq_tag1=dut.pcie_rq_tag1, - # pcie_rq_tag_av=dut.pcie_rq_tag_av, - pcie_rq_tag_vld0=dut.pcie_rq_tag_vld0, - pcie_rq_tag_vld1=dut.pcie_rq_tag_vld1, - - rc_bus=AxiStreamBus.from_prefix(dut, "s_axis_rc"), - - cq_bus=AxiStreamBus.from_prefix(dut, "s_axis_cq"), - pcie_cq_np_req=dut.pcie_cq_np_req, - pcie_cq_np_req_count=dut.pcie_cq_np_req_count, - - cc_bus=AxiStreamBus.from_prefix(dut, "m_axis_cc"), - - pcie_tfc_nph_av=dut.pcie_tfc_nph_av, - pcie_tfc_npd_av=dut.pcie_tfc_npd_av, - cfg_phy_link_down=dut.cfg_phy_link_down, - cfg_phy_link_status=dut.cfg_phy_link_status, - cfg_negotiated_width=dut.cfg_negotiated_width, - cfg_current_speed=dut.cfg_current_speed, - cfg_max_payload=dut.cfg_max_payload, - cfg_max_read_req=dut.cfg_max_read_req, - cfg_function_status=dut.cfg_function_status, - cfg_function_power_state=dut.cfg_function_power_state, - cfg_vf_status=dut.cfg_vf_status, - cfg_vf_power_state=dut.cfg_vf_power_state, - cfg_link_power_state=dut.cfg_link_power_state, - cfg_mgmt_addr=dut.cfg_mgmt_addr, - cfg_mgmt_function_number=dut.cfg_mgmt_function_number, - cfg_mgmt_write=dut.cfg_mgmt_write, - cfg_mgmt_write_data=dut.cfg_mgmt_write_data, - cfg_mgmt_byte_enable=dut.cfg_mgmt_byte_enable, - cfg_mgmt_read=dut.cfg_mgmt_read, - cfg_mgmt_read_data=dut.cfg_mgmt_read_data, - cfg_mgmt_read_write_done=dut.cfg_mgmt_read_write_done, - cfg_mgmt_debug_access=dut.cfg_mgmt_debug_access, - cfg_err_cor_out=dut.cfg_err_cor_out, - cfg_err_nonfatal_out=dut.cfg_err_nonfatal_out, - cfg_err_fatal_out=dut.cfg_err_fatal_out, - cfg_local_error_valid=dut.cfg_local_error_valid, - cfg_local_error_out=dut.cfg_local_error_out, - cfg_ltssm_state=dut.cfg_ltssm_state, - cfg_rx_pm_state=dut.cfg_rx_pm_state, - cfg_tx_pm_state=dut.cfg_tx_pm_state, - cfg_rcb_status=dut.cfg_rcb_status, - cfg_obff_enable=dut.cfg_obff_enable, - # cfg_pl_status_change=dut.cfg_pl_status_change, - # cfg_tph_requester_enable=dut.cfg_tph_requester_enable, - # cfg_tph_st_mode=dut.cfg_tph_st_mode, - # cfg_vf_tph_requester_enable=dut.cfg_vf_tph_requester_enable, - # cfg_vf_tph_st_mode=dut.cfg_vf_tph_st_mode, - cfg_msg_received=dut.cfg_msg_received, - cfg_msg_received_data=dut.cfg_msg_received_data, - cfg_msg_received_type=dut.cfg_msg_received_type, - cfg_msg_transmit=dut.cfg_msg_transmit, - cfg_msg_transmit_type=dut.cfg_msg_transmit_type, - cfg_msg_transmit_data=dut.cfg_msg_transmit_data, - cfg_msg_transmit_done=dut.cfg_msg_transmit_done, - cfg_fc_ph=dut.cfg_fc_ph, - cfg_fc_pd=dut.cfg_fc_pd, - cfg_fc_nph=dut.cfg_fc_nph, - cfg_fc_npd=dut.cfg_fc_npd, - cfg_fc_cplh=dut.cfg_fc_cplh, - cfg_fc_cpld=dut.cfg_fc_cpld, - cfg_fc_sel=dut.cfg_fc_sel, - cfg_dsn=dut.cfg_dsn, - cfg_bus_number=dut.cfg_bus_number, - cfg_power_state_change_ack=dut.cfg_power_state_change_ack, - cfg_power_state_change_interrupt=dut.cfg_power_state_change_interrupt, - cfg_err_cor_in=dut.cfg_err_cor_in, - cfg_err_uncor_in=dut.cfg_err_uncor_in, - cfg_flr_in_process=dut.cfg_flr_in_process, - cfg_flr_done=dut.cfg_flr_done, - cfg_vf_flr_in_process=dut.cfg_vf_flr_in_process, - cfg_vf_flr_func_num=dut.cfg_vf_flr_func_num, - cfg_vf_flr_done=dut.cfg_vf_flr_done, - cfg_link_training_enable=dut.cfg_link_training_enable, - cfg_interrupt_int=dut.cfg_interrupt_int, - cfg_interrupt_pending=dut.cfg_interrupt_pending, - cfg_interrupt_sent=dut.cfg_interrupt_sent, - cfg_interrupt_msi_enable=dut.cfg_interrupt_msi_enable, - cfg_interrupt_msi_mmenable=dut.cfg_interrupt_msi_mmenable, - cfg_interrupt_msi_mask_update=dut.cfg_interrupt_msi_mask_update, - cfg_interrupt_msi_data=dut.cfg_interrupt_msi_data, - cfg_interrupt_msi_select=dut.cfg_interrupt_msi_select, - cfg_interrupt_msi_int=dut.cfg_interrupt_msi_int, - cfg_interrupt_msi_pending_status=dut.cfg_interrupt_msi_pending_status, - cfg_interrupt_msi_pending_status_data_enable=dut.cfg_interrupt_msi_pending_status_data_enable, - cfg_interrupt_msi_pending_status_function_num=dut.cfg_interrupt_msi_pending_status_function_num, - cfg_interrupt_msi_sent=dut.cfg_interrupt_msi_sent, - cfg_interrupt_msi_fail=dut.cfg_interrupt_msi_fail, - cfg_interrupt_msi_attr=dut.cfg_interrupt_msi_attr, - cfg_interrupt_msi_tph_present=dut.cfg_interrupt_msi_tph_present, - cfg_interrupt_msi_tph_type=dut.cfg_interrupt_msi_tph_type, - cfg_interrupt_msi_tph_st_tag=dut.cfg_interrupt_msi_tph_st_tag, - cfg_interrupt_msi_function_number=dut.cfg_interrupt_msi_function_number, - cfg_pm_aspm_l1_entry_reject=dut.cfg_pm_aspm_l1_entry_reject, - cfg_pm_aspm_tx_l0s_entry_disable=dut.cfg_pm_aspm_tx_l0s_entry_disable, - cfg_hot_reset_out=dut.cfg_hot_reset_out, - cfg_config_space_enable=dut.cfg_config_space_enable, - cfg_req_pm_transition_l23_ready=dut.cfg_req_pm_transition_l23_ready, - cfg_hot_reset_in=dut.cfg_hot_reset_in, - cfg_ds_port_number=dut.cfg_ds_port_number, - cfg_ds_bus_number=dut.cfg_ds_bus_number, - cfg_ds_device_number=dut.cfg_ds_device_number, - ) - - self.dev.log.setLevel(logging.INFO) - - dut.pcie_cq_np_req.setimmediatevalue(1) - dut.cfg_mgmt_addr.setimmediatevalue(0) - dut.cfg_mgmt_function_number.setimmediatevalue(0) - dut.cfg_mgmt_write.setimmediatevalue(0) - dut.cfg_mgmt_write_data.setimmediatevalue(0) - dut.cfg_mgmt_byte_enable.setimmediatevalue(0) - dut.cfg_mgmt_read.setimmediatevalue(0) - dut.cfg_mgmt_debug_access.setimmediatevalue(0) - dut.cfg_msg_transmit.setimmediatevalue(0) - dut.cfg_msg_transmit_type.setimmediatevalue(0) - dut.cfg_msg_transmit_data.setimmediatevalue(0) - dut.cfg_fc_sel.setimmediatevalue(0) - dut.cfg_dsn.setimmediatevalue(0) - dut.cfg_power_state_change_ack.setimmediatevalue(0) - dut.cfg_err_cor_in.setimmediatevalue(0) - dut.cfg_err_uncor_in.setimmediatevalue(0) - dut.cfg_flr_done.setimmediatevalue(0) - dut.cfg_vf_flr_func_num.setimmediatevalue(0) - dut.cfg_vf_flr_done.setimmediatevalue(0) - dut.cfg_link_training_enable.setimmediatevalue(1) - dut.cfg_interrupt_int.setimmediatevalue(0) - dut.cfg_interrupt_pending.setimmediatevalue(0) - dut.cfg_interrupt_msi_select.setimmediatevalue(0) - dut.cfg_interrupt_msi_int.setimmediatevalue(0) - dut.cfg_interrupt_msi_pending_status.setimmediatevalue(0) - dut.cfg_interrupt_msi_pending_status_data_enable.setimmediatevalue(0) - dut.cfg_interrupt_msi_pending_status_function_num.setimmediatevalue(0) - dut.cfg_interrupt_msi_attr.setimmediatevalue(0) - dut.cfg_interrupt_msi_tph_present.setimmediatevalue(0) - dut.cfg_interrupt_msi_tph_type.setimmediatevalue(0) - dut.cfg_interrupt_msi_tph_st_tag.setimmediatevalue(0) - dut.cfg_interrupt_msi_function_number.setimmediatevalue(0) - dut.cfg_pm_aspm_l1_entry_reject.setimmediatevalue(0) - dut.cfg_pm_aspm_tx_l0s_entry_disable.setimmediatevalue(0) - dut.cfg_config_space_enable.setimmediatevalue(1) - dut.cfg_req_pm_transition_l23_ready.setimmediatevalue(0) - dut.cfg_hot_reset_in.setimmediatevalue(0) - dut.cfg_ds_port_number.setimmediatevalue(0) - dut.cfg_ds_bus_number.setimmediatevalue(0) - dut.cfg_ds_device_number.setimmediatevalue(0) - - self.rc.make_port().connect(self.dev) - - # DMA - self.c2h_write_source_0 = AxiStreamSource(AxiStreamBus.from_prefix(dut, "s_axis_c2h_0"), self.clock, self.resetn, False) - self.c2h_desc_source_0 = DescSource(DescBus.from_prefix(dut, "s_desc_c2h_0"),self.clock, self.resetn, False) - self.c2h_read_sink_0 = AxiStreamSink(AxiStreamBus.from_prefix(dut, "m_axis_c2h_0"), self.clock, self.resetn, False) - self.c2h_write_source_1 = AxiStreamSource(AxiStreamBus.from_prefix(dut, "s_axis_c2h_1"), self.clock, self.resetn, False) - self.c2h_desc_source_1 = DescSource(DescBus.from_prefix(dut, "s_desc_c2h_1"), self.clock, self.resetn, False) - self.c2h_read_sink_1 = AxiStreamSink(AxiStreamBus.from_prefix(dut, "m_axis_c2h_1"), self.clock, self.resetn, False) - - #monitor - self.rq_monitor = AxiStreamMonitor(AxiStreamBus.from_prefix(dut, "m_axis_rq"), self.clock, self.resetn, False) - - #Do not use user_rst but gen rstn for bsv - async def gen_reset(self): - self.resetn.value = 0 - await RisingEdge(self.clock) - await RisingEdge(self.clock) - await RisingEdge(self.clock) - self.resetn.value = 1 - await RisingEdge(self.clock) - await RisingEdge(self.clock) - await RisingEdge(self.clock) - self.log.info("Generated DMA RST_N") - - async def send_desc(self, channel, startAddr, length, isWrite): - desc = DescTransaction() - desc.start_addr = startAddr - desc.byte_cnt = length - desc.is_write = isWrite - if channel == 0: - await self.c2h_desc_source_0.send(desc) - else: - await self.c2h_desc_source_1.send(desc) - - async def send_data(self, channel, data): - if channel == 0: - await self.c2h_write_source_0.send(data) - else: - await self.c2h_write_source_1.send(data) - - async def recv_data(self, channel): - if channel == 0 : - data = await self.c2h_read_sink_0.read() - else: - data = await self.c2h_read_sink_1.read() - data = bytes(''.join([chr(item) for item in data]), encoding='UTF-8') - return data - - async def run_single_write_once(self, channel, addr, data): - length = len(data) - self.log.info("Conduct DMA single write: channel %d addr %d, length %d, char %c", channel, addr, length, data[0]) - await self.send_desc(channel, addr, length, True) - await self.send_data(channel, data) - - async def run_single_read_once(self, channel, addr, length): - self.log.info("Conduct DMA single read: channel %d addr %d, length %d", channel, addr, length) - await self.send_desc(channel, addr, length, False) - data = await self.recv_data(channel) - self.log.info("Read data from RootComplex successfully, recv length %d, req length %d", len(data), length) - return data - -async def small_write_drive(pcie_tb, dma_channel, mem): - for _ in range(100): - addr= random.randint(0, 8192) - addr = mem.get_absolute_address(addr) - length = random.randint(1, 64) - char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") - data = char * length - await pcie_tb.run_single_write_once(dma_channel, addr, data) - -@cocotb.test(timeout_time=100000000, timeout_unit="ns") -async def small_desc_fp_test(dut): - - tb = TB(dut) - await tb.gen_reset() - - await tb.rc.enumerate() - dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) - - await dev.enable_device() - await dev.set_master() - - mem = tb.rc.mem_pool.alloc_region(1024*1024) - - await small_write_drive(tb, 0, mem) - await Timer(1000, units='ns') - -tests_dir = os.path.dirname(__file__) -rtl_dir = tests_dir - - -def test_dma(): - dut = "mkRawDmaController" - module = os.path.splitext(os.path.basename(__file__))[0] - toplevel = dut - - verilog_sources = [ - os.path.join(rtl_dir, f"{dut}.v") - ] - - sim_build = os.path.join(tests_dir, "sim_build", dut) - - cocotb_test.simulator.run( - python_search=[tests_dir], - verilog_sources=verilog_sources, - toplevel=toplevel, - module=module, - timescale="1ns/1ps", - sim_build=sim_build - ) - -if __name__ == "__main__": - test_dma() \ No newline at end of file diff --git a/cocotb/Makefile b/cocotb/simple_write_read/Makefile similarity index 91% rename from cocotb/Makefile rename to cocotb/simple_write_read/Makefile index 0f23a2f..411444b 100644 --- a/cocotb/Makefile +++ b/cocotb/simple_write_read/Makefile @@ -1,5 +1,6 @@ -ROOT_DIR = $(abspath ../) +ROOT_DIR = $(abspath ../../) BACKEND_DIR = $(ROOT_DIR)/backend +COCOTB_DIR = $(abspath ../) TB_DIR = $(abspath ./) include $(ROOT_DIR)/Makefile.base VBUILD_DIR = $(BACKEND_DIR)/build @@ -10,11 +11,13 @@ TOP_MODULE = mk$(TARGET) TOP_FILE = $(TOP_MODULE).v VLOG_FILE = $(TB_DIR)/$(TOP_FILE) -TB_CASE = dma_bar +TB_CASE = dma_simple_wr_rd TB_FILE = $(TB_CASE)_tb.py DATE = $(shell date "+%Y%m%d") LOG_FILE = $(TB_DIR)/log/$(DATE)_$(TOP_MODULE).log +cocotb:clean verilog prepare run + verilog: cd $(BACKEND_DIR) && make clean && make verilog TOPMODULE=$(TOP_MODULE) @@ -28,8 +31,6 @@ run: mkdir -p log python3 $(TB_FILE) 2>&1 | tee $(LOG_FILE) -cocotb:clean verilog prepare run - clean: cd $(BACKEND_DIR) && make clean cd $(TB_DIR) && rm -rf $(VLOG_FILE) __pycache__ .pytest_cache sim_build *.log diff --git a/cocotb/simple_write_read/dma_simple_wr_rd_tb.py b/cocotb/simple_write_read/dma_simple_wr_rd_tb.py index 355ca86..054ae2b 100644 --- a/cocotb/simple_write_read/dma_simple_wr_rd_tb.py +++ b/cocotb/simple_write_read/dma_simple_wr_rd_tb.py @@ -7,11 +7,23 @@ import cocotb_test.simulator +import sys +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) from bdmatb import BdmaSimpleTb tests_dir = os.path.dirname(__file__) rtl_dir = tests_dir +async def single_path_random_write_test(pcie_tb, dma_channel, mem): + for _ in range(100): + addr, length = pcie_tb.gen_random_req(dma_channel) + addr = mem.get_absolute_address(addr) + char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") + data = char * length + await pcie_tb.run_single_write_once(dma_channel, addr, data) + await Timer(200+length, units='ns') + assert mem[addr:addr+length] == data + @cocotb.test(timeout_time=10000000, timeout_unit="ns") async def bar_test(dut): tb = BdmaSimpleTb(dut) @@ -24,18 +36,12 @@ async def bar_test(dut): await dev.set_master() dev_bar0 = dev.bar_window[0] - addr = 0x12345678 - length = 0xffff - isWrite = True - addrLo = addr & 0xFFFFFFFF - addrHi = (addr >> 32) & 0xFFFFFFFF - base_addr = 0 - await dev_bar0.write(base_addr + 1, addrLo.to_bytes(4, byteorder='big', signed=False)) - await dev_bar0.write(base_addr + 2, addrHi.to_bytes(4, byteorder='big', signed=False)) - await dev_bar0.write(base_addr + 3, length.to_bytes(4, byteorder='big', signed=False)) - await dev_bar0.write(base_addr, int(isWrite).to_bytes(4, byteorder='big', signed=False)) + tb.conbine_bar(dev_bar0) + await tb.memory_map() + + mem = tb.rc.mem_pool.alloc_region(1024*1024) + await single_path_random_write_test(tb, 0, mem) - await Timer(500, units='ns') def test_dma(): dut = "mkRawSimpleDmaController" module = os.path.splitext(os.path.basename(__file__))[0] diff --git a/src/DmaC2HPipe.bsv b/src/DmaC2HPipe.bsv index 3902ce7..d6a90fd 100644 --- a/src/DmaC2HPipe.bsv +++ b/src/DmaC2HPipe.bsv @@ -246,7 +246,7 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); end else begin stream.isLast = False; - $display($time, "ns SIM DEBUG @ mkDmaC2HReadCore%d, expect bytes %drecv bytes %d", pathIdx, reqInflightFifo.first.length, recvBytesCnt); + // $display($time, "ns SIM DEBUG @ mkDmaC2HReadCore%d, expect bytes %drecv bytes %d", pathIdx, reqInflightFifo.first.length, recvBytesCnt); end end // recvTlpCntReg <= recvTlpCnt; diff --git a/src/DmaH2CPipe.bsv b/src/DmaH2CPipe.bsv index 8c1000c..c638ae8 100644 --- a/src/DmaH2CPipe.bsv +++ b/src/DmaH2CPipe.bsv @@ -78,12 +78,12 @@ module mkDmaH2CPipe(DmaH2CPipe); let descriptor = getDescriptorFromFirstBeat(stream); case (descriptor.reqType) fromInteger(valueOf(MEM_WRITE_REQ)): begin - $display($time, "ns SIM INFO @ mkDmaH2CPipe: MemWrite Detect!"); + // $display($time, "ns SIM INFO @ mkDmaH2CPipe: MemWrite Detect!"); let firstData = getDataFromFirstBeat(stream); DmaCsrValue wrValue = truncate(firstData); let wrAddr = getCsrAddrFromCqDescriptor(descriptor); if (descriptor.dwordCnt == fromInteger(valueOf(IDEA_CQ_CSR_DWORD_CNT))) begin - $display($time, "ns SIM INFO @ mkDmaH2CPipe: Valid wrReq with Addr %h, data %h", wrAddr << valueOf(TLog#(DWORD_BYTES)), wrValue); + // $display($time, "ns SIM INFO @ mkDmaH2CPipe: Valid wrReq with Addr %d, data %h", wrAddr, wrValue); let req = CsrRequest { addr : wrAddr, value : wrValue, @@ -97,12 +97,12 @@ module mkDmaH2CPipe(DmaH2CPipe); end end else begin - $display($time, "ns SIM INFO @ mkDmaH2CPipe: Invalid wrReq with Addr %h, data %h", wrAddr << valueOf(TLog#(DWORD_BYTES)), wrValue); + $display($time, "ns SIM INFO @ mkDmaH2CPipe: Invalid wrReq with Addr %d, data %h", wrAddr, wrValue); illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; end end fromInteger(valueOf(MEM_READ_REQ)): begin - $display($time, "ns SIM INFO @ mkDmaH2CPipe: MemRead Detect!"); + // $display($time, "ns SIM INFO @ mkDmaH2CPipe: MemRead Detect!"); let rdAddr = getCsrAddrFromCqDescriptor(descriptor); let req = CsrRequest{ addr : rdAddr, diff --git a/src/DmaUtils.bsv b/src/DmaUtils.bsv index 9a1525f..f5390ff 100644 --- a/src/DmaUtils.bsv +++ b/src/DmaUtils.bsv @@ -196,7 +196,7 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); else begin firstChunkLen = request.length; end - $display($time, "ns SIM INFO @ mkChunkSplit: get first chunkLen, offset %d, remainder %d", offset, TlpPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); + // $display($time, "ns SIM INFO @ mkChunkSplit: get first chunkLen, offset %d, remainder %d", offset, TlpPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); firstChunkSplitor.splitLocationFifoIn.enq(unpack(truncate(firstChunkLen))); let firstReq = DmaRequest { startAddr : request.startAddr, @@ -275,7 +275,6 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); reqOutFifo.enq(chunkReq); end end - $display($time, "ns SIM INFO @ mkChunkSplit: debug, next addr %d, remainBytesLen %d", nextStartAddr, remainLen); nextStartAddrReg <= nextStartAddr; remainLenReg <= remainLen; isInSplitReg <= (remainLen != 0); @@ -349,7 +348,7 @@ module mkRqDescriptorGenerator#(Bool isWrite)(RqDescriptorGenerator); lastByteEn = 0; end byteEnOutFifo.enq(tuple2(firstByteEn, lastByteEn)); - $display($time, "ns SIM INFO @ mkRqDescriptorGenerator: generate desc, tag %d, dwcnt %d, start:%d, end:%d, byteCnt:%d ", exReq.tag, dwCnt, exReq.startAddr, exReq.endAddr, exReq.length); + // $display($time, "ns SIM INFO @ mkRqDescriptorGenerator: generate desc, tag %d, dwcnt %d, start:%d, end:%d, byteCnt:%d ", exReq.tag, dwCnt, exReq.startAddr, exReq.endAddr, exReq.length); endrule interface exReqFifoIn = convertFifoToFifoIn(exReqInFifo); diff --git a/src/DmaWrapper.bsv b/src/DmaWrapper.bsv index c5292f5..229f220 100755 --- a/src/DmaWrapper.bsv +++ b/src/DmaWrapper.bsv @@ -290,6 +290,14 @@ endmodule interface RawSimpleDmaController; // User Logic Ifc + (* prefix = "s_axis_c2h_0" *) interface RawDmaDataSlave dmaWrData0; + (* prefix = "m_axis_c2h_0" *) interface RawDmaDataMaster dmaRdData0; + + (* prefix = "s_axis_c2h_1" *) interface RawDmaDataSlave dmaWrData1; + (* prefix = "m_axis_c2h_1" *) interface RawDmaDataMaster dmaRdData1; + + (* prefix = "s_h2c_csr" *) interface RawDmaCsrSlave dmaCsrResp; + (* prefix = "m_h2c_csr" *) interface RawDmaCsrMaster dmaCsrReq; // Raw PCIe interfaces, connected to the Xilinx PCIe IP (* prefix = "" *) interface RawXilinxPcieIp rawPcie; @@ -300,40 +308,59 @@ endinterface module mkRawSimpleDmaController(RawSimpleDmaController); DmaController dmac <- mkDmaController; DmaSimpleCore simpleCore <- mkDmaSimpleCore; - GenericCsr dummyCsr <- mkDummyCsr; - Vector#(DMA_PATH_NUM, FIFOF#(DataStream)) dataFifo <- replicateM(mkSizedBRAMFIFOF(valueOf(BUS_BOUNDARY))); for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1 ) begin - mkConnection(dataFifo[pathIdx], dmac.c2hDataFifoIn[pathIdx]); - mkConnection(dmac.c2hDataFifoOut[pathIdx], dataFifo[pathIdx]); mkConnection(dmac.c2hReqFifoIn[pathIdx], simpleCore.c2hReqFifoOut[pathIdx]); end + let dmaWrData0Ifc <- mkFifoInToRawDmaDataSlave(dmac.c2hDataFifoIn[0]); + let dmaRdData0Ifc <- mkFifoOutToRawDmaDataMaster(dmac.c2hDataFifoOut[0]); + + let dmaWrData1Ifc <- mkFifoInToRawDmaDataSlave(dmac.c2hDataFifoIn[1]); + let dmaRdData1Ifc <- mkFifoOutToRawDmaDataMaster(dmac.c2hDataFifoOut[1]); + + let csrRespIfc <- mkFifoInToRawCsrClient(dmac.h2cRespFifoIn); + let csrReqIfc <- mkFifoOutToRawCsrMaster(dmac.h2cReqFifoOut); + mkConnection(dmac.innerReqFifoOut, simpleCore.reqFifoIn); mkConnection(dmac.innerRespFifoIn, simpleCore.respFifoOut); - mkConnection(dmac.h2cReqFifoOut, dummyCsr.reqFifoIn); - mkConnection(dmac.h2cRespFifoIn, dummyCsr.respFifoOut); - interface rawPcie = dmac.rawPcie; + + interface dmaWrData0 = dmaWrData0Ifc; + interface dmaRdData0 = dmaRdData0Ifc; + interface dmaWrData1 = dmaWrData1Ifc; + interface dmaRdData1 = dmaRdData1Ifc; + interface dmaCsrResp = csrRespIfc; + interface dmaCsrReq = csrReqIfc; endmodule +interface RawLoopDmaController; + // User Logic Ifc + + // Raw PCIe interfaces, connected to the Xilinx PCIe IP + (* prefix = "" *) interface RawXilinxPcieIp rawPcie; +endinterface + (* synthesize *) -module mkRawTestDmaController(RawSimpleDmaController); +module mkRawTestDmaController(RawLoopDmaController); DmaController dmac <- mkDmaController; - TestModule tm <- mkTestModule; + DmaSimpleCore simpleCore <- mkDmaSimpleCore; GenericCsr dummyCsr <- mkDummyCsr; + Vector#(DMA_PATH_NUM, FIFOF#(DataStream)) dataFifo <- replicateM(mkSizedBRAMFIFOF(valueOf(BUS_BOUNDARY))); for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1 ) begin - mkConnection(tm.c2hDataFifoOut[pathIdx], dmac.c2hDataFifoIn[pathIdx]); - mkConnection(tm.c2hReqFifoOut[pathIdx], dmac.c2hReqFifoIn[pathIdx]); - mkConnection(dmac.c2hDataFifoOut[pathIdx], tm.c2hDataFifoIn[pathIdx]); + mkConnection(dataFifo[pathIdx], dmac.c2hDataFifoIn[pathIdx]); + mkConnection(dmac.c2hDataFifoOut[pathIdx], dataFifo[pathIdx]); + mkConnection(dmac.c2hReqFifoIn[pathIdx], simpleCore.c2hReqFifoOut[pathIdx]); end - mkConnection(dmac.innerReqFifoOut, dummyCsr.reqFifoIn); - mkConnection(dummyCsr.respFifoOut, dmac.innerRespFifoIn); - mkConnection(dmac.h2cReqFifoOut, tm.h2cReqFifoIn); - mkConnection(dmac.h2cRespFifoIn, tm.h2cRespFifoOut); + mkConnection(dmac.innerReqFifoOut, simpleCore.reqFifoIn); + mkConnection(dmac.innerRespFifoIn, simpleCore.respFifoOut); + + mkConnection(dmac.h2cReqFifoOut, dummyCsr.reqFifoIn); + mkConnection(dmac.h2cRespFifoIn, dummyCsr.respFifoOut); interface rawPcie = dmac.rawPcie; + endmodule diff --git a/src/SimpleModeUtils.bsv b/src/SimpleModeUtils.bsv index 966204a..3db24d5 100644 --- a/src/SimpleModeUtils.bsv +++ b/src/SimpleModeUtils.bsv @@ -88,7 +88,7 @@ module mkDmaSimpleCore(DmaSimpleCore); // if not doorbell, write the register else begin controlRegFile.upd(regIdx, req.value); - $display($time, "ns SIM INFO @ mkDmaSimpleCore: register writing regIdx:%d value:%d", regIdx, req.value); + // $display($time, "ns SIM INFO @ mkDmaSimpleCore: register writing regIdx:%d value:%d", regIdx, req.value); end end // Block 1~2 : Channel 0 Va-Pa Table @@ -98,6 +98,7 @@ module mkDmaSimpleCore(DmaSimpleCore); value : req.value, isWrite : True }; + // $display($time, "ns SIM INFO @ mkDmaSimpleCore: paTableBram0 writing addr:%d value:%d", vaReq.addr, req.value); paTableBram[0].paSetFifoIn.enq(vaReq); end // Block 3~4 : Channel 1 Va-Pa Table @@ -108,6 +109,7 @@ module mkDmaSimpleCore(DmaSimpleCore); isWrite : True }; paTableBram[1].paSetFifoIn.enq(vaReq); + // $display($time, "ns SIM INFO @ mkDmaSimpleCore: paTableBram1 writing addr:%d value:%d", vaReq.addr, req.value); end end // Read Request @@ -156,6 +158,8 @@ module mkPhyAddrBram(PhyAddrBram); BRAM1Port#(PaBramAddr, DmaCsrValue) phyAddrLoBram <- mkBRAM1Server(defaultValue); BRAM1Port#(PaBramAddr, DmaCsrValue) phyAddrHiBram <- mkBRAM1Server(defaultValue); + DmaMemAddr pageMask = (valueOf(IS_HUGE_PAGE)>0) ? 'h1FFFFF : 'hFFF; + function Bool isLoAddr(DmaCsrAddr addr); return unpack(addr[0]); endfunction @@ -181,6 +185,7 @@ module mkPhyAddrBram(PhyAddrBram); // if is setting va-pa table if (paSetFifo.notEmpty) begin let paSet = paSetFifo.first; + paSetFifo.deq; let bramAddr = convertCsrAddrToBramAddr(paSet.addr); let bramReq = BRAMRequest { write : True, @@ -190,11 +195,11 @@ module mkPhyAddrBram(PhyAddrBram); }; if (isLoAddr(paSet.addr)) begin phyAddrLoBram.portA.request.put(bramReq); - $display($time, "ns SIM INFO @ mkPhyAddrBram: pa writing, va offset:%h mapping pa low:%h", bramAddr, bramReq.datain ); + // $display($time, "ns SIM INFO @ mkPhyAddrBram: pa writing, va offset:%d, mapping pa low:%h", bramAddr, bramReq.datain ); end else begin phyAddrHiBram.portA.request.put(bramReq); - $display($time, "ns SIM INFO @ mkPhyAddrBram: pa writing, va offset:%h mapping pa high:%h", bramAddr, bramReq.datain ); + // $display($time, "ns SIM INFO @ mkPhyAddrBram: pa writing, va offset:%d, mapping pa low:%h", bramAddr, bramReq.datain); end end @@ -211,7 +216,7 @@ module mkPhyAddrBram(PhyAddrBram); phyAddrLoBram.portA.request.put(bramReq); phyAddrHiBram.portA.request.put(bramReq); pendingFifo.enq(vaReq); - $display($time, "ns SIM INFO @ mkPhyAddrBram: receive pa mapping request, va:%h", vaReq.startAddr); + // $display($time, "ns SIM INFO @ mkPhyAddrBram: receive pa mapping request, va:%h", vaReq.startAddr); end endrule @@ -222,7 +227,7 @@ module mkPhyAddrBram(PhyAddrBram); let oriReq = pendingFifo.first; pendingFifo.deq; $display($time, "ns SIM INFO @ mkPhyAddrBram: got a pa mapping, va:%h pa:%h", oriReq.startAddr, pa); - oriReq.startAddr = pa; + oriReq.startAddr = pa | (oriReq.startAddr & pageMask); paReqFifo.enq(oriReq); endrule From 15ef72923703b585de13790f97691691e18a93b7 Mon Sep 17 00:00:00 2001 From: BIGWJZ <1059537323@qq.com> Date: Wed, 16 Oct 2024 21:09:13 +0800 Subject: [PATCH 45/53] Add read-write loop tb --- cocotb/bdmatb.py | 48 ++++++++++++- cocotb/loop_write_read/Makefile | 37 ++++++++++ cocotb/loop_write_read/dma_loop_tb.py | 71 +++++++++++++++++++ .../simple_write_read/dma_simple_wr_rd_tb.py | 12 +++- src/DmaC2HPipe.bsv | 16 ++--- src/DmaWrapper.bsv | 5 ++ 6 files changed, 175 insertions(+), 14 deletions(-) create mode 100644 cocotb/loop_write_read/Makefile create mode 100644 cocotb/loop_write_read/dma_loop_tb.py diff --git a/cocotb/bdmatb.py b/cocotb/bdmatb.py index 18383bb..5dcd52b 100644 --- a/cocotb/bdmatb.py +++ b/cocotb/bdmatb.py @@ -392,7 +392,6 @@ async def write_pa_table(self, channel, page_offset, pa): await self.write_register(base_addr + 2*page_offset, paLo) await self.write_register(base_addr + 2*page_offset + 1, paHi) - async def memory_map(self): self.log.info("BdmaTb: Starting memory map...") await self.write_pa_table(0, 1, 123456) @@ -423,4 +422,49 @@ async def run_single_read_once(self, channel, addr, length): data = await self.recv_data(channel) self.log.info("Read data from RootComplex successfully, recv length %d, req length %d", len(data), length) return data - \ No newline at end of file + + +class BdmaLoopTb(BdmaTb): + def conbine_bar(self, bar): + self.ep_bar = bar + + async def write_register(self, addr:int, x:int): + x = x & 0xFFFFFFFF + self.log.debug("BdmaTb: write register at %d, value %d" % (addr, x)) + await self.ep_bar.write(addr * 4, x.to_bytes(4, byteorder='little', signed=False)) + + async def write_pa_table(self, channel, page_offset, pa): + base_addr = 512 + channel * 1024 + page_offset = page_offset & 0x1FF + paLo = pa & 0xFFFFFFFF + paHi = (pa >> 32) & 0xFFFFFFFF + await self.write_register(base_addr + 2*page_offset, paLo) + await self.write_register(base_addr + 2*page_offset + 1, paHi) + + async def memory_map(self): + self.log.info("BdmaTb: Starting memory map...") + await self.write_pa_table(0, 1, 123456) + await self.write_pa_table(1, 2, 1) + for i in range(512): + await self.write_pa_table(0, i, 4096*i) + await self.write_pa_table(1, i, 4096*i) + await Timer(4 * 512 * 2 * 2, units='ns') + + async def submit_transfer(self, channel, addr, length, isWrite=True): + addrLo = addr & 0xFFFFFFFF + addrHi = (addr >> 32) & 0xFFFFFFFF + base_addr = channel * 6 + await self.write_register(base_addr + 1, addrLo) + await self.write_register(base_addr + 2, addrHi) + await self.write_register(base_addr + 3, length) + await self.write_register(base_addr, int(isWrite)) + + async def run_single_write_once(self, channel, addr, length): + self.log.info("Conduct DMA single write: channel %d addr %d, length %d", channel, addr, length) + await self.submit_transfer(channel, addr, length, True) + + async def run_single_read_once(self, channel, addr, length): + self.log.info("Conduct DMA single read: channel %d addr %d, length %d", channel, addr, length) + await self.submit_transfer(channel, addr, length, False) + + \ No newline at end of file diff --git a/cocotb/loop_write_read/Makefile b/cocotb/loop_write_read/Makefile new file mode 100644 index 0000000..890cfc9 --- /dev/null +++ b/cocotb/loop_write_read/Makefile @@ -0,0 +1,37 @@ +ROOT_DIR = $(abspath ../../) +BACKEND_DIR = $(ROOT_DIR)/backend +COCOTB_DIR = $(abspath ../) +TB_DIR = $(abspath ./) +include $(ROOT_DIR)/Makefile.base +VBUILD_DIR = $(BACKEND_DIR)/build +VSRC_DIR = $(BACKEND_DIR)/verilog + +TARGET = RawTestDmaController +TOP_MODULE = mk$(TARGET) +TOP_FILE = $(TOP_MODULE).v +VLOG_FILE = $(TB_DIR)/$(TOP_FILE) + +TB_CASE = dma_loop +TB_FILE = $(TB_CASE)_tb.py +DATE = $(shell date "+%Y%m%d") +LOG_FILE = $(TB_DIR)/log/$(DATE)_$(TOP_MODULE).log + +cocotb:clean verilog prepare run + +verilog: + cd $(BACKEND_DIR) && make clean && make verilog TOPMODULE=$(TOP_MODULE) + +prepare: + rm -rf $(VLOG_FILE) + bluetcl $(BACKEND_DIR)/listVlogFiles.tcl -bdir $(VBUILD_DIR) -vdir $(VSRC_DIR) $(TOP_MODULE) $(TOP_MODULE) | grep -i '\.v' | xargs -I {} cat {} >> $(VLOG_FILE) + sed -i '1i `timescale 1ns/1ps' $(VLOG_FILE) + +run: + cd $(TB_DIR) + mkdir -p log + python3 $(TB_FILE) 2>&1 | tee $(LOG_FILE) + +clean: + cd $(BACKEND_DIR) && make clean + cd $(TB_DIR) && rm -rf $(VLOG_FILE) __pycache__ .pytest_cache sim_build *.log + \ No newline at end of file diff --git a/cocotb/loop_write_read/dma_loop_tb.py b/cocotb/loop_write_read/dma_loop_tb.py new file mode 100644 index 0000000..c22ca2a --- /dev/null +++ b/cocotb/loop_write_read/dma_loop_tb.py @@ -0,0 +1,71 @@ +import os +import random + +import cocotb +from cocotb.triggers import RisingEdge, Timer +from cocotb.clock import Clock + +import cocotb_test.simulator + +import sys +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) +from bdmatb import BdmaLoopTb + +tests_dir = os.path.dirname(__file__) +rtl_dir = tests_dir + +async def loop_write_read_once(pcie_tb, mem): + # addr, length = pcie_tb.gen_random_req(0) + addr = 1 + length = 129 + addr = mem.get_absolute_address(addr) + char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") + data = char * length + mem[addr:addr+length] = data + await pcie_tb.run_single_read_once(0, addr, length) + await Timer(length, units='ns') + new_addr = addr + 8192 + await pcie_tb.run_single_write_once(0, new_addr, length) + await Timer(200+4*length, units='ns') + assert mem[new_addr:new_addr+length] == data + +@cocotb.test(timeout_time=10000000, timeout_unit="ns") +async def bar_test(dut): + tb = BdmaLoopTb(dut) + await tb.gen_reset() + + await tb.rc.enumerate() + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + dev_bar0 = dev.bar_window[0] + tb.conbine_bar(dev_bar0) + await tb.memory_map() + + mem = tb.rc.mem_pool.alloc_region(1024*1024) + await loop_write_read_once(tb, mem) + +def test_dma(): + dut = "mkRawTestDmaController" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = dut + + verilog_sources = [ + os.path.join(rtl_dir, f"{dut}.v") + ] + + sim_build = os.path.join(tests_dir, "sim_build", dut) + + cocotb_test.simulator.run( + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + timescale="1ns/1ps", + sim_build=sim_build + ) + +if __name__ == "__main__": + test_dma() \ No newline at end of file diff --git a/cocotb/simple_write_read/dma_simple_wr_rd_tb.py b/cocotb/simple_write_read/dma_simple_wr_rd_tb.py index 054ae2b..732530c 100644 --- a/cocotb/simple_write_read/dma_simple_wr_rd_tb.py +++ b/cocotb/simple_write_read/dma_simple_wr_rd_tb.py @@ -23,6 +23,16 @@ async def single_path_random_write_test(pcie_tb, dma_channel, mem): await pcie_tb.run_single_write_once(dma_channel, addr, data) await Timer(200+length, units='ns') assert mem[addr:addr+length] == data + +async def single_path_random_read_test(pcie_tb, dma_channel, mem): + for _ in range(100): + addr, length = pcie_tb.gen_random_req(dma_channel) + addr = mem.get_absolute_address(addr) + char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") + mem[addr:addr+length] = char * length + data = await pcie_tb.run_single_read_once(dma_channel, addr, length) + await Timer(200+length, units='ns') + assert data == char * length @cocotb.test(timeout_time=10000000, timeout_unit="ns") async def bar_test(dut): @@ -40,7 +50,7 @@ async def bar_test(dut): await tb.memory_map() mem = tb.rc.mem_pool.alloc_region(1024*1024) - await single_path_random_write_test(tb, 0, mem) + await single_path_random_read_test(tb, 0, mem) def test_dma(): dut = "mkRawSimpleDmaController" diff --git a/src/DmaC2HPipe.bsv b/src/DmaC2HPipe.bsv index d6a90fd..60deb84 100644 --- a/src/DmaC2HPipe.bsv +++ b/src/DmaC2HPipe.bsv @@ -131,7 +131,6 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); Reg#(DmaReqLen) recvBytesReg <- mkReg(0); Vector#(SLOT_PER_PATH, Reg#(DmaReqLen)) chunkBytesRegs <- replicateM(mkReg(0)); - // mkConnection(chunkSplitor.chunkCntFifoOut, expectTlpCntFifo); mkConnection(reshapeStrad.streamFifoOut, descRemove.streamFifoIn); mkConnection(descRemove.streamFifoOut, dwRemove.streamFifoIn); @@ -197,13 +196,15 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); let byteInStream = convertByteEn2BytePtr(stream.byteEn); let isCompleted = completedFifo.first; let tag = tagFifo.first; - let chunkBytes = zeroExtend(byteInStream) + chunkBytesRegs[tag]; + let recvdChunkBytes = chunkBytesRegs[tag]; + let chunkBytes = zeroExtend(byteInStream) + recvdChunkBytes; dwRemove.streamFifoOut.deq; if (stream.isLast) begin completedFifo.deq; tagFifo.deq; end stream.isLast = isCompleted && stream.isLast; + stream.isFirst = stream.isFirst && (recvdChunkBytes == 0); cBuffer.append.enq(tuple2(tag, stream)); if (stream.isLast) begin cBuffer.complete.put(tag); @@ -214,7 +215,7 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); // $display("tag%d", tag, fshow(stream)); endrule - // Pipeline stage 4: there may be a bubble ibetween the first and last DataStream of cBUffer drain output + // Pipeline stage 4: there may be a bubble between the first and last DataStream of cBUffer drain output // Reshape the DataStream from RCB chunks to MRRS chunks rule reshapeRCB; let stream = cBuffer.drain.first; @@ -227,19 +228,12 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); // Reshape the DataStream from MRRS chunks to a whole DataStream rule reshapeMRRS; let stream = reshapeRcb.streamFifoOut.first; + $display($time, "ns SIM INFO @ reshapeMRRS: get stream from reshapeRcb, isFirst %d, isLast %d, data %h", pack(stream.isFirst), pack(stream.isLast), stream.data); let byteInStream = convertByteEn2BytePtr(stream.byteEn); let recvBytesCnt = recvBytesReg + zeroExtend(byteInStream); reshapeRcb.streamFifoOut.deq; - // let recvTlpCnt = recvTlpCntReg; - // if (stream.isFirst) begin - // if (recvTlpCnt > 0) begin - // stream.isFirst = False; - // end - // recvTlpCnt = recvTlpCntReg + 1; - // end if (stream.isLast) begin if (reqInflightFifo.first.length == recvBytesCnt) begin - // recvTlpCnt = 0; reqInflightFifo.deq; $display($time, "ns SIM INFO @ mkDmaC2HReadCore%d: a read request is done, total recvd bytes: %d", pathIdx, recvBytesCnt); recvBytesCnt = 0; diff --git a/src/DmaWrapper.bsv b/src/DmaWrapper.bsv index 229f220..3a544df 100755 --- a/src/DmaWrapper.bsv +++ b/src/DmaWrapper.bsv @@ -361,6 +361,11 @@ module mkRawTestDmaController(RawLoopDmaController); mkConnection(dmac.h2cReqFifoOut, dummyCsr.reqFifoIn); mkConnection(dmac.h2cRespFifoIn, dummyCsr.respFifoOut); + rule logRead; + let stream = dmac.c2hDataFifoOut[0].first; + $display($time, "ns SIM INFO @ mkRawTestDmaController: recv stream, isFirst %d, isLast %d, data %h", pack(stream.isFirst), pack(stream.isLast), stream.data); + endrule + interface rawPcie = dmac.rawPcie; endmodule From 10666a577278b6bb2d7531284f382fd131c2f55f Mon Sep 17 00:00:00 2001 From: BIGWJZ Date: Mon, 28 Oct 2024 12:34:32 +0800 Subject: [PATCH 46/53] optimize for fully-pipeline --- backend/top.v | 630 ++++++++++++++++++ cocotb/bypass_write_read/Makefile | 18 +- cocotb/bypass_write_read/dma_straddle_tb.py | 12 +- cocotb/bypass_write_read/dma_wr_rd_tb.py | 7 +- .../simple_write_read/dma_simple_wr_rd_tb.py | 5 +- src/DmaC2HPipe.bsv | 63 +- src/DmaTypes.bsv | 10 +- src/DmaUtils.bsv | 33 +- src/PcieAdapter.bsv | 454 +++++++++++++ src/PcieDescriptorTypes.bsv | 2 + src/StreamUtils.bsv | 23 +- test/Makefile | 4 +- test/TestDmaCore.bsv | 30 +- 13 files changed, 1206 insertions(+), 85 deletions(-) create mode 100644 backend/top.v diff --git a/backend/top.v b/backend/top.v new file mode 100644 index 0000000..dcff5ef --- /dev/null +++ b/backend/top.v @@ -0,0 +1,630 @@ +`timescale 1ps / 1ps +`define ENABLE_CMAC_RS_FEC + +module top#( + parameter [4:0] PL_LINK_CAP_MAX_LINK_WIDTH = 16, // 1- X1, 2 - X2, 4 - X4, 8 - X8, 16 - X16 + parameter C_DATA_WIDTH = 512, // RX/TX interface data width + parameter AXISTEN_IF_MC_RX_STRADDLE = 1, + parameter PL_LINK_CAP_MAX_LINK_SPEED = 4, // 1- GEN1, 2 - GEN2, 4 - GEN3, 8 - GEN4 + parameter KEEP_WIDTH = C_DATA_WIDTH / 32, + parameter EXT_PIPE_SIM = "FALSE", // This Parameter has effect on selecting Enable External PIPE Interface in GUI. + parameter AXISTEN_IF_CC_ALIGNMENT_MODE = "FALSE", + parameter AXISTEN_IF_CQ_ALIGNMENT_MODE = "FALSE", + parameter AXISTEN_IF_RQ_ALIGNMENT_MODE = "FALSE", + parameter AXISTEN_IF_RC_ALIGNMENT_MODE = "FALSE", + parameter AXI4_CQ_TUSER_WIDTH = 183, + parameter AXI4_CC_TUSER_WIDTH = 81, + parameter AXI4_RQ_TUSER_WIDTH = 137, + parameter AXI4_RC_TUSER_WIDTH = 161, + parameter AXISTEN_IF_ENABLE_CLIENT_TAG = 0, + parameter RQ_AVAIL_TAG_IDX = 8, + parameter RQ_AVAIL_TAG = 256, + parameter AXISTEN_IF_RQ_PARITY_CHECK = 0, + parameter AXISTEN_IF_CC_PARITY_CHECK = 0, + parameter AXISTEN_IF_RC_PARITY_CHECK = 0, + parameter AXISTEN_IF_CQ_PARITY_CHECK = 0, + parameter AXISTEN_IF_ENABLE_RX_MSG_INTFC = "FALSE", + parameter [17:0] AXISTEN_IF_ENABLE_MSG_ROUTE = 18'h2FFFF + +)( + // PCIe and XDMA + output [(PL_LINK_CAP_MAX_LINK_WIDTH - 1) : 0] pci_exp_txp, + output [(PL_LINK_CAP_MAX_LINK_WIDTH - 1) : 0] pci_exp_txn, + input [(PL_LINK_CAP_MAX_LINK_WIDTH - 1) : 0] pci_exp_rxp, + input [(PL_LINK_CAP_MAX_LINK_WIDTH - 1) : 0] pci_exp_rxn, + + input sys_clk_p, + input sys_clk_n, + input sys_rst_n, + + input board_sys_clk_n, + input board_sys_clk_p +); + + + wire user_lnk_up; + + //----------------------------------------------------------------------------------------------------------------// + // AXI Interface // + //----------------------------------------------------------------------------------------------------------------// + + wire user_clk; + wire user_reset; + + (*mark_debug, mark_debug_clock="user_clk" *)wire s_axis_rq_tlast; + (*mark_debug, mark_debug_clock="user_clk" *)wire [C_DATA_WIDTH-1:0] s_axis_rq_tdata; + wire [AXI4_RQ_TUSER_WIDTH-1:0] s_axis_rq_tuser; + wire [KEEP_WIDTH-1:0] s_axis_rq_tkeep; + (*mark_debug, mark_debug_clock="user_clk" *)wire [3:0] s_axis_rq_tready; + (*mark_debug, mark_debug_clock="user_clk" *)wire s_axis_rq_tvalid; + + (*mark_debug, mark_debug_clock="user_clk" *)wire [C_DATA_WIDTH-1:0] m_axis_rc_tdata; + wire [AXI4_RC_TUSER_WIDTH-1:0] m_axis_rc_tuser; + (*mark_debug, mark_debug_clock="user_clk" *)wire m_axis_rc_tlast; + wire [KEEP_WIDTH-1:0] m_axis_rc_tkeep; + (*mark_debug, mark_debug_clock="user_clk" *)wire m_axis_rc_tvalid; + (*mark_debug, mark_debug_clock="user_clk" *)wire m_axis_rc_tready; + + (*mark_debug, mark_debug_clock="user_clk" *)wire [C_DATA_WIDTH-1:0] m_axis_cq_tdata; + wire [AXI4_CQ_TUSER_WIDTH-1:0] m_axis_cq_tuser; + (*mark_debug, mark_debug_clock="user_clk" *)wire m_axis_cq_tlast; + wire [KEEP_WIDTH-1:0] m_axis_cq_tkeep; + (*mark_debug, mark_debug_clock="user_clk" *)wire m_axis_cq_tvalid; + (*mark_debug, mark_debug_clock="user_clk" *)wire m_axis_cq_tready; + + (*mark_debug, mark_debug_clock="user_clk" *)wire [C_DATA_WIDTH-1:0] s_axis_cc_tdata; + wire [AXI4_CC_TUSER_WIDTH-1:0] s_axis_cc_tuser; + (*mark_debug, mark_debug_clock="user_clk" *)wire s_axis_cc_tlast; + wire [KEEP_WIDTH-1:0] s_axis_cc_tkeep; + (*mark_debug, mark_debug_clock="user_clk" *)wire s_axis_cc_tvalid; + wire [3:0] s_axis_cc_tready; + + wire [3:0] pcie_tfc_nph_av; + wire [3:0] pcie_tfc_npd_av; + //----------------------------------------------------------------------------------------------------------------// + // Configuration (CFG) Interface // + //----------------------------------------------------------------------------------------------------------------// + + wire pcie_cq_np_req; + wire [5:0] pcie_cq_np_req_count; + wire [5:0] pcie_rq_seq_num0; + wire pcie_rq_seq_num_vld0; + wire [5:0] pcie_rq_seq_num1; + wire pcie_rq_seq_num_vld1; + + //----------------------------------------------------------------------------------------------------------------// + // EP and RP // + //----------------------------------------------------------------------------------------------------------------// + + wire cfg_phy_link_down; + wire [2:0] cfg_negotiated_width; + wire [1:0] cfg_current_speed; + wire [1:0] cfg_max_payload; + wire [2:0] cfg_max_read_req; + wire [15:0] cfg_function_status; + wire [11:0] cfg_function_power_state; + wire [503:0] cfg_vf_status; + wire [1:0] cfg_link_power_state; + + // Error Reporting Interface + wire cfg_err_cor_out; + wire cfg_err_nonfatal_out; + wire cfg_err_fatal_out; + + wire [5:0] cfg_ltssm_state; + wire [3:0] cfg_rcb_status; + wire [1:0] cfg_obff_enable; + wire cfg_pl_status_change; + + // Management Interface + wire [9:0] cfg_mgmt_addr; + wire cfg_mgmt_write; + wire [31:0] cfg_mgmt_write_data; + wire [3:0] cfg_mgmt_byte_enable; + wire cfg_mgmt_read; + wire [31:0] cfg_mgmt_read_data; + wire cfg_mgmt_read_write_done; + wire cfg_mgmt_type1_cfg_reg_access; + wire cfg_msg_received; + wire [7:0] cfg_msg_received_data; + wire [4:0] cfg_msg_received_type; + wire cfg_msg_transmit; + wire [2:0] cfg_msg_transmit_type; + wire [31:0] cfg_msg_transmit_data; + wire cfg_msg_transmit_done; + wire [7:0] cfg_fc_ph; + wire [11:0] cfg_fc_pd; + wire [7:0] cfg_fc_nph; + wire [11:0] cfg_fc_npd; + wire [7:0] cfg_fc_cplh; + wire [11:0] cfg_fc_cpld; + wire [2:0] cfg_fc_sel; + wire [2:0] cfg_per_func_status_control; + wire [3:0] cfg_per_function_number; + wire cfg_per_function_output_request; + + wire [63:0] cfg_dsn; + wire cfg_power_state_change_interrupt; + wire cfg_power_state_change_ack; + wire cfg_err_cor_in; + wire cfg_err_uncor_in; + + wire [3:0] cfg_flr_in_process; + wire [1:0] cfg_flr_done; + wire [251:0] cfg_vf_flr_in_process; + wire cfg_vf_flr_done; + wire [7:0] cfg_vf_flr_func_num; + + wire cfg_link_training_enable; + + //----------------------------------------------------------------------------------------------------------------// + // EP Only // + //----------------------------------------------------------------------------------------------------------------// + + // Interrupt Interface Signals + wire [3:0] cfg_interrupt_int; + wire [1:0] cfg_interrupt_pending; + wire cfg_interrupt_sent; + + wire [3:0] cfg_interrupt_msi_enable; + wire [11:0] cfg_interrupt_msi_mmenable; + wire cfg_interrupt_msi_mask_update; + wire [31:0] cfg_interrupt_msi_data; + wire [1:0] cfg_interrupt_msi_select; + wire [31:0] cfg_interrupt_msi_int; + wire [63:0] cfg_interrupt_msi_pending_status; + wire cfg_interrupt_msi_sent; + wire cfg_interrupt_msi_fail; + wire [2:0] cfg_interrupt_msi_attr; + wire cfg_interrupt_msi_tph_present; + wire [1:0] cfg_interrupt_msi_tph_type; + wire [7:0] cfg_interrupt_msi_tph_st_tag; + wire [7:0] cfg_interrupt_msi_function_number; + +// EP only + wire cfg_hot_reset_out; + wire cfg_config_space_enable; + wire cfg_req_pm_transition_l23_ready; + +// RP only + wire cfg_hot_reset_in; + + wire [7:0] cfg_ds_port_number; + wire [7:0] cfg_ds_bus_number; + wire [4:0] cfg_ds_device_number; + + //----------------------------------------------------------------------------------------------------------------// + // System(SYS) Interface // + //----------------------------------------------------------------------------------------------------------------// + + wire sys_clk; + wire sys_clk_gt; + wire global_reset_100mhz_clk; + wire sys_rst_n_c; + + // Ref clock buffer + IBUFDS_GTE4 # (.REFCLK_HROW_CK_SEL(2'b00)) refclk_ibuf (.O(sys_clk_gt), .ODIV2(sys_clk), .I(sys_clk_p), .CEB(1'b0), .IB(sys_clk_n)); + // Reset buffer + IBUF sys_reset_n_ibuf (.O(sys_rst_n_c), .I(sys_rst_n)); + + + IBUFDS IBUFDS_inst ( + .O(global_reset_100mhz_clk), // 1-bit output: Buffer output + .I(board_sys_clk_p), // 1-bit input: Diff_p buffer input (connect directly to top-level port) + .IB(board_sys_clk_n) // 1-bit input: Diff_n buffer input (connect directly to top-level port) + ); + + pcie4_uscale_plus_0 pcie4_uscale_plus_0_i ( + //---------------------------------------------------------------------------------------// + // PCI Express (pci_exp) Interface // + //---------------------------------------------------------------------------------------// + + // Tx + .pci_exp_txn ( pci_exp_txn ), + .pci_exp_txp ( pci_exp_txp ), + + // Rx + .pci_exp_rxn ( pci_exp_rxn ), + .pci_exp_rxp ( pci_exp_rxp ), + + //---------------------------------------------------------------------------------------// + // AXI Interface // + //---------------------------------------------------------------------------------------// + + .user_clk ( user_clk ), + .user_reset ( user_reset ), + .user_lnk_up ( user_lnk_up ), + .phy_rdy_out ( phy_rdy_out ), + + .s_axis_rq_tlast ( s_axis_rq_tlast ), + .s_axis_rq_tdata ( s_axis_rq_tdata ), + .s_axis_rq_tuser ( s_axis_rq_tuser ), + .s_axis_rq_tkeep ( s_axis_rq_tkeep ), + .s_axis_rq_tready ( s_axis_rq_tready ), + .s_axis_rq_tvalid ( s_axis_rq_tvalid ), + + .m_axis_rc_tdata ( m_axis_rc_tdata ), + .m_axis_rc_tuser ( m_axis_rc_tuser ), + .m_axis_rc_tlast ( m_axis_rc_tlast ), + .m_axis_rc_tkeep ( m_axis_rc_tkeep ), + .m_axis_rc_tvalid ( m_axis_rc_tvalid ), + .m_axis_rc_tready ( m_axis_rc_tready ), + + .m_axis_cq_tdata ( m_axis_cq_tdata ), + .m_axis_cq_tuser ( m_axis_cq_tuser ), + .m_axis_cq_tlast ( m_axis_cq_tlast ), + .m_axis_cq_tkeep ( m_axis_cq_tkeep ), + .m_axis_cq_tvalid ( m_axis_cq_tvalid ), + .m_axis_cq_tready ( m_axis_cq_tready ), + + .s_axis_cc_tdata ( s_axis_cc_tdata ), + .s_axis_cc_tuser ( s_axis_cc_tuser ), + .s_axis_cc_tlast ( s_axis_cc_tlast ), + .s_axis_cc_tkeep ( s_axis_cc_tkeep ), + .s_axis_cc_tvalid ( s_axis_cc_tvalid ), + .s_axis_cc_tready ( s_axis_cc_tready ), + + + + //---------------------------------------------------------------------------------------// + // Configuration (CFG) Interface // + //---------------------------------------------------------------------------------------// + .pcie_tfc_nph_av ( pcie_tfc_nph_av ), + .pcie_tfc_npd_av ( pcie_tfc_npd_av ), + + .pcie_rq_seq_num0 ( pcie_rq_seq_num0 ) , + .pcie_rq_seq_num_vld0 ( pcie_rq_seq_num_vld0 ) , + .pcie_rq_seq_num1 ( pcie_rq_seq_num1 ) , + .pcie_rq_seq_num_vld1 ( pcie_rq_seq_num_vld1 ) , + .pcie_rq_tag0 ( ) , + .pcie_rq_tag1 ( ) , + .pcie_rq_tag_av ( ) , + .pcie_rq_tag_vld0 ( ) , + .pcie_rq_tag_vld1 ( ) , + .pcie_cq_np_req ( {1'b1,pcie_cq_np_req} ), + .pcie_cq_np_req_count ( pcie_cq_np_req_count ), + .cfg_phy_link_down ( cfg_phy_link_down ), + .cfg_phy_link_status ( ), + .cfg_negotiated_width ( cfg_negotiated_width ), + .cfg_current_speed ( cfg_current_speed ), + .cfg_max_payload ( cfg_max_payload ), + .cfg_max_read_req ( cfg_max_read_req ), + .cfg_function_status ( cfg_function_status ), + .cfg_function_power_state ( cfg_function_power_state ), + .cfg_vf_status ( cfg_vf_status ), + .cfg_vf_power_state ( ), + .cfg_link_power_state ( cfg_link_power_state ), + // Error Reporting Interface + .cfg_err_cor_out ( cfg_err_cor_out ), + .cfg_err_nonfatal_out ( cfg_err_nonfatal_out ), + .cfg_err_fatal_out ( cfg_err_fatal_out ), + + .cfg_local_error_out ( ), + .cfg_local_error_valid ( ), + + .cfg_ltssm_state ( cfg_ltssm_state ), + .cfg_rx_pm_state ( ), + .cfg_tx_pm_state ( ), + .cfg_rcb_status ( cfg_rcb_status ), + + .cfg_obff_enable ( cfg_obff_enable ), + .cfg_pl_status_change ( cfg_pl_status_change ), + + .cfg_tph_requester_enable ( ), + .cfg_tph_st_mode ( ), + .cfg_vf_tph_requester_enable ( ), + .cfg_vf_tph_st_mode ( ), + // Management Interface + .cfg_mgmt_addr ( cfg_mgmt_addr ), + .cfg_mgmt_write ( cfg_mgmt_write ), + .cfg_mgmt_write_data ( cfg_mgmt_write_data ), + .cfg_mgmt_byte_enable ( cfg_mgmt_byte_enable ), + .cfg_mgmt_read ( cfg_mgmt_read ), + .cfg_mgmt_read_data ( cfg_mgmt_read_data ), + .cfg_mgmt_read_write_done ( cfg_mgmt_read_write_done ), + .cfg_mgmt_debug_access (1'b0), + .cfg_mgmt_function_number (8'b0), + .cfg_pm_aspm_l1_entry_reject (1'b0), + .cfg_pm_aspm_tx_l0s_entry_disable (1'b1), + + .cfg_msg_received ( cfg_msg_received ), + .cfg_msg_received_data ( cfg_msg_received_data ), + .cfg_msg_received_type ( cfg_msg_received_type ), + + .cfg_msg_transmit ( cfg_msg_transmit ), + .cfg_msg_transmit_type ( cfg_msg_transmit_type ), + .cfg_msg_transmit_data ( cfg_msg_transmit_data ), + .cfg_msg_transmit_done ( cfg_msg_transmit_done ), + + .cfg_fc_ph ( cfg_fc_ph ), + .cfg_fc_pd ( cfg_fc_pd ), + .cfg_fc_nph ( cfg_fc_nph ), + .cfg_fc_npd ( cfg_fc_npd ), + .cfg_fc_cplh ( cfg_fc_cplh ), + .cfg_fc_cpld ( cfg_fc_cpld ), + .cfg_fc_sel ( cfg_fc_sel ), + + //-------------------------------------------------------------------------------// + // EP and RP // + //-------------------------------------------------------------------------------// + .cfg_bus_number ( ), + .cfg_dsn ( cfg_dsn ), + .cfg_power_state_change_ack ( cfg_power_state_change_ack ), + .cfg_power_state_change_interrupt ( cfg_power_state_change_interrupt ), + .cfg_err_cor_in ( cfg_err_cor_in ), + .cfg_err_uncor_in ( cfg_err_uncor_in ), + + .cfg_flr_in_process ( cfg_flr_in_process ), + .cfg_flr_done ( {2'b0,cfg_flr_done} ), + .cfg_vf_flr_in_process ( cfg_vf_flr_in_process ), + .cfg_vf_flr_done ( cfg_vf_flr_done ), + .cfg_link_training_enable ( cfg_link_training_enable ), + // EP only + .cfg_hot_reset_out ( cfg_hot_reset_out ), + .cfg_config_space_enable ( cfg_config_space_enable ), + .cfg_req_pm_transition_l23_ready ( cfg_req_pm_transition_l23_ready ), + + // RP only + .cfg_hot_reset_in ( cfg_hot_reset_in ), + + .cfg_ds_bus_number ( cfg_ds_bus_number ), + .cfg_ds_device_number ( cfg_ds_device_number ), + .cfg_ds_port_number ( cfg_ds_port_number ), + .cfg_vf_flr_func_num (cfg_vf_flr_func_num), + + //-------------------------------------------------------------------------------// + // EP Only // + //-------------------------------------------------------------------------------// + + // Interrupt Interface Signals + .cfg_interrupt_int ( cfg_interrupt_int ), + .cfg_interrupt_pending ( {2'b0,cfg_interrupt_pending} ), + .cfg_interrupt_sent ( cfg_interrupt_sent ), + + + + // MSI Interface + .cfg_interrupt_msi_enable ( cfg_interrupt_msi_enable ), + .cfg_interrupt_msi_mmenable ( cfg_interrupt_msi_mmenable ), + .cfg_interrupt_msi_mask_update ( cfg_interrupt_msi_mask_update ), + .cfg_interrupt_msi_data ( cfg_interrupt_msi_data ), + .cfg_interrupt_msi_select ( cfg_interrupt_msi_select ), + .cfg_interrupt_msi_int ( cfg_interrupt_msi_int ), + .cfg_interrupt_msi_pending_status ( cfg_interrupt_msi_pending_status [31:0]), + .cfg_interrupt_msi_sent ( cfg_interrupt_msi_sent ), + .cfg_interrupt_msi_fail ( cfg_interrupt_msi_fail ), + .cfg_interrupt_msi_attr ( cfg_interrupt_msi_attr ), + .cfg_interrupt_msi_tph_present ( cfg_interrupt_msi_tph_present ), + .cfg_interrupt_msi_tph_type ( cfg_interrupt_msi_tph_type ), + .cfg_interrupt_msi_tph_st_tag ( cfg_interrupt_msi_tph_st_tag ), + .cfg_interrupt_msi_pending_status_function_num ( 2'b0), + .cfg_interrupt_msi_pending_status_data_enable ( 1'b0), + + .cfg_interrupt_msi_function_number ( cfg_interrupt_msi_function_number ), + + + //--------------------------------------------------------------------------------------// + // System(SYS) Interface // + //--------------------------------------------------------------------------------------// + + .sys_clk ( sys_clk ), + .sys_clk_gt ( sys_clk_gt ), + .sys_reset ( sys_rst_n_c ) + ); + +//------------------------------------------------------------------------------------------------------------------// +// PIO Example Design Top Level // +//------------------------------------------------------------------------------------------------------------------// + mkRawTestDmaController dmac_i ( + .CLK ( user_clk ), + .RST_N ( ~user_reset ), + .user_lnk_up ( user_lnk_up ), + // .sys_rst ( sys_rst_n_c ), + + //-------------------------------------------------------------------------------------// + // AXI Interface // + //-------------------------------------------------------------------------------------// + + .m_axis_rq_tlast ( s_axis_rq_tlast ), + .m_axis_rq_tdata ( s_axis_rq_tdata ), + .m_axis_rq_tuser ( s_axis_rq_tuser ), + .m_axis_rq_tkeep ( s_axis_rq_tkeep ), + .m_axis_rq_tready ( s_axis_rq_tready[0] ), + .m_axis_rq_tvalid ( s_axis_rq_tvalid ), + + .s_axis_rc_tdata ( m_axis_rc_tdata ), + .s_axis_rc_tuser ( m_axis_rc_tuser ), + .s_axis_rc_tlast ( m_axis_rc_tlast ), + .s_axis_rc_tkeep ( m_axis_rc_tkeep ), + .s_axis_rc_tvalid ( m_axis_rc_tvalid ), + .s_axis_rc_tready ( m_axis_rc_tready ), + + .s_axis_cq_tdata ( m_axis_cq_tdata ), + .s_axis_cq_tuser ( m_axis_cq_tuser ), + .s_axis_cq_tlast ( m_axis_cq_tlast ), + .s_axis_cq_tkeep ( m_axis_cq_tkeep ), + .s_axis_cq_tvalid ( m_axis_cq_tvalid ), + .s_axis_cq_tready ( m_axis_cq_tready ), + + .m_axis_cc_tdata ( s_axis_cc_tdata ), + .m_axis_cc_tuser ( s_axis_cc_tuser ), + .m_axis_cc_tlast ( s_axis_cc_tlast ), + .m_axis_cc_tkeep ( s_axis_cc_tkeep ), + .m_axis_cc_tvalid ( s_axis_cc_tvalid ), + .m_axis_cc_tready ( s_axis_cc_tready[0] ), + + + // .pcie_rq_seq_num ( 'h0), + // .pcie_rq_seq_num_vld ( 'h0), + // .pcie_rq_tag ( 'h0), + // .pcie_rq_tag_vld ( 'h0), + .pcie_tfc_nph_av ( pcie_tfc_nph_av[1:0]), + .pcie_tfc_npd_av ( pcie_tfc_npd_av[1:0]), + .pcie_cq_np_req ( pcie_cq_np_req ), + .pcie_cq_np_req_count ( pcie_cq_np_req_count ), + + //--------------------------------------------------------------------------------// + // Configuration (CFG) Interface // + //--------------------------------------------------------------------------------// + + //--------------------------------------------------------------------------------// + // EP and RP // + //--------------------------------------------------------------------------------// + .cfg_phy_link_down ( cfg_phy_link_down ), + .cfg_negotiated_width ( cfg_negotiated_width ), + .cfg_current_speed ( cfg_current_speed ), + .cfg_max_payload ( cfg_max_payload ), + .cfg_max_read_req ( cfg_max_read_req ), + .cfg_function_status ( cfg_function_status [7:0] ), + .cfg_function_power_state ( cfg_function_power_state [5:0] ), + .cfg_vf_status ( cfg_vf_status ), + .cfg_link_power_state ( cfg_link_power_state ), + + // Error Reporting Interface + .cfg_err_cor_out ( cfg_err_cor_out ), + .cfg_err_nonfatal_out ( cfg_err_nonfatal_out ), + .cfg_err_fatal_out ( cfg_err_fatal_out ), +// .cfg_ltr_enable ( 1'b0 ), + .cfg_ltssm_state ( cfg_ltssm_state ), + .cfg_rcb_status ( cfg_rcb_status [1:0]), + .cfg_obff_enable ( cfg_obff_enable ), +// .cfg_pl_status_change ( cfg_pl_status_change ), + + // Management Interface + .cfg_mgmt_addr ( cfg_mgmt_addr ), + .cfg_mgmt_write ( cfg_mgmt_write ), + .cfg_mgmt_write_data ( cfg_mgmt_write_data ), + .cfg_mgmt_byte_enable ( cfg_mgmt_byte_enable ), + .cfg_mgmt_read ( cfg_mgmt_read ), + .cfg_mgmt_read_data ( cfg_mgmt_read_data ), + .cfg_mgmt_read_write_done ( cfg_mgmt_read_write_done ), +// .cfg_mgmt_type1_cfg_reg_access ( cfg_mgmt_type1_cfg_reg_access ), + .cfg_msg_received ( cfg_msg_received ), + .cfg_msg_received_data ( cfg_msg_received_data ), + .cfg_msg_received_type ( cfg_msg_received_type ), + .cfg_msg_transmit ( cfg_msg_transmit ), + .cfg_msg_transmit_type ( cfg_msg_transmit_type ), + .cfg_msg_transmit_data ( cfg_msg_transmit_data ), + .cfg_msg_transmit_done ( cfg_msg_transmit_done ), + + .cfg_fc_ph ( cfg_fc_ph ), + .cfg_fc_pd ( cfg_fc_pd ), + .cfg_fc_nph ( cfg_fc_nph ), + .cfg_fc_npd ( cfg_fc_npd ), + .cfg_fc_cplh ( cfg_fc_cplh ), + .cfg_fc_cpld ( cfg_fc_cpld ), + .cfg_fc_sel ( cfg_fc_sel ), + +// .cfg_per_func_status_control ( cfg_per_func_status_control ), +// .cfg_per_function_number ( cfg_per_function_number ), +// .cfg_per_function_output_request ( cfg_per_function_output_request ), + + .cfg_dsn ( cfg_dsn ), + .cfg_power_state_change_ack ( cfg_power_state_change_ack ), + .cfg_power_state_change_interrupt ( cfg_power_state_change_interrupt ), + .cfg_err_cor_in ( cfg_err_cor_in ), + .cfg_err_uncor_in ( cfg_err_uncor_in ), + + .cfg_flr_in_process ( cfg_flr_in_process [1:0] ), + .cfg_flr_done ( cfg_flr_done ), + .cfg_vf_flr_in_process ( cfg_vf_flr_in_process ), + .cfg_vf_flr_done ( cfg_vf_flr_done ), + .cfg_vf_flr_func_num ( cfg_vf_flr_func_num ), + + .cfg_link_training_enable ( cfg_link_training_enable ), + + .cfg_ds_port_number ( cfg_ds_port_number ), + .cfg_hot_reset_in ( cfg_hot_reset_out ), + .cfg_config_space_enable ( cfg_config_space_enable ), + .cfg_req_pm_transition_l23_ready ( cfg_req_pm_transition_l23_ready ), + + // RP only + .cfg_hot_reset_out ( cfg_hot_reset_in ), + + .cfg_ds_bus_number ( cfg_ds_bus_number ), + .cfg_ds_device_number ( cfg_ds_device_number ), + .cfg_ds_function_number ( ), + + //-------------------------------------------------------------------------------------// + // EP Only // + //-------------------------------------------------------------------------------------// + + .cfg_interrupt_msi_enable ( cfg_interrupt_msi_enable[0] ), + .cfg_interrupt_msi_mmenable ( cfg_interrupt_msi_mmenable[5:0] ), + .cfg_interrupt_msi_mask_update ( cfg_interrupt_msi_mask_update ), + .cfg_interrupt_msi_data ( cfg_interrupt_msi_data ), + .cfg_interrupt_msi_select ( cfg_interrupt_msi_select ), + .cfg_interrupt_msi_int ( cfg_interrupt_msi_int ), + .cfg_interrupt_msi_pending_status ( cfg_interrupt_msi_pending_status ), + .cfg_interrupt_msi_sent ( cfg_interrupt_msi_sent ), + .cfg_interrupt_msi_fail ( cfg_interrupt_msi_fail ), + .cfg_interrupt_msi_attr ( cfg_interrupt_msi_attr ), + .cfg_interrupt_msi_tph_present ( cfg_interrupt_msi_tph_present ), + .cfg_interrupt_msi_tph_type ( cfg_interrupt_msi_tph_type ), + .cfg_interrupt_msi_tph_st_tag ( cfg_interrupt_msi_tph_st_tag ), + .cfg_interrupt_msi_function_number ( cfg_interrupt_msi_function_number ), + + // Interrupt Interface Signals + .cfg_interrupt_int ( cfg_interrupt_int ), + .cfg_interrupt_pending ( cfg_interrupt_pending ), + .cfg_interrupt_sent ( cfg_interrupt_sent ) + + //------------------------------------------------------------------------------------// + // DMA IFC + //------------------------------------------------------------------------------------// +// .s_axis_c2h_0_tvalid (0), +// .s_axis_c2h_0_tdata (0), +// .s_axis_c2h_0_tkeep (0), +// .s_axis_c2h_0_tlast (0), +// .s_axis_c2h_0_tuser (0), +// .s_axis_c2h_0_tready ( ), + +// .s_desc_c2h_0_valid (0), +// .s_desc_c2h_0_start_addr (0), +// .s_desc_c2h_0_byte_cnt (0), +// .s_desc_c2h_0_is_write (0), +// .s_desc_c2h_0_ready ( ), + +// .m_axis_c2h_0_tvalid ( ), +// .m_axis_c2h_0_tdata ( ), +// .m_axis_c2h_0_tkeep ( ), +// .m_axis_c2h_0_tlast ( ), +// .m_axis_c2h_0_tuser ( ), +// .m_axis_c2h_0_tready (0), + +// .s_axis_c2h_1_tvalid (0), +// .s_axis_c2h_1_tdata (0), +// .s_axis_c2h_1_tkeep (0), +// .s_axis_c2h_1_tlast (0), +// .s_axis_c2h_1_tuser (0), +// .s_axis_c2h_1_tready ( ), + +// .s_desc_c2h_1_valid (0), +// .s_desc_c2h_1_start_addr (0), +// .s_desc_c2h_1_byte_cnt (0), +// .s_desc_c2h_1_is_write (0), +// .s_desc_c2h_1_ready ( ), + +// .m_axis_c2h_1_tvalid ( ), +// .m_axis_c2h_1_tdata ( ), +// .m_axis_c2h_1_tkeep ( ), +// .m_axis_c2h_1_tlast ( ), +// .m_axis_c2h_1_tuser ( ), +// .m_axis_c2h_1_tready (0), + +// .s_h2c_value_valid (0), +// .s_h2c_value_data (0), +// .s_h2c_value_ready ( ), + +// .m_h2c_value_address ( ), +// .m_h2c_value_is_write ( ), +// .m_h2c_value_valid ( ), +// .m_h2c_value_ready (0), + +// .m_h2c_desc_data ( ), +// .m_h2c_desc_valid ( ), +// .m_h2c_desc_ready (0) + ); + + +endmodule \ No newline at end of file diff --git a/cocotb/bypass_write_read/Makefile b/cocotb/bypass_write_read/Makefile index 13e1213..978101a 100644 --- a/cocotb/bypass_write_read/Makefile +++ b/cocotb/bypass_write_read/Makefile @@ -1,20 +1,20 @@ -ROOT_DIR = $(abspath ../../) -BACKEND_DIR = $(ROOT_DIR)/backend -COCOTB_DIR = $(abspath ../) -TB_DIR = $(abspath ./) +ROOT_DIR := $(abspath ../../) +BACKEND_DIR := $(ROOT_DIR)/backend +COCOTB_DIR := $(abspath ../) +TB_DIR := $(abspath ./) include $(ROOT_DIR)/Makefile.base -VBUILD_DIR = $(BACKEND_DIR)/build -VSRC_DIR = $(BACKEND_DIR)/verilog +VBUILD_DIR := $(BACKEND_DIR)/build +VSRC_DIR := $(BACKEND_DIR)/verilog -TARGET = RawBypassDmaController +TARGET ?= RawBypassDmaController TOP_MODULE = mk$(TARGET) TOP_FILE = $(TOP_MODULE).v VLOG_FILE = $(TB_DIR)/$(TOP_FILE) -TB_CASE = dma_straddle +TB_CASE ?= dma_wr_rd TB_FILE = $(TB_CASE)_tb.py DATE = $(shell date "+%Y%m%d") -LOG_FILE = $(TB_DIR)/log/$(DATE)_$(TOP_MODULE).log +LOG_FILE = $(TB_DIR)/log/$(DATE)_$(TB_CASE).log cocotb:clean verilog prepare run diff --git a/cocotb/bypass_write_read/dma_straddle_tb.py b/cocotb/bypass_write_read/dma_straddle_tb.py index 9276064..208ef0c 100644 --- a/cocotb/bypass_write_read/dma_straddle_tb.py +++ b/cocotb/bypass_write_read/dma_straddle_tb.py @@ -14,20 +14,20 @@ # -------------- ------------- ----------- # | Root Complex | <-> | End Pointer | <-> | Dut(DMAC) | # -------------- ------------- ----------- - +test_num = 100 async def single_path_random_write_test(pcie_tb, dma_channel, mem): - for _ in range(100): + for _ in range(test_num): addr, length = pcie_tb.gen_random_req(dma_channel) addr = mem.get_absolute_address(addr) char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") data = char * length await pcie_tb.run_single_write_once(dma_channel, addr, data) - await Timer(100+length, units='ns') + await Timer(200+length, units='ns') assert mem[addr:addr+length] == data async def single_path_random_read_test(pcie_tb, dma_channel, mem): - for _ in range(100): + for _ in range(test_num): addr, length = pcie_tb.gen_random_req(dma_channel) addr = mem.get_absolute_address(addr) char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") @@ -61,7 +61,7 @@ async def straddle_write_test(dut): tb.log.info("End write test in straddle mode succesfully!") @cocotb.test(timeout_time=10000000, timeout_unit="ns") -async def random_read_test(dut): +async def straddle_read_test(dut): tb = BdmaBypassTb(dut) await tb.gen_reset() @@ -81,7 +81,7 @@ async def random_read_test(dut): await channel0 await channel1 - tb.log.info("End Read test in straddle mode succesfully!") +# tb.log.info("End Read test in straddle mode succesfully!") tests_dir = os.path.dirname(__file__) rtl_dir = tests_dir diff --git a/cocotb/bypass_write_read/dma_wr_rd_tb.py b/cocotb/bypass_write_read/dma_wr_rd_tb.py index 7964fbc..142913a 100644 --- a/cocotb/bypass_write_read/dma_wr_rd_tb.py +++ b/cocotb/bypass_write_read/dma_wr_rd_tb.py @@ -17,9 +17,10 @@ # -------------- ------------- ----------- # | Root Complex | <-> | End Pointer | <-> | Dut(DMAC) | # -------------- ------------- ----------- +test_num = 100 async def single_path_random_write_test(pcie_tb, dma_channel, mem): - for _ in range(100): + for _ in range(test_num): addr, length = pcie_tb.gen_random_req(dma_channel) addr = mem.get_absolute_address(addr) char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") @@ -30,7 +31,7 @@ async def single_path_random_write_test(pcie_tb, dma_channel, mem): async def single_path_random_read_test(pcie_tb, dma_channel, mem): - for _ in range(100): + for _ in range(test_num): addr, length = pcie_tb.gen_random_req(dma_channel) addr = mem.get_absolute_address(addr) char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") @@ -38,7 +39,7 @@ async def single_path_random_read_test(pcie_tb, dma_channel, mem): data = await pcie_tb.run_single_read_once(dma_channel, addr, length) assert data == char * length -@cocotb.test(timeout_time=100000000, timeout_unit="ns") +# @cocotb.test(timeout_time=100000000, timeout_unit="ns") async def step_random_write_test(dut): tb = BdmaBypassTb(dut) diff --git a/cocotb/simple_write_read/dma_simple_wr_rd_tb.py b/cocotb/simple_write_read/dma_simple_wr_rd_tb.py index 732530c..4dc1484 100644 --- a/cocotb/simple_write_read/dma_simple_wr_rd_tb.py +++ b/cocotb/simple_write_read/dma_simple_wr_rd_tb.py @@ -13,9 +13,10 @@ tests_dir = os.path.dirname(__file__) rtl_dir = tests_dir +test_num = 1 async def single_path_random_write_test(pcie_tb, dma_channel, mem): - for _ in range(100): + for _ in range(test_num): addr, length = pcie_tb.gen_random_req(dma_channel) addr = mem.get_absolute_address(addr) char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") @@ -25,7 +26,7 @@ async def single_path_random_write_test(pcie_tb, dma_channel, mem): assert mem[addr:addr+length] == data async def single_path_random_read_test(pcie_tb, dma_channel, mem): - for _ in range(100): + for _ in range(test_num): addr, length = pcie_tb.gen_random_req(dma_channel) addr = mem.get_absolute_address(addr) char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") diff --git a/src/DmaC2HPipe.bsv b/src/DmaC2HPipe.bsv index 60deb84..9addac0 100644 --- a/src/DmaC2HPipe.bsv +++ b/src/DmaC2HPipe.bsv @@ -13,6 +13,8 @@ import PcieDescriptorTypes::*; import DmaUtils::*; import CompletionFifo::*; +// `define DEBUG_READ + // TODO : change the PCIe Adapter Ifc to TlpData and TlpHeader, // move the module which convert TlpHeader to IP descriptor from dma to adapter interface DmaC2HPipe; @@ -115,7 +117,8 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); FIFOF#(SlotToken) tagFifo <- mkSizedFIFOF(valueOf(TAdd#(1, STREAM_HEADER_REMOVE_LATENCY))); FIFOF#(Bool) completedFifo <- mkSizedFIFOF(valueOf(TAdd#(1, STREAM_HEADER_REMOVE_LATENCY))); - FIFOF#(DmaRequest) reqInflightFifo <- mkSizedFIFOF(valueOf(SLOT_PER_PATH)); + FIFOF#(DmaReadReqCnt) inflightFifo <- mkSizedFIFOF(valueOf(SLOT_PER_PATH)); + StreamPipe descRemove <- mkStreamHeaderRemove(fromInteger(valueOf(TDiv#(DES_RC_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); StreamPipe dwRemove <- mkStreamRemoveFromDW; @@ -128,11 +131,12 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); Reg#(Bool) hasReadOnceReg <- mkReg(False); Reg#(Bool) isStreamValidReg <- mkReg(True); - Reg#(DmaReqLen) recvBytesReg <- mkReg(0); - Vector#(SLOT_PER_PATH, Reg#(DmaReqLen)) chunkBytesRegs <- replicateM(mkReg(0)); + Reg#(DmaReadReqCnt) rcvReqCntReg <- mkReg(1); + Vector#(SLOT_PER_PATH, Reg#(Bool)) chunkFlagRegs <- replicateM(mkReg(False)); mkConnection(reshapeStrad.streamFifoOut, descRemove.streamFifoIn); mkConnection(descRemove.streamFifoOut, dwRemove.streamFifoIn); + mkConnection(chunkSplitor.reqCntFifoOut, inflightFifo); // Pipeline stage 1: convert StraddleStream to DataStream, may cost 2 cycle for one StraddleStream rule convertStraddleToDataStream; @@ -185,7 +189,7 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); end end isStreamValidReg <= isStreamValid; - // $display("parse from straddle", fshow(stream)); + // $display("parse from straddle, tag: %d, cmpl status: %d", tag, pack(isCompleted), fshow(stream)); endrule // Pipeline stage 2: remove the descriptor in the head of each TLP @@ -196,23 +200,22 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); let byteInStream = convertByteEn2BytePtr(stream.byteEn); let isCompleted = completedFifo.first; let tag = tagFifo.first; - let recvdChunkBytes = chunkBytesRegs[tag]; - let chunkBytes = zeroExtend(byteInStream) + recvdChunkBytes; + let rcvdFlag = True; dwRemove.streamFifoOut.deq; + // $display($time, "ns SIM INFO @ mkDmaC2HReadCore%d: from dwRemove to cBuf, tag: %d, cmpl: %d", pathIdx, tag, pack(isCompleted), fshow(stream)); if (stream.isLast) begin completedFifo.deq; tagFifo.deq; end - stream.isLast = isCompleted && stream.isLast; - stream.isFirst = stream.isFirst && (recvdChunkBytes == 0); + stream.isLast = isCompleted && stream.isLast; //Re-define the stream boundary + stream.isFirst = stream.isFirst && (!chunkFlagRegs[tag]); cBuffer.append.enq(tuple2(tag, stream)); if (stream.isLast) begin cBuffer.complete.put(tag); - $display($time, "ns SIM INFO @ mkDmaC2HReadCore%d: a chunk is completed in cBuffer, tag: %d, recv bytes: %d", pathIdx, tag, chunkBytes); - chunkBytes = 0; + // $display($time, "ns SIM INFO @ mkDmaC2HReadCore%d: a chunk is completed in cBuffer, tag: %d", pathIdx, tag); + rcvdFlag = False; end - chunkBytesRegs[tag] <= chunkBytes; - // $display("tag%d", tag, fshow(stream)); + chunkFlagRegs[tag] <= rcvdFlag; endrule // Pipeline stage 4: there may be a bubble between the first and last DataStream of cBUffer drain output @@ -221,30 +224,27 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); let stream = cBuffer.drain.first; cBuffer.drain.deq; reshapeRcb.streamFifoIn.enq(stream); - // $display("cbuf output", fshow(stream)); + $display("cbuf output", fshow(stream)); endrule - // Pipeline stage 4: there may be bubbles in the first and last DataStream of a request because of MRRS chunk compute + // Pipeline stage 4: there may be bubbles in the first and last DataStream of a request because of MRRS split // Reshape the DataStream from MRRS chunks to a whole DataStream rule reshapeMRRS; let stream = reshapeRcb.streamFifoOut.first; - $display($time, "ns SIM INFO @ reshapeMRRS: get stream from reshapeRcb, isFirst %d, isLast %d, data %h", pack(stream.isFirst), pack(stream.isLast), stream.data); - let byteInStream = convertByteEn2BytePtr(stream.byteEn); - let recvBytesCnt = recvBytesReg + zeroExtend(byteInStream); reshapeRcb.streamFifoOut.deq; if (stream.isLast) begin - if (reqInflightFifo.first.length == recvBytesCnt) begin - reqInflightFifo.deq; - $display($time, "ns SIM INFO @ mkDmaC2HReadCore%d: a read request is done, total recvd bytes: %d", pathIdx, recvBytesCnt); - recvBytesCnt = 0; + let rcvReqCnt = rcvReqCntReg; + // $display("DEBUG: get isLast from reshapeRcb, fifo.first:%d, rcvReqCntReg: %d", inflightFifo.first, rcvReqCntReg); + if (inflightFifo.first == rcvReqCnt) begin + rcvReqCnt = 1; + inflightFifo.deq; end else begin + rcvReqCnt = rcvReqCnt + 1; stream.isLast = False; - // $display($time, "ns SIM DEBUG @ mkDmaC2HReadCore%d, expect bytes %drecv bytes %d", pathIdx, reqInflightFifo.first.length, recvBytesCnt); end + rcvReqCntReg <= rcvReqCnt; end - // recvTlpCntReg <= recvTlpCnt; - recvBytesReg <= recvBytesCnt; reshapeMrrs.streamFifoIn.enq(stream); endrule @@ -259,7 +259,6 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); tag : 0 }; chunkSplitor.dmaRequestFifoIn.enq(exReq); - reqInflightFifo.enq(req); endrule // Pipeline stage 2: generate read descriptor @@ -375,12 +374,12 @@ module mkC2HWriteCore#(DmaPathNo pathIdx)(C2HWriteCore); let chunkDataStream = chunkSplit.chunkDataFifoOut.first; chunkSplit.chunkDataFifoOut.deq; streamAlign.dataFifoIn.enq(chunkDataStream); - if (chunkDataStream.isLast && chunkDataStream.isFirst) begin - $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: tx write chunk end , tag:%d", pathIdx, convertSlotTokenToTag(tagReg, pathIdx)); - end - else if (chunkDataStream.isLast) begin - $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: tx write chunk end , tag:%d", pathIdx, convertSlotTokenToTag(tagReg-1, pathIdx)); - end + // if (chunkDataStream.isLast && chunkDataStream.isFirst) begin + // $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: tx write chunk end , tag:%d", pathIdx, convertSlotTokenToTag(tagReg, pathIdx)); + // end + // else if (chunkDataStream.isLast) begin + // $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: tx write chunk end , tag:%d", pathIdx, convertSlotTokenToTag(tagReg-1, pathIdx)); + // end end endrule @@ -399,7 +398,7 @@ module mkC2HWriteCore#(DmaPathNo pathIdx)(C2HWriteCore); // $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: tx a new tlp, BE:%b/%b", pathIdx, tpl_1(sideBandByteEn), tpl_2(sideBandByteEn)); end dataOutFifo.enq(stream); - // $display($time, "ns SIM INFO @ mkDmaC2HWriteCore: tlp stream", fshow(stream)); + // $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: tlp stream", pathIdx, fshow(stream)); endrule // User Logic Ifc diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 5c9a802..8c3530c 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -30,6 +30,9 @@ typedef TAdd#(1, TLog#(BUS_BOUNDARY)) BUS_BOUNDARY_WIDTH; typedef 128 DEFAULT_TLP_SIZE; typedef TLog#(DEFAULT_TLP_SIZE) DEFAULT_TLP_SIZE_WIDTH; +// Only support max to 512bytes TLP for resouce saving +typedef 512 MAX_TLP_SIZE; +typedef TLog#(MAX_TLP_SIZE) MAX_TLP_SIZE_WIDTH; typedef Bit#(BUS_BOUNDARY_WIDTH) TlpPayloadSize; typedef Bit#(TLog#(BUS_BOUNDARY_WIDTH)) TlpPayloadSizeWidth; @@ -57,6 +60,9 @@ typedef Bit#(TAdd#(1, TLog#(DWORD_BYTES))) DWordBytePtr; typedef Bit#(BYTE_DWORD_SHIFT_WIDTH) ByteModDWord; typedef 2'b11 MaxByteModDword; +typedef TSub#(BUS_BOUNDARY_WIDTH, MAX_TLP_SIZE_WIDTH) READ_REQ_CNT_WIDTH; +typedef Bit#(READ_REQ_CNT_WIDTH) DmaReadReqCnt; + typedef struct { DmaMemAddr startAddr; DmaReqLen length; @@ -189,7 +195,7 @@ typedef Bit#(PCIE_STRADDLE_WIDTH) StraddleNo; typedef TSub#(DES_NONEXTENDED_TAG_WIDTH, 1) SLOT_TOKEN_WIDTH; typedef Bit#(SLOT_TOKEN_WIDTH) SlotToken; typedef 16 SLOT_PER_PATH; -typedef TAdd#(1, TDiv#(BUS_BOUNDARY, BYTE_EN_WIDTH)) MAX_STREAM_NUM_PER_COMPLETION; +typedef TAdd#(1, TDiv#(MAX_TLP_SIZE, BYTE_EN_WIDTH)) MAX_STREAM_NUM_PER_COMPLETION; // Internal Registers /* Block 1 - DMA inner Ctrl Regs @@ -232,7 +238,7 @@ typedef Bit#(TLog#(PA_NUM)) PaBramAddr; typedef 2 PA_TABLE0_BLOCK_OFFSET; typedef 4 PA_TABLE1_BLOCK_OFFSET; -typedef 1 IS_HUGE_PAGE; +typedef 0 IS_HUGE_PAGE; typedef 4096 PAGE_SIZE; typedef TLog#(PAGE_SIZE) PAGE_SIZE_WIDTH; diff --git a/src/DmaUtils.bsv b/src/DmaUtils.bsv index f5390ff..f37a193 100644 --- a/src/DmaUtils.bsv +++ b/src/DmaUtils.bsv @@ -23,7 +23,7 @@ typedef 4 CHUNK_COMPUTE_LATENCY; interface ChunkCompute; interface FifoIn#(DmaExtendRequest) dmaRequestFifoIn; interface FifoOut#(DmaRequest) chunkRequestFifoOut; - // interface FifoOut#(DmaMemAddr) chunkCntFifoOut; + interface FifoOut#(DmaReadReqCnt) reqCntFifoOut; interface Put#(Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth)) maxReadReqSize; endinterface @@ -32,10 +32,14 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); FIFOF#(DmaExtendRequest) inputFifo <- mkFIFOF; FIFOF#(DmaRequest) outputFifo <- mkFIFOF; FIFOF#(Tuple2#(DmaExtendRequest, DmaReqLen)) pipeFifo <- mkFIFOF; + FIFOF#(DmaReadReqCnt) rdReqCntFifo <- mkFIFOF; Reg#(DmaMemAddr) newChunkPtrReg <- mkReg(0); Reg#(DmaReqLen) totalLenRemainReg <- mkReg(0); Reg#(Bool) isSplittingReg <- mkReg(False); + + FIFOF#(Bool) mrrsFlagFifo <- mkFIFOF; + Reg#(DmaReadReqCnt) rdReqCntReg <- mkReg(0); Reg#(DmaReqLen) tlpMaxSizeReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE))); Reg#(TlpPayloadSizeWidth) tlpMaxSizeWidthReg <- mkReg(fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH))); @@ -84,6 +88,7 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); isWrite : False }); pipeFifo.deq; + mrrsFlagFifo.enq(True); // this mrrs is done totalLenRemainReg <= 0; end else begin @@ -93,6 +98,7 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); length : tlpMaxSizeReg, isWrite : False }); + mrrsFlagFifo.enq(False); // this mrrs not done newChunkPtrReg <= newChunkPtrReg + zeroExtend(tlpMaxSizeReg); totalLenRemainReg <= totalLenRemainReg - tlpMaxSizeReg; end @@ -109,13 +115,29 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); if (!isSplittingNextCycle) begin pipeFifo.deq; end + mrrsFlagFifo.enq(!isSplittingNextCycle); // this mrrs is done newChunkPtrReg <= request.startAddr + zeroExtend(firstChunkLen); totalLenRemainReg <= remainderLength; end endrule + rule getMrrsReq; + let flag = mrrsFlagFifo.first; + mrrsFlagFifo.deq; + if (flag) begin + rdReqCntFifo.enq(rdReqCntReg + 1); + rdReqCntReg <= 0; + // $display($time, "ns SIM INFO @ mkChunkCompute: split new request to %d MRRS reqs", rdReqCntReg + 1); + end + else begin + rdReqCntReg <= rdReqCntReg + 1; + end + + endrule + interface dmaRequestFifoIn = convertFifoToFifoIn(inputFifo); interface chunkRequestFifoOut = convertFifoToFifoOut(outputFifo); + interface reqCntFifoOut = convertFifoToFifoOut(rdReqCntFifo); interface Put maxReadReqSize; method Action put (Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth) mrrsCfg); @@ -275,11 +297,11 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); reqOutFifo.enq(chunkReq); end end + // $display($time, "ns SIM INFO @ mkChunkSplit: output chunkReq."); nextStartAddrReg <= nextStartAddr; remainLenReg <= remainLen; isInSplitReg <= (remainLen != 0); end - chunkOutFifo.enq(stream); endrule @@ -320,7 +342,7 @@ module mkRqDescriptorGenerator#(Bool isWrite)(RqDescriptorGenerator); DataBytePtr bytePtr = fromInteger(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH))); let descriptor = PcieRequesterRequestDescriptor { forceECRC : False, - attributes : 0, + attributes : fromInteger(valueof(ATTR_NO_SNOOP)), trafficClass : 0, requesterIdEn : False, completerId : 0, @@ -330,7 +352,7 @@ module mkRqDescriptorGenerator#(Bool isWrite)(RqDescriptorGenerator); reqType : isWrite ? fromInteger(valueOf(MEM_WRITE_REQ)) : fromInteger(valueOf(MEM_READ_REQ)), dwordCnt : dwCnt, address : truncate(exReq.startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)), - addrType : fromInteger(valueOf(TRANSLATED_ADDR)) + addrType : fromInteger(valueOf(UNTRANSLATED_ADDR)) }; let stream = DataStream { data : zeroExtend(pack(descriptor)), @@ -343,7 +365,8 @@ module mkRqDescriptorGenerator#(Bool isWrite)(RqDescriptorGenerator); let endAddrOffset = byteModDWord(exReq.endAddr); let firstByteEn = convertDWordOffset2FirstByteEn(startAddrOffset); let lastByteEn = convertDWordOffset2LastByteEn(endAddrOffset); - if (exReq.length <= fromInteger(valueOf(DWORD_BYTES))) begin + // if startAddr and endAddr are in the same DWord + if ((exReq.startAddr >> valueOf(TLog#(DWORD_BYTES))) == (exReq.endAddr >> valueOf(TLog#(DWORD_BYTES)))) begin firstByteEn = firstByteEn & lastByteEn; lastByteEn = 0; end diff --git a/src/PcieAdapter.bsv b/src/PcieAdapter.bsv index 7d3bab3..15d2943 100644 --- a/src/PcieAdapter.bsv +++ b/src/PcieAdapter.bsv @@ -177,7 +177,461 @@ interface ConvertDataStreamsToStraddleAxis; interface FifoOut#(ReqReqAxiStream) axiStreamFifoOut; endinterface +typedef Bit#(2) StraddleState; +typedef 2'b00 S_IDLE; +typedef 2'b01 S_SINGLE; +typedef 2'b10 S_DOUBLE; + +typedef struct { + Bool valid; + Bool isSd; + DataStream stream; + DmaPathNo id; + DmaPathNo subId; +} ArbitHandle deriving(Bits, Eq, Bounded); + +function ArbitHandle getEmptyArbitHandle(); + return ArbitHandle { + valid : False, + isSd : False, + stream : getEmptyStream, + id : 0, + subId : 0 + }; +endfunction + +function Bool hasStraddleSpace(DataStream stream); + return !unpack(stream.byteEn[valueOf(STRADDLE_THRESH_BYTE_WIDTH)]); +endfunction + module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); + FIFOF#(DataStream) dataAFifo <- mkFIFOF; + FIFOF#(DataStream) dataBFifo <- mkFIFOF; + FIFOF#(ReqReqAxiStream) axiStreamOutFifo <- mkFIFOF; + + FIFOF#(SideBandByteEn) byteEnAFifo <- mkSizedFIFOF(valueOf(BYTEEN_INFIFO_DEPTH)); + FIFOF#(SideBandByteEn) byteEnBFifo <- mkSizedFIFOF(valueOf(BYTEEN_INFIFO_DEPTH)); + + FIFOF#(ArbitHandle) arbitFifo <- mkFIFOF; + + Reg#(ArbitHandle) cacheReg <- mkReg(getEmptyArbitHandle); + Wire#(ArbitHandle) way0Wire <- mkDWire(getEmptyArbitHandle); + Wire#(ArbitHandle) way1Wire <- mkDWire(getEmptyArbitHandle); + + function Tuple2#(DataStream, DataStream) conductStraddle(DataStream first, DataStream second); + let sum = first; + let carry = second; + sum.data = first.data | (second.data << valueOf(STRADDLE_THRESH_BIT_WIDTH)); + sum.byteEn = first.byteEn | (second.byteEn << valueOf(STRADDLE_THRESH_BYTE_WIDTH)); + carry.data = second.data >> valueOf(STRADDLE_THRESH_BIT_WIDTH); + carry.byteEn = second.byteEn >> valueOf(STRADDLE_THRESH_BYTE_WIDTH); + sum.isLast = isByteEnZero(carry.byteEn); // If carry is empty, than sum is last frame + carry.isFirst = False; + return tuple2(carry, sum); + endfunction + + // generate straddle mode datastream from 2 way seperated datastream + // return : tuple2(cache, result) + // warning: the module should save return wb and input as cache in the next cycle + function Tuple4#(ArbitHandle, ArbitHandle, Bool, Bool) arbitStraddleTwoWay(ArbitHandle cache, ArbitHandle way0, ArbitHandle way1); + let result = getEmptyArbitHandle; + let wb = getEmptyArbitHandle; + Bool way0dq = False; + Bool way1dq = False; + case(tuple3(cache.valid, way0.valid, way1.valid)) + // Only cache , output directly if isLast, or waiting subsequent beats + tuple3(True, False, False): begin + if (cache.stream.isLast) begin + result = cache; + wb.id = cache.id; + wb.stream.isLast = result.stream.isLast; + end + else begin + wb = cache; + end + end + // Combine cache and way0, if cache isLast high, it's straddle combine, or is normal stream combine + tuple3(True, True, False): begin + if (cache.id == 0) begin // Normal inner-stream combine + result = cache; + if (!cache.stream.isLast) begin + let {carry, sum} = conductStraddle(cache.stream, way0.stream); + result.stream = sum; + wb.stream = carry; + wb.valid = !isByteEnZero(wb.stream.byteEn); + wb.id = 0; + way0dq = True; + end + else begin + wb.id = cache.id; + end + end + else begin // bypass or Straddle combine + if (cache.stream.isLast) begin + result = cache; + if (hasStraddleSpace(cache.stream)) begin + let {carry, sum} = conductStraddle(cache.stream, way0.stream); + result.stream = sum; + wb.stream = carry; + wb.valid = !isByteEnZero(wb.stream.byteEn); + wb.id = 0; + result.subId = 0; + result.isSd = True; + way0dq = True; + end + end + else begin + wb = cache; + end + end + wb.stream.isLast = wb.valid ? wb.stream.isLast : result.stream.isLast; + end + // Combine cache and way1, if cache isLast high, it's straddle combine, or is normal stream combine + tuple3(True, False, True): begin + if (cache.id == 1) begin // Normal inner-stream combine + result = cache; + if (!cache.stream.isLast) begin + let {carry, sum} = conductStraddle(cache.stream, way1.stream); + result.stream = sum; + wb.stream = carry; + wb.valid = !isByteEnZero(wb.stream.byteEn); + wb.id = 1; + way1dq = True; + end + else begin + wb.id = cache.id; + end + end + else begin // bypass or Straddle combine + if (cache.stream.isLast) begin + result = cache; + if (hasStraddleSpace(cache.stream)) begin + let {carry, sum} = conductStraddle(cache.stream, way1.stream); + result.stream = sum; + wb.stream = carry; + wb.valid = !isByteEnZero(wb.stream.byteEn); + wb.id = 1; + result.subId = 1; + result.isSd = True; + way1dq = True; + end + end + else begin + wb = cache; + end + end + wb.stream.isLast = wb.valid ? wb.stream.isLast : result.stream.isLast; + end + // Both streams and the cache have data + tuple3(True, True, True): begin + result = cache; + // cache's stream is not over yet, combine cache and way(x) first + if (!cache.stream.isLast) begin + if (cache.id == 0) begin + let {carry, sum} = conductStraddle(cache.stream, way0.stream); + result.stream = sum; + wb.stream = carry; + way0dq = True; + end + else begin + let {carry, sum} = conductStraddle(cache.stream, way1.stream); + result.stream = sum; + wb.stream = carry; + way1dq = True; + end + wb.id = cache.id; + wb.valid = !isByteEnZero(wb.stream.byteEn); + end + // assert whether it isLast and has straddle space, combine the other stream + else begin + if(hasStraddleSpace(cache.stream)) begin + result.isSd = True; + if (cache.id == 0) begin + let {carry, sum} = conductStraddle(cache.stream, way1.stream); + result.stream = sum; + wb.stream = carry; + result.subId = 1; + wb.id = 1; + way1dq = True; + end + else begin + let {carry, sum} = conductStraddle(cache.stream, way0.stream); + result.stream = sum; + wb.stream = carry; + result.subId = 0; + wb.id = 0; + way0dq = True; + end + wb.valid = !isByteEnZero(wb.stream.byteEn); + end + else begin + wb.id = cache.id; + end + end + wb.stream.isLast = wb.valid ? wb.stream.isLast : result.stream.isLast; + end + // Only way0 + tuple3(False, True, False): begin + // Last trans is over + if (cache.id == 0 || cache.stream.isLast) begin + result = way0; + wb.stream.isLast = result.stream.isLast; + wb.id = 0; + way0dq = True; + end + // waiting the other channel + else begin + wb = cache; + end + end + // Only way1 + tuple3(False, False, True): begin + // Last trans is over + if (cache.id == 1 || cache.stream.isLast) begin + result = way1; + wb.stream.isLast = result.stream.isLast; + wb.id = 1; + way1dq = True; + end + // waiting the other channel + else begin + wb = cache; + end + end + // Bypass + tuple3(False, False, False): begin + wb = cache; + end + // Both path have data, arbitrate the stream, and conbine the other if have spaces + tuple3(False, True, True): begin + // If no stream tranferring + if (cache.stream.isLast) begin + if (cache.id == 0) begin + result = way1; + way1dq = True; + end + else begin + result = way0; + way0dq = True; + end + end + // Continue the tranferring one + else begin + if (cache.id == 0) begin + result = way0; + way0dq = True; + end + else begin + result = way1; + way1dq = True; + end + end + wb.id = result.id; + // If the result is the last + if (hasStraddleSpace(result.stream) && result.stream.isLast) begin + result.isSd = True; + if (result.id == 0) begin + let {carry, sum} = conductStraddle(result.stream, way1.stream); + result.stream = sum; + wb.stream = carry; + result.subId = 1; + way1dq = True; + end + else begin + let {carry, sum} = conductStraddle(result.stream, way0.stream); + result.stream = sum; + wb.stream = carry; + result.subId = 0; + way0dq = True; + end + wb.valid = !isByteEnZero(wb.stream.byteEn); + wb.id = result.subId; + end + wb.stream.isLast = wb.valid ? wb.stream.isLast : result.stream.isLast; + end + endcase + return tuple4(wb, result, way0dq, way1dq); + endfunction + + // Generate isSop and isEop from ArbitHandle, byteEnA should be the sideband signal of the lsb straddle frame + function PcieRequesterRequestSideBandFrame genRQSideBand (ArbitHandle hdl, SideBandByteEn byteEnA, SideBandByteEn byteEnB); + // generate isSop and isEop first + let isSop = PcieTlpCtlIsSopCommon { + isSopPtrs : replicate(0), + isSop : 0 + }; + let isEop = PcieTlpCtlIsEopCommon { + isEopPtrs : replicate(0), + isEop : 0 + }; + if (!hdl.isSd) begin + if (hdl.stream.isFirst) begin + isSop.isSop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); + isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_0)); + end + if (hdl.stream.isLast) begin + isEop.isEop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); + isEop.isEopPtrs[0] = truncate(convertByteEn2DwordPtr(hdl.stream.byteEn)); + end + end + else if (hdl.isSd) begin + if (hdl.stream.isFirst) begin + isSop.isSop = fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT)); + isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_0)); + isSop.isSopPtrs[1] = fromInteger(valueOf(ISSOP_LANE_32)); + end + else begin + isSop.isSop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); + isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_32)); + end + Bit#(STRADDLE_THRESH_BYTE_WIDTH) lsbByteEn = truncate(hdl.stream.byteEn); + if (hdl.stream.isLast) begin + isEop.isEop = fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT)); + isEop.isEopPtrs[0] = truncate(convertByteEn2DwordPtr(zeroExtend(lsbByteEn))); + isEop.isEopPtrs[1] = truncate(convertByteEn2DwordPtr(hdl.stream.byteEn)); + end + else begin + isEop.isEop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); + isEop.isEopPtrs[0] = truncate(convertByteEn2DwordPtr(zeroExtend(lsbByteEn))); + end + end + // generate the full sideband frame + let {firstByteEnA, lastByteEnA} = byteEnA; + let {firstByteEnB, lastByteEnB} = byteEnB; + let sideBand = PcieRequesterRequestSideBandFrame { + // Do not use parity check in the core + parity : 0, + // Do not support progress track + seqNum1 : 0, + seqNum0 : 0, + //TODO: Do not support Transaction Processing Hint now, maybe we need TPH for better performance + tphSteeringTag : 0, + tphIndirectTagEn : 0, + tphType : 0, + tphPresent : 0, + // Do not support discontinue + discontinue : False, + // Indicates end of the tlp + isEop : isEop, + // Indicates starts of a new tlp + isSop : isSop, + // Disable when use DWord-aligned Mode + addrOffset : 0, + // Indicates byte enable in the first/last DWord + lastByteEn : {pack(lastByteEnB), pack(lastByteEnA)}, + firstByteEn : {pack(firstByteEnB), pack(firstByteEnA)} + }; + return sideBand; + endfunction + + rule getHandle; + if (dataAFifo.notEmpty) begin + way0Wire <= ArbitHandle { + valid : True, + isSd : False, + stream: dataAFifo.first, + id : 0, + subId : 0 + }; + end + if (dataBFifo.notEmpty) begin + way1Wire <= ArbitHandle { + valid : True, + isSd : False, + stream: dataBFifo.first, + id : 1, + subId : 0 + }; + end + endrule + + rule arbitrate; + // if (way0Wire.valid) + // $display($time, "ns SIM INFO @ arbit sim: input: id: %d, isFirst: %d, isLast: %d, data %h", way0Wire.id, pack(way0Wire.stream.isFirst), pack(way0Wire.stream.isLast), way0Wire.stream.data); + // if (way1Wire.valid) + // $display($time, "ns SIM INFO @ arbit sim: input: id: %d, isFirst: %d, isLast: %d, data %h", way1Wire.id, pack(way1Wire.stream.isFirst), pack(way1Wire.stream.isLast), way1Wire.stream.data); + let resultHdl = getEmptyArbitHandle; + let writebackHdl = getEmptyArbitHandle; + Bool way0dq = False; + Bool way1dq = False; + {writebackHdl, resultHdl, way0dq, way1dq} = arbitStraddleTwoWay(cacheReg, way0Wire, way1Wire); + cacheReg <= writebackHdl; + if (resultHdl.valid) begin + if (way0dq) begin + dataAFifo.deq; + end + if (way1dq) begin + dataBFifo.deq; + end + arbitFifo.enq(resultHdl); + // $display($time, "ns SIM INFO @ arbit sim: input: cache.valid:%d way0.valid:%d, way1.valid:%d", cacheReg.valid, way0Wire.valid, way1Wire.valid); + // $display($time, "ns SIM INFO @ arbit sim: result: id:%d, isSd:%d, subId:%d, data %h", resultHdl.id, resultHdl.isSd, resultHdl.subId, resultHdl.stream.data); + // if (writebackHdl.valid) $display($time, "ns SIM INFO @ arbit sim: wb: id:%d, isSd:%d, subId:%d, data %h", writebackHdl.id, writebackHdl.isSd, writebackHdl.subId, writebackHdl.stream.data); + end + endrule + + rule genStraddle; + let hdl = arbitFifo.first; + arbitFifo.deq; + let sideBandBE0 = tuple2(0,0); + let sideBandBE1 = tuple2(0,0); + if (hdl.isSd && hdl.stream.isFirst) begin + byteEnAFifo.deq; + byteEnBFifo.deq; + if (hdl.id == 0) begin + sideBandBE0 = byteEnAFifo.first; + sideBandBE1 = byteEnBFifo.first; + end + else begin + sideBandBE0 = byteEnBFifo.first; + sideBandBE1 = byteEnAFifo.first; + end + end + else if (hdl.isSd) begin + if (hdl.subId == 0) begin + sideBandBE0 = byteEnAFifo.first; + byteEnAFifo.deq; + end + else begin + sideBandBE0 = byteEnBFifo.first; + byteEnBFifo.deq; + end + end + else if (!hdl.isSd && hdl.stream.isFirst) begin + if (hdl.id == 0) begin + sideBandBE0 = byteEnAFifo.first; + byteEnAFifo.deq; + end + else begin + sideBandBE0 = byteEnBFifo.first; + byteEnBFifo.deq; + end + end + let sideBand = genRQSideBand(hdl, sideBandBE0, sideBandBE1); + let axiStream = ReqReqAxiStream { + tData : hdl.stream.data, + tKeep : -1, + tLast : True, + tUser : pack(sideBand) + }; + axiStreamOutFifo.enq(axiStream); + $display($time, "ns SIM INFO @ mkDataStreamToAxis: tx a AXIS frame, isSop:%d, isSopPtr:%d/%d, isEop:%d, isEopPtr:%d/%d, BE0:%b/%b, BE1:%b/%b, tData:%h", + sideBand.isSop.isSop, sideBand.isSop.isSopPtrs[0], sideBand.isSop.isSopPtrs[1], sideBand.isEop.isEop, sideBand.isEop.isEopPtrs[0], sideBand.isEop.isEopPtrs[1], + tpl_1(sideBandBE0), tpl_2(sideBandBE0), tpl_1(sideBandBE1), tpl_2(sideBandBE1), axiStream.tData); + endrule + + Vector#(DMA_PATH_NUM, FifoIn#(DataStream)) dataFifoInIfc = newVector; + Vector#(DMA_PATH_NUM, FifoIn#(SideBandByteEn)) byteEnFifoInIfc = newVector; + dataFifoInIfc[0] = convertFifoToFifoIn(dataAFifo); + dataFifoInIfc[1] = convertFifoToFifoIn(dataBFifo); + byteEnFifoInIfc[0] = convertFifoToFifoIn(byteEnAFifo); + byteEnFifoInIfc[1] = convertFifoToFifoIn(byteEnBFifo); + interface dataFifoIn = dataFifoInIfc; + interface byteEnFifoIn = byteEnFifoInIfc; + interface axiStreamFifoOut = convertFifoToFifoOut(axiStreamOutFifo); +endmodule + +module mkOldConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); FIFOF#(SideBandByteEn) byteEnAFifo <- mkSizedFIFOF(valueOf(BYTEEN_INFIFO_DEPTH)); FIFOF#(SideBandByteEn) byteEnBFifo <- mkSizedFIFOF(valueOf(BYTEEN_INFIFO_DEPTH)); diff --git a/src/PcieDescriptorTypes.bsv b/src/PcieDescriptorTypes.bsv index d1e1748..eee4953 100644 --- a/src/PcieDescriptorTypes.bsv +++ b/src/PcieDescriptorTypes.bsv @@ -34,6 +34,8 @@ typedef Bit#(DES_DWORD_COUNT_WIDTH) DwordCount; typedef Bit#(DES_ADDR_WIDTH) Address; typedef Bit#(DES_ADDR_TYPE_WIDTH) AddrType; +typedef 3'b001 ATTR_NO_SNOOP; + // 16bytes Completer Request Descriptor Format for Memory, I/O, and Atomic Options typedef struct { // DW + 3 diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index c6245cd..93d1087 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -122,18 +122,19 @@ function Tuple2#(StreamWithPtr, StreamWithPtr) getConcatStream (StreamWithPtr st return tuple2(concatStreamWithPtr, remainStreamWithPtr); endfunction -typedef 3 STREAM_SPLIT_LATENCY; +typedef 5 STREAM_SPLIT_LATENCY; +typedef 3 STREAM_SPLIT_INNER_LATENCY; module mkStreamSplit(StreamSplit ifc); Reg#(StreamSize) streamByteCntReg <- mkReg(0); - FIFOF#(StreamSize) splitLocationFifo <- mkSizedFIFOF(valueOf(STREAM_SPLIT_LATENCY)); + FIFOF#(StreamSize) splitLocationFifo <- mkSizedFIFOF(valueOf(STREAM_SPLIT_INNER_LATENCY)); FIFOF#(DataStream) inputFifo <- mkFIFOF; FIFOF#(DataStream) outputFifo <- mkFIFOF; FIFOF#(StreamWithPtr) prepareFifo <- mkFIFOF; FIFOF#(StreamWithPtr) assertFifo <- mkFIFOF; - FIFOF#(DataBytePtr) splitPtrFifo <- mkSizedFIFOF(valueOf(STREAM_SPLIT_LATENCY)); + FIFOF#(DataBytePtr) splitPtrFifo <- mkSizedFIFOF(valueOf(STREAM_SPLIT_INNER_LATENCY)); Reg#(StreamWithPtr) remainStreamWpReg <- mkRegU; @@ -379,7 +380,7 @@ typedef 2 STREAM_ALIGN_DW_LATENCY; module mkStreamShiftAlignToDw#(DataBytePtr offset)(StreamShiftAlignToDw); FIFOF#(DataStream) dataInFifo <- mkFIFOF; - FIFOF#(DataStream) pipeFifo <- mkFIFOF; + // FIFOF#(DataStream) pipeFifo <- mkFIFOF; FIFOF#(DataStream) dataOutFifo <- mkFIFOF; FIFOF#(AlignDwMode) alignModeFifo <- mkFIFOF; @@ -393,10 +394,10 @@ module mkStreamShiftAlignToDw#(DataBytePtr offset)(StreamShiftAlignToDw); ByteEn byteEnMask2 = 1 << (offset + 1) | byteEnMask1 ; ByteEn byteEnMask3 = 1 << (offset + 2) | byteEnMask2; - rule pipe; - pipeFifo.enq(dataInFifo.first); - dataInFifo.deq; - endrule + // rule pipe; + // pipeFifo.enq(dataInFifo.first); + // dataInFifo.deq; + // endrule rule execShift; if (hasLastRemainReg) begin @@ -405,8 +406,10 @@ module mkStreamShiftAlignToDw#(DataBytePtr offset)(StreamShiftAlignToDw); remainStreamReg <= getEmptyStream; end else begin - let stream = pipeFifo.first; - pipeFifo.deq; + // let stream = pipeFifo.first; + // pipeFifo.deq; + let stream = dataInFifo.first; + dataInFifo.deq; let shiftStream = DataStream { data : stream.data << offsetBits, byteEn : stream.byteEn << offset , diff --git a/test/Makefile b/test/Makefile index 10975d9..d25bc9d 100755 --- a/test/Makefile +++ b/test/Makefile @@ -1,7 +1,7 @@ include ../Makefile.base -TESTFILE ?= TestSimpleUtils.bsv -TOPMODULE ?= mkTestSimpleH2CCore +TESTFILE ?= TestDmaCore.bsv +TOPMODULE ?= mkSimpleConvertDataStreamsToStraddleAxisTb SIMSCRIPT = $(BUILDDIR)/$(TOPMODULE).sh diff --git a/test/TestDmaCore.bsv b/test/TestDmaCore.bsv index 3e4de81..da5fe74 100755 --- a/test/TestDmaCore.bsv +++ b/test/TestDmaCore.bsv @@ -31,13 +31,13 @@ module mkChunkComputerTb(Empty); Reg#(UInt#(32)) testCntReg <- mkReg(0); Reg#(UInt#(32)) epochCntReg <- mkReg(0); - Reg#(DmaMemAddr) lenRemainReg <- mkReg(0); + Reg#(DmaReqLen) lenRemainReg <- mkReg(0); Randomize#(DmaMemAddr) startAddrRandomVal <- mkConstrainedRandomizer(0, fromInteger(valueOf(MAX_ADDRESS)-1)); - Randomize#(DmaMemAddr) lengthRandomVal <- mkConstrainedRandomizer(1, fromInteger(valueOf(MAX_TEST_LENGTH))); + Randomize#(DmaReqLen) lengthRandomVal <- mkConstrainedRandomizer(1, fromInteger(valueOf(MAX_TEST_LENGTH))); function Bool hasBoundary(DmaRequest request); - let highIdx = (request.startAddr + request.length - 1) >> valueOf(BUS_BOUNDARY_WIDTH); + let highIdx = (request.startAddr + zeroExtend(request.length) - 1) >> valueOf(BUS_BOUNDARY_WIDTH); let lowIdx = request.startAddr >> valueOf(BUS_BOUNDARY_WIDTH); return (highIdx > lowIdx); endfunction @@ -46,20 +46,21 @@ module mkChunkComputerTb(Empty); startAddrRandomVal.cntrl.init; lengthRandomVal.cntrl.init; isInitReg <= True; - dut.setTlpMaxSize.put(fromInteger(valueOf(DEFAULT_TLP_SIZE_SETTING))); + dut.maxReadReqSize.put(tuple2(fromInteger(valueOf(DEFAULT_TLP_SIZE)), fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH)))); $display("INFO: Start Test of mkChunkComputerTb"); $display("INFO: Set Max Payload Size to ", valueOf(DEFAULT_TLP_SIZE)); endrule rule testInput if (isInitReg && lenRemainReg == 0); DmaMemAddr testAddr <- startAddrRandomVal.next; - DmaMemAddr testLength <- lengthRandomVal.next; - let testEnd = testAddr + testLength - 1; + DmaReqLen testLength <- lengthRandomVal.next; + let testEnd = testAddr + zeroExtend(testLength) - 1; if (testEnd > testAddr && testEnd <= fromInteger(valueOf(MAX_ADDRESS))) begin - let request = DmaRequest{ + let request = DmaExtendRequest{ startAddr : testAddr, + endAddr : testAddr + zeroExtend(testLength), length : testLength, - isWrite : False + tag : 0 }; lenRemainReg <= testLength; dut.dmaRequestFifoIn.enq(request); @@ -92,9 +93,10 @@ module mkChunkComputerTb(Empty); $finish(); end else begin - PcieTlpSizeSetting newSetting = fromInteger(valueOf(DEFAULT_TLP_SIZE_SETTING)) + truncate(pack(testCntReg)) + 1; - dut.setTlpMaxSize.put(newSetting); - $display("INFO: Set Max Payload Size to ", pack(fromInteger(valueOf(DEFAULT_TLP_SIZE)) << newSetting)); + TlpPayloadSizeWidth mpsWidth = fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH)) + truncate(pack(testCntReg)); + TlpPayloadSize mps = 1 << mpsWidth; + dut.maxReadReqSize.put(tuple2(mps, mpsWidth)); + $display("INFO: Set Max Payload Size to %d", mps); end end end @@ -104,11 +106,11 @@ endmodule // Do not use any simple tests, run cocotb for whole verification -typedef 60 SIMPLE_TEST_BYTELEN; +typedef 25 SIMPLE_TEST_BYTELEN; typedef 'hABCDEF SIMPLE_TEST_ADDR; module mkSimpleC2HWriteCoreTb(Empty); - C2HWriteCore dut <- mkC2HWriteCore; + C2HWriteCore dut <- mkC2HWriteCore(0); Reg#(UInt#(32)) testCntReg <- mkReg(0); rule testInput if (testCntReg < 1); @@ -275,7 +277,7 @@ module mkSimpleC2HReadCoreTb(Empty); endmodule module simpleWritePathTb(Empty); - C2HWriteCore c2hWriteCore <- mkC2HWriteCore; + C2HWriteCore c2hWriteCore <- mkC2HWriteCore(0); ConvertDataStreamsToStraddleAxis adapter <- mkConvertDataStreamsToStraddleAxis; mkConnection(c2hWriteCore.tlpFifoOut, adapter.dataFifoIn[0]); mkConnection(c2hWriteCore.tlpSideBandFifoOut, adapter.byteEnFifoIn[0]); From 35c503df274db6c2c8227e00415fd57ff53b6326 Mon Sep 17 00:00:00 2001 From: BIGWJZ Date: Thu, 7 Nov 2024 18:06:18 +0800 Subject: [PATCH 47/53] Add blue-rdma style interface --- cocotb/bypass_write_read/dma_wr_rd_tb.py | 2 +- src/CompletionFifo.bsv | 94 +++++++++++++++++++++--- src/DmaC2HPipe.bsv | 88 +++++++++++++++++++++- src/DmaH2CPipe.bsv | 67 ++++++++++++++++- src/DmaTypes.bsv | 36 +++++++++ src/DmaUtils.bsv | 13 +++- src/DmaWrapper.bsv | 80 ++++++++++++++++++++ test/TestCompletionFifo.bsv | 2 +- 8 files changed, 362 insertions(+), 20 deletions(-) diff --git a/cocotb/bypass_write_read/dma_wr_rd_tb.py b/cocotb/bypass_write_read/dma_wr_rd_tb.py index 142913a..569b19d 100644 --- a/cocotb/bypass_write_read/dma_wr_rd_tb.py +++ b/cocotb/bypass_write_read/dma_wr_rd_tb.py @@ -39,7 +39,7 @@ async def single_path_random_read_test(pcie_tb, dma_channel, mem): data = await pcie_tb.run_single_read_once(dma_channel, addr, length) assert data == char * length -# @cocotb.test(timeout_time=100000000, timeout_unit="ns") +@cocotb.test(timeout_time=100000000, timeout_unit="ns") async def step_random_write_test(dut): tb = BdmaBypassTb(dut) diff --git a/src/CompletionFifo.bsv b/src/CompletionFifo.bsv index 442d339..a33cc32 100644 --- a/src/CompletionFifo.bsv +++ b/src/CompletionFifo.bsv @@ -4,6 +4,7 @@ import FIFOF::*; import BRAMFIFO::*; import Vector::*; import DReg::*; +import Connectable::*; import SemiFifo::*; @@ -20,8 +21,8 @@ import SemiFifo::*; // get chunks in order: CRam.drain.first; CRam.drain.deq; // Parameters: -// nSlot : slot numbers -// nChunk: chunk numbers per slot +// nSlot : slot numbers, should be less than 16 in current version +// nChunk: chunk numbers per slot, a large value may cause bad timing // tChunk: chunk data types interface CompletionFifo#(numeric type nSlot, type tChunk); interface Get#(SlotNum#(nSlot)) reserve; @@ -38,10 +39,10 @@ function Bool isPowerOf2(Integer n); endfunction module mkCompletionFifo#(Integer nChunk)(CompletionFifo#(nSlot, tChunk)) - provisos (Bits#(tChunk, szChunk), Log#(nSlot, ln), Add#(1, ln, ln1), Add#(1, _a, szChunk)); + provisos (Bits#(tChunk, szChunk), Add#(1, _a, szChunk), Add#(_b, TLog#(nSlot), 4)); let maxSlotIdx = fromInteger(valueOf(nSlot) - 1); - function Action incrSlotIdx(Reg#(Bit#(ln)) idxReg); + function Action incrSlotIdx(Reg#(Bit#(TLog#(nSlot))) idxReg); action if (isPowerOf2(valueOf(nSlot))) idxReg <= idxReg + 1; // counter wraps automagically @@ -51,23 +52,31 @@ module mkCompletionFifo#(Integer nChunk)(CompletionFifo#(nSlot, tChunk)) endfunction FIFOF#(Tuple2#(SlotNum#(nSlot), tChunk)) appendFifo <- mkFIFOF; + Demux1To16#(tChunk) demuxer <- mkDemux1To16; FIFOF#(tChunk) drainFifo <- mkFIFOF; Vector#(nSlot, FIFOF#(tChunk)) bufferFifos <- replicateM(mkSizedBRAMFIFOF(nChunk)); + Vector#(nSlot, FIFOF#(Maybe#(tChunk))) fanoutFifos <- replicateM(mkFIFOF); Reg#(SlotNum#(nSlot)) inIdxReg <- mkReg(0); // input index, return this value when `reserve` is called Reg#(SlotNum#(nSlot)) outIdxReg <- mkReg(0); // output index, pipeout Fifos[outIdxReg] - Counter#(ln1) counter <- mkCounter(0); // number of filled slots + Counter#(TAdd#(1, TLog#(nSlot))) counter <- mkCounter(0); // number of filled slots Reg#(Vector#(nSlot, Bool)) flagsReg <- mkReg(replicate(False)); - Reg#(Maybe#(SlotNum#(nSlot))) cmplSlotReg <- mkDReg(tagged Invalid); + Vector#(TAdd#(DEMUX16_LATENCY,1), Reg#(Maybe#(SlotNum#(nSlot)))) cmplSlotRegs <- replicateM(mkDReg(tagged Invalid)); RWire#(SlotNum#(nSlot)) rstSlot <- mkRWire; + Integer fIdx = 0; rule writeBuffer; let {slot, data} = appendFifo.first; appendFifo.deq; - bufferFifos[slot].enq(data); + demuxer.fin.enq(data); + demuxer.sin.enq(zeroExtend(slot)); endrule + for (fIdx = 0; fIdx < valueOf(nSlot); fIdx = fIdx + 1) begin + mkConnection(demuxer.fouts[fIdx], bufferFifos[fIdx]); + end + rule readBuffer; if (!bufferFifos[outIdxReg].notEmpty && flagsReg[outIdxReg]) begin // complete assert and the buffer is empty incrSlotIdx(outIdxReg); @@ -82,7 +91,7 @@ module mkCompletionFifo#(Integer nChunk)(CompletionFifo#(nSlot, tChunk)) endrule rule setFlags; - let cmplMaybe = cmplSlotReg; + let cmplMaybe = cmplSlotRegs[valueOf(DEMUX16_LATENCY)]; let rstMaybe = rstSlot.wget; let flags = flagsReg; if (isValid(cmplMaybe)) begin @@ -94,6 +103,13 @@ module mkCompletionFifo#(Integer nChunk)(CompletionFifo#(nSlot, tChunk)) flagsReg <= flags; endrule + rule cmpl; + for (Integer rIdx = 0; rIdx < valueOf(DEMUX16_LATENCY); rIdx = rIdx + 1) begin + if (isValid(cmplSlotRegs[rIdx])) + cmplSlotRegs[rIdx+1] <= cmplSlotRegs[rIdx]; + end + endrule + interface Get reserve; method ActionValue#(SlotNum#(nSlot)) get() if (counter.value <= maxSlotIdx); incrSlotIdx(inIdxReg); @@ -108,7 +124,7 @@ module mkCompletionFifo#(Integer nChunk)(CompletionFifo#(nSlot, tChunk)) interface Put complete; method Action put(SlotNum#(nSlot) slot); - cmplSlotReg <= tagged Valid slot; + cmplSlotRegs[0] <= tagged Valid slot; endmethod endinterface @@ -116,3 +132,63 @@ module mkCompletionFifo#(Integer nChunk)(CompletionFifo#(nSlot, tChunk)) interface drain = convertFifoToFifoOut(drainFifo); endmodule + +function Action demux1To4(FIFOF#(tData) inFifo, Vector#(4, FIFOF#(tData)) outFifos, Bit#(2) s) + provisos (Bits#(tData, szData)); + action + let data = inFifo.first; + inFifo.deq; + case(s) + 0: outFifos[0].enq(data); + 1: outFifos[1].enq(data); + 2: outFifos[2].enq(data); + 3: outFifos[3].enq(data); + default: begin end + endcase + endaction +endfunction + +typedef 3 DEMUX16_LATENCY; + +interface Demux1To16#(type tData); + interface FifoIn#(tData) fin; + interface Vector#(16, FifoOut#(tData)) fouts; + interface FifoIn#(Bit#(4)) sin; +endinterface + +module mkDemux1To16(Demux1To16#(tData)) provisos(Bits#(tData, szData)); + FIFOF#(tData) inFifo <- mkFIFOF; + Vector#(4, FIFOF#(tData)) midFifos <- replicateM(mkFIFOF); + Vector#(4, Vector#(4, FIFOF#(tData))) outFifos <- replicateM(replicateM(mkFIFOF)); + Vector#(2, FIFOF#(Bit#(4))) sFifo <- replicateM(mkFIFOF); + + Vector#(16, FifoOut#(tData)) outIfc = newVector; + + rule l1Demux; + let l1Set = truncate(sFifo[0].first >> 2); + demux1To4(inFifo, midFifos, l1Set); + sFifo[0].deq; + sFifo[1].enq(sFifo[0].first); + endrule + + for(Integer idx = 0; idx < 4; idx = idx + 1) begin + rule l2Demux; + let l2set = truncate(sFifo[1].first); + demux1To4(midFifos[idx], outFifos[idx], l2set); + endrule + end + + rule sDeq; + sFifo[1].deq; + endrule + + for (Integer idx = 0; idx < 4; idx = idx + 1) begin + for (Integer subIdx = 0; subIdx < 4; subIdx = subIdx + 1) begin + outIfc[idx*4 + subIdx] = convertFifoToFifoOut(outFifos[idx][subIdx]); + end + end + + interface fin = convertFifoToFifoIn(inFifo); + interface sin = convertFifoToFifoIn(sFifo[0]); + interface fouts = outIfc; +endmodule diff --git a/src/DmaC2HPipe.bsv b/src/DmaC2HPipe.bsv index 9addac0..5deb7fe 100644 --- a/src/DmaC2HPipe.bsv +++ b/src/DmaC2HPipe.bsv @@ -2,6 +2,7 @@ import FIFOF::*; import GetPut::*; import Vector::*; import Connectable::*; +import ClientServer::*; import SemiFifo::*; import PrimUtils::*; @@ -13,7 +14,88 @@ import PcieDescriptorTypes::*; import DmaUtils::*; import CompletionFifo::*; -// `define DEBUG_READ +// Wrapper between original dma pipe and blue-rdma style interface +interface BdmaC2HPipe; + // User Logic Ifc + interface Server#(BdmaUserC2hWrReq, BdmaUserC2hWrResp) writeSrv; + interface Server#(BdmaUserC2hRdReq, BdmaUserC2hRdResp) readSrv; + + // Pcie Adapter Ifc + interface FifoOut#(DataStream) tlpDataFifoOut; + interface FifoOut#(SideBandByteEn) tlpSideBandFifoOut; + interface FifoIn#(StraddleStream) tlpDataFifoIn; + // TODO: CSR Ifc + interface Put#(TlpSizeCfg) tlpSizeCfg; + // interface Client#(DmaCsrValue, DmaCsrValue) statusReg; +endinterface + +module mkBdmaC2HPipe#(DmaPathNo pathIdx)(BdmaC2HPipe); + C2HReadCore readCore <- mkC2HReadCore(pathIdx); + C2HWriteCore writeCore <- mkC2HWriteCore(pathIdx); + + Reg#(Bool) isInitDoneReg <- mkReg(False); + + FIFOF#(BdmaUserC2hWrReq) wrReqInFifo <- mkFIFOF; + FIFOF#(BdmaUserC2hWrResp) wrRespOutFifo <- mkFIFOF; + FIFOF#(BdmaUserC2hRdReq) rdReqInFifo <- mkFIFOF; + FIFOF#(BdmaUserC2hRdResp) rdRespOutFifo <- mkFIFOF; + + FIFOF#(DataStream) tlpOutFifo <- mkFIFOF; + FIFOF#(SideBandByteEn) tlpSideBandFifo <- mkFIFOF; + + rule forwardWrReq; + let req = wrReqInFifo.first; + wrReqInFifo.deq; + writeCore.dataFifoIn.enq(req.dataStream); + writeCore.wrReqFifoIn.enq(DmaRequest { + startAddr: req.addr, + length : req.len, + isWrite : True + }); + endrule + + rule forwardWrResp; + let rv = writeCore.doneFifoOut.first; + writeCore.doneFifoOut.deq; + wrRespOutFifo.enq(BdmaUserC2hWrResp{ }); + endrule + + rule forwardRdReq; + let req = rdReqInFifo.first; + rdReqInFifo.deq; + readCore.rdReqFifoIn.enq(DmaRequest { + startAddr: req.addr, + length : req.len, + isWrite : False + }); + endrule + + rule forwardRdResp; + let stream = readCore.dataFifoOut.first; + readCore.dataFifoOut.deq; + rdRespOutFifo.enq(BdmaUserC2hRdResp{ + dataStream: stream + }); + endrule + + // User Ifc + interface readSrv = toGPServer(rdReqInFifo, rdRespOutFifo); + interface writeSrv = toGPServer(wrReqInFifo, wrRespOutFifo); + + // Pcie Adapter Ifc + interface tlpDataFifoOut = convertFifoToFifoOut(tlpOutFifo); + interface tlpSideBandFifoOut = convertFifoToFifoOut(tlpSideBandFifo); + interface tlpDataFifoIn = readCore.tlpFifoIn; + // TODO: CSR Ifc + interface Put tlpSizeCfg; + method Action put(sizeCfg); + writeCore.maxPayloadSize.put(tuple2(sizeCfg.mps, sizeCfg.mpsWidth)); + readCore.maxReadReqSize.put(tuple2(sizeCfg.mrrs, sizeCfg.mrrsWidth)); + isInitDoneReg <= True; + endmethod + endinterface + +endmodule // TODO : change the PCIe Adapter Ifc to TlpData and TlpHeader, // move the module which convert TlpHeader to IP descriptor from dma to adapter @@ -22,6 +104,7 @@ interface DmaC2HPipe; interface FifoIn#(DataStream) wrDataFifoIn; interface FifoIn#(DmaRequest) reqFifoIn; interface FifoOut#(DataStream) rdDataFifoOut; + interface FifoOut#(Bool) doneFifoOut; // Pcie Adapter Ifc interface FifoOut#(DataStream) tlpDataFifoOut; interface FifoOut#(SideBandByteEn) tlpSideBandFifoOut; @@ -81,6 +164,7 @@ module mkDmaC2HPipe#(DmaPathNo pathIdx)(DmaC2HPipe); interface wrDataFifoIn = convertFifoToFifoIn(dataInFifo); interface reqFifoIn = convertFifoToFifoIn(reqInFifo); interface rdDataFifoOut = readCore.dataFifoOut; + interface doneFifoOut = writeCore.doneFifoOut; // Pcie Adapter Ifc interface tlpDataFifoOut = convertFifoToFifoOut(tlpOutFifo); interface tlpSideBandFifoOut = convertFifoToFifoOut(tlpSideBandFifo); @@ -310,6 +394,7 @@ interface C2HWriteCore; // User Logic Ifc interface FifoIn#(DataStream) dataFifoIn; interface FifoIn#(DmaRequest) wrReqFifoIn; + interface FifoOut#(Bool) doneFifoOut; // PCIe IP Ifc interface FifoOut#(DataStream) tlpFifoOut; interface FifoOut#(SideBandByteEn) tlpSideBandFifoOut; @@ -404,6 +489,7 @@ module mkC2HWriteCore#(DmaPathNo pathIdx)(C2HWriteCore); // User Logic Ifc interface dataFifoIn = convertFifoToFifoIn(dataInFifo); interface wrReqFifoIn = convertFifoToFifoIn(wrReqInFifo); + interface doneFifoOut = chunkSplit.doneFifoOut; // PCIe Adapter Ifc interface tlpFifoOut = convertFifoToFifoOut(dataOutFifo); interface tlpSideBandFifoOut = convertFifoToFifoOut(byteEnOutFifo); diff --git a/src/DmaH2CPipe.bsv b/src/DmaH2CPipe.bsv index c638ae8..4110560 100644 --- a/src/DmaH2CPipe.bsv +++ b/src/DmaH2CPipe.bsv @@ -1,6 +1,7 @@ import FIFOF::*; import Vector::*; import RegFile::*; +import ClientServer::*; import SemiFifo::*; import PrimUtils::*; @@ -15,6 +16,60 @@ typedef 1 IDEA_CC_CSR_DWORD_CNT; typedef 4 IDEA_CC_CSR_BYTE_CNT; typedef 4 IDEA_FIRST_BE_HIGH_VALID_PTR_OF_CSR; +// Wrapper between original dma pipe and blue-rdma style interface +interface BdmaH2CPipe; + // User Ifc + interface Client#(BdmaUserH2cWrReq, BdmaUserH2cWrResp) writeClt; + interface Client#(BdmaUserH2cRdReq, BdmaUserH2cRdResp) readClt; + + // Pcie Adapter Ifc + interface FifoIn#(DataStream) tlpDataFifoIn; + interface FifoOut#(DataStream) tlpDataFifoOut; +endinterface + +module mkBdmaH2CPipe(BdmaH2CPipe); + DmaH2CPipe pipe <- mkDmaH2CPipe; + FIFOF#(BdmaUserH2cWrReq) wrReqQ <- mkFIFOF; + FIFOF#(BdmaUserH2cWrResp) wrRespQ <- mkFIFOF; + FIFOF#(BdmaUserH2cRdReq) rdReqQ <- mkFIFOF; + FIFOF#(BdmaUserH2cRdResp) rdRespQ <- mkFIFOF; + + rule forwardReq; + let h2cReq = pipe.userReqFifoOut.first; + pipe.userReqFifoOut.deq; + if (h2cReq.isWrite) begin + wrReqQ.enq(BdmaUserH2cWrReq { + addr: h2cReq.addr, + data: h2cReq.value + }); + end + else begin + rdReqQ.enq(BdmaUserH2cRdReq { + addr: h2cReq.addr + }); + end + endrule + + rule handleWrResp; + wrRespQ.deq; + endrule + + rule handleRdResp; + let value = rdRespQ.first.data; + rdRespQ.deq; + pipe.userRespFifoIn.enq(CsrResponse{ + addr : 0, + value: value + }); + endrule + + interface writeClt = toGPClient(wrReqQ, wrRespQ); + interface readClt = toGPClient(rdReqQ, rdRespQ); + interface tlpDataFifoIn = pipe.tlpDataFifoIn; + interface tlpDataFifoOut = pipe.tlpDataFifoOut; +endmodule + + function CsrResponse getEmptyCsrResponse(); return CsrResponse { addr : 0, @@ -28,7 +83,7 @@ interface DmaH2CPipe; interface FifoIn#(CsrResponse) csrRespFifoIn; // User Ifc interface FifoOut#(CsrRequest) userReqFifoOut; - interface FifoIn#(CsrResponse) userRespFifoIn; + interface FifoIn#(CsrResponse) userRespFifoIn; // Pcie Adapter Ifc interface FifoIn#(DataStream) tlpDataFifoIn; interface FifoOut#(DataStream) tlpDataFifoOut; @@ -136,9 +191,11 @@ module mkDmaH2CPipe(DmaH2CPipe); let addr = resp.addr; let value = resp.value; let {req, cqDescriptor} = pendingFifo.first; + `ifdef H2C_DEBUG if (addr == req.addr) begin - pendingFifo.deq; $display($time, "ns SIM INFO @ mkDmaH2CPipe: Valid rdResp with Addr %h, data %h", addr, value); + `endif + pendingFifo.deq; let ccDescriptor = PcieCompleterCompleteDescriptor { reserve0 : 0, attributes : cqDescriptor.attributes, @@ -157,7 +214,7 @@ module mkDmaH2CPipe(DmaH2CPipe); reserve3 : 0, addrType : cqDescriptor.addrType, reserve4 : 0, - lowerAddr : truncate(addr << valueOf(TLog#(DWORD_BYTES))) // Suppose all cq/cc requests are 32 bit aligned + lowerAddr : truncate(req.addr << valueOf(TLog#(DWORD_BYTES))) // Suppose all cq/cc requests are 32 bit aligned }; Data data = zeroExtend(pack(ccDescriptor)); data = data | (zeroExtend(value) << valueOf(DES_CC_DESCRIPTOR_WIDTH)); @@ -168,11 +225,13 @@ module mkDmaH2CPipe(DmaH2CPipe); isLast : True }; tlpOutFifo.enq(stream); - // $display($time, "ns SIM INFO @ mkDmaH2CPipe: output a cmpl tlp", fshow(stream)); + `ifdef H2C_DEBUG + $display($time, "ns SIM INFO @ mkDmaH2CPipe: output a cmpl tlp", fshow(stream)); end else begin $display($time, "ns SIM ERROR @ mkDmaH2CPipe: InValid rdResp with Addr %h, data %h and Expect Addr %h", addr, value, req.addr); end + `endif endrule // DMA Csr Ifc diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 8c3530c..2cbdf2d 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -120,6 +120,42 @@ instance FShow#(DataStream); endfunction endinstance +// Bdma User + +typedef struct { + DmaMemAddr addr; + DmaReqLen len; +} BdmaUserC2hRdReq deriving(Bits, FShow); + +typedef struct { + DataStream dataStream; +} BdmaUserC2hRdResp deriving(Bits, FShow); + +typedef struct { + DmaMemAddr addr; + DmaReqLen len; + DataStream dataStream; +} BdmaUserC2hWrReq deriving(Bits, FShow); + +typedef struct { +} BdmaUserC2hWrResp deriving(Bits, FShow); + +typedef struct { + DmaCsrAddr addr; + DmaCsrValue data; +} BdmaUserH2cWrReq deriving(Bits, FShow); + +typedef struct { +} BdmaUserH2cWrResp deriving(Bits, FShow); + +typedef struct { + DmaCsrAddr addr; +} BdmaUserH2cRdReq deriving(Bits, FShow); + +typedef struct { + DmaCsrValue data; +} BdmaUserH2cRdResp deriving(Bits, FShow); + // Straddle Parameters typedef TDiv#(DATA_WIDTH, PCIE_STRADDLE_NUM) STRADDLE_THRESH_BIT_WIDTH; diff --git a/src/DmaUtils.bsv b/src/DmaUtils.bsv index f37a193..071b010 100644 --- a/src/DmaUtils.bsv +++ b/src/DmaUtils.bsv @@ -135,9 +135,9 @@ module mkChunkComputer (TRXDirection direction, ChunkCompute ifc); endrule - interface dmaRequestFifoIn = convertFifoToFifoIn(inputFifo); + interface dmaRequestFifoIn = convertFifoToFifoIn(inputFifo); interface chunkRequestFifoOut = convertFifoToFifoOut(outputFifo); - interface reqCntFifoOut = convertFifoToFifoOut(rdReqCntFifo); + interface reqCntFifoOut = convertFifoToFifoOut(rdReqCntFifo); interface Put maxReadReqSize; method Action put (Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth) mrrsCfg); @@ -156,6 +156,7 @@ endmodule interface ChunkSplit; interface FifoIn#(DataStream) dataFifoIn; interface FifoIn#(DmaExtendRequest) reqFifoIn; + interface FifoOut#(Bool) doneFifoOut; interface FifoOut#(DataStream) chunkDataFifoOut; interface FifoOut#(DmaRequest) chunkReqFifoOut; interface Put#(Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth)) maxPayloadSize; @@ -165,6 +166,7 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); FIFOF#(DataStream) dataInFifo <- mkFIFOF; FIFOF#(DataStream) chunkOutFifo <- mkFIFOF; FIFOF#(DmaRequest) reqOutFifo <- mkFIFOF; + FIFOF#(Bool) doneFifo <- mkFIFOF; FIFOF#(DmaRequest) firstReqPipeFifo <- mkSizedFIFOF(valueOf(STREAM_SPLIT_LATENCY)); FIFOF#(DmaExtendRequest) reqInFifo <- mkFIFOF; @@ -268,6 +270,7 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); if (chunkReq.length == oriReq.length) begin nextStartAddr = 0; remainLen = 0; + doneFifo.enq(True); end else begin nextStartAddr = oriReq.startAddr + zeroExtend(chunkReq.length); @@ -290,6 +293,7 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); reqOutFifo.enq(chunkReq); nextStartAddr = 0; remainLen = 0; + doneFifo.enq(True); end else begin nextStartAddr = nextStartAddr + zeroExtend(tlpMaxSizeReg); @@ -305,8 +309,9 @@ module mkChunkSplit(TRXDirection direction, ChunkSplit ifc); chunkOutFifo.enq(stream); endrule - interface dataFifoIn = convertFifoToFifoIn(dataInFifo); - interface reqFifoIn = convertFifoToFifoIn(reqInFifo); + interface dataFifoIn = convertFifoToFifoIn(dataInFifo); + interface reqFifoIn = convertFifoToFifoIn(reqInFifo); + interface doneFifoOut = convertFifoToFifoOut(doneFifo); interface chunkDataFifoOut = convertFifoToFifoOut(chunkOutFifo); interface chunkReqFifoOut = convertFifoToFifoOut(reqOutFifo); diff --git a/src/DmaWrapper.bsv b/src/DmaWrapper.bsv index 3a544df..76f68dd 100755 --- a/src/DmaWrapper.bsv +++ b/src/DmaWrapper.bsv @@ -5,6 +5,7 @@ import Connectable :: *; import DReg::*; import GetPut::*; import BRAMFIFO::*; +import ClientServer::*; import SemiFifo::*; import BusConversion::*; @@ -21,6 +22,82 @@ import SimpleModeUtils::*; import TestUtils::*; // For Bsv User +interface BdmaControllerBypassWrapper; + // User Logic Ifc + interface Server#(BdmaUserC2hWrReq, BdmaUserC2hWrResp) c2hWrSrvA; + interface Server#(BdmaUserC2hRdReq, BdmaUserC2hRdResp) c2hRdSrvA; + interface Server#(BdmaUserC2hWrReq, BdmaUserC2hWrResp) c2hWrSrvB; + interface Server#(BdmaUserC2hRdReq, BdmaUserC2hRdResp) c2hRdSrvB; + // User Csr Ifc + interface Client#(BdmaUserH2cWrReq, BdmaUserH2cWrResp) csrWrClt; + interface Client#(BdmaUserH2cRdReq, BdmaUserH2cRdResp) csrRdClt; + + // Raw PCIe interfaces, connected to the Xilinx PCIe IP + (* prefix = "" *)interface RawXilinxPcieIp rawPcie; +endinterface + +(* synthesize *) +module mkBdmaControllerBypassWrapper(BdmaControllerBypassWrapper); + Wire#(Bool) linkUpWire <- mkWire; + Reg#(Bool) linkUpReg <- mkReg(False); + Reg#(Bool) cfgFlagReg <- mkDReg(False); + + BdmaC2HPipe c2hPipeA <- mkBdmaC2HPipe(0); + BdmaC2HPipe c2hPipeB <- mkBdmaC2HPipe(1); + BdmaH2CPipe h2cPipe <- mkBdmaH2CPipe; + + RequesterAxiStreamAdapter reqAdapter <- mkRequesterAxiStreamAdapter; + CompleterAxiStreamAdapter cmplAdapter <- mkCompleterAxiStreamAdapter; + + PcieConfigurator configurator <- mkPcieConfigurator; + + mkConnection(c2hPipeA.tlpDataFifoOut, reqAdapter.dmaDataFifoIn[0]); + mkConnection(c2hPipeA.tlpSideBandFifoOut, reqAdapter.dmaSideBandFifoIn[0]); + mkConnection(reqAdapter.dmaDataFifoOut[0], c2hPipeA.tlpDataFifoIn); + + mkConnection(c2hPipeB.tlpDataFifoOut, reqAdapter.dmaDataFifoIn[1]); + mkConnection(c2hPipeB.tlpSideBandFifoOut, reqAdapter.dmaSideBandFifoIn[1]); + mkConnection(reqAdapter.dmaDataFifoOut[1], c2hPipeB.tlpDataFifoIn); + + mkConnection(cmplAdapter.dmaDataFifoOut, h2cPipe.tlpDataFifoIn); + mkConnection(h2cPipe.tlpDataFifoOut, cmplAdapter.dmaDataFifoIn); + + rule detectLink if (linkUpWire && !linkUpReg); + configurator.initCfg; + cfgFlagReg <= True; + linkUpReg <= True; + $display($time, "ns SIM INFO @ BLUE-DMAC: PCIe link is up!"); + endrule + + rule setCfg if (cfgFlagReg); + let tlpSizeCfg <- configurator.tlpSizeCfg.get; + c2hPipeA.tlpSizeCfg.put(tlpSizeCfg); + c2hPipeB.tlpSizeCfg.put(tlpSizeCfg); + $display($time, "ns SIM INFO @ BLUE-DMAC: Get PCIe configurations, mps:%d, mrrs:%d", tlpSizeCfg.mps, tlpSizeCfg.mrrs); + endrule + + // User Logic Ifc + interface c2hWrSrvA = c2hPipeA.writeSrv; + interface c2hRdSrvA = c2hPipeA.readSrv; + interface c2hWrSrvB = c2hPipeB.writeSrv; + interface c2hRdSrvB = c2hPipeB.readSrv; + interface csrWrClt = h2cPipe.writeClt; + interface csrRdClt = h2cPipe.readClt; + + // Raw PCIe Ifc + interface RawXilinxPcieIp rawPcie; + interface requesterRequest = reqAdapter.rawRequesterRequest; + interface requesterComplete = reqAdapter.rawRequesterComplete; + interface completerRequest = cmplAdapter.rawCompleterRequest; + interface completerComplete = cmplAdapter.rawCompleterComplete; + interface configuration = configurator.rawConfiguration; + method Action linkUp(Bool isLinkUp); + linkUpWire <= isLinkUp; + endmethod + endinterface +endmodule + + // Native Blue-DMA Interface, the addrs in the req should be pa interface DmaController; // User Logic Ifc @@ -69,6 +146,9 @@ module mkDmaController(DmaController); mkConnection(c2hPipes[pathIdx].tlpDataFifoOut, reqAdapter.dmaDataFifoIn[pathIdx]); mkConnection(c2hPipes[pathIdx].tlpSideBandFifoOut, reqAdapter.dmaSideBandFifoIn[pathIdx]); mkConnection(reqAdapter.dmaDataFifoOut[pathIdx], c2hPipes[pathIdx].tlpDataFifoIn); + rule doneFlag; //TODO: let verilog interface has done signal + c2hPipes[pathIdx].doneFifoOut.deq; + endrule end mkConnection(cmplAdapter.dmaDataFifoOut, h2cPipe.tlpDataFifoIn); diff --git a/test/TestCompletionFifo.bsv b/test/TestCompletionFifo.bsv index fbb01d4..f2cbdf7 100644 --- a/test/TestCompletionFifo.bsv +++ b/test/TestCompletionFifo.bsv @@ -58,7 +58,7 @@ module mkCompletionFifoTb(Empty); rule getResponse if (initReg); outPtrReg <= outPtrReg == fromInteger(valueOf(TEST_SLOT_NUM)-1) ? 0 : outPtrReg + 1; if (!doneFlags[outPtrReg]) begin - if (reqDones[outPtrReg] <= reqs[outPtrReg]) begin + if (reqDones[outPtrReg] < reqs[outPtrReg]) begin reqDones[outPtrReg] <= reqDones[outPtrReg] + 1; dut.append.enq(tuple2(outPtrReg, zeroExtend(outPtrReg) << valueOf(TLog#(TEST_SLOT_NUM)) | zeroExtend(reqDones[outPtrReg]))); end From cb50bdc09f2bcaa1d75c99389eba428873683bd3 Mon Sep 17 00:00:00 2001 From: BIGWJZ Date: Fri, 8 Nov 2024 02:31:20 +0800 Subject: [PATCH 48/53] Add blue-rdma style interface --- src/DmaH2CPipe.bsv | 38 ++++++++++++++++++++++---------------- src/DmaTypes.bsv | 14 +++++++------- src/DmaWrapper.bsv | 15 +++++++++------ 3 files changed, 38 insertions(+), 29 deletions(-) diff --git a/src/DmaH2CPipe.bsv b/src/DmaH2CPipe.bsv index 4110560..126cfe9 100644 --- a/src/DmaH2CPipe.bsv +++ b/src/DmaH2CPipe.bsv @@ -17,39 +17,45 @@ typedef 4 IDEA_CC_CSR_BYTE_CNT; typedef 4 IDEA_FIRST_BE_HIGH_VALID_PTR_OF_CSR; // Wrapper between original dma pipe and blue-rdma style interface -interface BdmaH2CPipe; +interface BdmaH2CPipe#(numeric type sz_csr_addr, numeric type sz_csr_data); // User Ifc - interface Client#(BdmaUserH2cWrReq, BdmaUserH2cWrResp) writeClt; - interface Client#(BdmaUserH2cRdReq, BdmaUserH2cRdResp) readClt; + interface Client#(BdmaUserH2cWrReq#(sz_csr_addr, sz_csr_data), BdmaUserH2cWrResp) writeClt; + interface Client#(BdmaUserH2cRdReq#(sz_csr_addr), BdmaUserH2cRdResp#(sz_csr_data)) readClt; // Pcie Adapter Ifc interface FifoIn#(DataStream) tlpDataFifoIn; interface FifoOut#(DataStream) tlpDataFifoOut; endinterface -module mkBdmaH2CPipe(BdmaH2CPipe); +module mkBdmaH2CPipe(BdmaH2CPipe#(sz_csr_addr, sz_csr_data)) + provisos( + Add#(_a, sz_csr_addr, DMA_CSR_ADDR_WIDTH), + Add#(_b, sz_csr_data, DMA_CSR_DATA_WIDTH) + ); DmaH2CPipe pipe <- mkDmaH2CPipe; - FIFOF#(BdmaUserH2cWrReq) wrReqQ <- mkFIFOF; + FIFOF#(BdmaUserH2cWrReq#(sz_csr_addr, sz_csr_data)) wrReqQ <- mkFIFOF; FIFOF#(BdmaUserH2cWrResp) wrRespQ <- mkFIFOF; - FIFOF#(BdmaUserH2cRdReq) rdReqQ <- mkFIFOF; - FIFOF#(BdmaUserH2cRdResp) rdRespQ <- mkFIFOF; + FIFOF#(BdmaUserH2cRdReq#(sz_csr_addr)) rdReqQ <- mkFIFOF; + FIFOF#(BdmaUserH2cRdResp#(sz_csr_data)) rdRespQ <- mkFIFOF; rule forwardReq; let h2cReq = pipe.userReqFifoOut.first; pipe.userReqFifoOut.deq; if (h2cReq.isWrite) begin - wrReqQ.enq(BdmaUserH2cWrReq { - addr: h2cReq.addr, - data: h2cReq.value - }); + BdmaUserH2cWrReq#(sz_csr_addr, sz_csr_data) wrReq = BdmaUserH2cWrReq { + addr: truncate(h2cReq.addr), + data: truncate(h2cReq.value) + }; + wrReqQ.enq(wrReq); end else begin - rdReqQ.enq(BdmaUserH2cRdReq { - addr: h2cReq.addr - }); + BdmaUserH2cRdReq#(sz_csr_addr) rdReq = BdmaUserH2cRdReq { + addr: truncate(h2cReq.addr) + }; + rdReqQ.enq(rdReq); end endrule - + rule handleWrResp; wrRespQ.deq; endrule @@ -59,7 +65,7 @@ module mkBdmaH2CPipe(BdmaH2CPipe); rdRespQ.deq; pipe.userRespFifoIn.enq(CsrResponse{ addr : 0, - value: value + value: zeroExtend(value) }); endrule diff --git a/src/DmaTypes.bsv b/src/DmaTypes.bsv index 2cbdf2d..badb849 100755 --- a/src/DmaTypes.bsv +++ b/src/DmaTypes.bsv @@ -141,20 +141,20 @@ typedef struct { } BdmaUserC2hWrResp deriving(Bits, FShow); typedef struct { - DmaCsrAddr addr; - DmaCsrValue data; -} BdmaUserH2cWrReq deriving(Bits, FShow); + Bit#(sz_csr_addr) addr; + Bit#(sz_csr_data) data; +} BdmaUserH2cWrReq#(numeric type sz_csr_addr, numeric type sz_csr_data) deriving(Bits, FShow); typedef struct { } BdmaUserH2cWrResp deriving(Bits, FShow); typedef struct { - DmaCsrAddr addr; -} BdmaUserH2cRdReq deriving(Bits, FShow); + Bit#(sz_csr_addr) addr; +} BdmaUserH2cRdReq#(numeric type sz_csr_addr) deriving(Bits, FShow); typedef struct { - DmaCsrValue data; -} BdmaUserH2cRdResp deriving(Bits, FShow); + Bit#(sz_csr_data) data; +} BdmaUserH2cRdResp#(numeric type sz_csr_data) deriving(Bits, FShow); // Straddle Parameters diff --git a/src/DmaWrapper.bsv b/src/DmaWrapper.bsv index 76f68dd..97154ef 100755 --- a/src/DmaWrapper.bsv +++ b/src/DmaWrapper.bsv @@ -22,29 +22,32 @@ import SimpleModeUtils::*; import TestUtils::*; // For Bsv User -interface BdmaControllerBypassWrapper; +interface BdmaControllerBypassWrapper#(numeric type sz_csr_addr, numeric type sz_csr_data); // User Logic Ifc interface Server#(BdmaUserC2hWrReq, BdmaUserC2hWrResp) c2hWrSrvA; interface Server#(BdmaUserC2hRdReq, BdmaUserC2hRdResp) c2hRdSrvA; interface Server#(BdmaUserC2hWrReq, BdmaUserC2hWrResp) c2hWrSrvB; interface Server#(BdmaUserC2hRdReq, BdmaUserC2hRdResp) c2hRdSrvB; // User Csr Ifc - interface Client#(BdmaUserH2cWrReq, BdmaUserH2cWrResp) csrWrClt; - interface Client#(BdmaUserH2cRdReq, BdmaUserH2cRdResp) csrRdClt; + interface Client#(BdmaUserH2cWrReq#(sz_csr_addr, sz_csr_data), BdmaUserH2cWrResp) csrWrClt; + interface Client#(BdmaUserH2cRdReq#(sz_csr_addr), BdmaUserH2cRdResp#(sz_csr_data)) csrRdClt; // Raw PCIe interfaces, connected to the Xilinx PCIe IP (* prefix = "" *)interface RawXilinxPcieIp rawPcie; endinterface -(* synthesize *) -module mkBdmaControllerBypassWrapper(BdmaControllerBypassWrapper); +module mkBdmaControllerBypassWrapper(BdmaControllerBypassWrapper#(sz_csr_addr, sz_csr_data)) + provisos( + Add#(_a, sz_csr_addr, DMA_CSR_ADDR_WIDTH), + Add#(_b, sz_csr_data, DMA_CSR_DATA_WIDTH) + ); Wire#(Bool) linkUpWire <- mkWire; Reg#(Bool) linkUpReg <- mkReg(False); Reg#(Bool) cfgFlagReg <- mkDReg(False); BdmaC2HPipe c2hPipeA <- mkBdmaC2HPipe(0); BdmaC2HPipe c2hPipeB <- mkBdmaC2HPipe(1); - BdmaH2CPipe h2cPipe <- mkBdmaH2CPipe; + BdmaH2CPipe#(sz_csr_addr, sz_csr_data) h2cPipe <- mkBdmaH2CPipe; RequesterAxiStreamAdapter reqAdapter <- mkRequesterAxiStreamAdapter; CompleterAxiStreamAdapter cmplAdapter <- mkCompleterAxiStreamAdapter; From e146e69d81fdbe7a489ad5ce76749feb994b110f Mon Sep 17 00:00:00 2001 From: BIGWJZ Date: Fri, 8 Nov 2024 02:50:33 +0800 Subject: [PATCH 49/53] avoid same name file in blue-rdma --- src/{PrimUtils.bsv => BdmaPrimUtils.bsv} | 0 src/DmaC2HPipe.bsv | 2 +- src/DmaH2CPipe.bsv | 2 +- src/DmaUtils.bsv | 2 +- src/PcieAdapter.bsv | 2 +- src/StreamUtils.bsv | 2 +- test/TestCompletionFifo.bsv | 2 +- test/TestDmaCompleter.bsv | 2 +- test/TestDmaCore.bsv | 2 +- test/TestStreamUtils.bsv | 2 +- 10 files changed, 9 insertions(+), 9 deletions(-) rename src/{PrimUtils.bsv => BdmaPrimUtils.bsv} (100%) diff --git a/src/PrimUtils.bsv b/src/BdmaPrimUtils.bsv similarity index 100% rename from src/PrimUtils.bsv rename to src/BdmaPrimUtils.bsv diff --git a/src/DmaC2HPipe.bsv b/src/DmaC2HPipe.bsv index 5deb7fe..2da40ca 100644 --- a/src/DmaC2HPipe.bsv +++ b/src/DmaC2HPipe.bsv @@ -5,7 +5,7 @@ import Connectable::*; import ClientServer::*; import SemiFifo::*; -import PrimUtils::*; +import BdmaPrimUtils::*; import StreamUtils::*; import PcieTypes::*; import DmaTypes::*; diff --git a/src/DmaH2CPipe.bsv b/src/DmaH2CPipe.bsv index 126cfe9..a1274e5 100644 --- a/src/DmaH2CPipe.bsv +++ b/src/DmaH2CPipe.bsv @@ -4,7 +4,7 @@ import RegFile::*; import ClientServer::*; import SemiFifo::*; -import PrimUtils::*; +import BdmaPrimUtils::*; import PcieAxiStreamTypes::*; import PcieTypes::*; import PcieDescriptorTypes::*; diff --git a/src/DmaUtils.bsv b/src/DmaUtils.bsv index 071b010..3322e13 100644 --- a/src/DmaUtils.bsv +++ b/src/DmaUtils.bsv @@ -6,7 +6,7 @@ import SemiFifo::*; import PcieTypes::*; import DmaTypes::*; import PcieAxiStreamTypes::*; -import PrimUtils::*; +import BdmaPrimUtils::*; import StreamUtils::*; import PcieDescriptorTypes::*; diff --git a/src/PcieAdapter.bsv b/src/PcieAdapter.bsv index 15d2943..5595a55 100644 --- a/src/PcieAdapter.bsv +++ b/src/PcieAdapter.bsv @@ -6,7 +6,7 @@ import SemiFifo::*; import PcieTypes::*; import DmaTypes::*; import PcieAxiStreamTypes::*; -import PrimUtils::*; +import BdmaPrimUtils::*; import StreamUtils::*; import PcieDescriptorTypes::*; import CompletionFifo::*; diff --git a/src/StreamUtils.bsv b/src/StreamUtils.bsv index 93d1087..a839e40 100755 --- a/src/StreamUtils.bsv +++ b/src/StreamUtils.bsv @@ -4,7 +4,7 @@ import GetPut::*; import Connectable::*; import SemiFifo::*; -import PrimUtils::*; +import BdmaPrimUtils::*; import DmaTypes::*; import PcieAxiStreamTypes::*; diff --git a/test/TestCompletionFifo.bsv b/test/TestCompletionFifo.bsv index f2cbdf7..76e48f4 100644 --- a/test/TestCompletionFifo.bsv +++ b/test/TestCompletionFifo.bsv @@ -7,7 +7,7 @@ import Vector::*; import SemiFifo::*; import CompletionFifo::*; -import PrimUtils::*; +import BdmaPrimUtils::*; import PcieAxiStreamTypes::*; import DmaTypes::*; diff --git a/test/TestDmaCompleter.bsv b/test/TestDmaCompleter.bsv index 03d03d3..05b9d5d 100644 --- a/test/TestDmaCompleter.bsv +++ b/test/TestDmaCompleter.bsv @@ -4,7 +4,7 @@ import Vector::*; import FShow::*; import SemiFifo::*; -import PrimUtils::*; +import BdmaPrimUtils::*; import PcieAxiStreamTypes::*; import PcieTypes::*; import PcieDescriptorTypes::*; diff --git a/test/TestDmaCore.bsv b/test/TestDmaCore.bsv index da5fe74..7582eeb 100755 --- a/test/TestDmaCore.bsv +++ b/test/TestDmaCore.bsv @@ -6,7 +6,7 @@ import Connectable::*; import SemiFifo::*; import PcieAxiStreamTypes::*; import DmaTypes::*; -import PrimUtils::*; +import BdmaPrimUtils::*; import PcieTypes::*; import PcieDescriptorTypes::*; import StreamUtils::*; diff --git a/test/TestStreamUtils.bsv b/test/TestStreamUtils.bsv index d5996ca..f7a42a3 100755 --- a/test/TestStreamUtils.bsv +++ b/test/TestStreamUtils.bsv @@ -3,7 +3,7 @@ import SemiFifo::*; import LFSR::*; import Vector::*; -import PrimUtils::*; +import BdmaPrimUtils::*; import DmaTypes::*; import StreamUtils::*; From cb2073ca9680b57923fe68fc9fb26253071227cb Mon Sep 17 00:00:00 2001 From: BIGWJZ Date: Fri, 29 Nov 2024 14:56:54 +0800 Subject: [PATCH 50/53] Fix User Bar Problems --- src/DmaC2HPipe.bsv | 43 ++++++++++--- src/DmaH2CPipe.bsv | 153 ++++++++++++++++++++++----------------------- 2 files changed, 111 insertions(+), 85 deletions(-) diff --git a/src/DmaC2HPipe.bsv b/src/DmaC2HPipe.bsv index 2da40ca..58c57e3 100644 --- a/src/DmaC2HPipe.bsv +++ b/src/DmaC2HPipe.bsv @@ -34,6 +34,7 @@ module mkBdmaC2HPipe#(DmaPathNo pathIdx)(BdmaC2HPipe); C2HWriteCore writeCore <- mkC2HWriteCore(pathIdx); Reg#(Bool) isInitDoneReg <- mkReg(False); + Reg#(Bool) isInWriteCoreOutputReg <- mkReg(False); FIFOF#(BdmaUserC2hWrReq) wrReqInFifo <- mkFIFOF; FIFOF#(BdmaUserC2hWrResp) wrRespOutFifo <- mkFIFOF; @@ -43,7 +44,7 @@ module mkBdmaC2HPipe#(DmaPathNo pathIdx)(BdmaC2HPipe); FIFOF#(DataStream) tlpOutFifo <- mkFIFOF; FIFOF#(SideBandByteEn) tlpSideBandFifo <- mkFIFOF; - rule forwardWrReq; + rule forwardWrReq if (isInitDoneReg); let req = wrReqInFifo.first; wrReqInFifo.deq; writeCore.dataFifoIn.enq(req.dataStream); @@ -52,15 +53,17 @@ module mkBdmaC2HPipe#(DmaPathNo pathIdx)(BdmaC2HPipe); length : req.len, isWrite : True }); + $display($time, "ns SIM INFO @ mkBdmaC2HPipe%d: recv new request, startAddr:%d length:%d isWrite:%b", + pathIdx, req.addr, req.len, 1); endrule - rule forwardWrResp; + rule forwardWrResp if (isInitDoneReg); let rv = writeCore.doneFifoOut.first; writeCore.doneFifoOut.deq; wrRespOutFifo.enq(BdmaUserC2hWrResp{ }); endrule - rule forwardRdReq; + rule forwardRdReq if (isInitDoneReg); let req = rdReqInFifo.first; rdReqInFifo.deq; readCore.rdReqFifoIn.enq(DmaRequest { @@ -68,9 +71,11 @@ module mkBdmaC2HPipe#(DmaPathNo pathIdx)(BdmaC2HPipe); length : req.len, isWrite : False }); + $display($time, "ns SIM INFO @ mkBdmaC2HPipe%d: recv new request, startAddr:%d length:%d isWrite:%b", + pathIdx, req.addr, req.len, 0); endrule - rule forwardRdResp; + rule forwardRdResp if (isInitDoneReg); let stream = readCore.dataFifoOut.first; readCore.dataFifoOut.deq; rdRespOutFifo.enq(BdmaUserC2hRdResp{ @@ -78,6 +83,30 @@ module mkBdmaC2HPipe#(DmaPathNo pathIdx)(BdmaC2HPipe); }); endrule + rule muxTlpOut; + if (isInWriteCoreOutputReg) begin + let tlpStream = writeCore.tlpFifoOut.first; + tlpOutFifo.enq(tlpStream); + writeCore.tlpFifoOut.deq; + isInWriteCoreOutputReg <= !tlpStream.isLast; + end + else begin + if (readCore.tlpFifoOut.notEmpty) begin + tlpOutFifo.enq(readCore.tlpFifoOut.first); + tlpSideBandFifo.enq(readCore.tlpSideBandFifoOut.first); + readCore.tlpFifoOut.deq; + readCore.tlpSideBandFifoOut.deq; + end + else begin + tlpOutFifo.enq(writeCore.tlpFifoOut.first); + tlpSideBandFifo.enq(writeCore.tlpSideBandFifoOut.first); + writeCore.tlpFifoOut.deq; + writeCore.tlpSideBandFifoOut.deq; + isInWriteCoreOutputReg <= !writeCore.tlpFifoOut.first.isLast; + end + end + endrule + // User Ifc interface readSrv = toGPServer(rdReqInFifo, rdRespOutFifo); interface writeSrv = toGPServer(wrReqInFifo, wrRespOutFifo); @@ -308,7 +337,7 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); let stream = cBuffer.drain.first; cBuffer.drain.deq; reshapeRcb.streamFifoIn.enq(stream); - $display("cbuf output", fshow(stream)); + // $display("cbuf output", fshow(stream)); endrule // Pipeline stage 4: there may be bubbles in the first and last DataStream of a request because of MRRS split @@ -357,7 +386,7 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); tag : convertSlotTokenToTag(token, pathIdx) }; rqDescGenerator.exReqFifoIn.enq(exReq); - $display($time, "ns SIM INFO @ mkDmaC2HReadCore%d: tx a new read chunk, tag:%d, addr:%d, length:%d", pathIdx, exReq.tag, req.startAddr, req.length); + // $display($time, "ns SIM INFO @ mkDmaC2HReadCore%d: tx a new read chunk, tag:%d, addr:%d, length:%d", pathIdx, exReq.tag, req.startAddr, req.length); endrule // Pipeline stage 3: generate Tlp to PCIe Adapter @@ -370,7 +399,7 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); stream.isLast = True; tlpOutFifo.enq(stream); tlpByteEnFifo.enq(sideBandByteEn); - // $display($time, "ns SIM INFO @ mkDmaC2HReadCore: output new tlp, BE:%h/%h", tpl_1(sideBandByteEn), tpl_2(sideBandByteEn)); + // $display($time, "ns SIM INFO @ mkDmaC2HReadCore%d: output new tlp, BE:%h/%h", pathIdx, tpl_1(sideBandByteEn), tpl_2(sideBandByteEn)); endrule // User Logic Ifc diff --git a/src/DmaH2CPipe.bsv b/src/DmaH2CPipe.bsv index a1274e5..748a47c 100644 --- a/src/DmaH2CPipe.bsv +++ b/src/DmaH2CPipe.bsv @@ -63,6 +63,7 @@ module mkBdmaH2CPipe(BdmaH2CPipe#(sz_csr_addr, sz_csr_data)) rule handleRdResp; let value = rdRespQ.first.data; rdRespQ.deq; + $display("Get Blue-Rdma Register, value:%d\n", value); pipe.userRespFifoIn.enq(CsrResponse{ addr : 0, value: zeroExtend(value) @@ -123,11 +124,10 @@ module mkDmaH2CPipe(DmaH2CPipe); DataBytePtr csrCmplBytes = fromInteger(valueOf(TDiv#(TAdd#(DES_CC_DESCRIPTOR_WIDTH ,DMA_CSR_DATA_WIDTH), BYTE_WIDTH))); - // This function returns DW addr pointing to inner registers, where byteAddr = DWordAddr << 2 - // The registers in the hw are all of 32bit DW type - function DmaCsrAddr getCsrAddrFromCqDescriptor(PcieCompleterRequestDescriptor descriptor); - // Only care about low bits, because the offset is allocated. - let addr = getAddrLowBits(zeroExtend(descriptor.address), descriptor.barAperture); + // The return address of this function is aligned to BYTE + function DmaCsrAddr getBarAddrFromCqDescriptor(PcieCompleterRequestDescriptor descriptor); + // Only care about low bits, because the offset is pre-assigned and not important. + let addr = getAddrLowBits(zeroExtend(descriptor.address) << valueOf(TLog#(DWORD_BYTES)), descriptor.barAperture); return truncate(addr); endfunction @@ -137,50 +137,49 @@ module mkDmaH2CPipe(DmaH2CPipe); isInPacket <= !stream.isLast; if (!isInPacket) begin let descriptor = getDescriptorFromFirstBeat(stream); - case (descriptor.reqType) - fromInteger(valueOf(MEM_WRITE_REQ)): begin - // $display($time, "ns SIM INFO @ mkDmaH2CPipe: MemWrite Detect!"); - let firstData = getDataFromFirstBeat(stream); - DmaCsrValue wrValue = truncate(firstData); - let wrAddr = getCsrAddrFromCqDescriptor(descriptor); - if (descriptor.dwordCnt == fromInteger(valueOf(IDEA_CQ_CSR_DWORD_CNT))) begin - // $display($time, "ns SIM INFO @ mkDmaH2CPipe: Valid wrReq with Addr %d, data %h", wrAddr, wrValue); + if (descriptor.dwordCnt == fromInteger(valueOf(IDEA_CQ_CSR_DWORD_CNT))) begin + case (descriptor.reqType) + fromInteger(valueOf(MEM_WRITE_REQ)): begin + let firstData = getDataFromFirstBeat(stream); + DmaCsrValue wrValue = truncate(firstData); + let wrAddr = getBarAddrFromCqDescriptor(descriptor); let req = CsrRequest { addr : wrAddr, value : wrValue, isWrite : True }; if (descriptor.barId == 0) begin + req.addr = req.addr >> valueOf(TLog#(DWORD_BYTES)); reqOutFifo.enq(req); end else if (descriptor.barId == 1) begin userOutFifo.enq(req); end end - else begin - $display($time, "ns SIM INFO @ mkDmaH2CPipe: Invalid wrReq with Addr %d, data %h", wrAddr, wrValue); - illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; - end - end - fromInteger(valueOf(MEM_READ_REQ)): begin - // $display($time, "ns SIM INFO @ mkDmaH2CPipe: MemRead Detect!"); - let rdAddr = getCsrAddrFromCqDescriptor(descriptor); - let req = CsrRequest{ - addr : rdAddr, - value : 0, - isWrite : False - }; - $display($time, "ns SIM INFO @ mkDmaH2CPipe: Valid rdReq with Addr %h", rdAddr << valueOf(TLog#(DWORD_BYTES))); - if (descriptor.barId == 0) begin - reqOutFifo.enq(req); - end - else if (descriptor.barId == 1) begin - userOutFifo.enq(req); + fromInteger(valueOf(MEM_READ_REQ)): begin + let rdAddr = getBarAddrFromCqDescriptor(descriptor); + let req = CsrRequest{ + addr : rdAddr, + value : 0, + isWrite : False + }; + if (descriptor.barId == 0) begin + req.addr = req.addr >> valueOf(TLog#(DWORD_BYTES)); + reqOutFifo.enq(req); + end + else if (descriptor.barId == 1) begin + userOutFifo.enq(req); + $display($time, "ns SIM INFO @ mkDmaH2CPipe: Valid User Bar rdReq, addr %h", getBarAddrFromCqDescriptor(descriptor)); + end + pendingFifo.enq(tuple2(req, descriptor)); end - pendingFifo.enq(tuple2(req, descriptor)); - end - default: illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; - endcase + default: illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; + endcase + end + else begin + $display($time, "ns SIM INFO @ mkDmaH2CPipe: Invalid req with Addr %d, dwCnt %d", getBarAddrFromCqDescriptor(descriptor), descriptor.dwordCnt); + illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; + end end endrule @@ -188,56 +187,54 @@ module mkDmaH2CPipe(DmaH2CPipe); CsrResponse resp = getEmptyCsrResponse; if (respInFifo.notEmpty) begin resp = respInFifo.first; + resp.addr = resp.addr << valueOf(TLog#(DWORD_BYTES)); respInFifo.deq; end - else if (userInFifo.notEmpty) begin + else begin resp = userInFifo.first; userInFifo.deq; end let addr = resp.addr; let value = resp.value; let {req, cqDescriptor} = pendingFifo.first; - `ifdef H2C_DEBUG - if (addr == req.addr) begin - $display($time, "ns SIM INFO @ mkDmaH2CPipe: Valid rdResp with Addr %h, data %h", addr, value); - `endif - pendingFifo.deq; - let ccDescriptor = PcieCompleterCompleteDescriptor { - reserve0 : 0, - attributes : cqDescriptor.attributes, - trafficClass : cqDescriptor.trafficClass, - completerIdEn : False, - completerId : 0, - tag : cqDescriptor.tag, - requesterId : cqDescriptor.requesterId, - reserve1 : 0, - isPoisoned : False, - status : fromInteger(valueOf(DES_CC_STAUS_SUCCESS)), - dwordCnt : fromInteger(valueOf(IDEA_CC_CSR_DWORD_CNT)), - reserve2 : 0, - isLockedReadCmpl: False, - byteCnt : fromInteger(valueOf(IDEA_CC_CSR_BYTE_CNT)), - reserve3 : 0, - addrType : cqDescriptor.addrType, - reserve4 : 0, - lowerAddr : truncate(req.addr << valueOf(TLog#(DWORD_BYTES))) // Suppose all cq/cc requests are 32 bit aligned - }; - Data data = zeroExtend(pack(ccDescriptor)); - data = data | (zeroExtend(value) << valueOf(DES_CC_DESCRIPTOR_WIDTH)); - let stream = DataStream { - data : data, - byteEn : convertBytePtr2ByteEn(csrCmplBytes), - isFirst : True, - isLast : True - }; - tlpOutFifo.enq(stream); - `ifdef H2C_DEBUG - $display($time, "ns SIM INFO @ mkDmaH2CPipe: output a cmpl tlp", fshow(stream)); - end - else begin - $display($time, "ns SIM ERROR @ mkDmaH2CPipe: InValid rdResp with Addr %h, data %h and Expect Addr %h", addr, value, req.addr); - end - `endif + + // if (addr == req.addr || addr == 0) begin + // $display($time, "ns SIM INFO @ mkDmaH2CPipe: Valid rdResp with Addr %d, value %d", req.addr, value); + // end + // else begin + // $display($time, "ns SIM ERROR @ mkDmaH2CPipe: InValid rdResp with Addr %d, value %d and Expect Addr %d", addr, value, req.addr); + // end + + pendingFifo.deq; + let ccDescriptor = PcieCompleterCompleteDescriptor { + reserve0 : 0, + attributes : cqDescriptor.attributes, + trafficClass : cqDescriptor.trafficClass, + completerIdEn : False, + completerId : 0, + tag : cqDescriptor.tag, + requesterId : cqDescriptor.requesterId, + reserve1 : 0, + isPoisoned : False, + status : fromInteger(valueOf(DES_CC_STAUS_SUCCESS)), + dwordCnt : fromInteger(valueOf(IDEA_CC_CSR_DWORD_CNT)), + reserve2 : 0, + isLockedReadCmpl: False, + byteCnt : fromInteger(valueOf(IDEA_CC_CSR_BYTE_CNT)), + reserve3 : 0, + addrType : cqDescriptor.addrType, + reserve4 : 0, + lowerAddr : truncate(req.addr) + }; + Data data = zeroExtend(pack(ccDescriptor)); + data = data | (zeroExtend(value) << valueOf(DES_CC_DESCRIPTOR_WIDTH)); + let stream = DataStream { + data : data, + byteEn : convertBytePtr2ByteEn(csrCmplBytes), + isFirst : True, + isLast : True + }; + tlpOutFifo.enq(stream); endrule // DMA Csr Ifc From 30f7649461733b0bb103f2c4def15c603f86c9ad Mon Sep 17 00:00:00 2001 From: BIGWJZ Date: Tue, 3 Dec 2024 22:45:14 +0800 Subject: [PATCH 51/53] Fix C2HPipe.reshapeMRRS and cocotb.loop --- cocotb/bdmatb.py | 8 ++++---- cocotb/loop_write_read/dma_loop_tb.py | 3 +-- src/DmaC2HPipe.bsv | 2 +- src/DmaH2CPipe.bsv | 3 +-- src/SimpleModeUtils.bsv | 8 ++++---- 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/cocotb/bdmatb.py b/cocotb/bdmatb.py index 5dcd52b..3ef5198 100644 --- a/cocotb/bdmatb.py +++ b/cocotb/bdmatb.py @@ -389,8 +389,8 @@ async def write_pa_table(self, channel, page_offset, pa): page_offset = page_offset & 0x1FF paLo = pa & 0xFFFFFFFF paHi = (pa >> 32) & 0xFFFFFFFF - await self.write_register(base_addr + 2*page_offset, paLo) - await self.write_register(base_addr + 2*page_offset + 1, paHi) + await self.write_register(base_addr + 2*page_offset + 1, paLo) + await self.write_register(base_addr + 2*page_offset, paHi) async def memory_map(self): self.log.info("BdmaTb: Starting memory map...") @@ -438,8 +438,8 @@ async def write_pa_table(self, channel, page_offset, pa): page_offset = page_offset & 0x1FF paLo = pa & 0xFFFFFFFF paHi = (pa >> 32) & 0xFFFFFFFF - await self.write_register(base_addr + 2*page_offset, paLo) - await self.write_register(base_addr + 2*page_offset + 1, paHi) + await self.write_register(base_addr + 2*page_offset + 1, paLo) + await self.write_register(base_addr + 2*page_offset, paHi) async def memory_map(self): self.log.info("BdmaTb: Starting memory map...") diff --git a/cocotb/loop_write_read/dma_loop_tb.py b/cocotb/loop_write_read/dma_loop_tb.py index c22ca2a..a2dfc4b 100644 --- a/cocotb/loop_write_read/dma_loop_tb.py +++ b/cocotb/loop_write_read/dma_loop_tb.py @@ -17,13 +17,12 @@ async def loop_write_read_once(pcie_tb, mem): # addr, length = pcie_tb.gen_random_req(0) addr = 1 - length = 129 + length = 2378 addr = mem.get_absolute_address(addr) char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") data = char * length mem[addr:addr+length] = data await pcie_tb.run_single_read_once(0, addr, length) - await Timer(length, units='ns') new_addr = addr + 8192 await pcie_tb.run_single_write_once(0, new_addr, length) await Timer(200+4*length, units='ns') diff --git a/src/DmaC2HPipe.bsv b/src/DmaC2HPipe.bsv index 58c57e3..360616f 100644 --- a/src/DmaC2HPipe.bsv +++ b/src/DmaC2HPipe.bsv @@ -167,7 +167,6 @@ module mkDmaC2HPipe#(DmaPathNo pathIdx)(DmaC2HPipe); else begin readCore.rdReqFifoIn.enq(req); end - $display(" "); $display($time, "ns SIM INFO @ mkDmaC2HPipe%d: recv new request, startAddr:%d length:%d isWrite:%b", pathIdx, req.startAddr, req.length, pack(req.isWrite)); endrule @@ -358,6 +357,7 @@ module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); end rcvReqCntReg <= rcvReqCnt; end + stream.isFirst = stream.isFirst && (rcvReqCntReg == 1); reshapeMrrs.streamFifoIn.enq(stream); endrule diff --git a/src/DmaH2CPipe.bsv b/src/DmaH2CPipe.bsv index 748a47c..1dbfe55 100644 --- a/src/DmaH2CPipe.bsv +++ b/src/DmaH2CPipe.bsv @@ -63,7 +63,6 @@ module mkBdmaH2CPipe(BdmaH2CPipe#(sz_csr_addr, sz_csr_data)) rule handleRdResp; let value = rdRespQ.first.data; rdRespQ.deq; - $display("Get Blue-Rdma Register, value:%d\n", value); pipe.userRespFifoIn.enq(CsrResponse{ addr : 0, value: zeroExtend(value) @@ -169,7 +168,7 @@ module mkDmaH2CPipe(DmaH2CPipe); end else if (descriptor.barId == 1) begin userOutFifo.enq(req); - $display($time, "ns SIM INFO @ mkDmaH2CPipe: Valid User Bar rdReq, addr %h", getBarAddrFromCqDescriptor(descriptor)); + // $display($time, "ns SIM INFO @ mkDmaH2CPipe: Valid User Bar rdReq, addr %h", getBarAddrFromCqDescriptor(descriptor)); end pendingFifo.enq(tuple2(req, descriptor)); end diff --git a/src/SimpleModeUtils.bsv b/src/SimpleModeUtils.bsv index 3db24d5..439a971 100644 --- a/src/SimpleModeUtils.bsv +++ b/src/SimpleModeUtils.bsv @@ -195,11 +195,11 @@ module mkPhyAddrBram(PhyAddrBram); }; if (isLoAddr(paSet.addr)) begin phyAddrLoBram.portA.request.put(bramReq); - // $display($time, "ns SIM INFO @ mkPhyAddrBram: pa writing, va offset:%d, mapping pa low:%h", bramAddr, bramReq.datain ); + $display($time, "ns SIM INFO @ mkPhyAddrBram: pa writing, va offset:%d, mapping pa low:%h", bramAddr, bramReq.datain ); end else begin phyAddrHiBram.portA.request.put(bramReq); - // $display($time, "ns SIM INFO @ mkPhyAddrBram: pa writing, va offset:%d, mapping pa low:%h", bramAddr, bramReq.datain); + $display($time, "ns SIM INFO @ mkPhyAddrBram: pa writing, va offset:%d, mapping pa high:%h", bramAddr, bramReq.datain); end end @@ -216,7 +216,7 @@ module mkPhyAddrBram(PhyAddrBram); phyAddrLoBram.portA.request.put(bramReq); phyAddrHiBram.portA.request.put(bramReq); pendingFifo.enq(vaReq); - // $display($time, "ns SIM INFO @ mkPhyAddrBram: receive pa mapping request, va:%h", vaReq.startAddr); + $display($time, "ns SIM INFO @ mkPhyAddrBram: receive pa mapping request, va:%h, bramAddr:%d", vaReq.startAddr, bramReq.address); end endrule @@ -226,7 +226,7 @@ module mkPhyAddrBram(PhyAddrBram); DmaMemAddr pa = doubleExtend(pa_lo, pa_hi); let oriReq = pendingFifo.first; pendingFifo.deq; - $display($time, "ns SIM INFO @ mkPhyAddrBram: got a pa mapping, va:%h pa:%h", oriReq.startAddr, pa); + $display($time, "ns SIM INFO @ mkPhyAddrBram: got a pa mapping, va:%h pa:%h pa_lo:%h pa_hi:%h", oriReq.startAddr, pa, pa_lo, pa_hi); oriReq.startAddr = pa | (oriReq.startAddr & pageMask); paReqFifo.enq(oriReq); endrule From 0c5ac0dde795f58b493e71c562100ba2141b9b20 Mon Sep 17 00:00:00 2001 From: BIGWJZ Date: Tue, 3 Dec 2024 22:48:14 +0800 Subject: [PATCH 52/53] Fix TlpOutMux --- .gitignore | 4 +- backend/Makefile | 45 +- backend/build_top.tcl | 328 ++++++++ backend/ips/pcie/pcie.tcl | 38 + .../ooc_tcl_and_xdc/bsv_ooc_module_common.tcl | 18 + .../ooc_tcl_and_xdc/bsv_ooc_module_common.xdc | 1 + backend/u200/top.v | 705 ++++++++++++++++++ backend/u200/xdc/u200_pcie.xdc | 162 ++++ backend/{ => vu13p}/top.v | 0 cocotb/cocotb.yaml | 41 + src/DmaC2HPipe.bsv | 43 +- 11 files changed, 1362 insertions(+), 23 deletions(-) create mode 100644 backend/build_top.tcl create mode 100644 backend/ips/pcie/pcie.tcl create mode 100644 backend/ooc_tcl_and_xdc/bsv_ooc_module_common.tcl create mode 100644 backend/ooc_tcl_and_xdc/bsv_ooc_module_common.xdc create mode 100644 backend/u200/top.v create mode 100644 backend/u200/xdc/u200_pcie.xdc rename backend/{ => vu13p}/top.v (100%) create mode 100644 cocotb/cocotb.yaml diff --git a/.gitignore b/.gitignore index 0190e22..3245ab7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ **/build/** **/verilog/** **/*.log -img/*.drawio \ No newline at end of file +img/*.drawio +**/output/** +**/.Xil/** \ No newline at end of file diff --git a/backend/Makefile b/backend/Makefile index 4baf722..58a559a 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -5,22 +5,35 @@ CLK ?= rdma_clock OOC ?= 1 VLOGDIR ?= verilog OUTPUTDIR ?= output -LOGFILE ?= run.log -RUNTOPHASE ?= place # synth place route all -PARTNAME = xcvu13p-fhgb2104-2-i +LOGFILE ?= run_vivado.log +RUNTOPHASE ?= synth # synth place route all +# vu13p, u200 +TARGETPLATFORM ?= u200 +# xcvu13p-fhgb2104-2-i, xcu200-fsgd2104-2-e +PARTNAME = xcu200-fsgd2104-2-e TARGETFILE ?= ../src/DmaWrapper.bsv #MODULE List: mkRawSimpleDmaController mkRawBypassDmaController mkRawTestDmaController -TOPMODULE ?= mkRawSimpleDmaController - -export TOP = $(TOPMODULE) -export RTL = $(VLOGDIR) -export XDC = $(TCLDIR) -export IPS = $(SRCDIR)/ip/$(PARTNAME) -export CLOCKS = $(CLK) -export OUTPUT = $(OUTPUTDIR) +TOPMODULE ?= mkRawTestDmaController + +BACKENDDIR ?= . + +VERILOG_TOPMODULE ?= top +MAX_NET_PATH_NUM ?= 100000 + +export DIR_RTL = $(VLOGDIR) +export DIR_BOARD = $(TARGETPLATFORM) +export DIR_XDC = $(TARGETPLATFORM)/$(TCLDIR) +export DIR_OOC_SCRIPTS = $(BACKENDDIR)/ooc_tcl_and_xdc +export DIR_IPS = $(BACKENDDIR)/ips +export DIR_IP_GENERATED = $(BUILDDIR)/ips +export DIR_BSV_GENERATED = $(BACKENDDIR)/verilog +export VERILOG_TOPMODULE +export TARGET_CLOCKS = $(CLK) +export DIR_OUTPUT = $(OUTPUTDIR) export OOCSYNTH = $(OOC) export RUNTO = $(RUNTOPHASE) export PART = $(PARTNAME) +export MAX_NET_PATH_NUM compile: mkdir -p $(BUILDDIR) @@ -31,8 +44,14 @@ verilog: compile bsc $(VERILOGFLAGS) $(DIRFLAGS) $(MISCFLAGS) $(RECOMPILEFLAGS) $(RUNTIMEFLAGS) $(TRANSFLAGS) -g $(TOPMODULE) $(TARGETFILE) bluetcl listVlogFiles.tcl -bdir $(BUILDDIR) -vdir $(BUILDDIR) $(TOPMODULE) $(TOPMODULE) | grep -i '\.v' | xargs -I {} cp {} $(VLOGDIR) -# vivado: verilog -# vivado -mode tcl -nolog -nojournal -source ./non_project_build.tcl 2>&1 | tee $(LOGFILE) +vivado: + vivado -mode tcl -nolog -nojournal -source ./build_top.tcl 2>&1 | tee $(LOGFILE) + +vivado_synth: + vivado -mode tcl -nolog -nojournal -source ./build_top.tcl -tclargs synth 2>&1 | tee $(LOGFILE) + +vivado_prw: + vivado -mode tcl -nolog -nojournal -source ./build_top.tcl -tclargs prw 2>&1 | tee $(LOGFILE) clean: rm -rf $(BUILDDIR) $(OUTPUTDIR) $(VLOGDIR) .Xil *.jou *.log diff --git a/backend/build_top.tcl b/backend/build_top.tcl new file mode 100644 index 0000000..8afe2e5 --- /dev/null +++ b/backend/build_top.tcl @@ -0,0 +1,328 @@ +set dir_output $::env(DIR_OUTPUT) +set dir_rtl $::env(DIR_RTL) +set dir_xdc $::env(DIR_XDC) +set dir_ooc_scripts $::env(DIR_OOC_SCRIPTS) +set dir_ips $::env(DIR_IPS) +set dir_board $::env(DIR_BOARD) +set dir_ip_gen $::env(DIR_IP_GENERATED) +set dir_bsv_gen $::env(DIR_BSV_GENERATED) +set part $::env(PART) +set top_module $::env(VERILOG_TOPMODULE) +set target_clks $::env(TARGET_CLOCKS) +set max_net_path_num $::env(MAX_NET_PATH_NUM) + +set current_time [clock format [clock seconds] -format "%Y-%m-%d-%H-%M-%S"] + +set_param general.maxthreads 16 +#set device [get_parts $part]; # xcvu13p-fhgb2104-2-i; #xcu200-fsgd2104-2-e +#set_part $device + +create_project -in_memory +set device [get_parts $part] +#set_part $device +set_property board_part xilinx.com:au200:1.3 [current_project] + +set ooc_module_names { \ + mkRawTestDmaController \ +} + +proc runGenerateIP {args} { + global dir_output part device dir_ips dir_xdc device dir_ip_gen + + file mkdir $dir_output + + # read_xdc [ glob $dir_xdc/*.xdc ] + + foreach file [ glob $dir_ips/**/*.tcl ] { + source $file + } + + report_property $device -file $dir_output/pre_synth_dev_prop.rpt + reset_target all [ get_ips * ] + generate_target all [ get_ips * ] + +} + +proc runSynthIP {args} { + global dir_output top_module dir_ip_gen dir_xdc + + # read_xdc [ glob $dir_xdc/*.xdc ] + + read_ip [glob $dir_ip_gen/**/*.xci] + # The following line will generate a .dcp checkpoint file, so no need to create by ourselves + synth_ip [ get_ips * ] -quiet +} + + +proc runSynthOOC {args} { + global dir_output part dir_bsv_gen dir_ooc_scripts dir_ooc_scripts max_net_path_num + global ooc_module_names + + foreach ooc_top $ooc_module_names { + source ooc_tcl_and_xdc/bsv_ooc_module_common.tcl + } +} + + +proc addExtFiles {args} { + global dir_output part device dir_rtl dir_xdc dir_board dir_ip_gen dir_bsv_gen + global ooc_module_names + + read_ip [glob $dir_ip_gen/**/*.xci] + read_verilog [ glob $dir_rtl/*.v ] + read_verilog [ glob $dir_bsv_gen/*.v ] + read_verilog [ glob $dir_board/*.v ] + + read_xdc [ glob $dir_xdc/*.xdc ] +} + + +proc runSynthDesign {args} { + global dir_output top_module max_net_path_num + + synth_design -top $top_module -flatten_hierarchy none + + write_checkpoint -force $dir_output/post_synth_design.dcp + write_xdc -force -exclude_physical $dir_output/post_synth.xdc +} + + +proc runPostSynthReport {args} { + global dir_output target_clks max_net_path_num + + if {[dict get $args -open_checkpoint] == true} { + open_checkpoint $dir_output/post_synth_design.dcp + } + + xilinx::designutils::report_failfast -max_paths 10000 -detailed_reports synth -file $dir_output/post_synth_failfast.rpt + + # Check 1) slack, 2) requirement, 3) src and dst clocks, 4) datapath delay, 5) logic level, 6) skew and uncertainty. + report_timing_summary -report_unconstrained -warn_on_violation -file $dir_output/post_synth_timing_summary.rpt + # report_timing -of_objects [get_timing_paths -setup -to [get_clocks $target_clks] -max_paths $max_net_path_num -filter { LOGIC_LEVELS >= 4 && LOGIC_LEVELS <= 40 }] -file $dir_output/post_synth_long_paths.rpt + # Check 1) endpoints without clock, 2) combo loop and 3) latch. + check_timing -override_defaults no_clock -file $dir_output/post_synth_check_timing.rpt + report_clock_networks -file $dir_output/post_synth_clock_networks.rpt; # Show unconstrained clocks + report_clock_interaction -delay_type min_max -significant_digits 3 -file $dir_output/post_synth_clock_interaction.rpt; # Pay attention to Clock pair Classification, Inter-CLock Constraints, Path Requirement (WNS) + report_high_fanout_nets -timing -load_type -max_nets $max_net_path_num -file $dir_output/post_synth_fanout.rpt + report_exceptions -ignored -file $dir_output/post_synth_exceptions.rpt; # -ignored -ignored_objects -write_valid_exceptions -write_merged_exceptions + + # 1 LUT + 1 net have delay 0.5ns, if cycle period is Tns, logic level is 2T at most + # report_design_analysis -timing -max_paths $max_net_path_num -file $dir_output/post_synth_design_timing.rpt + report_design_analysis -setup -max_paths $max_net_path_num -file $dir_output/post_synth_design_setup_timing.rpt + # report_design_analysis -logic_level_dist_paths $max_net_path_num -min_level $MIN_LOGIC_LEVEL -max_level $MAX_LOGIC_LEVEL -file $dir_output/post_synth_design_logic_level.rpt + report_design_analysis -logic_level_dist_paths $max_net_path_num -logic_level_distribution -file $dir_output/post_synth_design_logic_level_dist.rpt + + report_datasheet -file $dir_output/post_synth_datasheet.rpt + + + report_drc -file $dir_output/post_synth_drc.rpt + report_drc -ruledeck methodology_checks -file $dir_output/post_synth_drc_methodology.rpt + report_drc -ruledeck timing_checks -file $dir_output/post_synth_drc_timing.rpt + + # intra-clock skew < 300ps, inter-clock skew < 500ps + + # Check 1) LUT on clock tree (TIMING-14), 2) hold constraints for multicycle path constraints (XDCH-1). + report_methodology -file $dir_output/post_synth_methodology.rpt + report_timing -max $max_net_path_num -slack_less_than 0 -file $dir_output/post_synth_timing.rpt + + report_compile_order -constraints -file $dir_output/post_synth_constraints.rpt; # Verify IP constraints included + report_utilization -file $dir_output/post_synth_util.rpt; # -cells -pblocks + report_cdc -file $dir_output/post_synth_cdc.rpt + report_clocks -file $dir_output/post_synth_clocks.rpt; # Verify clock settings + + # Use IS_SEQUENTIAL for -from/-to + # Instantiate XPM_CDC modules + # write_xdc -force -exclude_physical -exclude_timing -constraints INVALID + + report_qor_assessment -report_all_suggestions -csv_output_dir $dir_output -file $dir_output/post_synth_qor_assess.rpt +} + + +proc runPlacement {args} { + global dir_output top_module current_time max_net_path_num + + if {[dict get $args -open_checkpoint] == true} { + open_checkpoint $dir_output/post_synth_design.dcp + } + + #source ./pblock.tcl + + opt_design -remap -verbose + + if {[dict exist $args -directive]} { + set directive [dict get $args -directive] + place_design -verbose -directive ${directive} + } else { + set directive "" + place_design -verbose + } +} + + +proc runPostPlacementReport {args} { + global dir_output target_clks max_net_path_num + + if {[dict get $args -open_checkpoint] == true} { + open_checkpoint $dir_output/post_place.dcp + } + + xilinx::designutils::report_failfast -by_slr -detailed_reports impl -file $dir_output/post_place_failfast.rpt + set slr_nets [xilinx::designutils::get_inter_slr_nets] + set slr_nets_exclude_clock [filter $slr_nets "TYPE != GLOBAL_CLOCK"] + set slr_net_exclude_clock_num [llength $slr_nets_exclude_clock] + if {$slr_net_exclude_clock_num > 0} { + report_timing -through $slr_nets_exclude_clock -nworst 1 -max $slr_net_exclude_clock_num -unique_pins -file $dir_output/post_place_slr_nets.rpt + } +} + + +proc runRoute {args} { + global dir_output top_module + + if {[dict get $args -open_checkpoint] == true} { + open_checkpoint $dir_output/post_place.dcp + } + + route_design + + proc runPPO { {num_iters 1} {enable_phys_opt 1} } { + for {set idx 0} {$idx < $num_iters} {incr idx} { + place_design -post_place_opt; # Better to run after route + if {$enable_phys_opt != 0} { + phys_opt_design + } + route_design + if {[get_property SLACK [get_timing_paths ]] >= 0} { + break; # Stop if timing closure + } + } + } + + # runPPO 4 1; # num_iters=4, enable_phys_opt=1 + + write_checkpoint -force $dir_output/post_route.dcp + write_xdc -force -exclude_physical $dir_output/post_route.xdc + + write_verilog -force $dir_output/post_impl_netlist.v -mode timesim -sdf_anno true +} + + +proc runPostRouteReport {args} { + global dir_output target_clks max_net_path_num + + if {[dict get $args -open_checkpoint] == true} { + open_checkpoint $dir_output/post_route.dcp + } + + report_timing_summary -report_unconstrained -warn_on_violation -file $dir_output/post_route_timing_summary.rpt + # report_timing -of_objects [get_timing_paths -hold -to [get_clocks $target_clks] -max_paths $max_net_path_num -filter { LOGIC_LEVELS >= 4 && LOGIC_LEVELS <= 40 }] -file $dir_output/post_route_long_paths.rpt + report_methodology -file $dir_output/post_route_methodology.rpt + report_timing -max $max_net_path_num -slack_less_than 0 -file $dir_output/post_route_timing.rpt + + report_route_status -file $dir_output/post_route_status.rpt + report_drc -file $dir_output/post_route_drc.rpt + report_drc -ruledeck methodology_checks -file $dir_output/post_route_drc_methodology.rpt + report_drc -ruledeck timing_checks -file $dir_output/post_route_drc_timing.rpt + # Check unique control sets < 7.5% of total slices, at most 15% + report_control_sets -verbose -file $dir_output/post_route_control_sets.rpt + + report_power -file $dir_output/post_route_power.rpt + report_power_opt -file $dir_output/post_route_power_opt.rpt + report_utilization -file $dir_output/post_route_util.rpt + report_ram_utilization -detail -file $dir_output/post_route_ram_utils.rpt + # Check fanout < 25K + report_high_fanout_nets -file $dir_output/post_route_fanout.rpt + + report_design_analysis -hold -max_paths $max_net_path_num -file $dir_output/post_route_design_hold_timing.rpt + # Check initial estimated router congestion level no more than 5, type (global, long, short) and top cells + report_design_analysis -congestion -file $dir_output/post_route_congestion.rpt + # Check difficult modules (>15K cells) with high Rent Exponent (complex logic cone) >= 0.65 and/or Avg. Fanout >= 4 + report_design_analysis -complexity -file $dir_output/post_route_complexity.rpt; # -hierarchical_depth + # If congested, check problematic cells using report_utilization -cells + # If congested, try NetDelay* for UltraScale+, or try SpredLogic* for UltraScale in implementation strategy + + xilinx::designutils::report_failfast -detailed_reports impl -file $dir_output/post_route_failfast.rpt + # xilinx::ultrafast::report_io_reg -file $dir_output/post_route_io_reg.rpt + report_io -file $dir_output/post_route_io.rpt + report_pipeline_analysis -file $dir_output/post_route_pipeline.rpt + report_qor_assessment -report_all_suggestions -csv_output_dir $dir_output -file $dir_output/post_route_qor_assess.rpt + report_qor_suggestions -report_all_suggestions -csv_output_dir $dir_output -file $dir_output/post_route_qor_suggest.rpt +} + +proc runWriteBitStream {args} { + global dir_output top_module + + if {[dict get $args -open_checkpoint] == true} { + open_checkpoint $dir_output/post_route.dcp + } + + set_property CONFIG_MODE SPIx4 [current_design] + set_property BITSTREAM.CONFIG.SPI_BUSWIDTH 4 [current_design] + + write_bitstream -force $dir_output/top.bit +} + +proc runProgramDevice {args} { + global dir_output top_module + + open_hw_manager + connect_hw_server -allow_non_jtag + open_hw_target + current_hw_device [get_hw_devices xcvu13p_0] + refresh_hw_device -update_hw_probes false [lindex [get_hw_devices xcvu13p_0] 0] + + # set_property PROBES.FILE {/home/mingheng/xdma_0_ex/xdma_0_ex.runs/impl_1/top.ltx} [get_hw_devices xcvu13p_0] + # set_property FULL_PROBES.FILE {/home/mingheng/xdma_0_ex/xdma_0_ex.runs/impl_1/top.ltx} [get_hw_devices xcvu13p_0] + + set_property PROGRAM.FILE $dir_output/top.bit [get_hw_devices xcvu13p_0] + program_hw_devices [get_hw_devices xcvu13p_0] +} + +if {$argc == 0} { + set synth 1 + set prw 1 + set directive ExtraNetDelay_high + set redirect 0 +} elseif {$argc > 0} { + set op [lindex $argv 0] + + if {$op eq "synth"} { + set synth 1 + set prw 0 + } elseif {$op eq "prw"} { + set synth 0 + set prw 1 + if {$argc == 1} { + set directive ExtraNetDelay_high + set redirect 0 + } elseif {$argc == 2} { + set directive [lindex $argv 1] + set redirect 1 + } + } +} +if {$synth} { + runGenerateIP -open_checkpoint false + runSynthIP -open_checkpoint false + runSynthOOC + addExtFiles -open_checkpoint false + runSynthDesign -open_checkpoint false + runPostSynthReport -open_checkpoint false +} + +if {$prw} { + if {!$synth} { + read_xdc $dir_output/post_synth.xdc + open_checkpoint $dir_output/post_synth_design.dcp + } + if {$redirect} { + set dir_output "$dir_output/$directive" + } + runPlacement -open_checkpoint -false -directive $directive + + # runPostPlacementReport -open_checkpoint false + runRoute -open_checkpoint false + # runPostRouteReport -open_checkpoint false + runWriteBitStream -open_checkpoint false + # runProgramDevice -open_checkpoint false +} diff --git a/backend/ips/pcie/pcie.tcl b/backend/ips/pcie/pcie.tcl new file mode 100644 index 0000000..f93b1be --- /dev/null +++ b/backend/ips/pcie/pcie.tcl @@ -0,0 +1,38 @@ +create_ip -name pcie4_uscale_plus -vendor xilinx.com -library ip -version 1.3 \ + -module_name pcie4_uscale_plus_0 -dir $dir_ip_gen -force + +set_property -dict [list CONFIG.PL_LINK_CAP_MAX_LINK_SPEED {8.0_GT/s} \ + CONFIG.PL_LINK_CAP_MAX_LINK_WIDTH {X16} \ + CONFIG.AXISTEN_IF_EXT_512_RQ_STRADDLE {true} \ + CONFIG.AXISTEN_IF_EXT_512_RC_4TLP_STRADDLE {false} \ + CONFIG.axisten_if_enable_client_tag {true} \ + CONFIG.PF0_DEVICE_ID {903F} \ + CONFIG.PF2_DEVICE_ID {943F} \ + CONFIG.PF3_DEVICE_ID {963F} \ + CONFIG.pf0_bar0_size {4} \ + CONFIG.pf0_bar1_enabled {true} \ + CONFIG.pf0_bar1_type {Memory} \ + CONFIG.pf0_bar1_scale {Megabytes} \ + CONFIG.pf0_bar1_size {2} \ + CONFIG.pf0_dev_cap_max_payload {512_bytes} \ + CONFIG.extended_tag_field {false} \ + CONFIG.pf1_bar0_size {4} \ + CONFIG.pf1_bar1_enabled {true} \ + CONFIG.pf1_bar1_type {Memory} \ + CONFIG.pf1_bar1_scale {Megabytes} \ + CONFIG.pf1_bar1_size {2} \ + CONFIG.axisten_if_width {512_bit} \ + CONFIG.pf2_bar0_size {4} \ + CONFIG.pf2_bar1_enabled {true} \ + CONFIG.pf2_bar1_type {Memory} \ + CONFIG.pf1_bar1_scale {Megabytes} \ + CONFIG.pf1_bar1_size {2} \ + CONFIG.pf3_bar0_size {4} \ + CONFIG.pf3_bar1_enabled {true} \ + CONFIG.pf3_bar1_type {Memory} \ + CONFIG.pf3_bar1_scale {Megabytes} \ + CONFIG.pf3_bar1_size {2} \ + CONFIG.mode_selection {Advanced} \ + CONFIG.coreclk_freq {500} \ + CONFIG.plltype {QPLL1} \ + CONFIG.axisten_freq {250}] [get_ips pcie4_uscale_plus_0] \ No newline at end of file diff --git a/backend/ooc_tcl_and_xdc/bsv_ooc_module_common.tcl b/backend/ooc_tcl_and_xdc/bsv_ooc_module_common.tcl new file mode 100644 index 0000000..2143d31 --- /dev/null +++ b/backend/ooc_tcl_and_xdc/bsv_ooc_module_common.tcl @@ -0,0 +1,18 @@ +set dir_ooc_out ${dir_output}/ooc/${ooc_top} +file mkdir $dir_ooc_out + +read_verilog [ glob $dir_bsv_gen/*.v ] +read_xdc ${dir_ooc_scripts}/bsv_ooc_module_common.xdc -mode out_of_context +synth_design -top $ooc_top -mode out_of_context -flatten_hierarchy none +write_checkpoint -force ${dir_ooc_out}/${ooc_top}.dcp + + + +report_timing_summary -report_unconstrained -warn_on_violation -file $dir_ooc_out/post_synth_timing_summary.rpt +check_timing -override_defaults no_clock -file $dir_ooc_out/post_synth_check_timing.rpt +report_design_analysis -logic_level_dist_paths $max_net_path_num -logic_level_distribution -file $dir_ooc_out/post_synth_design_logic_level_dist.rpt +xilinx::designutils::report_failfast -max_paths $max_net_path_num -detailed_reports synth -file $dir_ooc_out/post_synth_failfast.rpt +report_drc -file $dir_ooc_out/post_synth_drc.rpt +report_methodology -file $dir_ooc_out/post_synth_methodology.rpt +report_timing -max $max_net_path_num -slack_less_than 0 -file $dir_ooc_out/post_synth_timing.rpt +report_utilization -file $dir_ooc_out/post_synth_util.rpt; # -cells -pblocks \ No newline at end of file diff --git a/backend/ooc_tcl_and_xdc/bsv_ooc_module_common.xdc b/backend/ooc_tcl_and_xdc/bsv_ooc_module_common.xdc new file mode 100644 index 0000000..a8e37d7 --- /dev/null +++ b/backend/ooc_tcl_and_xdc/bsv_ooc_module_common.xdc @@ -0,0 +1 @@ +create_clock -name bsv_clk -period 4 [get_ports CLK] \ No newline at end of file diff --git a/backend/u200/top.v b/backend/u200/top.v new file mode 100644 index 0000000..42d39fa --- /dev/null +++ b/backend/u200/top.v @@ -0,0 +1,705 @@ +//----------------------------------------------------------------------------- +// +// (c) Copyright 2012-2012 Xilinx, Inc. All rights reserved. +// +// This file contains confidential and proprietary information +// of Xilinx, Inc. and is protected under U.S. and +// international copyright and other intellectual property +// laws. +// +// DISCLAIMER +// This disclaimer is not a license and does not grant any +// rights to the materials distributed herewith. Except as +// otherwise provided in a valid license issued to you by +// Xilinx, and to the maximum extent permitted by applicable +// law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND +// WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES +// AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING +// BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON- +// INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and +// (2) Xilinx shall not be liable (whether in contract or tort, +// including negligence, or under any other theory of +// liability) for any loss or damage of any kind or nature +// related to, arising under or in connection with these +// materials, including for any direct, or any indirect, +// special, incidental, or consequential loss or damage +// (including loss of data, profits, goodwill, or any type of +// loss or damage suffered as a result of any action brought +// by a third party) even if such damage or loss was +// reasonably foreseeable or Xilinx had been advised of the +// possibility of the same. +// +// CRITICAL APPLICATIONS +// Xilinx products are not designed or intended to be fail- +// safe, or for use in any application requiring fail-safe +// performance, such as life-support or safety devices or +// systems, Class III medical devices, nuclear facilities, +// applications related to the deployment of airbags, or any +// other applications that could lead to death, personal +// injury, or severe property or environmental damage +// (individually and collectively, "Critical +// Applications"). Customer assumes the sole risk and +// liability of any use of Xilinx products in Critical +// Applications, subject only to applicable laws and +// regulations governing limitations on product liability. +// +// THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS +// PART OF THIS FILE AT ALL TIMES. +// +//----------------------------------------------------------------------------- +// +// Project : UltraScale+ FPGA PCI Express v4.0 Integrated Block +// File : xilinx_pcie4_uscale_ep.v +// Version : 1.3 +//----------------------------------------------------------------------------- +//-- +//-- Description: PCI Express Endpoint example FPGA design +//-- +//------------------------------------------------------------------------------ +`define PCIE4_NEW_PINS 1 +`timescale 1ps / 1ps + +(* DowngradeIPIdentifiedWarnings = "yes" *) +module top # ( +/* +*/ + parameter [4:0] PL_LINK_CAP_MAX_LINK_WIDTH = 16, // 1- X1, 2 - X2, 4 - X4, 8 - X8, 16 - X16 + parameter C_DATA_WIDTH = 512, // RX/TX interface data width + parameter AXISTEN_IF_MC_RX_STRADDLE = 1, + parameter PL_LINK_CAP_MAX_LINK_SPEED = 4, // 1- GEN1, 2 - GEN2, 4 - GEN3, 8 - GEN4 + parameter KEEP_WIDTH = C_DATA_WIDTH / 32, + parameter EXT_PIPE_SIM = "FALSE", // This Parameter has effect on selecting Enable External PIPE Interface in GUI. + parameter AXISTEN_IF_CC_ALIGNMENT_MODE = "FALSE", + parameter AXISTEN_IF_CQ_ALIGNMENT_MODE = "FALSE", + parameter AXISTEN_IF_RQ_ALIGNMENT_MODE = "FALSE", + parameter AXISTEN_IF_RC_ALIGNMENT_MODE = "FALSE", + parameter AXI4_CQ_TUSER_WIDTH = 183, + parameter AXI4_CC_TUSER_WIDTH = 81, + parameter AXI4_RQ_TUSER_WIDTH = 137, + parameter AXI4_RC_TUSER_WIDTH = 161, + parameter AXISTEN_IF_ENABLE_CLIENT_TAG = 1, + parameter RQ_AVAIL_TAG_IDX = 8, + parameter RQ_AVAIL_TAG = 256, + parameter AXISTEN_IF_RQ_PARITY_CHECK = 0, + parameter AXISTEN_IF_CC_PARITY_CHECK = 0, + parameter AXISTEN_IF_RC_PARITY_CHECK = 0, + parameter AXISTEN_IF_CQ_PARITY_CHECK = 0, + parameter AXISTEN_IF_ENABLE_RX_MSG_INTFC = "FALSE", + parameter [17:0] AXISTEN_IF_ENABLE_MSG_ROUTE = 18'h2FFFF +) ( + output [(PL_LINK_CAP_MAX_LINK_WIDTH - 1) : 0] pci_exp_txp, + output [(PL_LINK_CAP_MAX_LINK_WIDTH - 1) : 0] pci_exp_txn, + input [(PL_LINK_CAP_MAX_LINK_WIDTH - 1) : 0] pci_exp_rxp, + input [(PL_LINK_CAP_MAX_LINK_WIDTH - 1) : 0] pci_exp_rxn, + + + + + output led_0, + output led_1, + output led_2, + + // Board LED Logic IO + // 300 MHz clock for the board + input wire clk_300MHz_p, + input wire clk_300MHz_n, + + input sys_clk_p, + input sys_clk_n, + + input sys_rst_n +); + + // Local Parameters derived from user selection + localparam TCQ = 1; + + wire user_lnk_up; + wire phy_rdy_out; + + + //----------------------------------------------------------------------------------------------------------------// + // AXI Interface // + //----------------------------------------------------------------------------------------------------------------// + + wire user_clk; + wire user_reset; + + wire s_axis_rq_tlast; + wire [C_DATA_WIDTH-1:0] s_axis_rq_tdata; + wire [AXI4_RQ_TUSER_WIDTH-1:0] s_axis_rq_tuser; + wire [KEEP_WIDTH-1:0] s_axis_rq_tkeep; + wire [3:0] s_axis_rq_tready; + wire s_axis_rq_tvalid; + + wire [C_DATA_WIDTH-1:0] m_axis_rc_tdata; + wire [AXI4_RC_TUSER_WIDTH-1:0] m_axis_rc_tuser; + wire m_axis_rc_tlast; + wire [KEEP_WIDTH-1:0] m_axis_rc_tkeep; + wire m_axis_rc_tvalid; + wire m_axis_rc_tready; + + wire [C_DATA_WIDTH-1:0] m_axis_cq_tdata; + wire [AXI4_CQ_TUSER_WIDTH-1:0] m_axis_cq_tuser; + wire m_axis_cq_tlast; + wire [KEEP_WIDTH-1:0] m_axis_cq_tkeep; + wire m_axis_cq_tvalid; + wire m_axis_cq_tready; + + wire [C_DATA_WIDTH-1:0] s_axis_cc_tdata; + wire [AXI4_CC_TUSER_WIDTH-1:0] s_axis_cc_tuser; + wire s_axis_cc_tlast; + wire [KEEP_WIDTH-1:0] s_axis_cc_tkeep; + wire s_axis_cc_tvalid; + wire [3:0] s_axis_cc_tready; + + wire [3:0] pcie_tfc_nph_av; + wire [3:0] pcie_tfc_npd_av; + //----------------------------------------------------------------------------------------------------------------// + // Configuration (CFG) Interface // + //----------------------------------------------------------------------------------------------------------------// + + wire pcie_cq_np_req; + wire [5:0] pcie_cq_np_req_count; + wire [5:0] pcie_rq_seq_num0; + wire pcie_rq_seq_num_vld0; + wire [5:0] pcie_rq_seq_num1; + wire pcie_rq_seq_num_vld1; + + //----------------------------------------------------------------------------------------------------------------// + // EP and RP // + //----------------------------------------------------------------------------------------------------------------// + + wire cfg_phy_link_down; + wire [2:0] cfg_negotiated_width; + wire [1:0] cfg_current_speed; + wire [1:0] cfg_max_payload; + wire [2:0] cfg_max_read_req; + wire [15:0] cfg_function_status; + wire [11:0] cfg_function_power_state; + wire [503:0] cfg_vf_status; + wire [1:0] cfg_link_power_state; + + // Error Reporting Interface + wire cfg_err_cor_out; + wire cfg_err_nonfatal_out; + wire cfg_err_fatal_out; + + wire [5:0] cfg_ltssm_state; + wire [3:0] cfg_rcb_status; + wire [1:0] cfg_obff_enable; + wire cfg_pl_status_change; + + // Management Interface + wire [9:0] cfg_mgmt_addr; + wire cfg_mgmt_write; + wire [31:0] cfg_mgmt_write_data; + wire [3:0] cfg_mgmt_byte_enable; + wire cfg_mgmt_read; + wire [31:0] cfg_mgmt_read_data; + wire cfg_mgmt_read_write_done; + wire cfg_mgmt_type1_cfg_reg_access; + wire cfg_msg_received; + wire [7:0] cfg_msg_received_data; + wire [4:0] cfg_msg_received_type; + wire cfg_msg_transmit; + wire [2:0] cfg_msg_transmit_type; + wire [31:0] cfg_msg_transmit_data; + wire cfg_msg_transmit_done; + wire [7:0] cfg_fc_ph; + wire [11:0] cfg_fc_pd; + wire [7:0] cfg_fc_nph; + wire [11:0] cfg_fc_npd; + wire [7:0] cfg_fc_cplh; + wire [11:0] cfg_fc_cpld; + wire [2:0] cfg_fc_sel; + wire [2:0] cfg_per_func_status_control; + wire [3:0] cfg_per_function_number; + wire cfg_per_function_output_request; + + wire [63:0] cfg_dsn; + wire cfg_power_state_change_interrupt; + wire cfg_power_state_change_ack; + wire cfg_err_cor_in; + wire cfg_err_uncor_in; + + wire [3:0] cfg_flr_in_process; + wire [1:0] cfg_flr_done; + wire [251:0] cfg_vf_flr_in_process; + wire cfg_vf_flr_done; + wire [7:0] cfg_vf_flr_func_num; + + wire cfg_link_training_enable; + + //----------------------------------------------------------------------------------------------------------------// + // EP Only // + //----------------------------------------------------------------------------------------------------------------// + + // Interrupt Interface Signals + wire [3:0] cfg_interrupt_int; + wire [1:0] cfg_interrupt_pending; + wire cfg_interrupt_sent; + + wire [3:0] cfg_interrupt_msi_enable; + wire [11:0] cfg_interrupt_msi_mmenable; + wire cfg_interrupt_msi_mask_update; + wire [31:0] cfg_interrupt_msi_data; + wire [1:0] cfg_interrupt_msi_select; + wire [31:0] cfg_interrupt_msi_int; + wire [63:0] cfg_interrupt_msi_pending_status; + wire cfg_interrupt_msi_sent; + wire cfg_interrupt_msi_fail; + wire [2:0] cfg_interrupt_msi_attr; + wire cfg_interrupt_msi_tph_present; + wire [1:0] cfg_interrupt_msi_tph_type; + wire [7:0] cfg_interrupt_msi_tph_st_tag; + wire [7:0] cfg_interrupt_msi_function_number; + +// EP only + wire cfg_hot_reset_out; + wire cfg_config_space_enable; + wire cfg_req_pm_transition_l23_ready; + +// RP only + wire cfg_hot_reset_in; + + wire [7:0] cfg_ds_port_number; + wire [7:0] cfg_ds_bus_number; + wire [4:0] cfg_ds_device_number; + + //----------------------------------------------------------------------------------------------------------------// + // System(SYS) Interface // + //----------------------------------------------------------------------------------------------------------------// + + wire sys_clk; + wire sys_clk_gt; + wire sys_rst_n_c; + //----------------------------------------------------------------------------------------------------------------------- + + IBUF sys_reset_n_ibuf (.O(sys_rst_n_c), .I(sys_rst_n)); + + IBUFDS_GTE4 refclk_ibuf (.O(sys_clk_gt), .ODIV2(sys_clk), .I(sys_clk_p), .CEB(1'b0), .IB(sys_clk_n)); + + + + // assign LED outputs + assign led_0 = 0; + assign led_1 = 0; + assign led_2 = 0; + +//------------------------------------------------------------------------------------------------------------------// +// PCIe Core Top Level Wrapper // +//------------------------------------------------------------------------------------------------------------------// +// Core Top Level Wrapper + pcie4_uscale_plus_0 pcie4_uscale_plus_0_i ( + //---------------------------------------------------------------------------------------// + // PCI Express (pci_exp) Interface // + //---------------------------------------------------------------------------------------// + + // Tx + .pci_exp_txn ( pci_exp_txn ), + .pci_exp_txp ( pci_exp_txp ), + + // Rx + .pci_exp_rxn ( pci_exp_rxn ), + .pci_exp_rxp ( pci_exp_rxp ), + + //---------------------------------------------------------------------------------------// + // AXI Interface // + //---------------------------------------------------------------------------------------// + + .user_clk ( user_clk ), + .user_reset ( user_reset ), + .user_lnk_up ( user_lnk_up ), + .phy_rdy_out ( phy_rdy_out ), + + .s_axis_rq_tlast ( s_axis_rq_tlast ), + .s_axis_rq_tdata ( s_axis_rq_tdata ), + .s_axis_rq_tuser ( s_axis_rq_tuser ), + .s_axis_rq_tkeep ( s_axis_rq_tkeep ), + .s_axis_rq_tready ( s_axis_rq_tready ), + .s_axis_rq_tvalid ( s_axis_rq_tvalid ), + + .m_axis_rc_tdata ( m_axis_rc_tdata ), + .m_axis_rc_tuser ( m_axis_rc_tuser ), + .m_axis_rc_tlast ( m_axis_rc_tlast ), + .m_axis_rc_tkeep ( m_axis_rc_tkeep ), + .m_axis_rc_tvalid ( m_axis_rc_tvalid ), + .m_axis_rc_tready ( m_axis_rc_tready ), + + .m_axis_cq_tdata ( m_axis_cq_tdata ), + .m_axis_cq_tuser ( m_axis_cq_tuser ), + .m_axis_cq_tlast ( m_axis_cq_tlast ), + .m_axis_cq_tkeep ( m_axis_cq_tkeep ), + .m_axis_cq_tvalid ( m_axis_cq_tvalid ), + .m_axis_cq_tready ( m_axis_cq_tready ), + + .s_axis_cc_tdata ( s_axis_cc_tdata ), + .s_axis_cc_tuser ( s_axis_cc_tuser ), + .s_axis_cc_tlast ( s_axis_cc_tlast ), + .s_axis_cc_tkeep ( s_axis_cc_tkeep ), + .s_axis_cc_tvalid ( s_axis_cc_tvalid ), + .s_axis_cc_tready ( s_axis_cc_tready ), + + + + //---------------------------------------------------------------------------------------// + // Configuration (CFG) Interface // + //---------------------------------------------------------------------------------------// + .pcie_tfc_nph_av ( pcie_tfc_nph_av ), + .pcie_tfc_npd_av ( pcie_tfc_npd_av ), + + .pcie_rq_seq_num0 ( pcie_rq_seq_num0 ) , + .pcie_rq_seq_num_vld0 ( pcie_rq_seq_num_vld0 ) , + .pcie_rq_seq_num1 ( pcie_rq_seq_num1 ) , + .pcie_rq_seq_num_vld1 ( pcie_rq_seq_num_vld1 ) , + .pcie_rq_tag0 ( ) , + .pcie_rq_tag1 ( ) , + .pcie_rq_tag_av ( ) , + .pcie_rq_tag_vld0 ( ) , + .pcie_rq_tag_vld1 ( ) , + .pcie_cq_np_req ( {1'b1,pcie_cq_np_req} ), + .pcie_cq_np_req_count ( pcie_cq_np_req_count ), + .cfg_phy_link_down ( cfg_phy_link_down ), + .cfg_phy_link_status ( ), + .cfg_negotiated_width ( cfg_negotiated_width ), + .cfg_current_speed ( cfg_current_speed ), + .cfg_max_payload ( cfg_max_payload ), + .cfg_max_read_req ( cfg_max_read_req ), + .cfg_function_status ( cfg_function_status ), + .cfg_function_power_state ( cfg_function_power_state ), + .cfg_vf_status ( cfg_vf_status ), + .cfg_vf_power_state ( ), + .cfg_link_power_state ( cfg_link_power_state ), + // Error Reporting Interface + .cfg_err_cor_out ( cfg_err_cor_out ), + .cfg_err_nonfatal_out ( cfg_err_nonfatal_out ), + .cfg_err_fatal_out ( cfg_err_fatal_out ), + + .cfg_local_error_out ( ), + .cfg_local_error_valid ( ), + + .cfg_ltssm_state ( cfg_ltssm_state ), + .cfg_rx_pm_state ( ), + .cfg_tx_pm_state ( ), + .cfg_rcb_status ( cfg_rcb_status ), + + .cfg_obff_enable ( cfg_obff_enable ), + .cfg_pl_status_change ( cfg_pl_status_change ), + + .cfg_tph_requester_enable ( ), + .cfg_tph_st_mode ( ), + .cfg_vf_tph_requester_enable ( ), + .cfg_vf_tph_st_mode ( ), + // Management Interface + .cfg_mgmt_addr ( cfg_mgmt_addr ), + .cfg_mgmt_write ( cfg_mgmt_write ), + .cfg_mgmt_write_data ( cfg_mgmt_write_data ), + .cfg_mgmt_byte_enable ( cfg_mgmt_byte_enable ), + .cfg_mgmt_read ( cfg_mgmt_read ), + .cfg_mgmt_read_data ( cfg_mgmt_read_data ), + .cfg_mgmt_read_write_done ( cfg_mgmt_read_write_done ), + .cfg_mgmt_debug_access (1'b0), + .cfg_mgmt_function_number (8'b0), + .cfg_pm_aspm_l1_entry_reject (1'b0), + .cfg_pm_aspm_tx_l0s_entry_disable (1'b1), + + .cfg_msg_received ( cfg_msg_received ), + .cfg_msg_received_data ( cfg_msg_received_data ), + .cfg_msg_received_type ( cfg_msg_received_type ), + + .cfg_msg_transmit ( cfg_msg_transmit ), + .cfg_msg_transmit_type ( cfg_msg_transmit_type ), + .cfg_msg_transmit_data ( cfg_msg_transmit_data ), + .cfg_msg_transmit_done ( cfg_msg_transmit_done ), + + .cfg_fc_ph ( cfg_fc_ph ), + .cfg_fc_pd ( cfg_fc_pd ), + .cfg_fc_nph ( cfg_fc_nph ), + .cfg_fc_npd ( cfg_fc_npd ), + .cfg_fc_cplh ( cfg_fc_cplh ), + .cfg_fc_cpld ( cfg_fc_cpld ), + .cfg_fc_sel ( cfg_fc_sel ), + + //-------------------------------------------------------------------------------// + // EP and RP // + //-------------------------------------------------------------------------------// + .cfg_bus_number ( ), + .cfg_dsn ( cfg_dsn ), + .cfg_power_state_change_ack ( cfg_power_state_change_ack ), + .cfg_power_state_change_interrupt ( cfg_power_state_change_interrupt ), + .cfg_err_cor_in ( cfg_err_cor_in ), + .cfg_err_uncor_in ( cfg_err_uncor_in ), + + .cfg_flr_in_process ( cfg_flr_in_process ), + .cfg_flr_done ( {2'b0,cfg_flr_done} ), + .cfg_vf_flr_in_process ( cfg_vf_flr_in_process ), + .cfg_vf_flr_done ( cfg_vf_flr_done ), + .cfg_link_training_enable ( cfg_link_training_enable ), + // EP only + .cfg_hot_reset_out ( cfg_hot_reset_out ), + .cfg_config_space_enable ( cfg_config_space_enable ), + .cfg_req_pm_transition_l23_ready ( cfg_req_pm_transition_l23_ready ), + + // RP only + .cfg_hot_reset_in ( cfg_hot_reset_in ), + + .cfg_ds_bus_number ( cfg_ds_bus_number ), + .cfg_ds_device_number ( cfg_ds_device_number ), + .cfg_ds_port_number ( cfg_ds_port_number ), + .cfg_vf_flr_func_num (cfg_vf_flr_func_num), + + //-------------------------------------------------------------------------------// + // EP Only // + //-------------------------------------------------------------------------------// + + // Interrupt Interface Signals + .cfg_interrupt_int ( cfg_interrupt_int ), + .cfg_interrupt_pending ( {2'b0,cfg_interrupt_pending} ), + .cfg_interrupt_sent ( cfg_interrupt_sent ), + + + + // MSI Interface + .cfg_interrupt_msi_enable ( cfg_interrupt_msi_enable ), + .cfg_interrupt_msi_mmenable ( cfg_interrupt_msi_mmenable ), + .cfg_interrupt_msi_mask_update ( cfg_interrupt_msi_mask_update ), + .cfg_interrupt_msi_data ( cfg_interrupt_msi_data ), + .cfg_interrupt_msi_select ( cfg_interrupt_msi_select ), + .cfg_interrupt_msi_int ( cfg_interrupt_msi_int ), + .cfg_interrupt_msi_pending_status ( cfg_interrupt_msi_pending_status [31:0]), + .cfg_interrupt_msi_sent ( cfg_interrupt_msi_sent ), + .cfg_interrupt_msi_fail ( cfg_interrupt_msi_fail ), + .cfg_interrupt_msi_attr ( cfg_interrupt_msi_attr ), + .cfg_interrupt_msi_tph_present ( cfg_interrupt_msi_tph_present ), + .cfg_interrupt_msi_tph_type ( cfg_interrupt_msi_tph_type ), + .cfg_interrupt_msi_tph_st_tag ( cfg_interrupt_msi_tph_st_tag ), + .cfg_interrupt_msi_pending_status_function_num ( 2'b0), + .cfg_interrupt_msi_pending_status_data_enable ( 1'b0), + + .cfg_interrupt_msi_function_number ( cfg_interrupt_msi_function_number ), + + + //--------------------------------------------------------------------------------------// + // System(SYS) Interface // + //--------------------------------------------------------------------------------------// + + .sys_clk ( sys_clk ), + .sys_clk_gt ( sys_clk_gt ), + .sys_reset ( sys_rst_n_c ) + ); + +//------------------------------------------------------------------------------------------------------------------// +// PIO Example Design Top Level // +//------------------------------------------------------------------------------------------------------------------// + mkRawTestDmaController dmac_i ( + .CLK ( user_clk ), + .RST_N ( ~user_reset ), + .user_lnk_up ( user_lnk_up ), + // .sys_rst ( sys_rst_n_c ), + + //-------------------------------------------------------------------------------------// + // AXI Interface // + //-------------------------------------------------------------------------------------// + + .m_axis_rq_tlast ( s_axis_rq_tlast ), + .m_axis_rq_tdata ( s_axis_rq_tdata ), + .m_axis_rq_tuser ( s_axis_rq_tuser ), + .m_axis_rq_tkeep ( s_axis_rq_tkeep ), + .m_axis_rq_tready ( s_axis_rq_tready[0] ), + .m_axis_rq_tvalid ( s_axis_rq_tvalid ), + + .s_axis_rc_tdata ( m_axis_rc_tdata ), + .s_axis_rc_tuser ( m_axis_rc_tuser ), + .s_axis_rc_tlast ( m_axis_rc_tlast ), + .s_axis_rc_tkeep ( m_axis_rc_tkeep ), + .s_axis_rc_tvalid ( m_axis_rc_tvalid ), + .s_axis_rc_tready ( m_axis_rc_tready ), + + .s_axis_cq_tdata ( m_axis_cq_tdata ), + .s_axis_cq_tuser ( m_axis_cq_tuser ), + .s_axis_cq_tlast ( m_axis_cq_tlast ), + .s_axis_cq_tkeep ( m_axis_cq_tkeep ), + .s_axis_cq_tvalid ( m_axis_cq_tvalid ), + .s_axis_cq_tready ( m_axis_cq_tready ), + + .m_axis_cc_tdata ( s_axis_cc_tdata ), + .m_axis_cc_tuser ( s_axis_cc_tuser ), + .m_axis_cc_tlast ( s_axis_cc_tlast ), + .m_axis_cc_tkeep ( s_axis_cc_tkeep ), + .m_axis_cc_tvalid ( s_axis_cc_tvalid ), + .m_axis_cc_tready ( s_axis_cc_tready[0] ), + + + // .pcie_rq_seq_num ( 'h0), + // .pcie_rq_seq_num_vld ( 'h0), + // .pcie_rq_tag ( 'h0), + // .pcie_rq_tag_vld ( 'h0), + .pcie_tfc_nph_av ( pcie_tfc_nph_av[1:0]), + .pcie_tfc_npd_av ( pcie_tfc_npd_av[1:0]), + .pcie_cq_np_req ( pcie_cq_np_req ), + .pcie_cq_np_req_count ( pcie_cq_np_req_count ), + + //--------------------------------------------------------------------------------// + // Configuration (CFG) Interface // + //--------------------------------------------------------------------------------// + + //--------------------------------------------------------------------------------// + // EP and RP // + //--------------------------------------------------------------------------------// + .cfg_phy_link_down ( cfg_phy_link_down ), + .cfg_negotiated_width ( cfg_negotiated_width ), + .cfg_current_speed ( cfg_current_speed ), + .cfg_max_payload ( cfg_max_payload ), + .cfg_max_read_req ( cfg_max_read_req ), + .cfg_function_status ( cfg_function_status [7:0] ), + .cfg_function_power_state ( cfg_function_power_state [5:0] ), + .cfg_vf_status ( cfg_vf_status ), + .cfg_link_power_state ( cfg_link_power_state ), + + // Error Reporting Interface + .cfg_err_cor_out ( cfg_err_cor_out ), + .cfg_err_nonfatal_out ( cfg_err_nonfatal_out ), + .cfg_err_fatal_out ( cfg_err_fatal_out ), +// .cfg_ltr_enable ( 1'b0 ), + .cfg_ltssm_state ( cfg_ltssm_state ), + .cfg_rcb_status ( cfg_rcb_status [1:0]), + .cfg_obff_enable ( cfg_obff_enable ), +// .cfg_pl_status_change ( cfg_pl_status_change ), + + // Management Interface + .cfg_mgmt_addr ( cfg_mgmt_addr ), + .cfg_mgmt_write ( cfg_mgmt_write ), + .cfg_mgmt_write_data ( cfg_mgmt_write_data ), + .cfg_mgmt_byte_enable ( cfg_mgmt_byte_enable ), + .cfg_mgmt_read ( cfg_mgmt_read ), + .cfg_mgmt_read_data ( cfg_mgmt_read_data ), + .cfg_mgmt_read_write_done ( cfg_mgmt_read_write_done ), +// .cfg_mgmt_type1_cfg_reg_access ( cfg_mgmt_type1_cfg_reg_access ), + .cfg_msg_received ( cfg_msg_received ), + .cfg_msg_received_data ( cfg_msg_received_data ), + .cfg_msg_received_type ( cfg_msg_received_type ), + .cfg_msg_transmit ( cfg_msg_transmit ), + .cfg_msg_transmit_type ( cfg_msg_transmit_type ), + .cfg_msg_transmit_data ( cfg_msg_transmit_data ), + .cfg_msg_transmit_done ( cfg_msg_transmit_done ), + + .cfg_fc_ph ( cfg_fc_ph ), + .cfg_fc_pd ( cfg_fc_pd ), + .cfg_fc_nph ( cfg_fc_nph ), + .cfg_fc_npd ( cfg_fc_npd ), + .cfg_fc_cplh ( cfg_fc_cplh ), + .cfg_fc_cpld ( cfg_fc_cpld ), + .cfg_fc_sel ( cfg_fc_sel ), + +// .cfg_per_func_status_control ( cfg_per_func_status_control ), +// .cfg_per_function_number ( cfg_per_function_number ), +// .cfg_per_function_output_request ( cfg_per_function_output_request ), + + .cfg_dsn ( cfg_dsn ), + .cfg_power_state_change_ack ( cfg_power_state_change_ack ), + .cfg_power_state_change_interrupt ( cfg_power_state_change_interrupt ), + .cfg_err_cor_in ( cfg_err_cor_in ), + .cfg_err_uncor_in ( cfg_err_uncor_in ), + + .cfg_flr_in_process ( cfg_flr_in_process [1:0] ), + .cfg_flr_done ( cfg_flr_done ), + .cfg_vf_flr_in_process ( cfg_vf_flr_in_process ), + .cfg_vf_flr_done ( cfg_vf_flr_done ), + .cfg_vf_flr_func_num ( cfg_vf_flr_func_num ), + + .cfg_link_training_enable ( cfg_link_training_enable ), + + .cfg_ds_port_number ( cfg_ds_port_number ), + .cfg_hot_reset_in ( cfg_hot_reset_out ), + .cfg_config_space_enable ( cfg_config_space_enable ), + .cfg_req_pm_transition_l23_ready ( cfg_req_pm_transition_l23_ready ), + + // RP only + .cfg_hot_reset_out ( cfg_hot_reset_in ), + + .cfg_ds_bus_number ( cfg_ds_bus_number ), + .cfg_ds_device_number ( cfg_ds_device_number ), + .cfg_ds_function_number ( ), + + //-------------------------------------------------------------------------------------// + // EP Only // + //-------------------------------------------------------------------------------------// + + .cfg_interrupt_msi_enable ( cfg_interrupt_msi_enable[0] ), + .cfg_interrupt_msi_mmenable ( cfg_interrupt_msi_mmenable[5:0] ), + .cfg_interrupt_msi_mask_update ( cfg_interrupt_msi_mask_update ), + .cfg_interrupt_msi_data ( cfg_interrupt_msi_data ), + .cfg_interrupt_msi_select ( cfg_interrupt_msi_select ), + .cfg_interrupt_msi_int ( cfg_interrupt_msi_int ), + .cfg_interrupt_msi_pending_status ( cfg_interrupt_msi_pending_status ), + .cfg_interrupt_msi_sent ( cfg_interrupt_msi_sent ), + .cfg_interrupt_msi_fail ( cfg_interrupt_msi_fail ), + .cfg_interrupt_msi_attr ( cfg_interrupt_msi_attr ), + .cfg_interrupt_msi_tph_present ( cfg_interrupt_msi_tph_present ), + .cfg_interrupt_msi_tph_type ( cfg_interrupt_msi_tph_type ), + .cfg_interrupt_msi_tph_st_tag ( cfg_interrupt_msi_tph_st_tag ), + .cfg_interrupt_msi_function_number ( cfg_interrupt_msi_function_number ), + + // Interrupt Interface Signals + .cfg_interrupt_int ( cfg_interrupt_int ), + .cfg_interrupt_pending ( cfg_interrupt_pending ), + .cfg_interrupt_sent ( cfg_interrupt_sent ) + + //------------------------------------------------------------------------------------// + // DMA IFC + //------------------------------------------------------------------------------------// +// .s_axis_c2h_0_tvalid (0), +// .s_axis_c2h_0_tdata (0), +// .s_axis_c2h_0_tkeep (0), +// .s_axis_c2h_0_tlast (0), +// .s_axis_c2h_0_tuser (0), +// .s_axis_c2h_0_tready ( ), + +// .s_desc_c2h_0_valid (0), +// .s_desc_c2h_0_start_addr (0), +// .s_desc_c2h_0_byte_cnt (0), +// .s_desc_c2h_0_is_write (0), +// .s_desc_c2h_0_ready ( ), + +// .m_axis_c2h_0_tvalid ( ), +// .m_axis_c2h_0_tdata ( ), +// .m_axis_c2h_0_tkeep ( ), +// .m_axis_c2h_0_tlast ( ), +// .m_axis_c2h_0_tuser ( ), +// .m_axis_c2h_0_tready (0), + +// .s_axis_c2h_1_tvalid (0), +// .s_axis_c2h_1_tdata (0), +// .s_axis_c2h_1_tkeep (0), +// .s_axis_c2h_1_tlast (0), +// .s_axis_c2h_1_tuser (0), +// .s_axis_c2h_1_tready ( ), + +// .s_desc_c2h_1_valid (0), +// .s_desc_c2h_1_start_addr (0), +// .s_desc_c2h_1_byte_cnt (0), +// .s_desc_c2h_1_is_write (0), +// .s_desc_c2h_1_ready ( ), + +// .m_axis_c2h_1_tvalid ( ), +// .m_axis_c2h_1_tdata ( ), +// .m_axis_c2h_1_tkeep ( ), +// .m_axis_c2h_1_tlast ( ), +// .m_axis_c2h_1_tuser ( ), +// .m_axis_c2h_1_tready (0), + +// .s_h2c_value_valid (0), +// .s_h2c_value_data (0), +// .s_h2c_value_ready ( ), + +// .m_h2c_value_address ( ), +// .m_h2c_value_is_write ( ), +// .m_h2c_value_valid ( ), +// .m_h2c_value_ready (0), + +// .m_h2c_desc_data ( ), +// .m_h2c_desc_valid ( ), +// .m_h2c_desc_ready (0) + ); + +endmodule diff --git a/backend/u200/xdc/u200_pcie.xdc b/backend/u200/xdc/u200_pcie.xdc new file mode 100644 index 0000000..cbe0c05 --- /dev/null +++ b/backend/u200/xdc/u200_pcie.xdc @@ -0,0 +1,162 @@ +##----------------------------------------------------------------------------- +## +## (c) Copyright 2012-2012 Xilinx, Inc. All rights reserved. +## +## This file contains confidential and proprietary information +## of Xilinx, Inc. and is protected under U.S. and +## international copyright and other intellectual property +## laws. +## +## DISCLAIMER +## This disclaimer is not a license and does not grant any +## rights to the materials distributed herewith. Except as +## otherwise provided in a valid license issued to you by +## Xilinx, and to the maximum extent permitted by applicable +## law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND +## WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES +## AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING +## BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON- +## INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and +## (2) Xilinx shall not be liable (whether in contract or tort, +## including negligence, or under any other theory of +## liability) for any loss or damage of any kind or nature +## related to, arising under or in connection with these +## materials, including for any direct, or any indirect, +## special, incidental, or consequential loss or damage +## (including loss of data, profits, goodwill, or any type of +## loss or damage suffered as a result of any action brought +## by a third party) even if such damage or loss was +## reasonably foreseeable or Xilinx had been advised of the +## possibility of the same. +## +## CRITICAL APPLICATIONS +## Xilinx products are not designed or intended to be fail- +## safe, or for use in any application requiring fail-safe +## performance, such as life-support or safety devices or +## systems, Class III medical devices, nuclear facilities, +## applications related to the deployment of airbags, or any +## other applications that could lead to death, personal +## injury, or severe property or environmental damage +## (individually and collectively, "Critical +## Applications"). Customer assumes the sole risk and +## liability of any use of Xilinx products in Critical +## Applications, subject only to applicable laws and +## regulations governing limitations on product liability. +## +## THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS +## PART OF THIS FILE AT ALL TIMES. +## +##----------------------------------------------------------------------------- +## +## Project : UltraScale+ FPGA PCI Express v4.0 Integrated Block +## File : xilinx_pcie4_uscale_plus_x1y2.xdc +## Version : 1.3 +##----------------------------------------------------------------------------- +# +############################################################################### +# Vivado - PCIe GUI / User Configuration +############################################################################### +# +# Family # virtexuplus +# Part # xcu200 +# Package # fsgd2104 +# Speed grade # -2 +# PCIe Block # X1Y2 +# Xilinx BNo # 15 +# +# Link Speed # Gen3 - 8.0 Gb/s +# Link Width # X16 +# AXIST Width # 512-bit +# AXIST Frequ # 250 MHz = User Clock +# Core Clock # 500 MHz +# Pipe Clock # 125 MHz (Gen1) : 250 MHz (Gen2/Gen3/Gen4) +# PLL TYPE # QPLL1 +# MSI-X TYPE # HARD +# +# master_gt_quad_inx # 3 +# master_gt_container # 32 +# gt_type # gtye4 +# +# Xilinx Reference Board is AU200 +# +############################################################################### +# User Time Names / User Time Groups / Time Specs +############################################################################### +create_clock -name sys_clk -period 10 [get_ports sys_clk_p] +# +#set_false_path -from [get_ports sys_rst_n] +#set_property PULLUP true [get_ports sys_rst_n] + +set_property IOSTANDARD POD12 [get_ports sys_rst_n] + +set_property PACKAGE_PIN BD21 [get_ports sys_rst_n] + +# +set_property PACKAGE_PIN AM10 [get_ports sys_clk_n] +set_property PACKAGE_PIN AM11 [get_ports sys_clk_p] +# + +# LEDs for ZCU117 +set_property PACKAGE_PIN BC21 [get_ports led_0] +# sys_reset +set_property PACKAGE_PIN BB21 [get_ports led_1] +# user_link_up +set_property PACKAGE_PIN BA20 [get_ports led_2] +# + +set_property IOSTANDARD LVCMOS12 [get_ports led_0] +set_property IOSTANDARD LVCMOS12 [get_ports led_1] +set_property IOSTANDARD LVCMOS12 [get_ports led_2] +# +set_property DRIVE 8 [get_ports led_0] +set_property DRIVE 8 [get_ports led_1] +set_property DRIVE 8 [get_ports led_2] + +# +# +# Clock for the 300 MHz clock is already created in the Clock Wizard IP. +# 300 MHz clock pin constraints. +set_property IOSTANDARD DIFF_SSTL12 [get_ports clk_300MHz_p] +set_property IOSTANDARD DIFF_SSTL12 [get_ports clk_300MHz_n] +set_property PACKAGE_PIN AY37 [get_ports clk_300MHz_p] +set_property PACKAGE_PIN AY38 [get_ports clk_300MHz_n] +# +# +# CLOCK_ROOT LOCKing to Reduce CLOCK SKEW +# Add/Edit Clock Routing Option to improve clock path skew +# +# BITFILE/BITSTREAM compress options +# ############################################################################## +# Flash Programming Example Settings: These should be modified to match the target board. +# ############################################################################## +# +# +# sys_clk vs TXOUTCLK +set_clock_groups -name async18 -asynchronous -group [get_clocks {sys_clk}] -group [get_clocks -of_objects [get_pins -hierarchical -filter {NAME =~ *gen_channel_container[32].*gen_gtye4_channel_inst[3].GTYE4_CHANNEL_PRIM_INST/TXOUTCLK}]] +set_clock_groups -name async19 -asynchronous -group [get_clocks -of_objects [get_pins -hierarchical -filter {NAME =~ *gen_channel_container[32].*gen_gtye4_channel_inst[3].GTYE4_CHANNEL_PRIM_INST/TXOUTCLK}]] -group [get_clocks {sys_clk}] +# +# +# +# +# +# +# ASYNC CLOCK GROUPINGS +# sys_clk vs user_clk +set_clock_groups -name async5 -asynchronous -group [get_clocks {sys_clk}] -group [get_clocks -of_objects [get_pins pcie4_uscale_plus_0_i/inst/pcie4_uscale_plus_0_gt_top_i/diablo_gt.diablo_gt_phy_wrapper/phy_clk_i/bufg_gt_userclk/O]] +set_clock_groups -name async6 -asynchronous -group [get_clocks -of_objects [get_pins pcie4_uscale_plus_0_i/inst/pcie4_uscale_plus_0_gt_top_i/diablo_gt.diablo_gt_phy_wrapper/phy_clk_i/bufg_gt_userclk/O]] -group [get_clocks {sys_clk}] +# sys_clk vs pclk +set_clock_groups -name async1 -asynchronous -group [get_clocks {sys_clk}] -group [get_clocks -of_objects [get_pins pcie4_uscale_plus_0_i/inst/pcie4_uscale_plus_0_gt_top_i/diablo_gt.diablo_gt_phy_wrapper/phy_clk_i/bufg_gt_pclk/O]] +set_clock_groups -name async2 -asynchronous -group [get_clocks -of_objects [get_pins pcie4_uscale_plus_0_i/inst/pcie4_uscale_plus_0_gt_top_i/diablo_gt.diablo_gt_phy_wrapper/phy_clk_i/bufg_gt_pclk/O]] -group [get_clocks {sys_clk}] +# +# +# +# Add/Edit Pblock slice constraints for 512b soft logic to improve timing +#create_pblock soft_512b; add_cells_to_pblock [get_pblocks soft_512b] [get_cells {pcie4_uscale_plus_0_i/inst/pcie4_uscale_plus_0_pcie_4_0_pipe_inst/pcie_4_0_init_ctrl_inst pcie4_uscale_plus_0_i/inst/pcie4_uscale_plus_0_pcie_4_0_pipe_inst/pcie4_0_512b_intfc_mod}] +# Keep This Logic Left/Right Side Of The PCIe Block (Whichever is near to the FPGA Boundary) +#resize_pblock [get_pblocks soft_512b] -add {SLICE_X157Y300:SLICE_X168Y370} +#set_property EXCLUDE_PLACEMENT 1 [get_pblocks soft_512b] +# +set_clock_groups -name async24 -asynchronous -group [get_clocks -of_objects [get_pins pcie4_uscale_plus_0_i/inst/pcie4_uscale_plus_0_gt_top_i/diablo_gt.diablo_gt_phy_wrapper/phy_clk_i/bufg_gt_intclk/O]] -group [get_clocks {sys_clk}] +# +#create_waiver -type METHODOLOGY -id {LUTAR-1} -user "pcie4_uscale_plus" -desc "user link up is synchroized in the user clk so it is safe to ignore" -internal -scoped -tags 1024539 -objects [get_cells { pcie_app_uscale_i/PIO_i/len_i[5]_i_4 }] -objects [get_pins { pcie4_uscale_plus_0_i/inst/user_lnk_up_cdc/arststages_ff_reg[0]/CLR pcie4_uscale_plus_0_i/inst/user_lnk_up_cdc/arststages_ff_reg[1]/CLR }] + diff --git a/backend/top.v b/backend/vu13p/top.v similarity index 100% rename from backend/top.v rename to backend/vu13p/top.v diff --git a/cocotb/cocotb.yaml b/cocotb/cocotb.yaml new file mode 100644 index 0000000..0e73cb3 --- /dev/null +++ b/cocotb/cocotb.yaml @@ -0,0 +1,41 @@ +name: cocotb +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - bzip2=1.0.8=h5eee18b_6 + - ca-certificates=2024.9.24=h06a4308_0 + - expat=2.6.3=h6a678d5_0 + - ld_impl_linux-64=2.40=h12ee557_0 + - libffi=3.4.4=h6a678d5_1 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libmpdec=4.0.0=h5eee18b_0 + - libstdcxx-ng=11.2.0=h1234567_1 + - libuuid=1.41.5=h5eee18b_0 + - ncurses=6.4=h6a678d5_0 + - openssl=3.0.15=h5eee18b_0 + - pip=24.2=py313h06a4308_0 + - python=3.13.0=hf623796_100_cp313 + - python_abi=3.13=0_cp313 + - readline=8.2=h5eee18b_0 + - setuptools=75.1.0=py313h06a4308_0 + - sqlite=3.45.3=h5eee18b_0 + - tk=8.6.14=h39e8969_0 + - tzdata=2024b=h04d1e81_0 + - wheel=0.44.0=py313h06a4308_0 + - xz=5.4.6=h5eee18b_1 + - zlib=1.2.13=h5eee18b_1 + - pip: + - cocotb==1.9.1 + - cocotb-bus==0.2.1 + - cocotb-test==0.2.5 + - cocotbext-axi==0.1.24 + - cocotbext-pcie==0.2.14 + - find-libpython==0.4.0 + - iniconfig==2.0.0 + - packaging==24.1 + - pluggy==1.5.0 + - pytest==8.3.3 +prefix: /home/wjz/miniconda3/envs/cocotb \ No newline at end of file diff --git a/src/DmaC2HPipe.bsv b/src/DmaC2HPipe.bsv index 360616f..fa468c5 100644 --- a/src/DmaC2HPipe.bsv +++ b/src/DmaC2HPipe.bsv @@ -150,6 +150,7 @@ module mkDmaC2HPipe#(DmaPathNo pathIdx)(DmaC2HPipe); C2HWriteCore writeCore <- mkC2HWriteCore(pathIdx); Reg#(Bool) isInitDoneReg <- mkReg(False); + Reg#(Bool) isInWriteCoreOutputReg <- mkReg(False); FIFOF#(DataStream) dataInFifo <- mkFIFOF; FIFOF#(DmaRequest) reqInFifo <- mkFIFOF; @@ -171,20 +172,44 @@ module mkDmaC2HPipe#(DmaPathNo pathIdx)(DmaC2HPipe); pathIdx, req.startAddr, req.length, pack(req.isWrite)); endrule - rule tlpOutMux; - if (readCore.tlpFifoOut.notEmpty) begin - tlpOutFifo.enq(readCore.tlpFifoOut.first); - tlpSideBandFifo.enq(readCore.tlpSideBandFifoOut.first); - readCore.tlpSideBandFifoOut.deq; - readCore.tlpFifoOut.deq; + // rule tlpOutMux; + // if (readCore.tlpFifoOut.notEmpty) begin + // tlpOutFifo.enq(readCore.tlpFifoOut.first); + // tlpSideBandFifo.enq(readCore.tlpSideBandFifoOut.first); + // readCore.tlpSideBandFifoOut.deq; + // readCore.tlpFifoOut.deq; + // end + // else begin + // if (writeCore.tlpSideBandFifoOut.notEmpty) begin + // tlpSideBandFifo.enq(writeCore.tlpSideBandFifoOut.first); + // writeCore.tlpSideBandFifoOut.deq; + // end + // tlpOutFifo.enq(writeCore.tlpFifoOut.first); + // writeCore.tlpFifoOut.deq; + // end + // endrule + + rule muxTlpOut; + if (isInWriteCoreOutputReg) begin + let tlpStream = writeCore.tlpFifoOut.first; + tlpOutFifo.enq(tlpStream); + writeCore.tlpFifoOut.deq; + isInWriteCoreOutputReg <= !tlpStream.isLast; end else begin - if (writeCore.tlpSideBandFifoOut.notEmpty) begin + if (readCore.tlpFifoOut.notEmpty) begin + tlpOutFifo.enq(readCore.tlpFifoOut.first); + tlpSideBandFifo.enq(readCore.tlpSideBandFifoOut.first); + readCore.tlpFifoOut.deq; + readCore.tlpSideBandFifoOut.deq; + end + else begin + tlpOutFifo.enq(writeCore.tlpFifoOut.first); tlpSideBandFifo.enq(writeCore.tlpSideBandFifoOut.first); + writeCore.tlpFifoOut.deq; writeCore.tlpSideBandFifoOut.deq; + isInWriteCoreOutputReg <= !writeCore.tlpFifoOut.first.isLast; end - tlpOutFifo.enq(writeCore.tlpFifoOut.first); - writeCore.tlpFifoOut.deq; end endrule From 3e9e8f6323b3c7bb14a9b2994a6bbfaa09eb78ce Mon Sep 17 00:00:00 2001 From: BIGWJZ Date: Fri, 6 Dec 2024 23:32:43 +0800 Subject: [PATCH 53/53] Add DummyCsr of Bypass Mode --- src/DmaC2HPipe.bsv | 17 ----------------- src/DmaH2CPipe.bsv | 7 +++++++ src/SimpleModeUtils.bsv | 1 + 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/src/DmaC2HPipe.bsv b/src/DmaC2HPipe.bsv index fa468c5..2c9487a 100644 --- a/src/DmaC2HPipe.bsv +++ b/src/DmaC2HPipe.bsv @@ -172,22 +172,6 @@ module mkDmaC2HPipe#(DmaPathNo pathIdx)(DmaC2HPipe); pathIdx, req.startAddr, req.length, pack(req.isWrite)); endrule - // rule tlpOutMux; - // if (readCore.tlpFifoOut.notEmpty) begin - // tlpOutFifo.enq(readCore.tlpFifoOut.first); - // tlpSideBandFifo.enq(readCore.tlpSideBandFifoOut.first); - // readCore.tlpSideBandFifoOut.deq; - // readCore.tlpFifoOut.deq; - // end - // else begin - // if (writeCore.tlpSideBandFifoOut.notEmpty) begin - // tlpSideBandFifo.enq(writeCore.tlpSideBandFifoOut.first); - // writeCore.tlpSideBandFifoOut.deq; - // end - // tlpOutFifo.enq(writeCore.tlpFifoOut.first); - // writeCore.tlpFifoOut.deq; - // end - // endrule rule muxTlpOut; if (isInWriteCoreOutputReg) begin @@ -200,7 +184,6 @@ module mkDmaC2HPipe#(DmaPathNo pathIdx)(DmaC2HPipe); if (readCore.tlpFifoOut.notEmpty) begin tlpOutFifo.enq(readCore.tlpFifoOut.first); tlpSideBandFifo.enq(readCore.tlpSideBandFifoOut.first); - readCore.tlpFifoOut.deq; readCore.tlpSideBandFifoOut.deq; end else begin diff --git a/src/DmaH2CPipe.bsv b/src/DmaH2CPipe.bsv index 1dbfe55..7b4e111 100644 --- a/src/DmaH2CPipe.bsv +++ b/src/DmaH2CPipe.bsv @@ -2,6 +2,7 @@ import FIFOF::*; import Vector::*; import RegFile::*; import ClientServer::*; +import Connectable :: *; import SemiFifo::*; import BdmaPrimUtils::*; @@ -10,6 +11,7 @@ import PcieTypes::*; import PcieDescriptorTypes::*; import PcieAdapter::*; import DmaTypes::*; +import SimpleModeUtils::*; typedef 1 IDEA_CQ_CSR_DWORD_CNT; typedef 1 IDEA_CC_CSR_DWORD_CNT; @@ -37,6 +39,10 @@ module mkBdmaH2CPipe(BdmaH2CPipe#(sz_csr_addr, sz_csr_data)) FIFOF#(BdmaUserH2cWrResp) wrRespQ <- mkFIFOF; FIFOF#(BdmaUserH2cRdReq#(sz_csr_addr)) rdReqQ <- mkFIFOF; FIFOF#(BdmaUserH2cRdResp#(sz_csr_data)) rdRespQ <- mkFIFOF; + let dummyCsr <- mkDummyCsr; + + mkConnection(pipe.csrReqFifoOut, dummyCsr.reqFifoIn); + mkConnection(dummyCsr.respFifoOut, pipe.csrRespFifoIn); rule forwardReq; let h2cReq = pipe.userReqFifoOut.first; @@ -137,6 +143,7 @@ module mkDmaH2CPipe(DmaH2CPipe); if (!isInPacket) begin let descriptor = getDescriptorFromFirstBeat(stream); if (descriptor.dwordCnt == fromInteger(valueOf(IDEA_CQ_CSR_DWORD_CNT))) begin + // $display($time, "ns SIM INFO @ mkDmaH2CPipe: recv CQ, address: %h\n", descriptor.address); case (descriptor.reqType) fromInteger(valueOf(MEM_WRITE_REQ)): begin let firstData = getDataFromFirstBeat(stream); diff --git a/src/SimpleModeUtils.bsv b/src/SimpleModeUtils.bsv index 439a971..a1fcabc 100644 --- a/src/SimpleModeUtils.bsv +++ b/src/SimpleModeUtils.bsv @@ -254,6 +254,7 @@ module mkDummyCsr(GenericCsr); rule request; let req = reqFifo.first; reqFifo.deq; + pendingFifo.enq(req.addr); let bramReq = BRAMRequest { write : req.isWrite, responseOnWrite : False,