Skip to content

Commit 91807b0

Browse files
authored
Implement the DSP primitive. (#239)
* Implement the DSP primitive. For chips that have these capabilities, a DSP implementation has been added in the form of all the primitives described in the Gowin documentation (UG287-1.3.3E_Gowin Digital Signal Processing (DSP) User Guide), namely: - PADD9 - PADD18 - MULT9X9 - MULT18X18 - MULT36X36 - MULTALU18X18 - MULTALU36X18 - MULTADDALU18X18 - ALU54D The most complex but also the most useful is the MULTADDALU18X18 primitive - it allows you to easily make a typical FIR filter, while all connections between these primitives in the chain will be implemented by direct fixed wires with minimal delay. MULT36X36 are not combined into chains, but they have a different task - this primitive can be found in Linux SOCs. Added examples (in the examples/himbaechel directory) that are based on the tiny Riscv demonstrating UART calculations. Only the TXD pin is used (can be found in the specific .CST file for each board), so on the large computer side, only GND and RXD are enough. Port speed 115200, no parity, 8 data bits, 1 stop bit, linefeed only. Picocom launch example: ``` shell picocom -l --imap lfcrlf -b 115200 /dev/ttyU0 ``` The source code for the riscv test programs is provided along with the assembly instructions, but they are not built during the compilation of the examples due to additional compilers. Implemented the combination of primitives into chains using wires CASO-CASI, SO(A, B)-SI(A, B), as well as SBO-SBI for PADD. Signed-off-by: YRabbit <[email protected]> * I forgot to remove the debugging part. Fixed. Signed-off-by: YRabbit <[email protected]> --------- Signed-off-by: YRabbit <[email protected]>
1 parent 1ff94b7 commit 91807b0

37 files changed

+8885
-72
lines changed

apycula/attrids.py

+479
Large diffs are not rendered by default.

apycula/chipdb.py

+833-49
Large diffs are not rendered by default.

apycula/gowin_pack.py

+1,357-3
Large diffs are not rendered by default.

apycula/gowin_unpack.py

+32-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@
2222
'GW2A-18C' : 'PBGA256S'
2323
}
2424

25+
def print_sorted_dict(start, d):
26+
print(start, end='{')
27+
for i in sorted(d):
28+
print(f'{i}:{d[i]}, ', end='')
29+
print('}')
30+
2531
# bank iostandards
2632
# XXX default io standard may be board-dependent!
2733
_banks = {'0': "LVCMOS18", '1': "LVCMOS18", '2': "LVCMOS18", '3': "LVCMOS18"}
@@ -59,6 +65,7 @@ def get_attr_name(attrname_table, code):
5965
for name, cod in attrname_table.items():
6066
if cod == code:
6167
return name
68+
print(f'Unknown attr name for {code}/0x{code:x}.')
6269
return ''
6370

6471
# fix names and types of the PLL attributes
@@ -293,7 +300,7 @@ def get_pll_A(db, row, col, typ):
293300
'IDDRX8': 'IDES16',
294301
}
295302

296-
# BSRAM have 3 cells: BSRAM, BSRAM0 and BSRAM1
303+
# BSRAM has 3 cells: BSRAM, BSRAM0 and BSRAM1
297304
# { (row, col) : idx }
298305
_bsram_cells = {}
299306
def get_bsram_main_cell(db, row, col, typ):
@@ -303,6 +310,12 @@ def get_bsram_main_cell(db, row, col, typ):
303310
col -= 2
304311
return row, col
305312

313+
# The DSP has 9 cells: the main one and a group of auxiliary ones.
314+
def get_dsp_main_cell(db, row, col, typ):
315+
if type[-6:-2] == '_AUX':
316+
col = 1 + (col - 1) // 9
317+
return row, col
318+
306319
# noiostd --- this is the case when the function is called
307320
# with iostd by default, e.g. from the clock fuzzer
308321
# With normal gowin_unpack io standard is determined first and it is known.
@@ -361,6 +374,23 @@ def parse_tile_(db, row, col, tile, default=True, noalias=False, noiostd = True)
361374
#print(row, col, name, idx, tiledata.ttyp, attrvals)
362375
bels[f'{name}'] = {}
363376
continue
377+
if name.startswith("ALU54D"):
378+
continue
379+
if name.startswith("DSP") or name.startswith("DSP_AUX"):
380+
modes = set()
381+
idx = name[-1]
382+
#print(row, col, name, idx, tiledata.ttyp)
383+
if name.startswith("DSP_AUX"):
384+
row, col = get_dsp_main_cell(db, row, col, name)
385+
386+
if f'DSP{idx}' in db.shortval[tiledata.ttyp]:
387+
attrvals = parse_attrvals(tile, db.logicinfo['DSP'], db.shortval[tiledata.ttyp][f'DSP{idx}'], attrids.dsp_attrids)
388+
#print_sorted_dict(f'{row}, {col}, {name}, {idx}, {tiledata.ttyp} - ', attrvals)
389+
for attrval in attrvals:
390+
modes.add(attrval)
391+
if modes and not name.startswith("DSP_AUX"):
392+
bels[f'{name}{idx}'] = modes
393+
continue
364394
if name.startswith("IOLOGIC"):
365395
idx = name[-1]
366396
attrvals = parse_attrvals(tile, db.logicinfo['IOLOGIC'], db.shortval[tiledata.ttyp][f'IOLOGIC{idx}'], attrids.iologic_attrids)
@@ -802,7 +832,7 @@ def tile2verilog(dbrow, dbcol, bels, pips, clock_pips, mod, cst, db):
802832
mod.wires.update({srcg, destg})
803833
mod.assigns.append((destg, srcg))
804834

805-
belre = re.compile(r"(IOB|LUT|DFF|BANK|CFG|ALU|RAM16|ODDR|OSC[ZFHWO]?|BUFS|RPLL[AB]|PLLVR|IOLOGIC|BSRAM)(\w*)")
835+
belre = re.compile(r"(IOB|LUT|DFF|BANK|CFG|ALU|RAM16|ODDR|OSC[ZFHWO]?|BUFS|RPLL[AB]|PLLVR|IOLOGIC|BSRAM|DSP)(\w*)")
806836
bels_items = move_iologic(bels)
807837

808838
iologic_detected = set()

examples/himbaechel/Makefile.himbaechel

+32-8
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@ all: \
1212
bsram-pROM-tangnano20k.fs bsram-SDPB-tangnano20k.fs bsram-SP-tangnano20k.fs \
1313
bsram-DPB-tangnano20k.fs bsram-pROMX9-tangnano20k.fs bsram-SDPX9B-tangnano20k.fs \
1414
bsram-SPX9-tangnano20k.fs bsram-DPX9B-tangnano20k.fs \
15-
femto-riscv-15-tangnano20k.fs femto-riscv-16-tangnano20k.fs femto-riscv-16-tangnano20k.fs \
15+
femto-riscv-15-tangnano20k.fs femto-riscv-16-tangnano20k.fs femto-riscv-18-tangnano20k.fs \
16+
dsp-mult36x36-tangnano20k.fs dsp-padd9-tangnano20k.fs dsp-padd18-tangnano20k.fs \
17+
dsp-mult9x9-tangnano20k.fs dsp-alu54d-tangnano20k.fs dsp-multalu18x18-tangnano20k.fs \
18+
dsp-multalu36x18-tangnano20k.fs dsp-multaddalu18x18-tangnano20k.fs \
1619
\
1720
blinky-primer20k.fs shift-primer20k.fs blinky-tbuf-primer20k.fs blinky-oddr-primer20k.fs \
1821
blinky-osc-primer20k.fs tlvds-primer20k.fs elvds-primer20k.fs oddr-tlvds-primer20k.fs \
@@ -22,6 +25,10 @@ all: \
2225
bsram-pROM-primer20k.fs bsram-SDPB-primer20k.fs bsram-SP-primer20k.fs \
2326
bsram-DPB-primer20k.fs bsram-pROMX9-primer20k.fs bsram-SDPX9B-primer20k.fs \
2427
bsram-SPX9-primer20k.fs bsram-DPX9B-primer20k.fs \
28+
femto-riscv-15-primer20k.fs femto-riscv-16-primer20k.fs femto-riscv-18-primer20k.fs \
29+
dsp-mult36x36-primer20k.fs dsp-padd9-primer20k.fs dsp-padd18-primer20k.fs \
30+
dsp-mult9x9-primer20k.fs dsp-alu54d-primer20k.fs dsp-multalu18x18-primer20k.fs \
31+
dsp-multalu36x18-primer20k.fs dsp-multaddalu18x18-primer20k.fs \
2532
\
2633
blinky-tangnano.fs shift-tangnano.fs blinky-tbuf-tangnano.fs blinky-oddr-tangnano.fs \
2734
blinky-osc-tangnano.fs elvds-tangnano.fs oddr-elvds-tangnano.fs pll-nanolcd-tangnano.fs \
@@ -46,7 +53,10 @@ all: \
4653
ides16-tangnano4k.fs \
4754
ides4-tangnano4k.fs ivideo-tangnano4k.fs ides8-tangnano4k.fs ides10-tangnano4k.fs \
4855
oser10-tlvds-tangnano4k.fs \
49-
femto-riscv-15-tangnano4k.fs femto-riscv-16-tangnano4k.fs femto-riscv-16-tangnano4k.fs \
56+
femto-riscv-15-tangnano4k.fs femto-riscv-16-tangnano4k.fs femto-riscv-18-tangnano4k.fs \
57+
dsp-mult36x36-tangnano4k.fs dsp-padd9-tangnano4k.fs dsp-padd18-tangnano4k.fs \
58+
dsp-mult9x9-tangnano4k.fs dsp-alu54d-tangnano4k.fs dsp-multalu18x18-tangnano4k.fs \
59+
dsp-multalu36x18-tangnano4k.fs dsp-multaddalu18x18-tangnano4k.fs \
5060
\
5161
blinky-tangnano9k.fs shift-tangnano9k.fs blinky-tbuf-tangnano9k.fs blinky-oddr-tangnano9k.fs \
5262
blinky-osc-tangnano9k.fs tlvds-tangnano9k.fs elvds-tangnano9k.fs oddr-tlvds-tangnano9k.fs \
@@ -58,6 +68,9 @@ all: \
5868
bsram-SPX9-tangnano9k.fs bsram-DPX9B-tangnano9k.fs \
5969
oser10-elvds-tangnano9k.fs \
6070
femto-riscv-15-tangnano9k.fs femto-riscv-16-tangnano9k.fs femto-riscv-18-tangnano9k.fs \
71+
dsp-mult36x36-tangnano9k.fs dsp-padd9-tangnano9k.fs dsp-padd18-tangnano9k.fs \
72+
dsp-mult9x9-tangnano9k.fs dsp-alu54d-tangnano9k.fs dsp-multalu18x18-tangnano9k.fs \
73+
dsp-multalu36x18-tangnano9k.fs dsp-multaddalu18x18-tangnano9k.fs \
6174
\
6275
blinky-szfpga.fs shift-szfpga.fs blinky-tbuf-szfpga.fs blinky-oddr-szfpga.fs \
6376
blinky-osc-szfpga.fs tlvds-szfpga.fs elvds-szfpga.fs oddr-tlvds-szfpga.fs \
@@ -68,19 +81,29 @@ all: \
6881
bsram-pROM-szfpga.fs bsram-SDPB-szfpga.fs bsram-SP-szfpga.fs \
6982
bsram-pROMX9-szfpga.fs bsram-SDPX9B-szfpga.fs \
7083
bsram-SPX9-szfpga.fs \
84+
femto-riscv-15-szfpga.fs femto-riscv-16-szfpga.fs femto-riscv-18-szfpga.fs \
85+
dsp-mult36x36-szfpga.fs dsp-padd9-szfpga.fs dsp-padd18-szfpga.fs \
86+
dsp-mult9x9-szfpga.fs dsp-alu54d-szfpga.fs dsp-multalu18x18-szfpga.fs \
87+
dsp-multalu36x18-szfpga.fs dsp-multaddalu18x18-szfpga.fs \
7188
\
7289
blinky-tec0117.fs shift-tec0117.fs blinky-tbuf-tec0117.fs blinky-oddr-tec0117.fs \
7390
blinky-osc-tec0117.fs tlvds-tec0117.fs elvds-tec0117.fs oddr-tlvds-tec0117.fs \
7491
oddr-elvds-tec0117.fs blinky-pll-tec0117.fs oser16-tec0117.fs attosoc-tec0117.fs \
7592
oser4-tec0117.fs ovideo-tec0117.fs oser8-tec0117.fs oser10-tec0117.fs \
7693
ides16-tec0117.fs \
7794
ides4-tec0117.fs ivideo-tec0117.fs ides8-tec0117.fs ides10-tec0117.fs \
95+
dsp-mult36x36-tec0117.fs dsp-padd9-tec0117.fs dsp-padd18-tec0117.fs \
96+
dsp-mult9x9-tec0117.fs dsp-alu54d-tec0117.fs dsp-multalu18x18-tec0117.fs \
97+
dsp-multalu36x18-tec0117.fs dsp-multaddalu18x18-tec0117.fs \
7898
\
7999
blinky-runber.fs shift-runber.fs blinky-tbuf-runber.fs blinky-oddr-runber.fs \
80100
blinky-osc-runber.fs tlvds-runber.fs elvds-runber.fs oddr-tlvds-runber.fs \
81101
oddr-elvds-runber.fs blinky-pll-runber.fs \
82102
oser4-runber.fs ovideo-runber.fs oser8-runber.fs oser10-runber.fs \
83-
ides4-runber.fs ivideo-runber.fs ides8-runber.fs ides10-runber.fs
103+
ides4-runber.fs ivideo-runber.fs ides8-runber.fs ides10-runber.fs \
104+
dsp-mult36x36-runber.fs dsp-padd9-runber.fs dsp-padd18-runber.fs \
105+
dsp-mult9x9-runber.fs dsp-alu54d-runber.fs dsp-multalu18x18-runber.fs \
106+
dsp-multalu36x18-runber.fs dsp-multaddalu18x18-runber.fs
84107

85108
unpacked:\
86109
blinky-tangnano20k-unpacked.v shift-tangnano20k-unpacked.v \
@@ -173,7 +196,8 @@ unpacked:\
173196
elvds-runber-unpacked.v oddr-tlvds-runber-unpacked.v oddr-elvds-runber-unpacked.v \
174197
blinky-pll-runber-unpacked.v oser4-runber-unpacked.v ovideo-runber-unpacked.v \
175198
oser8-runber-unpacked.v oser10-runber-unpacked.v ides4-runber-unpacked.v \
176-
ivideo-runber-unpacked.v ides8-runber-unpacked.v ides10-runber-unpacked.v
199+
ivideo-runber-unpacked.v ides8-runber-unpacked.v ides10-runber-unpacked.v
200+
177201
clean:
178202
rm -f *.json *.fs *-unpacked.v
179203

@@ -208,7 +232,7 @@ attosoc-tangnano20k-synth.json: attosoc/attosoc.v attosoc/picorv32.v
208232
$(NEXTPNR) --json $< --write $@ --device GW2A-LV18PG256C8/I7 --vopt family=GW2A-18 --vopt cst=primer20k.cst
209233

210234
%-primer20k-synth.json: %.v
211-
$(YOSYS) -D LEDS_NR=6 -D OSC_TYPE_OSC -D INV_BTN=0 -p "read_verilog $^; synth_gowin -json $@"
235+
$(YOSYS) -D LEDS_NR=6 -D OSC_TYPE_OSC -D INV_BTN=0 -D CPU_FREQ=27 -D BAUD_RATE=115200 -p "read_verilog $^; synth_gowin -json $@"
212236

213237
pll-nanolcd-primer20k-synth.json: pll/GW2A-18-dyn.vh pll-nanolcd/TOP.v pll-nanolcd/VGAMod.v
214238
$(YOSYS) -D INV_BTN=0 -p "read_verilog $^; synth_gowin -json $@"
@@ -299,7 +323,7 @@ bsram-%-tangnano9k-synth.json: pll/GW1N-9C-dyn.vh %-image-rom.v %-video-ram.v %.
299323
$(NEXTPNR) --json $< --write $@ --device GW1NR-LV9LQ144PC6/I5 --vopt family=GW1N-9 --vopt cst=szfpga.cst
300324

301325
%-szfpga-synth.json: %.v
302-
$(YOSYS) -D LEDS_NR=4 -D OSC_TYPE_OSC -D INV_BTN=0 -p "read_verilog $^; synth_gowin -json $@"
326+
$(YOSYS) -D LEDS_NR=4 -D OSC_TYPE_OSC -D INV_BTN=0 -D CPU_FREQ=50 -D BAUD_RATE=115200 -p "read_verilog $^; synth_gowin -json $@"
303327

304328
blinky-pll-szfpga-synth.json: pll/GW1N-9-dyn.vh blinky-pll.v
305329
$(YOSYS) -D INV_BTN=0 -D LEDS_NR=4 -p "read_verilog $^; synth_gowin -json $@"
@@ -316,7 +340,7 @@ bsram-%-szfpga-synth.json: pll/GW1N-9-dyn.vh %-image-rom.v %-video-ram.v %.v
316340
$(NEXTPNR) --json $< --write $@ --device GW1NR-LV9QN88C6/I5 --vopt family=GW1N-9 --vopt cst=tec0117.cst
317341

318342
%-tec0117-synth.json: %.v
319-
$(YOSYS) -D LEDS_NR=8 -D OSC_TYPE_OSC -D INV_BTN=0 -p "read_verilog $^; synth_gowin -json $@"
343+
$(YOSYS) -D LEDS_NR=8 -D OSC_TYPE_OSC -D INV_BTN=0 -D CPU_FREQ=12 -D BAUD_RATE=115200 -p "read_verilog $^; synth_gowin -json $@"
320344

321345
blinky-pll-tec0117-synth.json: pll/GW1N-9-dyn.vh blinky-pll.v
322346
$(YOSYS) -D INV_BTN=0 -D LEDS_NR=8 -p "read_verilog $^; synth_gowin -json $@"
@@ -330,7 +354,7 @@ blinky-pll-tec0117-synth.json: pll/GW1N-9-dyn.vh blinky-pll.v
330354
$(NEXTPNR) --json $< --write $@ --device GW1N-UV4LQ144C6/I5 --vopt cst=runber.cst
331355

332356
%-runber-synth.json: %.v
333-
$(YOSYS) -D LEDS_NR=8 -D OSC_TYPE_OSC -D INV_BTN=0 -p "read_verilog $^; synth_gowin -json $@"
357+
$(YOSYS) -D LEDS_NR=8 -D OSC_TYPE_OSC -D INV_BTN=0 -D FORCE_BRAM -D CPU_FREQ=12 -D BAUD_RATE=115200 -p "read_verilog $^; synth_gowin -json $@"
334358

335359
blinky-pll-runber-synth.json: pll/GW1N-4-dyn.vh blinky-pll.v
336360
$(YOSYS) -D INV_BTN=0 -D LEDS_NR=8 -p "read_verilog $^; synth_gowin -json $@"

examples/himbaechel/dsp-alu54d.v

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
`default_nettype none
2+
// alu0 mode 0 - simple substraction with accumulator in C
3+
// alu1 mode 1 - addition with CASI and accumulator in A
4+
// alu2 mode 2 - addition with CASI
5+
module idsp(input wire clk, input wire reset,
6+
output wire [63:0] product,
7+
output wire [63:0] product1,
8+
output wire [63:0] product2,
9+
output wire [63:0] product3,
10+
output wire [63:0] product4
11+
);
12+
13+
wire [17:0] soa;
14+
wire [17:0] sob;
15+
wire [17:0] soa0;
16+
wire [17:0] sob0;
17+
wire [17:0] soa1;
18+
wire [17:0] sob1;
19+
wire [17:0] soa2;
20+
wire [17:0] sob2;
21+
wire gnd = 1'b0;
22+
23+
wire [54:0]caso;
24+
wire [54:0]caso0;
25+
26+
ALU54D alu0(
27+
.A(54'hde1ec7ab1e),
28+
.B(54'hcad),
29+
.DOUT(product[53:0]),
30+
.CASI(gnd),
31+
.CASO(caso),
32+
.ASIGN(gnd),
33+
.BSIGN(gnd),
34+
.ACCLOAD(1'b1),
35+
.CLK(clk),
36+
.CE(1'b1),
37+
.RESET(reset)
38+
);
39+
defparam alu0.AREG=1'b0;
40+
defparam alu0.BREG=1'b0;
41+
defparam alu0.ASIGN_REG=1'b0;
42+
defparam alu0.BSIGN_REG=1'b0;
43+
defparam alu0.ACCLOAD_REG=1'b0;
44+
defparam alu0.OUT_REG=1'b1;
45+
defparam alu0.B_ADD_SUB=1'b1;
46+
defparam alu0.C_ADD_SUB=1'b0;
47+
defparam alu0.ALUD_MODE=2'b0;
48+
defparam alu0.ALU_RESET_MODE="SYNC";
49+
50+
ALU54D alu1(
51+
.A(54'h1111),
52+
.B(54'h2),
53+
.DOUT(product1[53:0]),
54+
.CASI(caso),
55+
.CASO(caso0),
56+
.ASIGN(gnd),
57+
.BSIGN(gnd),
58+
.ACCLOAD(gnd),
59+
.CLK(clk),
60+
.CE(1'b1),
61+
.RESET(reset)
62+
);
63+
defparam alu1.AREG=1'b1;
64+
defparam alu1.BREG=1'b0;
65+
defparam alu1.ASIGN_REG=1'b0;
66+
defparam alu1.BSIGN_REG=1'b0;
67+
defparam alu1.ACCLOAD_REG=1'b0;
68+
defparam alu1.OUT_REG=1'b0;
69+
defparam alu1.B_ADD_SUB=1'b0;
70+
defparam alu1.C_ADD_SUB=1'b0;
71+
defparam alu1.ALUD_MODE=1;
72+
defparam alu1.ALU_RESET_MODE="SYNC";
73+
74+
ALU54D alu2(
75+
.A(54'h100000000),
76+
.B(54'h00000f000),
77+
.DOUT(product2[53:0]),
78+
.CASI(caso0),
79+
.CASO(),
80+
.ASIGN(gnd),
81+
.BSIGN(gnd),
82+
.ACCLOAD(gnd),
83+
.CLK(clk),
84+
.CE(1'b1),
85+
.RESET(reset)
86+
);
87+
defparam alu2.AREG=1'b1;
88+
defparam alu2.BREG=1'b1;
89+
defparam alu2.ASIGN_REG=1'b1;
90+
defparam alu2.BSIGN_REG=1'b1;
91+
defparam alu2.ACCLOAD_REG=1'b1;
92+
defparam alu2.OUT_REG=1'b0;
93+
defparam alu2.B_ADD_SUB=1'b0;
94+
defparam alu2.C_ADD_SUB=1'b0;
95+
defparam alu2.ALUD_MODE=2;
96+
defparam alu2.ALU_RESET_MODE="SYNC";
97+
endmodule
98+
99+
`define FIRMWARE "riscv-dsp-firmware/alu54d.hex"
100+
`include "dsp-riscv.v"
101+

0 commit comments

Comments
 (0)