Skip to content

Commit e22bc78

Browse files
authored
Export streaming zipformer2 to RKNN (#1977)
1 parent da87e7f commit e22bc78

16 files changed

Lines changed: 1069 additions & 144 deletions

.github/scripts/librispeech/ASR/run_rknn.sh

Lines changed: 107 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,10 @@ log() {
1212

1313
cd egs/librispeech/ASR
1414

15-
1615
# https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed
1716
# sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
18-
function export_bilingual_zh_en() {
19-
d=exp_zh_en
17+
function export_2023_02_20() {
18+
d=exp_2023_02_20
2019

2120
mkdir $d
2221
pushd $d
@@ -70,21 +69,20 @@ function export_bilingual_zh_en() {
7069
--tokens $d/tokens.txt \
7170
$d/1.wav
7271

73-
mkdir -p /icefall/rknn-models
74-
7572
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
76-
mkdir -p $platform
73+
dst=sherpa-onnx-$platform-streaming-zipformer-bilingual-zh-en-2023-02-20
74+
mkdir -p $dst
7775

7876
./pruned_transducer_stateless7_streaming/export_rknn.py \
7977
--in-encoder $d/encoder-epoch-99-avg-1.onnx \
8078
--in-decoder $d/decoder-epoch-99-avg-1.onnx \
8179
--in-joiner $d/joiner-epoch-99-avg-1.onnx \
82-
--out-encoder $platform/encoder.rknn \
83-
--out-decoder $platform/decoder.rknn \
84-
--out-joiner $platform/joiner.rknn \
80+
--out-encoder $dst/encoder.rknn \
81+
--out-decoder $dst/decoder.rknn \
82+
--out-joiner $dst/joiner.rknn \
8583
--target-platform $platform 2>/dev/null
8684

87-
ls -lh $platform/
85+
ls -lh $dst/
8886

8987
./pruned_transducer_stateless7_streaming/test_rknn_on_cpu_simulator.py \
9088
--encoder $d/encoder-epoch-99-avg-1.onnx \
@@ -93,19 +91,24 @@ function export_bilingual_zh_en() {
9391
--tokens $d/tokens.txt \
9492
--wav $d/0.wav
9593

96-
cp $d/tokens.txt $platform
97-
cp $d/*.wav $platform
94+
cp $d/tokens.txt $dst
95+
mkdir $dst/test_wavs
96+
cp $d/*.wav $dst/test_wavs
9897

99-
cp -av $platform /icefall/rknn-models
100-
done
98+
tar cjvf $dst.tar.bz2 $dst
99+
ls -lh $dst.tar.bz2
100+
mv $dst.tar.bz2 /icefall/
101+
ls -lh $dst/
102+
echo "---"
101103

102-
ls -lh /icefall/rknn-models
104+
rm -rf $dst
105+
done
103106
}
104107

105108
# https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t
106109
# sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16
107-
function export_bilingual_zh_en_small() {
108-
d=exp_zh_en_small
110+
function export_2023_02_16() {
111+
d=exp_2023_02_16
109112

110113
mkdir $d
111114
pushd $d
@@ -124,7 +127,6 @@ function export_bilingual_zh_en_small() {
124127

125128
popd
126129

127-
128130
./pruned_transducer_stateless7_streaming/export-onnx-zh.py \
129131
--dynamic-batch 0 \
130132
--enable-int8-quantization 0 \
@@ -163,21 +165,20 @@ function export_bilingual_zh_en_small() {
163165
--tokens $d/tokens.txt \
164166
$d/1.wav
165167

166-
mkdir -p /icefall/rknn-models-small
167-
168168
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
169-
mkdir -p $platform
169+
dst=sherpa-onnx-$platform-streaming-zipformer-small-bilingual-zh-en-2023-02-16
170+
mkdir -p $dst
170171

171172
./pruned_transducer_stateless7_streaming/export_rknn.py \
172173
--in-encoder $d/encoder-epoch-99-avg-1.onnx \
173174
--in-decoder $d/decoder-epoch-99-avg-1.onnx \
174175
--in-joiner $d/joiner-epoch-99-avg-1.onnx \
175-
--out-encoder $platform/encoder.rknn \
176-
--out-decoder $platform/decoder.rknn \
177-
--out-joiner $platform/joiner.rknn \
176+
--out-encoder $dst/encoder.rknn \
177+
--out-decoder $dst/decoder.rknn \
178+
--out-joiner $dst/joiner.rknn \
178179
--target-platform $platform 2>/dev/null
179180

180-
ls -lh $platform/
181+
ls -lh $dst/
181182

182183
./pruned_transducer_stateless7_streaming/test_rknn_on_cpu_simulator.py \
183184
--encoder $d/encoder-epoch-99-avg-1.onnx \
@@ -186,15 +187,89 @@ function export_bilingual_zh_en_small() {
186187
--tokens $d/tokens.txt \
187188
--wav $d/0.wav
188189

189-
cp $d/tokens.txt $platform
190-
cp $d/*.wav $platform
190+
cp $d/tokens.txt $dst
191+
mkdir $dst/test_wavs
192+
cp $d/*.wav $dst/test_wavs
191193

192-
cp -av $platform /icefall/rknn-models-small
193-
done
194+
tar cjvf $dst.tar.bz2 $dst
195+
ls -lh $dst.tar.bz2
196+
mv $dst.tar.bz2 /icefall/
197+
ls -lh $dst/
198+
echo "---"
194199

195-
ls -lh /icefall/rknn-models-small
200+
rm -rf $dst
201+
done
196202
}
197203

198-
export_bilingual_zh_en_small
204+
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-2023-06-26-english
205+
function export_2023_06_26() {
206+
d=exp_2023_06_26
207+
208+
mkdir $d
209+
pushd $d
210+
211+
curl -SL -O https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/exp/pretrained.pt
212+
mv pretrained.pt epoch-99.pt
213+
214+
curl -SL -O https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/data/lang_bpe_500/tokens.txt
215+
216+
curl -SL -o 0.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/data/lang_bpe_500/tokens.txt
217+
curl -SL -o 1.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/test_wavs/1221-135766-0001.wav
218+
curl -SL -o 2.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/test_wavs/1221-135766-0002.wav
219+
220+
ls -lh
221+
222+
popd
223+
224+
./zipformer/export-onnx-streaming.py \
225+
--dynamic-batch 0 \
226+
--enable-int8-quantization 0 \
227+
--tokens $d/tokens.txt \
228+
--use-averaged-model 0 \
229+
--epoch 99 \
230+
--avg 1 \
231+
--exp-dir $d \
232+
--use-ctc 0 \
233+
--use-transducer 1 \
234+
\
235+
--chunk-size 32 \
236+
--left-context-frames 128 \
237+
--causal 1
238+
239+
ls -lh $d/
240+
241+
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
242+
dst=sherpa-onnx-$platform-streaming-zipformer-en-2023-06-26
243+
mkdir -p $dst
244+
245+
./zipformer/export_rknn_transducer_streaming.py \
246+
--in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
247+
--in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
248+
--in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
249+
--out-encoder $dst/encoder.rknn \
250+
--out-decoder $dst/decoder.rknn \
251+
--out-joiner $dst/joiner.rknn \
252+
--target-platform $platform
253+
254+
ls -lh $dst/
255+
256+
cp $d/tokens.txt $dst
257+
mkdir $dst/test_wavs
258+
cp $d/*.wav $dst/test_wavs
259+
260+
tar cjvf $dst.tar.bz2 $dst
261+
ls -lh $dst.tar.bz2
262+
mv $dst.tar.bz2 /icefall/
263+
ls -lh $dst/
264+
echo "---"
265+
266+
rm -rf $dst
267+
done
268+
}
199269

200-
export_bilingual_zh_en
270+
if [[ $rknn_toolkit2_version == "2.1.0" ]]; then
271+
export_2023_02_16
272+
export_2023_02_20
273+
else
274+
export_2023_06_26
275+
fi
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
python3 -m pip install kaldi-native-fbank soundfile librosa
6+
7+
log() {
8+
# This function is from espnet
9+
local fname=${BASH_SOURCE[1]##*/}
10+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
11+
}
12+
13+
cd egs/multi_zh-hans/ASR
14+
15+
16+
17+
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12-chinese
18+
function export_2023_11_05() {
19+
d=exp
20+
mkdir $d
21+
pushd $d
22+
curl -SL -O https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/data/lang_bpe_2000/tokens.txt
23+
curl -SL -O https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/exp/pretrained.pt
24+
mv pretrained.pt epoch-99.pt
25+
26+
curl -SL -o 0.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000000.wav
27+
curl -SL -o 1.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000001.wav
28+
curl -SL -o 2.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000002.wav
29+
ls -lh
30+
popd
31+
32+
./zipformer/export-onnx-streaming.py \
33+
--dynamic-batch 0 \
34+
--enable-int8-quantization 0 \
35+
--tokens $d/tokens.txt \
36+
--use-averaged-model 0 \
37+
--epoch 99 \
38+
--avg 1 \
39+
--exp-dir $d \
40+
--use-ctc 0 \
41+
--use-transducer 1 \
42+
--chunk-size 32 \
43+
--left-context-frames 128 \
44+
--causal 1
45+
46+
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
47+
dst=sherpa-onnx-$platform-streaming-zipformer-multi-zh-hans-2023-12-12
48+
mkdir -p $dst
49+
50+
./zipformer/export_rknn_transducer_streaming.py \
51+
--in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
52+
--in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
53+
--in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
54+
--out-encoder $dst/encoder.rknn \
55+
--out-decoder $dst/decoder.rknn \
56+
--out-joiner $dst/joiner.rknn \
57+
--target-platform $platform
58+
59+
cp $d/tokens.txt $dst
60+
mkdir $dst/test_wavs
61+
cp $d/*.wav $dst/test_wavs
62+
63+
tar cjvf $dst.tar.bz2 $dst
64+
ls -lh $dst.tar.bz2
65+
mv $dst.tar.bz2 /icefall/
66+
ls -lh $dst/
67+
echo "---"
68+
69+
rm -rf $dst
70+
done
71+
}
72+
73+
export_2023_11_05

0 commit comments

Comments
 (0)