From 28ee1ab82389bfb4e4000533deb2269a134bd3e9 Mon Sep 17 00:00:00 2001 From: Mr-Xiao2021 <3493602396@qq.com> Date: Tue, 2 Dec 2025 11:07:18 +0800 Subject: [PATCH 1/4] HUST_ASCEND moe submit --- ...00\346\234\257\346\212\245\345\221\212.md" | 144 ++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 "2025-Ascend-Innovation-Contest/S1/MoE/HUST_ASCEND/MoE\346\250\241\345\236\213\346\230\207\350\205\276\344\274\230\345\214\226\346\212\200\346\234\257\346\212\245\345\221\212.md" diff --git "a/2025-Ascend-Innovation-Contest/S1/MoE/HUST_ASCEND/MoE\346\250\241\345\236\213\346\230\207\350\205\276\344\274\230\345\214\226\346\212\200\346\234\257\346\212\245\345\221\212.md" "b/2025-Ascend-Innovation-Contest/S1/MoE/HUST_ASCEND/MoE\346\250\241\345\236\213\346\230\207\350\205\276\344\274\230\345\214\226\346\212\200\346\234\257\346\212\245\345\221\212.md" new file mode 100644 index 00000000..7bcedc90 --- /dev/null +++ "b/2025-Ascend-Innovation-Contest/S1/MoE/HUST_ASCEND/MoE\346\250\241\345\236\213\346\230\207\350\205\276\344\274\230\345\214\226\346\212\200\346\234\257\346\212\245\345\221\212.md" @@ -0,0 +1,144 @@ +# MoE模型昇腾迁移优化技术报告 + +## 评测结果 + +| 评测指标 | 平均得分 | +|---------|---------| +| 峰值显存得分 | 100 | +| Prefill时延得分 | 109.3774 | +| Decode时延得分 | 360.0786 | +| **总分** | **189.8187** | + +## 优化模型 + +本项目针对以下两个MoE(Mixture of Experts)模型进行了昇腾NPU适配与性能优化: + +1. **DeepSeek-MoE-16B-Chat** - 深度求索开源的MoE大模型 +2. **Qwen1.5-MoE-A2.7B-Chat** - 通义千问开源的MoE大模型 + +--- + +## 核心优化技术 + +### 1. MindSpore算子适配优化 + +#### 1.1 使用mint算子替代ops算子 + +将原始的`ops`算子替换为Ascend硬件亲和的`mint`算子 + +```python +def rotate_half(x): + """Rotates half the hidden dims of the input.""" + # x1 = x[..., : x.shape[-1] // 2] + # x2 = x[..., x.shape[-1] // 2 :] + x1, x2 = ops.split(x,x.shape[-1]//2,dim=-1) + return ops.cat((-x2, x1), dim=-1) +``` + + +#### 1.2 使用mint.narrow替代切片操作 + +```python + def forward(self, x, seq_len=None): + # x: [bs, num_attention_heads, seq_len, head_size] + if seq_len > self.max_seq_len_cached: + self._set_cos_sin_cache(seq_len=seq_len, dtype=x.dtype) + + return ( + # self.cos_cached[:seq_len].to(dtype=x.dtype), + # self.sin_cached[:seq_len].to(dtype=x.dtype), + ops.narrow(self.cos_cached, 0, 0, seq_len).to(dtype=x.dtype), + ops.narrow(self.sin_cached, 0, 0, seq_len).to(dtype=x.dtype), + ) +``` + +**收益**:mint.narrow避免切片操作的额外内存拷贝。 + +--- + +### 2. FlashAttention优化 + +```python +else: # prefill开启 + # 融合后,采用mindspore的融合算子,flash_attention_score + sparse_mode = 0 + if attention_mask is not None: + attention_mask = ~attention_mask + + if self.is_causal: + sparse_mode = 3 + global_attn_mask = ops.ones(2048, 2048, dtype=mindspore.bool_).triu(diagonal=1) + attn_output = mindspore.ops.flash_attention_score(query_states, key_states, value_states, +head_num=self.num_heads, input_layout='BNSD', real_shift = None,padding_mask = None, attn_mask=global_attn_mask,scalar_value=1/math.sqrt(self.head_dim), keep_prob=1-self.attention_dropout,pre_tokens = 2147483647, next_tokens = 2147483647, inner_precise = 0,drop_mask = None, prefix = None, actual_seq_qlen = None, actual_seq_kvlen = None,sparse_mode=sparse_mode) +``` + + + +--- + +### 3. MoE路由与专家计算优化 + +#### 3.1 Qwen2-MoE: decode优化 + +```python +if routing_weights.shape[0] == 1: + # 遍历激活的 top-k 专家 + final_hidden_states = ops.zeros((batch_size * sequence_length, hidden_dim), dtype=mindspore.float32) + flat_topk_idx = selected_experts.view(-1) + # idt = ops.zeros(1,dtype = mindspore.int64) + for i in range(self.top_k): + expert_idx = flat_topk_idx[i].item() + weight = routing_weights[0, i].to(mindspore.float32) # no item, no precision loss + expert_layer = self.experts[expert_idx] + final_hidden_states += expert_layer(hidden_states).to(mindspore.float32).mul(weight) + final_hidden_states = final_hidden_states.to(hidden_states.dtype) +``` + +#### 3.2 DeepSeek-MoE: decode优化 + +**Decode阶段** + +```python +@no_grad() + def moe_infer_decode(self, x, flat_expert_indices, flat_expert_weights): + expert_cache = ops.zeros_like(x) + for i in range(self.num_experts_per_tok): + expert_id = flat_expert_indices[i].item() + weight = flat_expert_weights[i].item() + expert = self.experts[expert_id] + expert_out = expert(x) + expert_cache += expert_out * weight + return expert_cache +``` + + + +--- + +## 优化效果分析 + +### Prefill阶段优化 + +| 优化项 | 技术手段 | 预估收益 | +|-------|---------|---------| +| FlashAttention | 硬件加速 | 40-60% | +| mint算子替换 | 底层优化 | 10-20% | + + +### Decode阶段优化 + +| 优化项 | 技术手段 | 预估收益 | +|-------|---------|---------| +| 分场景MoE策略 | 减少padding | 40-50% | + + + + +--- + +## 关键技术总结 + +1. **算子层优化**:替换mint算子,充分使能昇腾NPU加速计算。 +2. **注意力优化**:集成FlashAttention,加速prefill阶段推理能力。 +3. **MoE优化**:针对decode场景进行优化。 + From 274d052fd1011828bffa12a5be563dac448b6f63 Mon Sep 17 00:00:00 2001 From: Mr-Xiao2021 <94785565+Mr-Xiao2021@users.noreply.github.com> Date: Tue, 2 Dec 2025 11:12:46 +0800 Subject: [PATCH 2/4] add patches.zip add patches.zip --- .../S1/MoE/HUST_ASCEND/patches.zip | Bin 0 -> 46298 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 2025-Ascend-Innovation-Contest/S1/MoE/HUST_ASCEND/patches.zip diff --git a/2025-Ascend-Innovation-Contest/S1/MoE/HUST_ASCEND/patches.zip b/2025-Ascend-Innovation-Contest/S1/MoE/HUST_ASCEND/patches.zip new file mode 100644 index 0000000000000000000000000000000000000000..65d236f0014581a7ec5f8e8030088f39ff3d5dc2 GIT binary patch literal 46298 zcma&OV~nRkx3~Lm+qP}np0;h>DT#!kUG$&R;*bx*J>tBx2Tz zxS6aUIt!Th>^IiJQ1B!5P|vM^etEV^?#LBgJwVtqLn|2?D!%aPSjcOqC~ zpc(ZJ)?$iur4^znsD}B{Vv6E%)8;(dXk&h@Yujoz)|lHAr*Z&nSB~2c%YVmnY$4z0 zhQ8t>go~-gGPz(kc`SfA0rvT}87AQOh8E68zSwIx?M#|sh8YT8Vj0VVZ%93|>HlY4 zXyh)F(6Dk((c}anZw13a7xOyxu8oxtk%CINt;8l|9?}K}nY=C@%G`)?$vPr3au+nT z#!?Y^)Bv+m6d2DODoP_G&QvHp-1CniQBP}~DS^Fg`vKq7X^k{4Gt+eX6>W9=OrS=} zX~%Nf`*LZy;#1Y#1vG4`2Z=ZM;C3l;2o)NzUoEUXr(Zm>CfhcU@d;eq1nN@=_($@P z;0wBt_%Jv!P_#4{ekfh16(7^%ey@m>aW&S*w%GA7u--&h(nPV9`cao);^{!NYWd&1 z2x8%4tfp>#txe>pFhR)>SEeB46`W7MO`eRDwpM=AE^%zIswow9;KenS&Af!{XPGo? z@%RM@z1gGtQ~6c`26?aL;Hr~Lg4nO3LP*PBQF(T9#UzK(6U|gDA-GE+t1+TdCArZ} zLQ}zAd(yd>&DP3NDz`e#&K+r4=)l3ybIW@f_PVhhW~^3V>)h_dLGdX;B~jupyupi< z>Qwp*tR7i~3u=Taf*2s1PUaVeD!mf-L=wG17YPrnbF!MG$ArJmB3UxtMeTue5$;4F zTZz0tgK9Os1K#EL)pyO}t%c*(G>WbLZ~A zF}QtW8-+_Zv9cVD{7P?>-H9aL_R2DY#P0n1^%b;6nlclDGf>`G2|u4mxKCQ&ST< zR|h&1XF3NbQ!`5&oB!QGTxsarZ;m1Pua;u2Ar#CUL>7FnF<-FH4BG(@m0R=BLH84? z9ZhVojGMMiYV_y@(s&6HT+gwdi3C2M!&5?|+HQmuNCtjzIy${9)y`=~CP->FnCQ=k z4bp)|9?g8ZnYo{EOm4@C0a|QVak5v;yeo}gw5qwS*R-ovYu(S(a@}-RY+k&z`Rrlf zMIlFNgWkzCR@~k}HXAOAeSfcq60(Q2n6B=7$C#DQw_J*s`rfRw97OJY6ho-g+j7bc zP`m6!c*I7!!CX{dg?8I68xC$XR4q6zd<3*x;dF#`Qrrj(1zJ5#E5{=lhrA zu_xyw5ZVll(P5FO4tTf>3TZ)sWCzRWnuvG!WRKMk0=M8HYXiZ~VXB3mrrXOmsXri2 z8WAssenuyR#_49HDNY#u@MWB4sR>v?f8W+#Q$5HZ4K?8Kx{@nxl!CPN_{v$n&9l?I zGEIS`)6AXtlzP zz?gqujQ@8K3kD80mrzL+@@x)jJ*04m3&#ConJJHfapQP~|B0;9=C21 zG}>qMJFLYS&_o%~4!ISf5uz{ToPX>%@Skwx+z~{bU&9B|G6AP!n4r&|v`l!9(S=TL zMEBS+43nUK6^Evvr;9S6G0dgpcoaf{#@|HJA@9JtZGsa(!=aM7@^Sl!r@7v=Dz}2a zmtOWr<$@r=Rf3APg8MC|6exW##8OS$_Apa777Imx#m06JL(T6<^1P$udhzyw@ynn} zz~{z#5EjwU&uKM%!5#ur&Y(OwTA{km?k7<7HQYVwjB=4!b4T^S*%>g3Ga)3`yK&x| z;rM+=u`OUY3aX;7CqtVhlQPB5f`}iPp1~U-6?BRhkDX%4TCqbGj~LYJR0}>`*JP@K zrao8mTPq9Gu2gVbsdZ`~UB?PgW4W)&TmLd>hs4w;m!9zvIJLMQsREjv7E;s)MOTyK$GI5HmH=Iq=kj_+}{-b9Fyz; zyKY40uIt(@)a6$aW2C_{R|bl7iiO<~+p0as1@AUb3=OfAF!t!dKpImXwA%Y*9(8MI z#zrIb5-7QD9;-rzYvo4Yl`6^$0o{jJkpUaM_$cI&UJFitv`nt6?Lb15Sn=$^QWLeo z_}>o|t~T|4I&3%3Ez^eA`=GnVzywHM%|jRG!Vjh1`0J$bZJ@XKVNP3q0rQbM=IkWb z*Y_y-9ME1v@l@% zy7NUNxN;DScq2L)Nhq>o&rP_`^{d$+y)rXjICg5 zEQlP)kYj(Lx%C0>K*007!=FbV>zp}lIis;DvX^$T;Cq1UbEq2(L!zi%3Wa5jH$<0G z>K0ssy7PeWb(d6}8kyi%z;}YxVOeI$CrM%OXrIvwut8%cdt$%8?{rn2(K9;rB(FOm zSDjK?>=~QZY?+vjCV=HqzZL6+wdVT0J#+YbJ-i>b`hR_1jrxP18=0El`S*NYUaT_I zeBUv9yOo_9g6n-AAUbmID=bsG8DQ=A&txe~OB_aD6j|rr zbB?=;CBLo@LGDl`uxz)A_cyBVgnk<^mF9j4|O?tR5eZdo&ZDJrIm zWu=E&n$Kib#hzqiyPm9CQ`PpXxRIGn&fi~2MYbshsTYNvDGBJ77IfyzIT(POo1RQl z)%2MwA4Z1!YE=!lt=K&_5YE}Z1}~68jzom9bP|_qg;Jb(ltniP9Xc7?4pKUWcLFJ& zNhNSRJ~K>FnHIwa6(GkuWMArlq@6BF^& z=j1S&Y~v6FqjTNLyPBQIu5fd99ssR$X`RXzW%Cfe3-60Y1>mq$y+*E76pdDVNdG&TjN| z7(*FR+spit1F+^t>?ubo4a@JvBbEn65m!z1Md=;bn+cZ$%7IDzZw!xF-Y?xz5M-;1 z<0hl%FOJuq+s?Cx*P2-k)BNS`R@An*%BiWFF~=kKTSgyiTSndD=k+1QA$FZDQzx5E z;-}*kmhlf$#7|^Om27YRMT(WnFr@(5#K)@(xx$sQ#NO;o2-xP4t@>ee2E8O6vmc6( z#3fT%r~FgZF=8%jyZ*ir;PP1xjEm_vF=l1$nq}zI?&rT%hwS4U?S~k0U4m9m>+cKP z=TMSu8i%hm^lq!|Wz=k|c_UBXSuIYy}4RjSW)GW9qkFuZorX4nL zk`A9)tmrr<m6AiqRQe2JH#wmkqC2cFwaAa*L2SN1@ zVAZclB zigXx~qh^dKpiuE%)h(O?0@?2+m{?R*3JT43sVbE?8PaE$E)R=2@HGZ?w)7rZi9u#% zHV{=xxD7ftGjt5}O`8^kpk^l4E4MD)fXK{@na0@!t7;_@mM3WJsm57!v!))?4f=Bs zwo&1a$t|KKB43KG8>=tKvDi`T3_8N%wYO|YRNfeC=*f0t8fdE|1 z*XVU0ur?Ha&OC0bu$&d%>q{Hctzo=~NUOHDzk^o~JrTInU}XM6)w^GX-2j`rmbN&e#jDJbYl|_MqEb z=zZ*h&m`T)K3+JhCi*pe5K%mb-|$?GUK?l3nSo6-2EF)z&7LAosK^4Wj7mzfu=8zb zeX0&4#a4_goTgdop+}ubXEfDAt4P$4!XrpTQ#b}C1tjQzNO~m&I|B(oQXLJ}^sjnB z7WQS>l9YoguR7AsB}eVjvTMey(}UeWfh?M!)F?+$O#Y|kM6`a2boN`=V+&e*sAR5R zoQ1ah#ZSYK9a*(pggClmXC}X(BsGGE3fQj-8JQ&g;@azYr5!hig9Q?78KI`l;PqVv zLue@aq7!@jVf}adNnYq2K-!S|ZK-{*wVoUfTv2eFNS0`535nOtV?{U!yPC$d5@K}6 z*1A>2Vsf7?orfvAr)8B?phr7doa;YXX*4aPj$_T|*$~H{q`yF;^Ag1qX1m z)ee{;wo>&%!oh98l$nsd7g|u%rS^uLV11l*UIjhxhKGN*ngR|#c89c_GkCvixN5p{ zwA^nYr+~?g^3{l9SFeMEM+kQkimDJD*`=N+c(X{9@E2AyrDpKtPg%M$hm@mrOzP90 zvd{-9_S`oaj2oXnk%Ftk^~Eg-Bjwo02~tz8_LmU|p6U;OcV3a`NR%=a>B_B!&Z zms$AzI|nrz3&(C2$jbV!CL%agPR-ss8#nf3n35($Q91w>Gpb@wDFZV2p-3<%1%|>V zd=yl$sYR~f)H@wBKu*Ie@VrBl$WA_3XA&r`P4>!fxe;(w5Y_?`3jrIZdB(7vBWdZ1 zwPnzqa#O^DBx<3o3<5G_F(3$B-{V_21&Vx~SCk?eD~u;@CVA(%+lz%G)&`rCHu7o+N!Hs3WmRl-Khwr}7e|`E#UaXHF+(I*`_(_fp;%GO)1w0%$n|M%JCSLF}bY5vX3!LtaQo%1W0hWduA*e#zahmMWn zqu%ZVii#=6P%j0}3L(XKxMF`^C|DNBVftrBc?34G+OBF4q()WI8;!XmND2S83F8bE zn8rSEzBE;bX_=Edp{3H)w88n2rMJgv1Lo^lf9^}|&-cFi?k!yM7s55}*2nPe-%fg zlES74K`mCMN#;Kro03O+JE?>A{CIv&p0C#wn$h7QcBU$75t7Ge{H#%;BgkHRP4A76 z;%$#zqEtiggVocp+cG5>NYmW0ijoWI2H_+$eiap{uM&`LYQem_>r1r5^%@d|UJA<& z(sCc|2APbo;0nbZAc}O!6I_~KTJu}LE&XVV)iG`0P7A)jCAsXzjQfKWq>CNPg}AaW zh%1V;9vDqe$;fZ?VrhL>IIaSL57O|mpy|ccs4#UF^NR0ny4@bn>gP4~GuE{i3E>DI z9PX*-B=BlKaBFu<+1G*(1Rf{(e=5=|<_OPApTV6$EcAwofu(YyaQ8iYX@_yuu?$3T z97U`8D7FT5I@}#@y1#Zmy?zpD;_S8~4Ei^?GtagGoO-@)ga(3C1)^kRDqLq?y=`2d zVY4sxy&t!gKZuH81KWk0x%gY1UH%_m61(4pgeXLjH=(?K5AV0xlXv}bJs+?0H#HG| z#+}4P^sNv7Mk9-+ESwE%&!n*Ou0V6+TejFxAC zc4O&s^7Ob8>8~+({P<80+vVeY-HVG8_;T<#-3%7+ZTuPk+V9oD`QC5b&vp+>{dxNM zus2_*6vW@6ovTv9GCE^#BwA_Fa;#a5b6%(!60IFBwb|zQiZDh-NfUs6;c{~B36+@% z=e-H$8I*nzN#^Z(S-I`^7MA~aYWTaJpAK7VL|R&Zp3M#ZVkO_MH$ZTaf~M~?2zWm3 z0U6--`10A`P;)BO)>9I2q?+8$PPw87OKQ0!RurRdWT67Z-Axhr_`qD6<34;3QM}1( z+tO-dQHTSp%Oudoh(I^~+@wDHe~j)2?-G38(;vOX)%djgpEvp#?9P1oB$%@u*=tFy z3|%2T#7Z1xnrnep(E7KUs&76x^AjV!u!Uv%CPwe_!^YJ-`s@M5>@sHg8jj;(vJ@LK z2XFP9Ho1Lev<$fTnK{L)niNX;u>})e%$O1i6P6l(_^W3soWDO{74Eg#mp+{}iT!_dMW>u?W$`C5=h3Ld>~PaRN0^O@750a0C8;n(#4;403{w}s^g{(XV}k?L-5Bzq+$_HIq`1gX5bAN3j;L)c5GHvX zXt0uZjC2&3-;<%%raxVQn|z7F2UG>Kn0q$Jb}DGGof9)8yQRhnUTf7QA2Mdn2tRuu znUXb^$mnpW^R{Cn#$6b!A)Q*Cuozor3>|eN~Rem*^KIo ze5t8mf~dt%X^UR?&O~sy0t=_Q;MA>ha+tZJ#LkM1D7fL`sTv?1BA6eBCmy<)Awe1o zF;OHZu{f~G?0M4mGh;qj=^l2o8OQI-0t~Y|jBC4OR+Rd=&ObXj z9mq<8pR{gU|m*vnQVlsz zgEYr;AY7*Gu-3YGdGQoX=N%7le;Dtvg{Dy;?CBd9rcK(KijBXJg_s7+jF>>GWQE{C zNnIMxh&-L z_dRv;Ru+&Lw~3=ikTuBlP1=%Q##{>WM0+AwU9ej^JeIDC%#HwBUMxs$TnY75pB%YN z?Ovv;Gqu;{0fT(a7z5u!{a2|jRn1YYm_GZsT2>lbU{r2F(KrMdG_>R)BP1fC6B)h^WfQ(tf(`mJNJXr96}iS~mNln3LpYB#31`aN z>L(#6tONqcQgb9P8EMXQ&o{Swu&W6?|5CvoTqlA*2Ywd8y?_F4?rTg4L=Z%h#Gp32{G|;N?47_^0!kB2IAOH=CyrtSVL}HUk|Z9N zRofFsU{vF9!=*;Y$^BxpRd#Qb!F{L>5#|^+;uvAtza44Y2U&YqLS+zCBv}xxF^O0M z*}=PY`|J{x$F~%#e&?=bZmzSxJC5vqTA6ziphl^l)T{jf&=+_r}bMX>4ITqmB z(Ro)gfqy{aBrpDKI0GhVC<$!)Sdg&xyelcA%Olv6rgyb#IBdvk7OYq@KE5Pt(H2X- z>J1p_>@zCOv$R77EO)#m8W-a^BWD)H5c&IxxEnbZb0lqW zfsDsA{5O&3VS);bZ>f&e(m8#*!ndK5dq=oqM_Ky#&+ohWXxTFq|Dc`Q!RO$gHoQ$q zhIRN?P~YJ4liooNC}@hirtX{MF1wc#yP0Xe`;0&65I~9U&pH0^isT;f`9U}U(&R4_ zB1ON~@hcl%v7Bpn*bRC9m0DHqs8RHd(UVhCPtP2-jmf8wyp;ZgV=9k_s4NPCIMunW zh58BN^nL+p1xTd3?I$S$=!S;`!?AdBFK#m>Vh3S#aF>?y>i6w@K3UcDz$jSgY5*)k z&v0>jn*}l>(r-&^oJfl&ss8ZfKeECFNXL*d)>@f$d4xA7=9@B`XNCkFtspxiP`xnQ zeL6(rw!cXn1qP>6Mp~qoy~yIEXz`4~JGjlP`6YP4kV}DLnO8p-28@b(WXw{~Q>dL` zG`9&5{6I;0K<)}@LIOm?W2%))N-K+|8TEMDj61chcr&QN^LI7uQ(7Uch;kD8(NOvvGnsV#-tafG6|2vLEhdismVFM1xz4 zJG9p+qGTX8=1^!tL(L|afxFJrsJyI2+Ge^@!6c|LncEimIYWg}A8$<7!M#r^&{5bA zC&%JM+)bt@(L*m=T(S%`a;I5_5Vm!%H=khFbNxC1*Z%8e^f&cl9zkp)^M}B(^5zw!ip_P`9hJGE_ z9zR?{xtD?g(=Gh1eY>keu+zrv^WpV*VnPxJB+vMOd9;Y>1uffCehT=a9LW)B&FaC* zO@9}Sl5cv8u77(iDu$F{|9DT_{91B(km>z*o6R`v34eE!Ayn63Z7x$*J;**K(8 zhu`_(MaA!p-;+nM^JV=!00-RW)+rI24a;kYY0n*8LG3Qu{?U2_2=q%()$t|SOB8ns zA%mP2gyD3omvjsa@1xTc{RTTu!ft*$hDQ1+DXq56=m1VMv(}SZJ_DB%qWP|NS#T5u zLGrFiidYXb?Jsy}bIZMtp&haNq^wSkJSnJ{I|pdkq2=;;f1A`BvCKs;E_P4?y{DBH z*(###K7e|iU`my}shQE|BxEh-b2p>abgB8a%yQT>Y}8n!9!3U;X^N||z_iwHRC~D& z8F~Kdw>`0c2w01G|6KUI-#YkI3c*}MPkIq$fi*kSvywRU-EJ;D0vt$u{9?*FPkk-nP2ET19x8_$z{Ce2+up$mWyu_Vvyn5L^$zX=-+FLbH{+nYoY??c~(IFupji?*bzsNT2 zA$LEGJ4|ZI)GMA|MZ5|@68S;LHA*_pl$9EsY?02{XDBV@8Z<8$F3tce%Siy*5HA3E z#E|SDpfMpC!1KjqOX}X29??wutvO zwjs0T-i0Vzcdb-;(6?9J5Nc4Zvz_QI&+`&T#Za~_4QWed!Y%V-8Y%W}mRpWWK7(K~JU59zEFaaM95qr5U8O241 zQ4WHpQGRWFYIAD7W1~-m1dWMSk8|*;3m5_|J!}uWslh#k`A>Ktw3tpKgaZJuMgsse z{|OIR>6}b$?cGf2{!fh*or$HBsj-W_ljr{*cJSrADel($w|0&AkjN4VQOY+%d1tu( zc!(wCa74~dNi0|sSsDoma_%*L=#2HS31hQ;BVyC1J@v2dwO8uVM$}b$Ne*dCqCiQB z$xo7h{>>He-a#<2X=(b*v>`H#Mwjo+)#gk4+A-DYIdb7NwT)rULi;c4c?-1^bTbU= zYPDn~cJC@SZCe6s`k7^PDosZlr<^BA*FP%MuHOXZs?^Kn$^j*E9W@=gl}i;z_U5Ot z)ATAfd%lvG`*t#Zo&G*+8iYMba! z+|*CLW*;TSm3oto=_CDPv*WxD)n>JzIT;ts0uHvzj?=D-@E;Savr?{pe?)byi)^*} zv?@ieT+RA0_lx^!Lm#^fv@($8qh0WV|5z0_Fmkw4j*JqG9eY?!FmBrJIc+~eWpgFv zWrP9eB5z^oz%$1wm)O?XF0h{IxZ2fJTPEKQwDXY4D=#iCnv57^vbY&&D$nvuFIi!g zI(I15#GB`4D=8vLHLEgNT&2}FEmhGirrZ~nDzNGJWedj<%iL@y&9n^!8)TAk^DL%@$ zLqd&6`93I_ja7rEl^i%n@rDhJwQxmU{+JVM^+t}aR+>F~dxIo;enFs&49pb-oOy?q z_?6H{I(sMI=#vx%r59gKd^))>-@;QUgHIz(;JOgT2n?Z{{n~5C>DnNL#Kjg6Nu3Pl zm`i3DaT>&s#K7vj^W)CVi=b~otGUc9Lt>&C%r~Fi3kl)TQj1r{jnbwyk6miJ-qNwh z9uEy=pmMBt5IBDGOjVNSUoi9F^%I;a*+QYt=#I_Uo@*r#!99WCW++$Pja9zVEBEu3 zod(=EG|P5PH{Ve!m2@qd#j_ffF#fVjGo0Jkp+~_|U#r$SiPc5=K2$C{*boddIOJZc zg#}5_X(Y%DD9~w0p>PcGGi#!oJ`m6qI}qLqASE+6kKGpBi`kU|z3D8y>H2O_jYuA) z$;cwnJeE|HZ)I{Fg{0kvD4??{*{cyS4ukM=u$=`Gb2Yh5+Y@HYdu9t)z?>bcP8OUl zhwb9(Q^9DJTl!{vvB=q(=nMN*1G6J=FUBkP5t5EZLRVcbJLoXbD1v1j5jp^ezo`FH zvKBy{Z;ox1z60{sw$zlan6wVpr?t8CfKys_%#?LEup2m=MHfJloqSQg}TI`pOq-s(Gp{nwEv!6l3b@YO~I_| zm1i0ZERb~Wa0K#qTYTu4BHBtio)r-1jU56nC^luAdbYe|A$DcpaR9~3?>rLU_%gTj z(|K1&3Aly~BQps{yeKNy0uSF=bKPQ@q!eRg(a{n0Y``bTtMr1QS}mT&9y%Wi-O6gx z_PwMD^7Gau zMwfVcXDV@h-P;j3wqo@8FP1)mG{5@ltQMiYtlf9(XZY`J-)X;TH4}?`Z*pO;bVW4v z`b90SM`}i97aKj1b1x`T?ULPyELXLNp&@T6IJmGRBxF5_xCxzNu%OiaGkk=OH|9YY z*$y$H_2Y(#U`dOzRY~5H{xD%iLIAM!d{OjBuFC6>GBmeo`5%PT5JV{^OE;M|Q*ed+ zLOG3m^;045<%jL&B(VV+1?05Ew_oWVLX9x7xFY@{icX=#55JajZ%G05_o4>XIcB>s zwEVj~R4O6)G;~XgI|6hAzzrZLn^#1a@gk2i_araliYEo@GivokK-RWCwCJ&275L{B z60l!mFc*K&p2psUBTOivEYHy!SHEH*j?`A23@}Y1Q({wWTV>dRrwr&Y=Q+zH-C^@j}sg1hM-JmTRoGHCs3iNoo z->moxbUe=QNVu~Mh4=-GbXch4QmyvG&i~epq9B?b6FUGEb8LG5%`5TAxXBrxS)}&@ zq;_3SKAKE(+mbM#xGTkWNo`q?AEz{=7h-b8yQ6DW3ghU=@u5k0E|B^vGe{P5VH3_ zAjILSy8j;t`S^4X{s%&uK4*(LgtQ6#ACGsIe}X>NDo+U!M38FAf1~MYuS-ti# zcN<3QZ^Aii`%;kCRjzmY>G2c)=`RaI%MXi6!Mk!DMNJM@VN5AR)TAuMl98v@3#gFh zz;tHa zs1|v1K|wf;>&Szcb)sW*K;-I8loL(&CE~-i4e;N+yz`R|+R!d$AR(o1TxR__vLw~J zysi8ssaPa+r4G-shD8ztFAjTN-4)lZ9(WNaElx5YO3itLGudEtX_n=Q`NC0nL73U8 z?JQ7h(DbMzj0B=*0?3{Nz0-y6FYlYHuai~pTpdrBzpgq?O;cmK!j z(KtVM*+*G4VRA1+jNV#QqH9!%O=qf7%!7UfmvJWanj^;{&~5gI6=LXc{hI@GJ^)s` z1u~aTj~Omd$)20?Z2Irk3Z!rY@80i+y?gCoZGCgl0SYpQCkG@1j zu?R$yG%!AFop#=~=X?WA#*eSB%HSLPOaI%^(Of^ywuh_l+@AH1qwhWa8o2M<^}U(S z;K`r&udmj|b`4f?9bZQ8VPw$_x)WhhJ64mC#o7m_rlF$RiNscP?7>&Lh;rO{xK+7( z=b;SoneeXLV4h&<7vuK-Vj+9~Sct}dv5>a^VIgj9FVE(ApPKErn$91dqM-Tx|HDG| zaJzi@+#P8<6uOl739aRrWyfZ`&_l)?Xv46FQFpp*fNPmR)_z{m%N7DbAYBQ$LYv^F z+ouCXqzDtm6Um&;fUdqiio18e9dGwuK6<-g&;|-=^sjZ_Z9rYho(+*F8VTZTVquQ3Sr2>4(OF)&EO?{Y4! z+MHzs7T?PPm%guJ8vHw5d~J8-4iJ0f_`6-udd9ccwjNvl4m7(AL&mnFgM5;tLF*zP zoNw3^u3aNE#zmndOTo6Sm@XP8{xYK@vkxTY2NUw{_QXO+4EQ!di$++Am_($y1%Z(HKq8ixKQOt7mr}?r-lN?@B6vtwc(REyI@o(hpFXrb!BoFJ$cW z0HVLTQ!L>HppGw2c6f5(YIg9tyiey_Z{TWtP$u4T?fKsyW&afc!@D%8rK{;dpMLQ& z=zL5xc_>b}Sd8YiJ(}#fS7kr;B~G?0_8ywgoSnwi(gx>?m&7Bq1!clKp)mCLJPT)E z9$DXBPs#N_$L4)aJiNE3rpRZ5*8Ta#J4+SYXcjVW?=dK-$O zNJWq(Z;nP*O5c|jn;2_HSN1V5#FKlpewI3Uz?6_1QmoDCi{1g)a*PrcEu+~?A~3R{ zHx}ROpmmrG5d8#HRb}xr>@nx!#goO2&1>dip>JDv@=}s?Z@c`YU9^p%AqAxs#~U@y z8mm&p#A=d+G9mq%Cks{?6Zj49n;-GfIhT0Gja~ym`e3somJd`^&%G7qvEhK;JO$a}}2NsSoYkZT|wg#~u?82osP#>Gm zP!I~OSf~)|NaM3v;ZY2`jvY+B;Yx&HGIq@=E=pe{t~o#y@`pA{48T`bh^QgQP!E(w zGldyC_yrv?zX8}|gI-LaRWIyD!!wV%k^IZkFP|Lj!825>(+7t$+BwChy#!8yfl2-< z{i!A6druvbudvx?A5a$0ddsuNl+At)Y;J}Caq$Qo)ln4XeYVAaw3fLCU~`&q;mf5# zbffPA+<_L@2$2Yg*kYW~Vh4RX3@Ro)%nP$p%)1G?Xt(`MyU`|L$fY}9%X z7PK9B(3!)GSmmemDNsqKYY}REB8$JnerCw(?d)}OvLW_amR*HMDPK02)%8yhS&yk@ zQ&MvAQ_k^4J%Fo%&Ps5by_6n-ffun0&PZkk**`lLcufUbE}(kIGA;;r1qZ-xb9Rd6 z%g)vV=W6;cIGyA8f?*-??n{OZcn#O%7Vchk9^GIa{oWDJWxUWLH#jo-zRrBlWLOGjkkh-Bx<`8za9D^RM+bY69%=giPQ>J4H^4x z5%YO!+BZ9J$ZuyT}(nLH#3U94V9zmRQoULve_twaBkddN@A&4TcP)yhkFxRWQRh@TJiXS8ry-M#X zcU_;#TSbQ4{FPRjSK_Q830aJX;0Qli;QJ^@Z13&M#1?ih_b&cj@(zAOY}T@{1D}wydhc|X!5uKlvf9+PQJ5kU84D5mqxn9W z(Q>76+E?!xZuchjx6)I88V~9H`OfbjDRc*%L1KP5LKsdh6P&>zZ1VA4bN2RFKp}E) z#&EGX13rZ$K;XS`_Z%buITo|uq(M`=x&nR@UqYQ{rhjG$=_RHKqf?1lxrB&VB+Usj?I;~(Ot3fB8;J`ZbKStWGS^;*v5+ytvq2N2>pK|y7? zIlw^Z8jVq-6EG&&KeWOzVnb5TiA0zt?_!%PgzK=hIlm(?VX$x&HVSHx?1HY+Li=r+ zxXTWVIJ;gqGR_N;$O1?aKcz5rZ8UxXK`D=qYlrIoLMGllw+)C9LZ7{9No(KfmwoML z_aI{6SLlo#7fUdPk~$Cz&6iEIMO(Jwij$`mGm)`{LFtaC$l-|Cgp`t+>O1%pZtoB+ z!8pa3Y7;~m!6RsQ`e3liooQy191kPB$N&PvpvXmm~FJ^T}TTGG+ei1xD;^a zfn+R`Zew$?H+kCwnz99ZtB#wbqh%8I-h(&6x$%<>9)dIi$4ENj92YOqE!_?Gb{PW- zb3A9!)&ZIZ!@H$MF`_k%qlXPMTT=$Lyh5`Cz1HxT`!%1A=P{{>%bjF6OPV`{Hq|iH zG07;@hH9A{hCrS9c=SApx8xSX`(S4_K5up4krsSFb;ZrTg5We1{{iJit|&nw>AGw4ymAMGtMRyB!!&Js!a}|z&6sNwja%ta|aQY7ofk_x(X)v zm_&p@!61jQdg3$Yr%dWl9s~C^6mI|A+s@zS=z9D7B>ESWnn5T|*$e8ie2G8A{P)4B z;1_2_u1H&wWpZg5x86^pcQSkf!1Gy&bp%J~iK@!LXuva6!IxNCG7+^!0nZk8xr*&b zvp>eYr`P}LE}7=z#vdOB5Epsd`q9dd?c)dnctwal7x5b;z{H^si<;&3v1}KjUcmX} z1Hpy94m_^ZPtT@}sA*i-be*(Rl0FSSxS$z<20uZwPHrz|{x338d(N2xpLm)fG|za> zJtIouxWIcZ8_I{|d#!+C1rQ$yeQAi#eePr$ap!Gbl@#+)jMr#F;N^gx&F1rOP;kQ< z8~K}Niwo?PT}rl72<{gYDLSo~pz#{>V|!!ezWF=}rZ*cRhbY7Iw!&1IRJZtJH68dp_V@;^M(By#@qp5yEps{>qrJ{NVjA9Q)Db zPrF>7K<($4WC(@b@gc#={|6GRF)yvf*_!pl=UeXsXBMHez{O2sBvoL0XiTP<&Zo^a z`vhT&7;*>p`nt&hsE#!dI4aK|9{_#{hd+XmlMYW;%YDBvKrElDpYeMC zd(6fc(Ngv7ngnlfd_Psq+Sk?WTH0Dzy^DSAFq-DEEVhxJ6W7WjNWPm5Yt;of2YVBg zmtv0#NFHo&9qoAQGbn~_ZK&A&AmpI+;||h2u>iigk;+S?arJbk9d_+XC+XsekI}mp zuc9ChAYO?M*#xGC>eUQGEW^}4oBu~TYD`HARrUHqwc%2ltlvH@l^@vjE^kqy>b_#l zOcnjNP<^@iog13%1EV(CIQc`cDn3P~M`!iFCyx?Ct$t2_!wMdefPN9ohbPIMlK;ZW zk3T1JT2q1hC52zmG|dWhUwU@x133#9C_W(f9EO(pnZF}K;a`{E%nwIjQ2g9QtgC8> zeFQ5-VkanOdj(OVz*%G2Mg|@hPhW_-D#a@hiQ_3Qc`|Fj6@wQ=rds+hplj>fCHFbhRthGcR2?0 z&elQQ>8~1Uq$@H|iKk0E$jv~&%h%aD5?z!gh z@WSGXfd$j$(xg_W(52qeN24_aJP#+TA3FurHW%gVs_gPMzc1M9kx+2%82;HqMuSYu zun5OAObEV_K)wK^km#ln1YaxuuJ`|o(xFoXA~S*m00{qnL%{PNl#Yp!&c(@-&dlD4 z&e7e}j?To?*xtmH&d}J{)W+1w(BXC3f2VfK8uIpsqNu$e%Dl~C;Ry8<8bpqJlXW3q=w0>M zOHx@)E+L?W74_yjKQG?)Wn0hoZQC|SY}>YN+ZeHJ+qP}n8L^X- zweG_`bKi2y+;9D{zx=H>)b_P7@TXrR#ep_le?NKl-?eA9x9&G$dJJ8kfGl@fXzCB# z{9)jLLj^Gdw?HP&jeU33lC80GQ*njFFDC1-v9>K;pEf#@7q)l0JUuKt2t9UQQpwi? z1@qHu!Ez!>Mo%yOwyv)(pDuCC)I5`ov5Nz1N=rQK9{z3@#9jwlTW@R`%8da7#rK}2 zI{Ymo^MV|_HxoljgftRE4|Unc5~)~X^vjMXhb`MwQ7XS;M*xH>!;sB&;07}j{Iw>v zuo)xgMBdUwI1&v!MTU;o-cTkm>uBbh%Yak0M&AoqB^$s-=q2_^y87PRS{QB+FriTp z!{m7?B6c6HvPJKO3Os88lLl1ZHUHH&P7-LJliinVt=ZAhY?_``y>j;A2^%uIe&yt( zw=Bjw8|*xPvmLxF)3Fu|`nB`R>(LxQaNJT1c_?)uEVC^$Mw>*2{ANQMzAQrVC}B#IQtP z3b7Ed9bTMB^j`*LqM!(Hd21TSvr zwiQ8kWx@+W+l>jJH~reVKB1R-Ef{LWYmZspzO&cU1z+0ChrzeBh)!2dPtO_6Akq71 zCESyFK{G6Szxd${6T~9m@Iv~z%Is};;e9YoP!w^S0OhwwlTxu-esybi3dk3q_1rzjkZ@eoK29{0tkorVhPD!m6-;N43&HuVlG?T+_OlT!|8)V=jU zSroywz@2Haprgp5pG)AyRvO|sb)?)FH!nD0Z%*CqXs#6CvXwUEFOL&}hxbo0@{a8R zNVk#QccYg#cCtWaeRtit{O#% zG)X9vY)>9=+NZP4@3Wyt1WH0n%$ur+i`J9W88@e^2P>h7N-W)|l@u)WQCd z9Q4=(kH28A88RffhnZCyYoUx?Lril`tU7bZkmHJm@PJ9?)}jzVcJl4rzgtwAZL1%uQw!oR zV!F5D?hx^|)zMf!brGGfVJC8*p8%yikb?rDI=lae3?nXsEi;P$prC)+g3>6yFgjzb z`NrqI#n#l#^4rOBC<4qWqRdJB)Pk8uqgvpNrGDLDSi{=#^?5&h9dq06j2U5Z+2S0P zq#00rwZtvxrf*_lYU45^C#&g{!1K?FDI1Taz>@;D%*?&aSE}jxJyWnI;gs&gU?kY6 z)*6|5KQk_Oyxwc+J}w9d{UguzK%_&t!y>Xc4bs`FYWQ?eF?nJ|0L;xTGW!$)QaqB7 zV1%plXM!(As*%*IYn*^+(qVsdA8lZcgwhGJ%u*+PDMD`GB#svm8sfv9!db5crakFS_KFq94y;i zKL=g_D4b8wb&c!`Xe4DYrO{rJN<`jjCUb(#v^$OCM8BV?7S3f7zm})Q7T(cx^Cn{nYRqd(NHr5KtV>m{i-19z=Z14}!>i6!p=nW6h z!ZvdY8T}ChM^c6(D59Eeln7Mf~%a%DRsxg3v{hFkS6Iv zva}{Mh!nV)SZ_xAW!wv;+OJ%xI3a~;p^}xMY?|K{#5c3;5&H}H0MZli)*^|o)KP54 z`-}*Gi3!77DQg2W%&1BwWb{Ys;h<-5&fD3rpQZwPffndNKYcKZGxike71##do@vdV z>`K?YPf=tq?*vrAU+7Ubj}EgFgNo*^=!;^WETlb873k7ZiF*qE925In}7@Q63Yt;XUFKj|iDxhv-2Br}Vkq2J56Nt}Z*3S&= z5;i^{Ev1t#sPvf;is3?KwTbu3ayr{mu2Q0(2dj0UVTd>%XTp21ClTg&zphoZDSJR= zX=+9k4Q;4}rPpn!g{1dwXa=D)tf&E%((Zvy8qN&4(RKwM!VfkpCt0YC@SsnorATz- z(YoOiJvS<%DD;)gsWIJB%os4r%zz=>2c7bdYMX0nYu`=JWfD)U83$}pc|IcH-nUt~ zMUMmo`Rjz#!z%*Hl8*nq90Nhst@x5-&z>+aj`N-4wt8WA8Q`FhQ&lmLCQf1eHpH$u zzHiP~dv@7E?++Gzjy(DPxSh%!oOx>Ia?y!E|QD7#u=;;v=Ub;e2<9PWTq`sh&p5`SH_ZjWn}To zks@oEoHfCY`F$Su~2pxc-Z_}g$|c>cj{zjI>-0*d}c=H^Nwqq(am*a9s29-YU4hoXW7`EP7Ey6 z6!uZpQAWQ^!Yed$SPcpCGd1B)jEPG|8O0-->;4GwUcO=Znzr_aRpJTg%oxITom&F zBnJYOXcTz64ZUlf%QiyRcaqLA3q(%}$e=tRAo+$0hlFG9inLjeM5^iPusZ|Y3VS&6 zW5wyjH(vdrKHx3}r*a$4rajtD?MZgYeG9`#>S`e@$ol@ZXAFOeW4It=Wl;@Mno%_= z*+P!el(b_f6ff`7zlde;9I5+bw? z){eS1whq?+9*eIMrENFZ;X`gdP#hEW(~7%k3>6E_^dI)~JI1QWfi*BWtVk3Hii!cB zM(E5nW1@kbXY*S~p!aEuco>OuVM_^zPH6~7-;2f5dnNaK}c=Av`v2g8m~1&;XT7n z$(#+B0&g;#{p~(-gi3`61S){J*d;gbwg5qpRBD+z)tnE;`|bRkp*usuir@>tXv39F zmMEf+F%lW2-a;L1=vv!z5WQOPLgTx#LLZ6d1F85v*(YP^$`2RdbSiB4yD@8?#|?PX zY|+Ki*b4{=$jAd#mr_0QiouXx%_$bENa~wfjws0ix)Myiz55ol)gPtqb1N7R7;I|m zdhRv#>5JaCC9>YeZu@T#Y z@DiVk*#x^{e#cVDY{QgkU)PQ~|I@)l?1vP};5z|0)q>;q$x=r6;E9UJ|szhOss?_!;(ZmT#_wcuN0z>$Z@ zZ*y1Qv)}rs%BD|IJwiQjM6akHzn+uH5a-$RqvE~+{`ro~+?WM9e;|NkKSlZfAbzp@ zH`k#tr*Sp^p>CL1+3GtnG5-5wDpHoT-4KEAzNtYUL@PrcD-z?8M?3!|;%PVE-vt56 zA1`QLaYQ?%{VM-}i2oa?aD)?Z-f`pfINu(e3u-O^a5MRRxNfVCIjfKUG(hk@ zF(GT{kW1g}n|9JDym!~H4C7FXl+y}nzn#`UAYAJ2JY6g^pC%P1vNs9y-QP1=_mq-cj*ND5< zgAxP5(eArvWuY^1GncPS+zViW=<{K#vGmQ@?Vv$ThtC=DoS6aBTFcu7sKAL3?MGdt zbPTTfHQ?RQ$!2GMJGj1nI5~efJsxha6};1Zy31fSHy^b|3osF&Kz|DNK z>wG?JM4G}`gU9C3=J4r!eqP-5-3;zIV3uU2-8(n+ro5*f@HzzgID1}9d$jG8aFOEe zdn;GuA|1MuJ#$||zv%~!DbrYw@sf9A8T;UYZop;vHG}Rh`htqN=OcR=PBoKUCyE!% za(fLMbJ~LMSx;E?)o~EpMiJvCn`js$agYXKepLsU7*59xlr zl#1B{>5@i{!I{Y4m&E zwpx(p!Wn--l!UbgO0O7E2S;xm!gkT?*EKkEO9v=&Sy#~mbXDsylHZ!K%DB|q5>Hx~ zB>Yc5e+mG=+*AHkUf>JlpD$BbC7|5qhxeIE3IKri4~1i3`0va7_oq07v0<|@`lzjI zpUy3&p~x&gmO!Fp?Sch-CK%8-?IO<(Di}``Y3UGh3{7L%MWLWKcLg-X`WvcH4cVI( z)hv4*|2Ncw>AunZqV6e10wP^LGS^*mkbXSViBARxQ;WdyfSFV~k>w`CTa3Q*o3`}L zMtad>R2}d38V-ig4MzGF%Pwp|ttil}XRdkR3mmlcf@MqGB)~_lvxQj*ET5I@+f}tm zmuwDt*EcEKU3rkfqS~M?7GkWh=7u?PF zA?xW|^EQ_s1e93SvPF}{Zy6fCzu-HlL6KMZMusay7W9GRCysuwRs`wg6fV4-%$g~BWbxVWswD{@? z(2eO$=?5_*vlck*K$TZXcd;rmOIqh~il+~kG9zm&N=s(3?Vmn9VC}`uTsY>bEQyw- z{ha4)Hoq-PsxJFczgAd!Z9}8>;$}ewUI95LTV$jqii`C`gJ7LO)QAJbQgH=~Uj8N@ z+0SDy<~GxNi3uSqgwa^FBm2K%M-pu=JByhOz*ldPW{)5{-62=T^+pNvb^l-n20z0m z_5^>ZOxE(dgbrH!`1F`cy>v9Oe!@7v(HZ(y#H7@~t)P7lA<&#d{6OMOgLh|I@%Ny~ zx%$N9_L8w202lAVg{pU4c!JWGlwXVt!aW408x-J?%O7F$TIyfBMxU!@KcfbPhN1IH z&@J#sDMFq{JniU+H=V&z?Ec74IPUtOzXD28!)`j@q!KgAliE>(n>eSo8vxBC^2M^d z&cU-rlAj}q34Zi9V6V6ewxU6#P;$h$qPMIeH}P-5&OPhp;n2WIlTp8en+O3EKaPmz z9Vz7JBeFDhN(vvWcL`+xX7aQb$Ecrb62gR=G=TD|3Lf6P2^}$6$o@2^w?vriar*w= zZGW8&U7UQsT;DdV%;+F5^p2z!>(tT+L^_D42Yuv2aIOP$Nf&1a4%sYTms#}eoSj;G zm!wK%*ooa)BSA8fY& zak}3p{@j=J@O+<(SFzwZjW9wZau(a(5@9$p9$qs>EMdlz4_u1I(Tjr$zzqrcQ~1jg zL6cm!u@0Awl9N_iQSB$E$YCd|UBV%YeeZ@%O3~UG?^2GClfBa%?B6D;a121(SK7qN+w?{lA-&yW+t;|s_6(R zghTPF84h7De$U3B9eF4xQ{Q(~(&W`U@qNDWeZOsO(DiuTkMz(g`QA^Vja_E^R6E~K z$biGGGE$RLYFb}{39)uDT%a;iZBMyE8a@`@G~MP`py805t<3Mx1CGeK%G;zWF7W?l?O;Hvwi!w=M(XOa4!1s$Gj7ZPH#oL>7e zMX-!|Y{$52F+?qaWle_ZwYA@NV?{umi{rM8CV>7^g+$kA$_tLSQu8S}fu z^!CqunCDd}OLm4wf(ebO^|V+XB`a&zu3S7I5A5c|pmFUX~!i?0sl|RI|3B`ge34(2zSW z8}uz~2jB0)yAV34q%WqBdm;+A#R4j1&!)5*DD$8?k{`fHr;|5nIi)EdN_a4OSx9m$ z%8%TeN7&5|LG5d>Aw$l@HPQ+C8eAQyn+i~+;r^yie2hFca90%`coZUo9o+pDbH~3M zaFip5F3x>4sW(4=PHGOxApMYr#f{7;?po!1xr-mUELb|hj*e#z9Xm4L08@jiOO?w= zz2|k`=Y-8O?<8xY#uM!ppwm3@Le!5ILm;^rN}%rHY7#OhGXp2{C~+$?Vc z_DzC~A*lpZvJ8;Io0ZT{88z0u>try!l%RQT`ep9oEyruh`Tf~a^Zl_@krJ!h^?|yt zp%7F$OQ(IvOGI!GeMFQwF*wiobbdd|bP%@dV_44k4w5sGm&r2R2P|K7jmsed#}o1?W_+@x7*$UvabsF6R{Ws~?OYK6F^#S;Cn+X@Jf#9N z3NJxxC`kzvQWu8eM5v|%c|Yl_-`9CO zfSb7vsCUG|B&EnnOp_(;C;WqBl%NWIp>?5n#X_~R13Gn>-5&fk>%-KC)@J5P6^zLC zYcii|0lIYp$L;I$SySs-5|=6G#tlx&b&bx{x*c|ZGqj{hM$TAMpM z{s;N^zsPab#bXbKQM&I{JKbP^h~XI-?IZ-y!yCOR3|3BYk{$dFWBEdCY|8OWJlng0 zdv*Tose(}e{Ba<5e#CxRlCU8NhbvN^nTlTmI`LNa3-de)dlvkJ727H`-OuOBk38u@ z^iPZTj=C9UZ(@&HQ_M~?c<$9Vv7|&>v!tM3=I5(H@8Zy8#2{DbO3oTyzg3l`TSE$VV_`Hr^bhRA@8y{BD-?ak*&m*j8hPH;{*=DAvt3$XcotF;drzFU+sn2^bu$i5B*~ z&!EmkW8&j&lmoT495;6{P({FWYy96^_Fz6Q2WYCYyd7^JQ2gE3&|Y@cN2~DzwD@}P zWElPQB2t8knZF0c$&DO=2*OR*v;snbx2M!WsiC!i_*EpSmD^;dGiwvgoRuPj{VPK>b ziU(nf#surCB!L8fwgb#ha_Lah4BrVLOzxwS?yGe}X^H~09C!L&oN*qG71)d*Udr-T zm~qBh4}EYuGM6Dxl3&yhwIZYmAv*EvjbMBmL@DanvDMS%@?Z4#jY4?k>V>opgJ?}v z;9+e_7YfAF57k9YEf^5rB(4S{q=29U@kMe|fLCm_t;N!h)td;zI>_O;Fmc)SWhf)T z`fL3Yf}>A?EBpp=+}^HSWqaNq-ub?d&TmUlaOqonPkdyutiF1Dzu(sIz8)^0bF@BB z8Y+5tzrGH9_27JbUQSXnZzg>APR?3%wY?voHO6#^ibKZCL?Zsn za5Zza?s811&g5CUm>0F`Ma6%=csy&;$VyS+&MHpxQo5zMz{at$U51wQZeDa{plaK~ zLV(}Ldd`X4yug|wwl2a=7GpZ%9@IcN>mSQ{voVb^cnZzsgi8`E3#;REaJUp$r4>xn zmQyuN+87<*CUYv+k&0cu0E>|A7))bEiW-83)VGRdmE^=PW>Lb-@q>?+9<^dA4jvxf zARXrS{Q#N6C7+EM# z>jB7g_&&S{#wE}LYh1c?8!}>Jokgh%RxaarBPF^*z_$Kj_++QL+w4#(w;d>HrLPMH~#b>TEwWi5|*;x zUU~aHMJGPXfQ)_rEDVy!pa_`?GP5MTT6pDoW~UD+6mQRJc$FvYb$=kgZjbL@A4L^) zC2#U3u1KTXya$)%%Wl$O6;ol4Sqj4Tjt83Z@uZ=x6ADFvzbwUOI&@O71YAv=t9Fi z>%I^p*&Nak)7Sh*w4*ru=$t+Jc4PyYNJhnoVc)cl`k*<;rxQb^ofWv0Nq)0(TRzF= z9=&EsCcX%UV|7#PWe+T4hQ!B?e4eMuJ=HC&pEi6+$K&apH=OvN#J7&i(l=_{4KAPR zMR(DuEL;kj=~5#5%+n+}W&T(Oo61w5Y+RMVJcW!QPzDm6P(p~T>#0=SIuy0KIs@_R z=ym|51gY~p)~KlklmMYoNiUTt#lYl#*G&RMucYx9m6H(vb8e%5fKu{xq8$PxM<3=`{dgu|d|_ z_W=Caz1l!c$*7X|71(M?b>#wan%e!wijT0Z`k1Roh&Y;~3=QlYuRoevryLdEI4nb9 zv+$wyv1axX&{H-uA`gC&ig@Ab|Y0Il8iEf zw;NzaP=bGh{Zqwjgd>~`e@5X9Z~y>=|4?yymj5D|4fP$J^{sTR^&Kt$U8|Q=r~V&X zR*hR>iqQf4*Qh9x{9E4UAYPM5rJREg@ zu^lE{hG4$J>Q(i^PV$Y3)Ja)PVxhkFZfyg(frE^qJf zLrCDM)N1=BQn;A^F<}9aQBNTzS_dmMHE0Rh2dKC}T7KvVfTfd2VBW`(scRlZO^6** z9M|&*#BeCq?Oq8Cg=_h1FyOu3r%_x=+U??o6(Ai}=Rj)d zX-S*MrOc8DOGtC(Zs8N4Gx7G%2H_5B8B|BdKbi~#N#lcO#%RI}fFF0Kqs z@Tk3x;}6*b1zzWF+^kj`JJVyfZsGnKY4*&D;jQ9|gHiaPeYmnVb8l{g_*wM!3pvCd zRhw3*hn5IcJAy}PtK3G%%&mbZ%5|(nQc9;lQeeI1$bON)Cfsq5f)5^tJR%FAPSNC| zju*4kbkfGLi%_UXaCd}yiW;BmDLo?WTZgx}Q=*izBwYmRLvbh~<3j?e#rE$*|3giQ zsHZTjz))Sep4J9gksDY}DBZ+I)m3Yr;cfQBb|y1`tQep3MlCM%yACBGCS8CLi9^G^ zPdPKcD7u?g&9)Key)V$8A#^mpnWr01pSA@cw<76?LG`gTg!T<$?|#?m#lYOoWL_TF zNg}dki2x91lVeBGU1pYJ(6-oCdN86XHIcK56BX^q{zT4H?RaCN-u`BAjbb+nId*JcvR zG&+S$!G*G}+KX5#2EH~yw3aPuO=vfo3@A1Z6YJ2OH836TQ8(&5hIxie$Lm|TP4ZdU zE0F=t376?yn==FN@WoP2<2F&afgnhHO9mZ4c$8aha=hJRBNVD%U#}ng9k#UHJi1Z} zvG5fe>>wKjReRb>&g-J0=qFFIo?h1 z5tUk2vC`?^17)-qGvrUq#~3E;YL2dNR2}~!CNdRJnYz=;oGYs#aU)sXtBi`EB%t28 zvr8{<4nwRB*h8w4ieJa%t`+!Y=R+1mMpNA(+BIyPK&{RruM?bYsK8udahk8%0Ioo~@^@D-1(sseUvjusGOvst}I$EtO%~)M#v%CrrKD1YN3n zllOm3GA_c;(tv_qId@C8M*jx-XZY6Z{~4nE6k)zUGX(s9hHu6nD2|h}gAI+XgSn}> z&A$ilEA^?EpG!X7_Y{;&VB+xN;e}raAk8ZWgS{~gby-@OfOCWy3ZCQ%12)q$>)Q)J z831d&p#XpmqZ}!R5OsOwiPRNBb{YsQ5nWURJxKhANuHa*tej zdM_~I7+})0Y&nRk^lV)vbn7>ZX3?ItR<8su(EF(}0~J6`9piugYFJ}VGzDk!Gq2Rn zkY4c1(X8rTGEZ0Oxj)?(aP2>%ODqwZ1?G>=uqS#fqNgKZk>kx|q>rqUfF! zwXGluy*s7)|k>!g~b6-)1Icf69<^J_l2Y za|5U$H)7ph%~qTz>Ah`75zSt7(y&8~zHy%fGU;jdn9GZ|Zjfk4TWzMdz7Ti(JF*D+ zj7JSB6MF!Nf(gu2Ab`PH*{sPQBy%_*7_es9@(fa7el0*n5jn*j*K9o%0zX>Qn!f!p zz8@$>N3bXQ+L}dL+ttH*<~)svK7C52f+V^FP@NxyHIL5&I9U@iA9Sw<)GNRe~nHzw) z|Bj7AhNuoeu!TpdaUS`K^ z7(>kP!cZH8A}<^v1n^o6TOL?skmy~^7pp$^%_KMnGHK=XLw8soQF+?pbch~7 zp3o{~NP(`4pDx^XB$2FpH_2hwQa6-~|YHB$;UWFjCO%SkSrv?PvY`4|at&KQ$NTe*?rh(U+}N{D4gUh$kG z4!poIqYa(x>@uSMR;cuMg5?IBd=Y_|`XhsY>crhKdeI|_J1s_6xkX63j12>uLN zf3{@M@B1_Cg5>q?jq8eJz{g9^p|{xJxMQe5WxPpIe^XwrknfMrcUipezh|*KXj7rH zYUlow@>djlC%x<{BL23cxki@9*7(@S6pDOwiZhi!d{_d`gFPyMRWADP%yN4l^T*6t zD=K=2z5Rm}A2BZwJmClQ)zw52)?SUZaD_m3;;ApRQRG3mc26w9u2`wQa{7tjWlJ5- z`M&HLBQ4FHVVZ%|%dP$~v|k}_3G&EQt)J)#qXeD1@X$zq2`8vItiu6C0#bluh{?LA z1!bkioSTG-0w#(DF-ogj!%yW_F|Z;Jt7Z_IL!SejB<<)< zpCGOfN8fsN2wvs;J%TN(I+Y;ztP!+lo^8TT-e^`g0$;!CIoGm+B2%agZdmxWtN?LJtO$skDs! zxjo+wL4|QuvOC=#8}o;eX$Ht9|6q3|Kr!D zJ@MF^OShuq);5Sc_T0M_H1z^}3!Qi+dA#O)ntbDF+u^x+HhRfT>#4bei+}eV|ND2X zrzX&5;(LBf--VA?f~mv~asm;$>s$)dt`K!WhOGN$_cK)`a>N$J0KuSMp~_t1&P#@_ z&<7AAWF~CJTw0>mvBX&Oz`>EuaN@dhW8Rr-(P2HNx+7$b45y;Qv&9%I^y)L&8r*!O zXhkuacd_ASsEY3@HY?-@x$wQb@pa4zLGwJFMo~dX+>C5_&xZx6;MFnT@u2ZSVP2cRhm@vjGCp+?WEHm^Lju zPvF(CXsAA0QWKjzCm#*_^P3&ryvsR!GSuawah`A7g-+ER}92_QXK5)sLBqlU-J3Y4v z2@TO^uIIdR*c`lmYUN^MQlWyIDx_BY0u?9##&OX5S`}ETg8rD*iKJ z7t@GzEVtTjpy|98O)PBK;Bt5)iU**302zMs$SOq<&6*Q1dVv1HE6-D=tmdaPhPK>sFmeE`GqkF>e{8Ts!-2oss?AZB6q%-$h28Gi! zhU_eU_NYI8aJemQIY4PfB$dH49nmMCbeMA*g`sd%L@C+Jb8Ta)C~ruhm$bI!e#@0w zBefz;H6quPqDfC%DIklncA+EbOw%xw>1Fp8;FK<_LJ>QQDExS@kw0ko81Hynx37gkSMnuk5M zwb<27RJugm(5Q8)IhPw_>lI$?D%+ks}(p#xfN}W2ywV)Y9dz=T8g9&#yh~!=Xkd#;%tO6NZYy$JJk-%}aWsfv z;9X1tG?#bq6G`Lqy5;41F}3wUzq83J%Wd?y>Q;&pj%a|bbYPli*>ds{3MiWny} zX&b8%7eux?a!S7T_6$O~wWRU%)sZoH(@PK17Mc_ZGzfw^0Mtu(Sv#~!`0~Mk6!So^YDiw;B2-I!sbW!I-UXHSCmcXlCc zy;C8(L~!Y5jSL-ppTfJry{nJB0c2P?Ol6}(5SiL>ah= zY^I_CcwTWGgjo!~E4St=7?Ih!kmMp9x?x>|TyFN)hrk&=4~XSb%bT2laasZiMs_Yy zKmt(lFk~k`1`{Kc)gXJo7;u&;dT1D_h0AGivg9r7y_b97iC?s&U!U=5IzVG9$PiA; zL*$pM@VDp$sf-MLa$p-Ew8SP6WCG2c);Mp-(osj-W|kVG1>n$z{V289nUQjX=Bmb0 zVvxQuX#~ z`}sr}FxufU*733|62tH>V3OpC1w7@e*HKqDv4a02M<_^kD_{ZL8_!5yRClt-j z4xm|8b&O=HRj$fR_HFUF27=!^DlZm|f+dRiD6z<`$kun%TlXyP^#ph#A$UBEQ=jrwV&LBk-UJL#pXdD&ewTV|%>q7re7i`;l?STPLzn@wu50X)qB zm-$4%rUyQHuNq@g*isE)FIqiYtwcGp%K4S4_4Dj%t7d0MtEa8a2Z}N+kG$AS59MG1 zV>L!bbLSLf8Jv9>- zXY`J&RC!R4+;`aqnkXgVt{mR-OZKH~3}sp~y4vE0zFHp@ub%YR$oSa_ja&XcQO}v` z;^W52OLq#B1o>J?y+DyG>&o(8H!4o4<=**C+S!Q-A4q zM&RoV8{g*F%hr~z?X}PM{f%z-R0F+$xN{}re)XWY1G<1g^=k$>BjQ6aZl=a9iv~&BbU!ok_-RS#CI79?GE$P zKMjWg0ATxvPSdmhuL8us3wV`^w#|><*5`x*ls-ZZMf^`(#&b zoZ1IuW?Z-iw*OR6QH{=ztUK!NgBN!*>j9LkVIey4*k#I>u${Z)TEkgsX3(1U=;Mpp zechd{HRSZ-Vsm~%#SPI2sROF6j1mmuLm_wiTxpm-!p3hf-vb?b8cQ zg6tbS;@)#;Dpr1=TI4(f&^BiDk0(ydL$}OhDF}Y>upfTD879lL!Vf_}g425Qj?4m# zgrq`I8)e%6NI$3>vS>hsYeSK_KDMx4`n_6!Z|X4jp|}q@R1Op*_mBWLb@`fa%90qo zt=?);#B)})F#`A{xF@f>?Q^?!(vIz|bisqyA79yQKXHI7Om8z)SwfI-I$-XamzakA zv>s$12gbRT#uSijlpuI$G6`$T!lg(!cTxjoFw{_>;a$n_PA^&Sx>}=Rs#xJ{6tf{M zm(64pYQ;J!=LK_r2pPomNFY{8SIruE+C!Fdc9=4P?jeGPwO%gDrd@A8 zhtsk;*$rf8rekDge0=Q3v$MFfWMYez4qeNAG?u(;66?i77B=aC=VS5yP^+D0YA)JO z0GAy}2!)^n8ZyL^7`FEkGvbfBAeCRV zz(Lh5hPDk8r*x}EUJmEFWSm$tpd%` zy#Cmnij@88HL|I7)yW{|B5PLq8myDn5j6nMfpn|M{KRhCOZ_zTvMLLFZDN*@B|Rl$ zK=6pd7~*aCyB8fcde3E_H}^Q2AaBMCLo0c^@7d_lBAokoGJej4F7lA&bfcbd$e!qAi|-|VcsBdYvh+nT>eazepVQRl zJf$QV94jqS+7DYGsRn_5WLCGaK}>opy-JsKK{5XlC&7tNOOr~WZ`@tFaqCPB4Rfqnwd#8c z(%NULsU2rxxzQK!pXr&xDblVGE&xD#`u}Ho#q{6lm4mI*4_QFhOyA1n-vjN>8PPu* z!|^-Ms+0UhgcOLohB*c}NMrNkKj&ZO#4Ss_E@&3x&}sKT05JiYCajtzx+9G_$ke-E z-0f-Y9qO_+NfOFsN)l)$z8J_HRC7Misy#sgz~E?IQM9*GdY2Ob+CAGnJ>G%=XWC>^ zsF~3!pemKCCKZ--v@Ei$R#4G4rqqYcD#njBr-EIaU8=W;sbZw7YjMkH6C{eJzsZ^; ziZ~r%M;FCclv^y5G?G59E;q;4f+eh<4*Sd4wr4eDp23(*%Vy4vnilBdK^*4VAj6v6 z6|~D$O)iq$6)LHfDVbE?rdHG^YW_4ymZ;8j3Q;y3o$qs+XKV~Qd*$Z3mM&$d$tydD z)LO4!sZUb9Pu@@Pte9t@E_nP(aQ{I5nu;gf{prG9bAmQ>*7vE+7+NW2W;})9>V%&H z=vcFcKM$ieSbD7J8xEc-_=O)#Idvo6noD(PrVH zZ;$yah87VD6cx3PpS3lewHyZ;1vgtemtKVt_otDRVnj7?ENtn29qW~eNNpI>I_w@r z1|zBy<3l2ySm=i|YJp;skrR}uP(nF6iXs|g`b%M$m_Rpdk(RgXzXc_e$=EyYS79u$ zuCkibOtqM%SV&8Aarip;*jFn+sr}yI@VkR9y}&;&kwIwEprVN<{DvyA))LT0UuyQi zjF6qQnX&ga;Cc&@jK*LTjPb9#v!qgeIEpOMZLO`;^?BVt|7p&D&L81rarx@MIutTYD9sVTBv4^3mKg686?c{1-%TkB}sCoe6qSl@y?N`=0)VLYOzh2W*P zNKDm6_vu92ETOsFwApt_MUNLr^Fg^glgwssjYd56KBsqdiU8on& z^i-EEIR}g{vo<2~e_8b6{e21W*anUAnMi^7B`mDkxPr4VT~jnn6~9R-^%YsYaC5W_ zMIR}%bMxrEMX#MGN!3uXIIn&DNV79B1jAMfhP?5#9Qbh~soK%^BYXDuVs%4^MLG(~#R~0gl`cZ8ZH=h3oyr6pOgUW`unaL9*7Su zR%TeyR0S}94G!Ss@}H4CjG%?m2_0f$Zufp1DVz%Bs1$Jj!S=|Ma(|ZyS;+Iw_c9lQyYMiB_WOYFDE6M?Pj!k+d9PTnEayv0ok7`dwfN@-1`SPnxDJ0%o?~4^Wavctg z0nj~M*}}N<#N8*3+-ejeJrD{+59Am>7&@vgOFBb+zCqB zP(2l6oJX6;iSkN`@3M71N9_ihGxm1IIRSWk@Zt?F3H0Oi7Mcyh=HHmR?0&2ntU%4x zg_@(fy)ll}*}lZxK<1o&8i|}cG#h`Wpe-H8hluo5BoUsLrBr(eI9rs2(IF{WtHNpWnA6g+S;{mN zF9#otEQ4!*Qb?F=%1kC>Uo1l7=4h!?;uWknjMk813Zx+b(?f%ExSkN2#uC#S-KJu zGA`|Gh6&H&^*Bw`UQf*G@#;!GT zQZmRET`RN$S)-K<+mlf8Qa0-K^0;fJN#=3+ z3$lpkVIe~u1q5}XU%~T&2^AXqrF;UtXrc>!u4_R2n(qHSD*~;*6fs^?t&|L8SX=(b z7t!Vd%JQqRY^Flkh z2tq~5g38`R>KVp@vP_+gUn*b#TX{E;G>u>dO>p2$R)nUn5u>UjJwA2$ywAc~dCE4c zP~mzz>X30YW5NKOh$-xz5IQ+2g&32?{JJck%$V0O;`YwWBGb#}D=e~`K?2AS8r3t7 zlkgK5t(qv`$t?XSAP>NDJXAF^7K;V{CFdZ(KaKwOhhdO^bHZT%3@UpXp2olO0O%F$ zs{rPBpyy4sPp}Pd-lk;sf_1v^dw6=8y^D@MS@- z&h^Uik?kj&R(yW&`E}H~j8w9oNYTC0ma5>t;sBV40dW9CiZf}f#7x(^Yr>%MO47Z% z{m|)Oi-fNjao2Z+IJ7>yZ6zG2(k-f@i;fF-h@Y{-@+*L#wrX64Br>P_tZf;6hCb?- z{@~qQ5u^}#Z88+31uyOBG+(27^xb`0_twjmuZDOV9TR$d#H(1e-RGY;0MPQrdP?Rh z(N|!3kT`02u!U2pho?D^ypBY|WSq8HZNB5zMCx%Oo@%c)z92luVH~4bs>cz7zw-b( z{p3HcxMfV?`Kt15sK(+vU#HIR(wtn=w17xOlhzk&jc;7>;KMaF<^cC)rgK+XwVBjH z>ld)Gf1s|%6Bz<$$AkJT8>-EKqp*aEKqzQooh=E}h6N0o-Ef0kNBafrzI8+^JjqJ2 z>Fa|h(2Y0&9rO!80d|@%KTxh)p>>t?UAmEF3W`Ixssss}Gz1^QA*`4H)13Rn?!A~5 z)d8fOSBbO*0~FHCc5jQdU3qx0jWnb@F){&kD__tI6N{*BaZ-W3_cC3&yZKUQN*L+un(#^B5=Kr+MLd`7&iP~lWF#y4P}sN<@v z*BIY$OwUlT^({5yqoU7qytz;X`g3&8Oodr1(twUHRi{Vd(-~4x;-?%_Gi2JvYLqk! z%@;7A@j7xzNf&JGbp0~qkKPj8%XDN%i>^C|<|r4;7N=At$QYxf41fp#ZgmFczas#w z`aXnyw9{`iLa7RWiW0z-9=b6yz6{)qvT5gd*}6y!U3~C)V)18xK02p}#fRH$yMN{A z!rkz){ITI1^LDkmUG~)tM}0Fq zWrK6D>Jp;mTPMngzx4k?YW&0>P^^0Fc%v`Vw{Y-~!HuZk;#-swdiIKTfQwq45`ha+ z5k`8U0}fF@G4k9tLf~Xv*$YjQ%5JthPC=aCuXDwld>;3<7uy|Kp074G3Ufbnd0g$a z`Qp4ip)^M!N0z10HiDBFdV`56Flh8+-*fhO`9ta+Pc{ccf0CNTlHNPVM{M$p6P;Nw z)y1w8rG#27MO7%6nQ*mB>X3L0)$?U~IeWf(_>4sw57&_~t7Xoswg4x3fHY=B+?ACT z1P7C-?XC6^ZN@u>hKWU3565Azo)iW3r$C2L zVQMaMP5X1xcxm}q+B=(+zuo`-@WfnnS;+L#nrxUJiFr@LEt*{RJA1&!+gUB@2qJ7j zmAE=43ZsV|#n=BMuhvs71vA~eM zX-!@vu`BdP?nV4_J`dl!Q@v<@PtW&BK4?tYs)lfW1lRTvrow;Z|Jo^)*B_j=Y(w9F zgwCw5%HzLnu5q(S)pcSvLgec(WLOgkZ#{>cZ{S#ZV3bWLj9$ zuJ?n%tF|Fe0-v|L$k>p_&h?+Wi-l92SZ_NASC4w|m+Sbt?~9jR?iAUP!OY-4*ldZsxeDQ21*=2G^@L)NDRu%_X37t1==~#Pe=N+`ZD$Syid`MLknFy!FJ_OgQqfhhOtHASYMt#|BNL7o!I7SV$L+$_CHC! z0bX|4F&Fk&m?ISpG2)sZrZ_S+L2;8s(!ZSJ(U*pVL0l5l4k`NZm+!xzKTK-22$HK- zu+X=Q{dwZBZ>Vea=M!it;=^XAKq{`-}d4DRjS~C07g_u=4u6 zK!ENa0-*a(0_gtLg|cPMmBb~q+?kvJy8n@5`j(S;)xUKA`tBSvH1ha!=-MfdaIsP} z3`$mlP{YQ2ces&C5i~q*f9w9Qq7b_9X#vg{$L+BD?5kCq3qs!N7o08v@4Lf1ua~`x zs}6=B9w3hEyF{EVwveXF-s6p3o@F1Rga*8D&p2tYI*AAS8z#9^H(8FsX*h~B$WB#*pkqN3 zE~Pyuj0)w`Kf3=!ciyGYOx6Zrbe+F-|8oG{zYwir9X?3`0Scy1EglNtK&!TakTQC% z#r%%aU^Ih?w|XmnL+4g4!1nKe62hqxN;9H-a$PZpgMqI^K;D!b_>b+M1G}YjQ!Uj2 z-09Q#-iNg{A5C+Mg1~KIDnsSM$j3(T%M{mIIlJDas`gU7)^@K8-YdK9yOWy% zma5Sr3McsmW>2!yzh(b&xQAyq=>*w2{5Oo@?9zc$V5 zVzR;ubos=wI)E$J;X>nB)xFRPW6NsN=z`am&PU(B@&NHNmU^H4(zlAE4Q{epocbJ z<2P~K-DZC3LBFC-@o*$082BlSxVin9vAN&2jW-@GZ^hiTFxHksXUBE#ptz?UU1cbZ zoz0=AghiabBF-#OBqKE1|Y5>TQ)TQGcuaNz^0E{!;lj z7e_5;2ve^)6Tvc@x?+k+vhC9IY!`UUfw zPF$TC(9KJ~ViJu3Nd^MI=JM#^K|#0bQKp@x1OA7+tojcR~Y zJ#FJqpb<`v9X4-eBl1Cr85})JOVnvC(sbs?QlC4`Q;U$)ajW*N{43Bkp~Iww#^yl= zb`lxAW)l2b+S^B5?Je^BZWUIMFup9AOuUiPfL9P{ITz#|^z$(5BV=)u61(xwGPMbi zGB9CUpxi|nDPK+Se^iO(VwB4OX@VBl40A<{1CFkfL0!4S{d&n_DX2;RIwB|}4kk}| ztnsOeGsOt56*Tn)G%#y=6&F!lXACSt+ghMR7gGGeVBJmhn$gyZDOeu@Xs13-w3I&| zDAvk=47SB^vlwUCDX*t_rRfVdkHAm`W9W01uX9O7%5 zCBL-y@1+{ZX62{lR*6QI0L^jl>1``GZzdih4q8-h4)3&*KDx*3X+++mHao^eM;108 z5=rW-wIkqzi*M-Q&rK6?r|>56RQ3)ml*~K}%pa^34Ya`eDQ0BOsmBnYZ%2&dY513% zzw`gf`OlHt8J6GEdhV1*hGBMCW+i!Jc(3^K`(8^&i8di%d@c7CY(Yo%iZT3B(BGcl zrW7~dxz;Yn!18>TW7~LB^<1ILc;9nfx3UI$ike=fJ+GQ!+qCt_PWxX$wfIEavyZ}o)eR1f0nN_Mu z$<5mxqd2^+<#0_MoGWhqBD2TA$c;Uax+gU-MF=XKeIOrqE)sXl5od=rte?A&tuT+5 zmj)|HHQp&LHac_v6!uJu%2zR)lLC@4HLQ)#*$GG3$q4nR%7 z?Wq#{ddi&j{ylr|pyR=b2pt;b#`U7hDF&l#?Tc@TJ0+?h;TauWGus;VbZ7mawysG zNOSSMk}DdyBD6-oxzp1@K}#M!i6)N`_8(b;`tkK(?ImBLm|=8-4fMy>y$alcb(cAo zOubyUt$n81&^J%TwmwduxJ52v>a@ADi92GwZ3vSruWnPqe^Jt1$!=SxHybX8dM8UL zK(1}}D3t_`=PTfi(QD1bIr0QGi|0=I=n4*1+|N`&Py3aKCMOLhzEYC2$9C%)&jP+~&h!9jbULLj3W zF5a~TqLTgi1#o$@JozYpvV|(JgkaC_Ao5j*GE>>HQu&gPAI2gemJWK)RzP)N-y=ldcw*q663ccW|IehmvYVS0v>WHu2356M1 z^A5b><@UY#zNnXVdJ6P?;$Xlk0x%SqT|TZ(AKonaHG{OQW|V*|qma5Q`f_spK5c$; z_{SMp4fbYb6VXeo3OIHio%U)EOy)>?ynQ;eO*y(Zp6CIIirri_o~Qo3wZtMi<^riF zvleXtTjSO<3y9IB+5`wawBG%AIetS>*MNV;)t*F4FwO!Y0H?+f9f`%qr;vLx{> zVNuwO5)y%t+^k1-!~V@W`s9%{uR1IckGq%lU{%xYQ3Ece^|CGf!NF`|E(br;DD!1f zmPnv)u+7U;F6AIiIgXsNqy(y8a`Xh1RiSP**(NPZzro|;l}(ezHPb=gmsir1oiJq7 zY6i4j7>56(fXGLrp=4S;mc*IsTqSZhV#$5-XJ3V>^=b~Ai63?R@gTieAW$u;;63N{ zP!XpK#K|Tt6fNCT9KfugcZg)XU004LDTz=dZqgGRSyHix;WgT7dTbV-P@<_}7%Ge7 z?p<~BJcPImYuN;HF6o&$eS8FKj#fw+@}<1>ZfE~a+ktZU5c!MLvx5con6ii|ZQx!f z%z$^45fu2_+M?kn9n7CCw!(_IOyh`yXcqNh)j0`xm6ic|>9#Nh=7nzP&DaBVio7@j z%0kz&0S=Rw`iAi~1-Bd{qV`n+>%FTe7xRj;}H z+EgXA_{=l$R!<0#NblXCpX|eR>BCo;kl*H6qRQqbc*KP#K?wD({iDy0!g4t7Df*75 z>t#2^NZe-YD00}&xMFx%$8#24*WQeo@y-s9!rqd%wXxJzW{}AFxdZT@grpN)w1ax7 z5cX6icOZF}0)Hfh%eW+SUiKJ5`-@iek09_}Z$UbR3jD~2&vIUBp^U+Ht~|n(=@CqH z_A=q0ZQTM+>DMiRVj5&mDo5O1Q)Y}5jFQ>SvgDR^w1xcC2+NrtCV7}uNE-{KB9hOV z4bQi510}>ZSrcgCcP$T6Zte@xoRM#Xg{dyWFSBhysCAWGckS;%8(=@e5e}p@dz5TX zpN{i}Os%vW+Sh7_^VYi_fb-UFtu@l>Z3bGvy!#9nMm)w0A(|nIWEEL^UZun#UvR^s zKo@$x1KntdTWDxUuWPBw7(?K2C?0KY&BH>s0{{# zfrPQVWfMusnb&sL24keI`aQfZeu=&4e;xpNz028E!JYsa#=49<8aWO;(86&0$B8M(^4<4{}IfIt&pW zVVBN^aSPumG+2&-)fOi*0UQ+B7s?(J&~;2hJFF*KaC#XODsYh={5WGafB&F63lLDW zxdU)UL~mCFsB+EnD4B0IcS#yL?hVCkDY1N7{MgYrwKT={1PaHK3FAWKH?k^ztkfRx zywM$qm2{0Wi_x76G5oMqbis;pShnPf6VYtK_ZAgM(QG^K#?fZ9@MM*PJJTAt*pVVk zk%4Lcd;ys5UIcGwubfW^zu%vpy+O!Xh69Y}Py;3`e_tKM@jt8%`k!xCpMP4nU0p!) zUDUIW=S|Cyj3}}iK`$u?w`>A7q>xcM>tRf@ypir?>+1E~kF5l6&0klu(t&_m_bXT( zV|1dacIgWtGGn^S4WB#cTH;Dg>PHw+Q~d0SWgtoFJlS&i^X1~+1v|8#=ja<(jD%-n zSLck5(SlIZ;oQg68t17d-h)p%Z6etpYEdNi_+`S@Nm9P6`}Wt+w$*$vd?uBRRt790 zym_5G5&vSzl^uOC7K@**i`zNx7A(C$@mMxL!o3_F>p$~}q;b6|`z=FGbD`CQ?IIRo zLqF#%v7?Ox->Or*LU;Az93?#`-XJSMf`XQq|JE=U>BOB@pGwl26|T<=(M7tCRKI{O zJ?@+|I+R!6)It0}*A}T(TC*GdW;iyWh)x+BT7%TM%q&(TsK6b!VJHF8kotgbhqEXc zqdKEx8D1z=u-+cwXMGLpHfIuC+a0SZ%g;INR!j=G$jea4H{_8e+zhSv;g# zF=SJI&SUD&@7=yFL9#e>vZqQlfdisa%z&m_wDSpE9?EW?{-MQ z{b74Ef>hy{7Ce#|dU<W`PLe1Yn8OcIS$W1P69Y_@~36ecsQ_ z>AIca(0iqoVILtz`U1%`pea>0q@DJK-|X>T;?ndGI9v z?K-#XZRe{k|I==rz>CLBhJ8|`9M=8RfxAII#7e`10dDOBW=QK?ial;U8JdL4TRLoa zio12q1WI{au)Rkk;RN|&)(!T!`y#0dWzXxUE-tPh)iC{@X|UH|9*EvBSg2~6r9!4i zUh;Zzb2j(k;7mr!ZfGj78#pL$KlveCOsAdb8s0*ow{MAYIXe@Q`0%DpEG-j03$oT< z3p2{jm!1d3I_f>pxXYyygTy)q3yF>>jk6E~W%13ZGfgf|_kb})w-^k^Kl8^kaYMIb zWSMYA?JIvw6M`$0&70#b6^AK|h&O`Z`JEp0%b)!z!(I|OYZAw=M1cjFD z9VD(ARe=2YguB<|$JxSGtuRJnSyn1Om@k;&9R%D5!G1Ki=mx%hzsgZo#1p}WI<_4&Y!!PZf2#8cogbwwE#{05$6zLOy=cKKX5Ft4^6>0F;CK0;lduWf=q znkgDz^mDRj1mCDeAEuRL31c;PcY`i{gNIs^L!S98LQ9**)ETnJ&9&D#MNDhD6z<1b z7#F2T%aRYH9XJKWx?iANPYkL{mAK1FylRRK!yzmub8|1d2EAJ-QZ}XDtf0rMWT%ls zra?m{RpP-zLiegcl2XHt!`(+%QoD<~kY1{=E3<<#VV*%$7;oAgHUJM&M~)f@d5k%o zzf$2`;cQiR)d92F5NADfOpXVOy|yt!etA)H2HcwT#FI8ZA&v0Sut&Aij=(w}d9cs} z-ry)XdK)Rj&|_C2N@ThQ`h2c%$Z+ye>#28&pkv~IH^7XduR>dE>VP)&G-)bdCZ>idhasu<>rz6?~Qlia+l4G3_D2F8{ z4tuMKw@8y}Q%_0^1x$6D$wfIr8?6<)O$)rv+Kn*|qpK2}CbXN%2|5z4lW#vo^3CY} z=kGdRAS{V2-@+RLT{Td*h@js9et6?0t$c2LEGZz&T*ln7TDxM)kk6;l3^b~J8w?zY zLC^dLB040r_b*k1Zn^c;3GpATHE-q3z*T=d_AZuI_{p&sn8SdUR%1HRrwOO%-YGk# z7Y~EWBopN8E!F@o16emKY1GU6sJMA(F_;8LpeUd^igb9kvE!2$-4cJ8C;LubKak3` zFI`v8V(i3}s!RR(gf;P5wKI~Xo%I36;z;8JOQFmhWIcwi1`cAaP$^U5Q^!N6mb2Z#-FqZ8{7cmnnQe+GRPjK@8C1;=DT_&?AU( zE?n%p$O8u4G8(>E5I;iO=i~5ZJV*186?GzwN$fLh4v*(@<=7Q(XAL+Gj=&|J3P5%nt$tS9{RtQ%PwB6a8m2)U%!Kv?erm!i6iW)vm|C?xRo9|!{d$%sG)QU{+8 zRg_AZGMNz};wSaWrD;Zcl*t`H!ec(u#=)IvhbTgAV2w=cy9g)LfnHM%t^rf)BibX{ z#TNFy$y$i>nCorjVO200oHge)u`4c2B!8D;h1c0PCx@U;y%Kw~oC~V5qfI~PP66h| z!tcVUXvQ0Y4vz&UQ>+fcSH_nWIVpAL9jzgwAQ^eo9mJyqF^hOg>e2Lux*B}rXltvVFFWcdr$=IOjhdZRJdWuX zyM0b*jhsR8PaDn-GGfdu>~cNV?$7b<{nKRWIJGLHg*v3d7%BtQ{ziyUIN4DHqyok# zeh4U}MG|quyDE6?!MlPtV6`ymwMI4CKB^X#b3It@X&^+6D-}cC{4_D9YJ#9)FSHo3 z8l+(m^ek2;up3iUR6v1K=q|@qkcI+-t@y zjA%+i)ij%-`3enRb2L?_WDPZopC_xZFw~du zZ`yJK9n_DOJWEW$@d&olz>gD!HpFTyR1#I)4(0Oy{PFrle#8BFZ;tlMDC+Hp-sk=u zRL$48$JHo5?#G>c1>$erqXvS-upfDqqAdi=qZfD&&cI>=N39}AZc!Lj@c=%--HeFE zocAArw|cL4%j%cBKMzy&eD5l$J4kNsMr^T^ z)UH*qTxcnQKpk%h^I3k-Hp)tc{U*(q+Ji|dSTZHMr2X=q4dE%afCm4Vu=hUdXRG?n zT~VYnrAP+;`RiE8CGVA%bGkam-COdg$NuJ*Y^*@3b zzqNBSR_pr*`{~oaz5}76##|679M_YyxYHj5Yd(LQZDUBTwua${G0Hjb8>?MI8ZL$E z&znT~&ImVaH~u}woDKUOu*Me{1RoSIaRqpgd6{edz45OXC?L##Z6N}^3jMWdXW(RH zX5z@8ssauK+9-l(?(7B&1PpcpnA-aL<)3PRI5HrpzpK@(K%4gfb}}IX0c!nI4H&Su z-e35aYK)9b%zw{WGX2+y*q>GZ9LAZ04+R7yiTjss{nzTMVE^4KpgPBYJMxSj|I*@_ znp;`@XDl5#b#U11BdF8>hb%e2zx8 z4krH@QD`omMhFK8h&2ibh~_sUBH3?5R)BoM+SbK{?q6Nd8Jjzp7&+NGxc_Is-{a(d zJ)Dh#zXJak;c|cWdHx<1^lJlJ62CR@p9Tm08UFVaj9>BZvcKa0Z6?N_VSWz@_!T3e z@GFMt{~HNn26@xc5U<@Yi4Uny^nzf%6+L+O9U_MHqrNI@ZU#Reub0z{|5iJ11*2X`TbJlublqe-#CAIVe;Q$!2v-d SKtRZVXFI@M0QK#!cmD@(9G9j5 literal 0 HcmV?d00001 From 31e3cf9b8092f6a64221df6dbff1cfa8f86b51b3 Mon Sep 17 00:00:00 2001 From: Mr-Xiao2021 <3493602396@qq.com> Date: Fri, 5 Dec 2025 14:07:54 +0800 Subject: [PATCH 3/4] update README --- .../S1/MoE/HUST_ASCEND/README.md | 137 ++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 2025-Ascend-Innovation-Contest/S1/MoE/HUST_ASCEND/README.md diff --git a/2025-Ascend-Innovation-Contest/S1/MoE/HUST_ASCEND/README.md b/2025-Ascend-Innovation-Contest/S1/MoE/HUST_ASCEND/README.md new file mode 100644 index 00000000..3d940bf6 --- /dev/null +++ b/2025-Ascend-Innovation-Contest/S1/MoE/HUST_ASCEND/README.md @@ -0,0 +1,137 @@ +# MindNLP 模型优化详细说明 (DeepseekMoE & Qwen2-MoE) +## 评测结果 + +| 评测指标 | 平均得分 | +|---------|---------| +| 峰值显存得分 | 100 | +| Prefill时延得分 | 109.3774 | +| Decode时延得分 | 360.0786 | +| **总分** | **189.8187** | + +## 优化模型 + +本项目针对以下两个MoE(Mixture of Experts)模型进行了昇腾NPU适配与性能优化: + +1. **DeepSeek-MoE-16B-Chat** - 深度求索开源的MoE大模型 +2. **Qwen1.5-MoE-A2.7B-Chat** - 通义千问开源的MoE大模型 + +--- + +## 核心优化技术 + +### 1. MindSpore算子适配优化 + +#### 1.1 ops.split代替切片 + + +```python +def rotate_half(x): + """Rotates half the hidden dims of the input.""" + # 优化前 + # x1 = x[..., : x.shape[-1] // 2] + # x2 = x[..., x.shape[-1] // 2 :] + + # 优化后 + x1, x2 = ops.split(x,x.shape[-1]//2,dim=-1) + return ops.cat((-x2, x1), dim=-1) +``` + + +#### 1.2 使用mint.narrow替代切片操作 + +```python + def forward(self, x, seq_len=None): + # x: [bs, num_attention_heads, seq_len, head_size] + if seq_len > self.max_seq_len_cached: + self._set_cos_sin_cache(seq_len=seq_len, dtype=x.dtype) + + return ( + # self.cos_cached[:seq_len].to(dtype=x.dtype), + # self.sin_cached[:seq_len].to(dtype=x.dtype), + ops.narrow(self.cos_cached, 0, 0, seq_len).to(dtype=x.dtype), + ops.narrow(self.sin_cached, 0, 0, seq_len).to(dtype=x.dtype), + ) +``` + +**收益**:mint.narrow避免切片操作的额外内存拷贝。 + +--- + +### 2. FlashAttention优化 + +```python +else: # prefill开启 + # 融合后,采用mindspore的融合算子,flash_attention_score + sparse_mode = 0 + if attention_mask is not None: + attention_mask = ~attention_mask + + if self.is_causal: + sparse_mode = 3 + global_attn_mask = ops.ones(2048, 2048, dtype=mindspore.bool_).triu(diagonal=1) + attn_output = mindspore.ops.flash_attention_score(query_states, key_states, value_states, +head_num=self.num_heads, input_layout='BNSD', real_shift = None,padding_mask = None, attn_mask=global_attn_mask,scalar_value=1/math.sqrt(self.head_dim), keep_prob=1-self.attention_dropout,pre_tokens = 2147483647, next_tokens = 2147483647, inner_precise = 0,drop_mask = None, prefix = None, actual_seq_qlen = None, actual_seq_kvlen = None,sparse_mode=sparse_mode) +``` + + + +--- + +### 3. MoE路由与专家计算优化 + +#### 3.1 Qwen2-MoE: decode优化 + +```python +if routing_weights.shape[0] == 1: + # 遍历激活的 top-k 专家 + final_hidden_states = ops.zeros((batch_size * sequence_length, hidden_dim), dtype=mindspore.float32) + flat_topk_idx = selected_experts.view(-1) + # idt = ops.zeros(1,dtype = mindspore.int64) + for i in range(self.top_k): + expert_idx = flat_topk_idx[i].item() + weight = routing_weights[0, i].to(mindspore.float32) # no item, no precision loss + expert_layer = self.experts[expert_idx] + final_hidden_states += expert_layer(hidden_states).to(mindspore.float32).mul(weight) + final_hidden_states = final_hidden_states.to(hidden_states.dtype) +``` + +#### 3.2 DeepSeek-MoE: decode优化 + +**Decode阶段** + +```python +@no_grad() + def moe_infer_decode(self, x, flat_expert_indices, flat_expert_weights): + expert_cache = ops.zeros_like(x) + for i in range(self.num_experts_per_tok): + expert_id = flat_expert_indices[i].item() + weight = flat_expert_weights[i].item() + expert = self.experts[expert_id] + expert_out = expert(x) + expert_cache += expert_out * weight + return expert_cache +``` + + + +--- + + +## 最终收益 +| model_name | memory_reserved | memory_allocated | avg_prefill_latency | avg_decode_latency | +| :--- | :--- | :--- | :--- | :--- | +| Qwen1.5-MoE-A2.7B-Chat | 31.138512896 | 29.234176512 | 1.8952324390411377 | 0.14382788760748297 | +| deepseek-moe-16b-chat | 34.359738368 | 32.813018112 | 3.0526745319366455 | 0.18968531806339592 | + + + +--- + +## 关键技术总结 + +1. **算子层优化**:替换mint算子,充分使能昇腾NPU加速计算。 +2. **注意力优化**:集成FlashAttention,加速prefill阶段推理能力。 +3. **MoE优化**:针对decode场景进行优化。 + + + From fe4eacf723ac364620570d54f772c71bd4780b76 Mon Sep 17 00:00:00 2001 From: Mr-Xiao2021 <94785565+Mr-Xiao2021@users.noreply.github.com> Date: Fri, 5 Dec 2025 14:10:09 +0800 Subject: [PATCH 4/4] rename readme.md --- ...00\346\234\257\346\212\245\345\221\212.md" | 144 ------------------ 1 file changed, 144 deletions(-) delete mode 100644 "2025-Ascend-Innovation-Contest/S1/MoE/HUST_ASCEND/MoE\346\250\241\345\236\213\346\230\207\350\205\276\344\274\230\345\214\226\346\212\200\346\234\257\346\212\245\345\221\212.md" diff --git "a/2025-Ascend-Innovation-Contest/S1/MoE/HUST_ASCEND/MoE\346\250\241\345\236\213\346\230\207\350\205\276\344\274\230\345\214\226\346\212\200\346\234\257\346\212\245\345\221\212.md" "b/2025-Ascend-Innovation-Contest/S1/MoE/HUST_ASCEND/MoE\346\250\241\345\236\213\346\230\207\350\205\276\344\274\230\345\214\226\346\212\200\346\234\257\346\212\245\345\221\212.md" deleted file mode 100644 index 7bcedc90..00000000 --- "a/2025-Ascend-Innovation-Contest/S1/MoE/HUST_ASCEND/MoE\346\250\241\345\236\213\346\230\207\350\205\276\344\274\230\345\214\226\346\212\200\346\234\257\346\212\245\345\221\212.md" +++ /dev/null @@ -1,144 +0,0 @@ -# MoE模型昇腾迁移优化技术报告 - -## 评测结果 - -| 评测指标 | 平均得分 | -|---------|---------| -| 峰值显存得分 | 100 | -| Prefill时延得分 | 109.3774 | -| Decode时延得分 | 360.0786 | -| **总分** | **189.8187** | - -## 优化模型 - -本项目针对以下两个MoE(Mixture of Experts)模型进行了昇腾NPU适配与性能优化: - -1. **DeepSeek-MoE-16B-Chat** - 深度求索开源的MoE大模型 -2. **Qwen1.5-MoE-A2.7B-Chat** - 通义千问开源的MoE大模型 - ---- - -## 核心优化技术 - -### 1. MindSpore算子适配优化 - -#### 1.1 使用mint算子替代ops算子 - -将原始的`ops`算子替换为Ascend硬件亲和的`mint`算子 - -```python -def rotate_half(x): - """Rotates half the hidden dims of the input.""" - # x1 = x[..., : x.shape[-1] // 2] - # x2 = x[..., x.shape[-1] // 2 :] - x1, x2 = ops.split(x,x.shape[-1]//2,dim=-1) - return ops.cat((-x2, x1), dim=-1) -``` - - -#### 1.2 使用mint.narrow替代切片操作 - -```python - def forward(self, x, seq_len=None): - # x: [bs, num_attention_heads, seq_len, head_size] - if seq_len > self.max_seq_len_cached: - self._set_cos_sin_cache(seq_len=seq_len, dtype=x.dtype) - - return ( - # self.cos_cached[:seq_len].to(dtype=x.dtype), - # self.sin_cached[:seq_len].to(dtype=x.dtype), - ops.narrow(self.cos_cached, 0, 0, seq_len).to(dtype=x.dtype), - ops.narrow(self.sin_cached, 0, 0, seq_len).to(dtype=x.dtype), - ) -``` - -**收益**:mint.narrow避免切片操作的额外内存拷贝。 - ---- - -### 2. FlashAttention优化 - -```python -else: # prefill开启 - # 融合后,采用mindspore的融合算子,flash_attention_score - sparse_mode = 0 - if attention_mask is not None: - attention_mask = ~attention_mask - - if self.is_causal: - sparse_mode = 3 - global_attn_mask = ops.ones(2048, 2048, dtype=mindspore.bool_).triu(diagonal=1) - attn_output = mindspore.ops.flash_attention_score(query_states, key_states, value_states, -head_num=self.num_heads, input_layout='BNSD', real_shift = None,padding_mask = None, attn_mask=global_attn_mask,scalar_value=1/math.sqrt(self.head_dim), keep_prob=1-self.attention_dropout,pre_tokens = 2147483647, next_tokens = 2147483647, inner_precise = 0,drop_mask = None, prefix = None, actual_seq_qlen = None, actual_seq_kvlen = None,sparse_mode=sparse_mode) -``` - - - ---- - -### 3. MoE路由与专家计算优化 - -#### 3.1 Qwen2-MoE: decode优化 - -```python -if routing_weights.shape[0] == 1: - # 遍历激活的 top-k 专家 - final_hidden_states = ops.zeros((batch_size * sequence_length, hidden_dim), dtype=mindspore.float32) - flat_topk_idx = selected_experts.view(-1) - # idt = ops.zeros(1,dtype = mindspore.int64) - for i in range(self.top_k): - expert_idx = flat_topk_idx[i].item() - weight = routing_weights[0, i].to(mindspore.float32) # no item, no precision loss - expert_layer = self.experts[expert_idx] - final_hidden_states += expert_layer(hidden_states).to(mindspore.float32).mul(weight) - final_hidden_states = final_hidden_states.to(hidden_states.dtype) -``` - -#### 3.2 DeepSeek-MoE: decode优化 - -**Decode阶段** - -```python -@no_grad() - def moe_infer_decode(self, x, flat_expert_indices, flat_expert_weights): - expert_cache = ops.zeros_like(x) - for i in range(self.num_experts_per_tok): - expert_id = flat_expert_indices[i].item() - weight = flat_expert_weights[i].item() - expert = self.experts[expert_id] - expert_out = expert(x) - expert_cache += expert_out * weight - return expert_cache -``` - - - ---- - -## 优化效果分析 - -### Prefill阶段优化 - -| 优化项 | 技术手段 | 预估收益 | -|-------|---------|---------| -| FlashAttention | 硬件加速 | 40-60% | -| mint算子替换 | 底层优化 | 10-20% | - - -### Decode阶段优化 - -| 优化项 | 技术手段 | 预估收益 | -|-------|---------|---------| -| 分场景MoE策略 | 减少padding | 40-50% | - - - - ---- - -## 关键技术总结 - -1. **算子层优化**:替换mint算子,充分使能昇腾NPU加速计算。 -2. **注意力优化**:集成FlashAttention,加速prefill阶段推理能力。 -3. **MoE优化**:针对decode场景进行优化。 -