From 5dc349b1dcb381efd0d9df55b5cdd8c6f10c598d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=9B=BD=E5=86=AC?= Date: Thu, 1 Aug 2024 20:43:08 +0800 Subject: [PATCH] update-2024-08-01_20:43:08 --- .DS_Store | Bin 0 -> 14340 bytes ai-compiler/.DS_Store | Bin 0 -> 6148 bytes ai-compiler/Treebeard/.DS_Store | Bin 0 -> 6148 bytes ai-framework/.DS_Store | Bin 0 -> 8196 bytes ai-framework/huggingface-transformers/FSDP.md | 2 - ai-framework/pai-megatron-patch/.DS_Store | Bin 0 -> 6148 bytes ai-framework/vllm/.DS_Store | Bin 0 -> 6148 bytes ai-infra/.DS_Store | Bin 0 -> 6148 bytes ai-infra/ai-hardware/.DS_Store | Bin 0 -> 6148 bytes "ai-infra/\347\275\221\347\273\234/.DS_Store" | Bin 0 -> 6148 bytes .../\347\275\221\347\273\234/pic/.DS_Store" | Bin 0 -> 6148 bytes blog/.DS_Store | Bin 0 -> 6148 bytes docs/.DS_Store | Bin 0 -> 6148 bytes docs/llm-base/.DS_Store | Bin 0 -> 10244 bytes .../distribution-parallelism/.DS_Store | Bin 0 -> 6148 bytes docs/llm-base/distribution-training/.DS_Store | Bin 0 -> 6148 bytes llm-algo/.DS_Store | Bin 0 -> 6148 bytes llm-inference/.DS_Store | Bin 0 -> 6148 bytes llm-inference/ascend/.DS_Store | Bin 0 -> 6148 bytes llm-inference/ascend/mindformers/.DS_Store | Bin 0 -> 6148 bytes llm-localization/.DS_Store | Bin 0 -> 6148 bytes llm-localization/ascend/.DS_Store | Bin 0 -> 6148 bytes llm-localization/ascend/mindformers/.DS_Store | Bin 0 -> 6148 bytes llm-localization/ascend/mindie/.DS_Store | Bin 0 -> 6148 bytes .../ascend/mindie/config/.DS_Store | Bin 0 -> 6148 bytes .../ascend/mindie/mindie-1.0-baichuan2-13b.md | 461 ------------------ .../ascend/mindie/mindie-1.0-chatglm2.md | 357 -------------- .../ascend/mindie/mindie-1.0-qwen-72b.md | 302 ------------ .../ascend/mindie/mindie-1.0.RC2.md | 132 +++++ .../ascend/mindie/mindie-1.0.rc2-config.json | 88 ++++ .../mindie/mindie-1.0.rc2-llm-server.sh | 170 +++++++ llm-localization/ascend/mindie/mindie-api.md | 5 +- ...5\345\277\227\345\210\206\346\236\220.txt" | 33 ++ llm-localization/ascend/pytorch/.DS_Store | Bin 0 -> 6148 bytes llm-performance/.DS_Store | Bin 0 -> 8196 bytes llm-performance/mindie/.DS_Store | Bin 0 -> 10244 bytes .../locust-lantency-throughput/.DS_Store | Bin 0 -> 6148 bytes llm-train/.DS_Store | Bin 0 -> 6148 bytes llm-train/ascend/.DS_Store | Bin 0 -> 6148 bytes llm-train/peft/.DS_Store | Bin 0 -> 6148 bytes paper/inference/llm-in-a-flash.md | 10 + pic/.DS_Store | Bin 0 -> 8196 bytes pic/llm/.DS_Store | Bin 0 -> 6148 bytes pic/llm/train/.DS_Store | Bin 0 -> 6148 bytes 44 files changed, 437 insertions(+), 1123 deletions(-) create mode 100644 .DS_Store create mode 100644 ai-compiler/.DS_Store create mode 100644 ai-compiler/Treebeard/.DS_Store create mode 100644 ai-framework/.DS_Store create mode 100644 ai-framework/pai-megatron-patch/.DS_Store create mode 100644 ai-framework/vllm/.DS_Store create mode 100644 ai-infra/.DS_Store create mode 100644 ai-infra/ai-hardware/.DS_Store create mode 100644 "ai-infra/\347\275\221\347\273\234/.DS_Store" create mode 100644 "ai-infra/\347\275\221\347\273\234/pic/.DS_Store" create mode 100644 blog/.DS_Store create mode 100644 docs/.DS_Store create mode 100644 docs/llm-base/.DS_Store create mode 100644 docs/llm-base/distribution-parallelism/.DS_Store create mode 100644 docs/llm-base/distribution-training/.DS_Store create mode 100644 llm-algo/.DS_Store create mode 100644 llm-inference/.DS_Store create mode 100644 llm-inference/ascend/.DS_Store create mode 100644 llm-inference/ascend/mindformers/.DS_Store create mode 100644 llm-localization/.DS_Store create mode 100644 llm-localization/ascend/.DS_Store create mode 100644 llm-localization/ascend/mindformers/.DS_Store create mode 100644 llm-localization/ascend/mindie/.DS_Store create mode 100644 llm-localization/ascend/mindie/config/.DS_Store delete mode 100644 llm-localization/ascend/mindie/mindie-1.0-baichuan2-13b.md delete mode 100644 llm-localization/ascend/mindie/mindie-1.0-chatglm2.md delete mode 100644 llm-localization/ascend/mindie/mindie-1.0-qwen-72b.md create mode 100644 llm-localization/ascend/mindie/mindie-1.0.RC2.md create mode 100644 llm-localization/ascend/mindie/mindie-1.0.rc2-config.json create mode 100644 llm-localization/ascend/mindie/mindie-1.0.rc2-llm-server.sh create mode 100644 "llm-localization/ascend/mindie/\346\227\245\345\277\227\345\210\206\346\236\220.txt" create mode 100644 llm-localization/ascend/pytorch/.DS_Store create mode 100644 llm-performance/.DS_Store create mode 100644 llm-performance/mindie/.DS_Store create mode 100644 llm-performance/mindie/locust-lantency-throughput/.DS_Store create mode 100644 llm-train/.DS_Store create mode 100644 llm-train/ascend/.DS_Store create mode 100644 llm-train/peft/.DS_Store create mode 100644 paper/inference/llm-in-a-flash.md create mode 100644 pic/.DS_Store create mode 100644 pic/llm/.DS_Store create mode 100644 pic/llm/train/.DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..63e96695c2822ac171a1406f2b4f0771f1a75a7f GIT binary patch literal 14340 zcmeHNdu$v>8J}-$$GhiqyiRQA;;Z9JOyksX96w^$&Z9ZMlDN)GIXiY9t#gmHv&#DR za(lLuhLi*Z5`9n};vqy`R6tPmfr?O7QXWO9k3aaQ#Vw=~r3ID=I5a^jov=Guv2odq3x$MW=rw9qr+?HrNy>e0k zQUI@Qcwt#4WRh$mQKFFvqUUhu5(Fes`9i!zl+x_-(~)#i(~}d72fHTScr|^<5|+H= zX~5HfrvXm`o(4P({CgUpdoL!jgy-Jf(}1S|PXk#E(DfmN7tOA)Ke;b}V! z;aS2m4b~|wWaeQrzrb^B!310sb1n)d!~iai^pMFHHuDQS=i&s|$k>lLGMESj*yu

3f$Y8FMq*IzAcvzlSI*#0vD4)hr4gBSk{;o^9B0F}1jTptV2l}5UP&$bv1$(2V z3Fp6n8dpE_Cl3mlrl%J!{`+10^V*`lkF#;;xN)MmBvD(A=e>etY{p2LdVV&R(o`eU z-26HS1%*Y+ii@S~CYqTtdZOuLv^J#cTFfd(V~15`W=yW=)sva1npDzE zKusvfa(qNhCiIyeeJYtSm~4bMNRlXx$$@j{TH2Z$LoHi6&Nqh6b+ux-qpj`yc}c9S z+q~`2*ePu?so%$q2h0T+t;U&9Pc}=+(M`gLYcc%CROkwda2)$kqqB~MG&B)vTGYhu zxuS{PYcsJ$<(FFVZOt91_|7fYsQ5*n)2{wSnO&1G?;(?@_$V=)c21~g+Lb=PDx{cq zjN{a$(Z8d4y@dMXRY))Sgmt#!gmqo(ge{^Y`*bbBsu|P0eCa4yZ!H~fm`jH!`AYny zfyhu~*grBRub9R);y@~+>Ph3Uk~T0OlgkWTO9tY4@?ct_?IUqro0?1-V{&;ss>P?Y zXh!MQw1|3-g0wOtp4K((pl+zNn% z#Fb=1O->xe#Vnao$5kaQl|!R6l#EU)Oe(NC5ESJ)adaxCo}N-OXL(<10wGC$R6L%* zKeA6xP{{=6UlaIuPWIOaZj0 zMR4l)W+IAWoizb<;Rar03Y6-?&1Q>8YDg>TBYVjSbmVj7LGl&yIQc$#fxJjwBX5#- z$h+h{01%)EmO%+rKqah#0IY=u*a(}T3ARBW^usPV1jpbw+zn%pfJsO~3Qof`d<-tY z$Kez3Y4|*R0lo-df?4=Bd{1N^J@5A5W13?ffgi4`G zs1;g;HerjV7u^7~ZnuHC!#`0xCm@u|zp zsTZxRsH|EQqOR4sqdRMZ++-~)wOcfkLk{@P*m2<431M=jr# zi>Zf-bCi4SI)ANPgg(Qo>+7+)4E=>yH#XMR$^yD6uQs>T`sD(2LW#+?H)0jcO4nAu zUoJ#<a@Tuo8{C9-5&8wxdz+hl3D-As9x( zJ^?X^Gvn6Lw9{a~N8w&H?~7>Q_rrtm5PTLs2VaG+!NbhRAA`r?33w8ofoI_da0z|} zFTjiN61)trz#H%zc*`_e%a{jw-MYgcu(XIVyLNMbHl#{nC`ET@8d71z?A+CX#=Z7^ zS<1>~a1n@78J7f&cmHe)zVb*c_3mxPx$V~&ZsmD0`;MKb*)<8vOl9gg0j_Y0f-HyP z%Q191MG~m+G(sFJAIG|elh=fB>7;Wa7|^m$L=lb>WmBDYanjJu&RKw52;rPu3^L{6 zTI$KXPO-E3%=QU1Nv=tg$RW~?%IzcjFmJ+XNw^e6ea5aR%S~_Gd^P^H66VdD5MFT< z&JD|LDu~`4@81`Yva8W=c0A4&40@z6!=S(^r`jlhaCE((+)`WIby_|ae7=84c`mCI zO(k>0jVrTLxzu?b%FW@wL6mx}XAe2gWmjfSqf3c}B!;W0o^x144>g)>TUwUyCQ(}U zdF;x{5_6Q4Ko!OFZCEZPnS3{mlFa9ED#;|;@b?M}EBwlazk|bvM|{@-CuEVf4OeGD zbRK}62Uq(mYu2sb*xbH#XYcO42j@x|Ch+-e_<5nQokjfRa1n#*(yCQ7cn>bbn#J0P zyG1s}n%bbx0U|J{$9{11y4qmSjyFt!`VB!JgB~p0r;Lr8d_D#}SddRCEv>#F2QCai zP*TV3zMzF-7>uCAZQFgpEYjiFgObBIvJ1h`!SUJpuw@=_Q}j`_sx&wd925~~+hDgv zKOl;^pxbJU(8gTQ?L^vHpe^A`u+P14LvZ3z~m5vCU+q$Nq_?5pu-f*z!^A; zfaE@S03pd|;LGrJ_y&9vz6DPq*8Va41mVZ4@C%rOU&EX57NYKV5p}-@?+cQ!LRg8o zyI$BJvBmEyu7Av z!=~1bZQ-m9T_3c}oyb>X;bONpaTG05gO!Wt7y0k1Yxw@ac7ywd60O3$w3LSEhB+lj|!7bTgndUZo}uP7p9zG@?V znp`5!lb6XH8tFUaPl%R31QAWM3PP|Rjgub+b-?Y=1zQn0(*vPBFo0%CvGWLw!cm5u z$6*3ghMhCE!=X>2p+00D5Y56PXtIx@(LMoB!zFHRStA_ab?Xk#*^SV(rxoBEoG3Z% zjF~8aa=hn~*;}Oy!mkvWV$hA35PIbWU^JkewS^7Ecyu;EdJ#43ClS)`9F0Y2cJBG} zcJo_W7e5V3uE@XC`)+G{XZDP|v#q@`)ZW#4wQrODqwl#q{EsF?zm)LsKmBx_oq5=U zyZ;aTUxWsLsksDi{b&yheE-jwyql*1PXjln2B5Gn($|B}X1!!^KPLUwE`-+!yeQA} zc+rGqCW1H0LM!ZZJgm^q@j5BZO4t^rWy~+|97~w~>mLGM{Z}nn0At@GvP2L6^SAI8 PlJW3AzI4Cw@&7*ouQS5Z-O8O(;SR3Oz1(E!fst5icRu7cfN+Dm5WNgE3qBV-KZ}v%Zi|;`2DO zyAew(coMNQVfNddpDg=r*vT@+xHpYjj5&-k3ly?JJ1>F9wh4Qzl>L*Z(6unQH=xT}#`Vt^P} zW}vKx4%Ywk@9+Q1Ni-q`h=G5_0I&4?o(D@ZwRL52SZf979ViOMH1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T00Q+H-} z!G>5hz93+tF+Pa~^-W25F$Uw`le~(Jkq3!r;)^^O6ZOSJ&z(El(4|fAK|^G2Gxy9r zKXd1t`Ocg@Qvkr;yxtDb2mmr2oU&?~78zX5k9&p`aF!EAvIoe50h6GC2f2H+Lq-^Z zFalu&!U%*B2qW+yM1X2GAMz4)Ur56_j6fKHhcW{E{Sc#r(?mdL1@&JYH2Ez6(Mn?b zgU+cB@NHrNO$2mSP~ViUDY^#?OfhINP?(cE#?^@?0y-yu;W3PcymMRk*|Un=(6n{ek<=+G zYdhzKgBZLdpr*{>3-{}-htEkn{k1yiGIQ(8975&FiiUL?H*IO_+PSB1|DnTkHIJ-hwRNgGLGiOa^Q39y zMvA8HO&!(TjA84hJvmk|T{CSN!?u<+YVmnRnX6g7rg3c~8eK|tJfAk7$(!C8)+EdR z9_u$m)#Ezd)02+QchT8pnXW^-H*JVUk8w#~&Y)Qj44OAb)nS+1Vp^aWZ&lUdQ}p1O z%Wr{9+dI_gh^3_si|LBkr+d2^9V>{~vV#G0*KRdBo^i5Sa?!2Dc)(83&QI~34{5G9 zG;P>obMmd9BuBYqude&d#?@#6K=t6_yK-|JMb&~iIrG`E0JL>Mz9$l!xr3votVJA*oz18Fdo4q4&w<-qmBli zL% zs=i@;)8^KW?p=v_dj0w2!)^Do2UoJcTrT`Bp>Gi`+Oz&I^)FtaM5 z9aA^Tlm~2?SU##Y%aj3ZxmdPD;xgqNll-N+HPWt7X0ZyfY>9L!GDET0q3%>D$5^FU zKB4YWc-k}Y*O8V;pDZ)^p9ADpjek?{P=zWT1e(>%dsa(pM$*;3SiM+_O4?0Q3<%EVppY9RagBx z#ZK{yUE|D5N7s&cti8uCbvw3o#baGP9c88NobYzZQup%R{*?JCOrb*1M&ADqEdBjI z&<;r$fiMF9#|U6$f3m-qJS!FaawmDO9i!uEI{4svvx532G;tNq_uFxz^ZziU^H?BD a1awwVlF<0iKLjlLD>|(IVf}x&>i^#o9KJyS literal 0 HcmV?d00001 diff --git a/ai-framework/huggingface-transformers/FSDP.md b/ai-framework/huggingface-transformers/FSDP.md index 690c6a2..e54a24f 100644 --- a/ai-framework/huggingface-transformers/FSDP.md +++ b/ai-framework/huggingface-transformers/FSDP.md @@ -3,8 +3,6 @@ - https://pytorch.org/docs/stable/fsdp.html - - - https://huggingface.co/docs/accelerate/usage_guides/fsdp diff --git a/ai-framework/pai-megatron-patch/.DS_Store b/ai-framework/pai-megatron-patch/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..cbf9ce2f5606f2ec8e9da4a923b1306d7d64d602 GIT binary patch literal 6148 zcmeH~O>P1)427Qyl`3_^l4Uj=pf`vRJpmWsSEv#d3&cK0=h^Xwq3dddo+anSPCP%~ zVloC`%iG}#m;v~pyJF$Pz>M)be(;MePBH1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0~2J@-b7S(VD`=K&SZBU>~01CSf}r`02%;LLnW+CVDpL4Jn4v()QEw|#5Y3d zLjo!I5RZ840nLDB;I}bAXLkY|2w)zL;m!H=lH<_jJfscRmBT4w3?5_x{lW#$K9u(N zHXqz9xo|`7V*k;-2!kYTHlJ9jTp1g$R*h=CdT5{W%ub!OpLCpfpL*N4hFjQ+pW$B_h_A6NXVUdX1UEpw({9nXRSe;hZ^GSwwre z-5w5&+Vt$gYPWY5o=5zK^1AsJ(vG8$M<>`92@meN_x~kf z%B4v7HMx%oUvFJK7f!^AmOsvbIs>)(AJ0Q?Uo)T?_Y6*%l6-ztFknU*)Gz0&N0ajVJ*E^V!dRs@5qqo*XJwhcRzg(gS k!A3pD!l0*k22}{onPebZ3Ui4#f}(!}1P!{;4E!hqFYkb)Hvj+t literal 0 HcmV?d00001 diff --git a/ai-infra/ai-hardware/.DS_Store b/ai-infra/ai-hardware/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..49320e7cdb668a4f8ab64b2606c2ba638cfd101e GIT binary patch literal 6148 zcmeH~ziPuk48|qbgCUE@jJNaw`UZz__CS_Emy&u6WGLP5ar&6~^oNi-ol7a7K>BpL z6UM*6XNibTpUaELC?W&gRGuxYOtJ1>kNdCX60Wh4O-Ab}eJ`#u!7X-#dR{^>yQ5dhkw?1tQD323nbw5GODRA3sd z&}da3L#*!Y(30nBY732a(HuTBpR6{;z%<%L3l=b~E(|0<0xJSfu^#RI-@$*H|5q(s zk^l+(GXmPrhk3$F<=y)5dRBkSs^tw1_2UTNegd$tt9T1{!+x;@T2osnDlq;CI0gn1 H_*DW|g<}y0 literal 0 HcmV?d00001 diff --git "a/ai-infra/\347\275\221\347\273\234/.DS_Store" "b/ai-infra/\347\275\221\347\273\234/.DS_Store" new file mode 100644 index 0000000000000000000000000000000000000000..bc1f5a3e96748c6523e99b1a9ac08d1054877437 GIT binary patch literal 6148 zcmeHK-AcnS6i&A3GKSC#gNDmr#soA(j!K1~yE0U>$%q`s$fjYULa;u~xmP{?VW-2gBj2E0-sOURMqe zk5{Xf*xNriy%;|yFR6MnbaG%@%Z|ko-a)aedG%*$qS8n3l-Xq#Au&J<5Cg=(W;0;V z1<~GY8ff*z05MR(0PYVG8lr2lFsQc<==84k&X_UxI`WDT#JQ490mQl Q9FQ&onh@%UfnQ+Y3yJVaX8-^I literal 0 HcmV?d00001 diff --git "a/ai-infra/\347\275\221\347\273\234/pic/.DS_Store" "b/ai-infra/\347\275\221\347\273\234/pic/.DS_Store" new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0&1YdaLQo2VNvKqj~VR1#7_L>gC3N+qI7xg9(Z zJPFUk|Ja_i4<^J-RUx0~`0uad#6QV)OhjV5%#MhBBJ$v@od-z%VBF4r%XVx{2P*U! z3(6^_CDl-8j({Vu?F4x3_9(-8t*~mZu3tqZJ)t=zs7_Pl-XUHnO{;yS69)i_za;nKNMMS7V}(pUL(HSr&vs(FBmRY4^W7FVuz3#;CHO=5S?8{L&SodWU4v+hC7>w5Iu6JZN=~=AY;NC0>+;DnBBY$MrRp87LK`=!2$|(^n|(f8ck8(~pgF zdfYTeVbGUhz?hy7kR1)5-N1BVc-)DJG5&R$H?Wu?UApxI?Aq}5xBg4r2RZ_dz@`cC z{vhG3k=53y-a3%zD*%*#BeWsUcL|P>RwJveQMAB>35A+aVXqj%gri>Cev#GIs0k-w zFCW6bS=bwj&~L~1QgS5T32oCKRCug&r5YR{W`G!Aq$60!H+pQWG0B7&_Z zw?(bqM5N4=*>5^O8|F*d?Ervi54u|bH30BX2}?dM-w4G?XQbpj3y8w(5kmwyB=InR zoXKX#e`J8(od+3y9$+0lyuUcpF~(lw$1KhJo11T0#K9`h&b3 z_0H+riPl*>v7`7jO@^J?%7Iq>C{;<%6r|k*Q!dU@)z!nc9;j|^YC{|Feb4XI>Z4I} zyVa0eyF25C9PRBi8?xDIjmN&Xy0)=@+_}#llzz5k3jEy}S#~&q7c^%2dJ20g)9Myu zRJT0sI|T(dkU$Sr!8IgbGAFw-G6T#2GcX?vxU;3#FK}2`wB;b&@FCojg_}@> zc{=VdO*)9cBCpH{UYG%9U_KcT<-_o>jlH?Mb*4+YYbELx qDhcHk7N;d>n5!6b=_;6~5p2H8XilQajz<#P!yW+jQ&rwVT9A?6Q8PPUF;8{LG_uXT8&S$a==? z?Al4A2vZ6|ZK(jMg#v9+QB@*nM2iGbRiHnhAc2rtxfQJhe-Kav;?W`@_ygzOJL}kM z2kHQ7D$Q6kXU@6z%6vo%0N)k<5(Cnm+M~QX$)qpmrIhXr(w)J7 zGyD|_#M{X)D!MZyr3|h?1cC^xi~vuY5KGf~-J@0e()GK`9JY(iFq5n%%g~=iC+|0% zn)7B-88=0~mr^qs+sidKzX7GBw5+_MLaSI;ao5n4J2#Xwb9t}J%%0%du_2Gi2XqdE+_jOpf0j_uOo*TXY<^$(P$E3?uJZM=f%&XRRJ`^D2u52kqkes_NRC zZf)MZ|F*7!hX)omY}{n%TlC7YS=+Ny8Ea(DPUohMnck%3q;2Qaaf+~$v+uR7thN~q z+OT6zTMJvS4;kS|c>9&SRHIHE&!_B<xi4?PfUxJa80;g zRZnnxch;h-H0wLVG0ia5VULC;AF)es(hN@yk0~ zX+%8hlFd@ni0s0wk*KYYoussyVQ1OH>bLiWoor#s_yAMcx@!po9lV{Y~FHRWz`LJJ9oA2Iq;!HkV#Ay zV7L_C0@2IzKp1R~%)Koco!!=R8+9;!>km`O}4dfBbE6|l0SxEymlNv&t7CN zvmdhs!trm|@7bT(KMA`VQAxPH2|Ee94cJB4ZNq*XKs$PII}YIvLh>+1F^*$6j!9TJ zg(=vC;~ess#Ygcm!tw)z=11@ud={U>m+%Ch#8dbRF5n`*Q-ssL{Z&=!oEm_ezY$H+^0-Hg4KLNf=t>KH@>* z39@e69@3ZPJmsJ!TB6%Sp{3=)ci2%M(nX~Z*~uSqOH9{Ar4TvEEsafjNY)vV@7xqm z=%GT<5n0cTZTs}lVqub{f}1<&%|!*re|aahTqZy=@;Vnf80u4r_?FOM!QQW`D@m}> zILI4UlHhU*)K*MY1Qph@1Z!u5>=-5298uw;>@oHXyGUgC5_^Tc%3h;F@;VVBC&NFo zKT$dPJIbk`XwY#TY7oVZh@lBBXhodLNhi9|Lq+9IQBKCFpqNxfP9uY9QARi=K7dc- z%c6Wdi|6qIzD0!iU3?!e<0n|a>v)3*@h^A>?-CtWD27s{Y?b9?kp@SlUbreZudO^R zA;8RP$r2&{>w905WhL1rpU*Rm(uFY1{MtJ>@llFP9*o2y-Kt9D zch&g4z+PaNhy;I1_~p~Mx7pvo1PO+S1Zz->I@F`lM{NQfxD82x+Y+`(c4z2Xi=s`*05DX+rlYJcx(zFdoHYG_`viU&L4OHC!k{>7fEXrNoPCLunEH zEq_>`eUKwiB7c08@BjB+`TzePH1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0921ictKq}yW~&Ls;&z_j zq8!!}6{Ua_m@06Z+m-kKYx)oK|CFSi6p#Y{N&#DJwwpCyse0?|<-FH6`VHM{KIv{; p2ZbTpF)`XPH{OmfqA2T{ulc+ej)_5MKIlaK8E{=>QsA!@_yX*@7fS#D literal 0 HcmV?d00001 diff --git a/llm-inference/.DS_Store b/llm-inference/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..a0535d710379de10380f1ad85463ba5f4957cada GIT binary patch literal 6148 zcmeHK&2G~`5T0#K6H`DrP^m{tTvL&srfS5+O3Mpi1P4IDt_@hY-pY1rq$rZl@IpKZ z&%-ynn@UY62S6x_cBI*Fc6Mg1`Rv{G5|Nn9;yzJ_h}uv_=RTTW2#>R_$&T^xpipz< z6vyR77D<_{MO%kuz%sCD4Dj0BqmW9vpoCV|?<4v^8QfnKN~-Ayfe96KO-WRjS5YP- zOz=Ir%F?px^?rz*UFY6jtL3)#TknETvJR@Snw68Vc*DJy*chUv9Y(Lxq@H#jpUJ!m z(>y6uLK-IsdHXueV_8q+ERRc-8|VSoZM)OX;e0+Aop!zc_;}Iv=BL9^*Bgxoi-p@h zc=Ghwi|Oa=GM8U9+X9=Dl7|MD@D0XqYrGDMJd^oH#PJ*ts8S@AB*8&}-Vyl8=yF52 z82u#|ch^D#V`BDxl%lawnWUbXzy=H zJ+=(XfMwv{F~IwS2W9jPHX7B|fks^cfDYVBVDnvqb3B8-!A2uSAVQ}Cb*eB|458EE z_e`8`u+gZ~NtnxrFi#fdh9cC{F~6t6N%$IVYZ=(cECZH- ztztkp=fU{|C7H8ztvEhwJ?MKV3&%AYRSFt&94mv5;zOtsjCpJTeS?igcp&yiK+#|u J%fNqS;3tBw+$#V8 literal 0 HcmV?d00001 diff --git a/llm-inference/ascend/.DS_Store b/llm-inference/ascend/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..8779145e4b2337ca563f0a4156891e469cf58525 GIT binary patch literal 6148 zcmeHK!AiqG5S?wSO({YT3Oz1(Em+%95ig5>#|PZ3e*IqpEg2!;@$ zWmyLX=vvITeE(*oc#%9?mJ5}e}dr(tv?2ji| z&mUdV)0t99uxR_ic^pj#we4e-j{P`|Mmiu4BXqgCh|^F_duoz~nU3|$fKzcQgIawy z>onU9**fUX8*+BoYBpr2-JQ=X&d%=M(dpnZ8K&yRP%QB4)3Rl80k7CtvAZz4O#*yd z3SHsy?2XbyrT6HuSYRRRuDU!ECY27ZG9x*sGep=&WWsILw* zx&%NhVYe1+Q!PO;(xPiIH;5xB!h|B4P+?09VZyOrT0hrfZqS5-u*HY4D+^np2)#Pa zFSR=e*C1PFfEoDC0M&jdHOl`dtLuO5#D*DQ2L2}lqSW{LJ=~kktxLP4Tq{v;P)W!y nH~1NX4V{Y7mr`*ZRSWh@Y9P86bA#A}!ao9<1~$yVpEB?PF^N{! literal 0 HcmV?d00001 diff --git a/llm-inference/ascend/mindformers/.DS_Store b/llm-inference/ascend/mindformers/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..0ff48be45142d55305177e91ae27c1e14ed1b214 GIT binary patch literal 6148 zcmeHKyG{c!5FA5_Lo_KV{R_YkSW)-_et;w=0n$N=B>i>#HnR_;LLJCx$L zGhRI%r3Gq~0#e{qf$Lb#t^PmJ|LFfuNm@w(DR5B=l-crmx!{wswoV>rwYJh<=s(6> nPv_vJnCPXL3oFHUM|sue+;53vpwk(5I#E9Yri)Ap{D%VH+HM-W literal 0 HcmV?d00001 diff --git a/llm-localization/.DS_Store b/llm-localization/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..177bdb5d1fa65ce2a796d5c0bb7e96557b995e2f GIT binary patch literal 6148 zcmeHK!AiqG5S?wSZ74zy3Oz1(E!bKqikDF94;aydN==%e!I&*gYY(N6v;L4@;`cbS zyA?w9CL(2~%)ZU+yiNBd>}CK!^k-oQpaB3ibi$H@%{NBl zDprvF@nY)0$K)cSb;xP;1f^pSA_ro literal 0 HcmV?d00001 diff --git a/llm-localization/ascend/.DS_Store b/llm-localization/ascend/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..84413250745fb499a0923ec908f7b53e9c272252 GIT binary patch literal 6148 zcmeHKUrWO<5Wmc6SB9_$1s?;x4&0oG;!BzO1+3_U%3Rl~#oCOubI2IBt%TJHvDaWv}G)(%ygcjGh~=zutk(B<+xPD3?nt6>^uI@UJ^*R8mn+QxX? zYV6i!b8lx-m*f5IMqTcN0`IO*Ic52<=K6bn3kT2?Ji;RP!TRu^WyB*58{ z>jIZYe~>0By+sciVC93R7@}!7864=t6Xg2r_uy`7LJJEszzi@0zmoxbYgVeib9y{0 zW`G&^K?dl4kf?;7#loPzIAAHt3-Y=$EA==go9-9dN;xnu^IfmsHYOtVJs|MBAde>R9)%m6d+uNV;J zBmbz4OR{h4+~Vl1m8jRKB;;2Z{0PB{eu~kTp5hHuE!Z!qg6LT+3}O!o{|IOrxL^jp Gm4SEHsbdNN literal 0 HcmV?d00001 diff --git a/llm-localization/ascend/mindformers/.DS_Store b/llm-localization/ascend/mindformers/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..f76bd0f09a1787b313b0b6b914a4eabfb7539079 GIT binary patch literal 6148 zcmeHK!Ab)$5PhlDZYfBQ9`_4`3f=_Qvi*YofZbMFA=|BJ@w(sQk9hLnQTz%&!J9M5 z*ltM*ofcLJ8f_Ci|Q#AH(FrHTBszEuDRaSXk*s7qc%RhGFOQos`V5|d`QjXONz21`2+X4{N4*GrC2T$PLA~)oK3JrNd+%wN(XFfn5a-{9~Wb|Iz;Y|89{!sRF9Nzf!=o^Km|;SCX?e ycsV|6Q}SzaHqNU(Zc3=*j$+E?qxgi}8O2lX5N3k4M~aZ?kARaw8&%*}75D;8r*^;q literal 0 HcmV?d00001 diff --git a/llm-localization/ascend/mindie/.DS_Store b/llm-localization/ascend/mindie/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..88aafe13e78ef72f150edc6ac32a66f8a42f2bbf GIT binary patch literal 6148 zcmeHK&1xGl5FR;>?Ii?!Xdy>IuZH}zhJ;)eH+_L@X%0zoyEIog<~| zq#S3lG1*467+xX+e0D7wlcAh`(u4EEfx?l%F{WFZP_a5#=O|$=5I?0Vo|n@&GcgkW z#?ve|4-I#e)jjCBW7!{X1`ifsjywXSUw{$WecHezAc{NJ& zNnsPxWP*^Zi!@Knd|+mIQrg_eIf9@Q47+=a#Zmvbrw>m)Eqi)#{ITED{gb2RGU&Yj zuzz|!yv^=%^V?Ccc;K0|G7<)a zfj7l~YMw=B18m8^ts9%;w^pP5Kx5&!+Tl6{ihYU^%TMthnibeJE5OiW?GP4-{1LD; LNGA+DD+B)kOE6vE literal 0 HcmV?d00001 diff --git a/llm-localization/ascend/mindie/config/.DS_Store b/llm-localization/ascend/mindie/config/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 - import torch_npu.npu - File "/root/miniconda3/envs/wqh39/lib/python3.9/site-packages/torch_npu/npu/__init__.py", line 46, in - from .utils import (is_initialized, _lazy_call, _lazy_init, init, set_dump, - File "/root/miniconda3/envs/wqh39/lib/python3.9/site-packages/torch_npu/npu/utils.py", line 27, in - import torch_npu._C -ImportError: /root/miniconda3/envs/wqh39/bin/../lib/libgomp.so.1: cannot allocate memory in static TLS block -Segmentation fault (core dumped) -``` - -则在命令行前加上`LD_PRELOAD=上面的error路径`。如 - -```shell -LD_PRELOAD=/root/miniconda3/envs/wqh39/bin/../lib/libgomp.so.1 MAX_SEQ_LEN=2048 python main.py --task ${task_name} --is_quant ${is_quant} -``` - - -## 量化推理 - -# 量化工具使用 - -量化权重的获取需要使用大模型量化工具(集成至CANN包中),详细操作手册可见[大模型权重量化工具-ModelSlim](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/devtools/auxiliarydevtool/modelslim_0001.html) -。针对Baichuan2-13B的权重量化可参考如下步骤,运行时需将下述三个步骤的代码整合为一个python文件 - -**特别注意1**:本章节依赖**pytorch >= 2.0.0 CANN >= 7.0.0.B060** -环境,大模型量化工具依赖指定pytorch版本(不依赖torch_npu,只依赖原生torch)。该环境的pytorch版本与后续步骤可能不同,后续将优化pytorch版本依赖的限制 - -**特别注意2**:本章节依赖 hugging face 的标准 transformers 包。若环境中的 transformers 包被改动过,可能引起相关报错,此时建议重新安装 -transformers 包 - -**特别注意3**:本章节执行完毕后,在`QUANT_WEIGHT_PATH`路径下生成如下权重文件,请检查是否缺失: - -``` -deq_scale.npy fp_bias.npy -input_offset.npy input_scale.npy -quant_bias.npy quant_weight.npy -weight_offset.npy weight_scale.npy -``` - -## 校准数据准备 - -```python -calib_list = ["中国的首都在哪里?", - "请做一首诗歌:", - "我想要学习python,该怎么学习?", - "请帮我写一篇关于大模型推理优化的任职报告:", - "中国最值得去的几个景点"] - - -# 获取校准数据函数定义 -def get_calib_dataset(tokenizer, calib_list): - calib_dataset = [] - for calib_data in calib_list: - inputs = tokenizer([calib_data], return_tensors='pt').to('cpu') - print(inputs) - calib_dataset.append([inputs.data['input_ids'], inputs.data['position_ids'], inputs.data['attention_mask']]) - return calib_dataset - - -dataset_calib = get_calib_dataset(tokenizer, calib_list) # 校准数据获取 -``` - -## 量化参数配置与运行 - -```python -from modelslim.pytorch.llm_ptq.llm_ptq_tools import Calibrator, QuantConfig - -quant_config = QuantConfig(w_bit=8, disable_names=['transformer.output_layer'], dev_type='cpu', act_method=3, pr=0.5, - mm_tensor=False, w_hessian=False) -calibrator = Calibrator(model, quant_config, calib_data=dataset_calib, disable_level='L1') -calibrator.run() # 执行PTQ量化校准 -calibrator.save('QUANT_WEIGHT_PATH') # 保存量化参数 -``` - -- 建议直接使用量化权重生成脚本,生成量化权重 - ``` - python quant.py - ``` - -> 注:要使用torch2.0.0导出量化权重,否则会有精度偏差 quant.py脚本需要修改calibrator.save('QUANT_WEIGHT_PATH') 最终量化全的指定路径 - -2. 量化权重切分 - -- 修改代码 - 1. 修改`cut_quant_model_util.py`中`--input_path`为实际存放量化权重的路径 - 2. 修改`cut_quant_model_util.py`中`--output_dir`为自定义路径,用于存放切分后的模型量化权重 -- 执行切分 - ``` - python cut_quant_model_util.py - # 切分好的模型权重会存放在自定义的output_dir - ``` - -3. 适配量化推理代码 - -- 进入modeling_baichuan_quant_parallel.py,适配量化权重路径和回退层 - ``` - # 修改以下全局变量 - self.quant_weight_path = '/code/models/baichuan2/quanted_weight_cut_1123_' 量化切分权重路径 及上一步的output_dir - self.cut_float_weight = '' 浮点切分权重路径 - self.roll_back_layer = [0,1,2,3,4,7,9,10,17,18,19,20,22,23,24,26,36,37,38,39] - ``` - **特别注意**:此处的self.roll_back_layer必须与quant.py里面的disable_idx_lst 保持一致 - -4. 执行量化模型推理 - - ``` - 单独推理 - bash cut_model_and_run_baichuan.sh inference 1 - 精度 - bash cut_model_and_run_baichuan.sh precision 1 - 性能 - bash cut_model_and_run_baichuan.sh performance 1 - - 具体参考atb_speed_sdk 使用README.md - ``` - -**特别注意 ** - -# 竞品对比 - -# 800T A2 - -## 精度 - -| 精度 | NPU | GPU | 对比 | -|----------------|-------------|-------------|----| -| STEM | 0.472093023 | 0.472093023 | 1 | -| Social Science | 0.661818182 | 0.661818182 | 1 | -| Humanities | 0.630350195 | 0.630350195 | 1 | -| Other | 0.567708333 | 0.567708333 | 1 | -| Avg acc | 0.568350669 | 0.568350669 | 1 | - -## 性能 - -| 芯片型号 | 首token推理速度(token/s) | 比例 | 增量推理速度(token/s) | 对比 | -|-------------------------------|---------------------|-------------|-------------------|-------------| -| Baichuan-13B NPU | 14.260809086490132 | | 31.69616807901823 | | -| Baichuan-13B A100(80G) NVlink | 15.642417690338782 | 0.911675508 | 36.41638939692089 | 0.870381952 | - -# 300I DUO - -## 性能 - -浮点 - -| 硬件形态 | 批大小 | 输入长度 | 输出长度 | 首次推理(ms/token) | 非首次推理(ms/token) | -|-------|-----|----------|----------|----------------|-----------------| -| Duo双芯 | 1 | 2^5~2^10 | 2^5~2^10 | 327 | 103 | - -量化 - -| 硬件形态 | 批大小 | 输入长度 | 输出长度 | 首次推理(ms/token) | 非首次推理(ms/token) | -|-------|-----|----------|----------|----------------|-----------------| -| Duo双芯 | 1 | 2^5~2^10 | 2^5~2^10 | \ | 75 | - -## 精度 - -| 精度 | NPU | GPU | 对比 | -|----------------|-------------|-------------|-------------| -| STEM | 0.472093023 | 0.472093023 | 1 | -| Social Science | 0.658181818 | 0.661818182 | 0.994505494 | -| Humanities | 0.630350195 | 0.630350195 | 1 | -| Other | 0.572916667 | 0.567708333 | 1.009174313 | -| Avg acc | 0.569093611 | 0.568350669 | 1.001307189 | - -# 附录 - -# 精度测试指南 - -## 配置说明 - -参考 [SDK精度测试指南CEVAL章节](../../atb_speed_sdk/README.md) - -## 运行脚本 - -- 单芯 - -```shell -cd ${script_path} -python main.py --task precision -``` - -- 多芯 - -```shell -cd ${script_path} -bash cut_model_and_run.sh precision -``` - -结束后在${ceval_work_dir}/test_result目录下查看测试结果。[双芯结果每个两份,只需看其中一份即可]。 - -| 文件 | 用途 | -|---------------------------|----------------------| -| device0.log | 运行过程日志 | -| cache0.csv | 结果详情,C列为预期答案,D列为测试答案 | -| result_0_classes_acc.json | 测试数据下按不同维度统计准确率 | -| result_0_subject_acc.json | 测试数据下按不同学科统计准确率 | - -**注意:后续重新运行, 需要删除当前目录下生成的test_result文件夹,否则只会读取当前的目录下的测试结果** - -# 性能测试 - -在功能运行正常的基础下,执行以下步骤进行性能测试 - -## 按照推理指导,下载模型及配置路径,并安装atb_speed_sdk - -## 1. 准备 - -参考 [SDK性能测试指南精确打点法章节](../../atb_speed_sdk/README.md) 进行准备 - -## 2. 修改配置文件 - -- 配置config.ini中[performance]属性, 如下: - ``` - model_name=baichuan2_13b - perf_mode=detail - ``` - -## 3. 执行测试脚本 - -- 单芯 - -```shell -cd ${script_path} -TIMEIT=1 python main.py --task performance -``` - -- 多芯 - -```shell -cd ${script_path} -TIMEIT=1 bash cut_model_and_run.sh performance 0 -``` - -将`TIMEIT`设置成1来返回具体的性能测试的值,默认是0 -上述多芯场景参数 - -* performance表示性能测试。 -* 0 表示浮点,1表示量化 - -### 性能测试结果 - -得到性能测试结果csv `performance_test_npu_${model_name}_xxx.csv` - -### 结果分析 - -| 列名 | 含义 | -|-------------------------------|------------| -| batch_size | batch大小 | -| input_seq_len(Encoding) | 输入长度 | -| output_seq_len(Decoding) | 输出长度 | -| ResponseTime(s) | 总响应时间 | -| forward_first_token_time(ms) | 首token推理时长 | -| forward_next_token_time(ms) | 增量推理时长 | -| pre_next_token_time(ms) | 前处理时长 | -| post_next_token_time_post(ms) | 后处理时长 | \ No newline at end of file diff --git a/llm-localization/ascend/mindie/mindie-1.0-chatglm2.md b/llm-localization/ascend/mindie/mindie-1.0-chatglm2.md deleted file mode 100644 index 26a27b9..0000000 --- a/llm-localization/ascend/mindie/mindie-1.0-chatglm2.md +++ /dev/null @@ -1,357 +0,0 @@ -# ChatGLM2-6B 模型推理指导 - -- [概述](#概述) -- [输入输出数据](#输入输出数据) -- [推理前准备](#推理前准备) -- [量化工具使用](#量化工具使用) -- [快速上手](#快速上手) - - [获取源码及依赖](#获取源码及依赖) - - [模型推理](#模型推理) -- [模型参考精度和性能结果](#模型参考精度和性能结果) - -# 概述 - -[ChatGLM2-6B](https://github.com/THUDM/ChatGLM2-6B/) 是开源中英双语对话模型 [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) 的第二代版本,在保留了初代模型对话流畅、部署门槛较低等众多优秀特性的基础之上,ChatGLM2-6B有更强大的性能、更长的上下文、更高效的推理和更开放的协议。 - -# 输入输出数据 - -- 输入数据 - - | 输入数据 | 大小 | 数据类型 | 数据排布格式 | 是否必选 | - | -------------- | -------------------- | -------- | ------------ | -------- | - | input_ids | BATCH_SIZE x SEQ_LEN | INT64 | ND | 是 | - | attention_mask | BATCH_SIZE x SEQ_LEN | FLOAT32 | ND | 否 | - -- 输出数据 - - | 输出数据 | 大小 | 数据类型 | 数据排布格式 | - | ---------- | --------------------------- | -------- | ------------ | - | output_ids | BATCH_SIZE x OUTPUT_SEQ_LEN | INT64 | ND | - -# 推理前准备 - -1. 参见 [推理环境准备](../../../../docs/推理环境准备.md) 安装 固件与驱动,CANN,PyTorchAdapter等基础软件。 - ```shell - # 使能cann环境变量(根据实际安装路径修改) - source ${path-to-ascend-toolkit}/set_env.sh - # 使能加速库环境变量(根据实际安装路径修改) - source ${path-to-ascendTB}/set_env.sh - # 使能inference库环境变量 - source ${path-to-atb_models}/set_env.sh - # 稀疏工具在线编译(可选) - cd ${path-to-ascend-toolkit}/tools/modelslim/pytorch/weight_compression/compress_graph/ - bash build.sh ${path-to-ascend-toolkit}/ascend-toolkit/latest/ - ``` - -2. 下载模型实现文件和权重文件,并存储到任意路径下 `CHECKPOINT={path-to-weights}` - - - 推荐下载方式 - - ```shell - # 请自行确认已安装 git-lfs - git lfs install - git clone https://huggingface.co/THUDM/chatglm2-6b - cd chatglm2-6b - git reset --hard 4e38bef4c028beafc8fb1837462f74c02e68fcc2 - ``` - - - 其他下载方式 - - 如果你的网络环境较差,下载模型参数可能会花费较长时间甚至失败。此时可以先将模型下载到本地,然后从本地加载。 - - 分开下载模型实现文件和权重文件 - ```shell - # 只下载模型实现文件 - GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/THUDM/chatglm2-6b - cd chatglm2-6b - git reset --hard 4e38bef4c028beafc8fb1837462f74c02e68fcc2 - ``` - 从 [这里](https://cloud.tsinghua.edu.cn/d/674208019e314311ab5c/) 手动下载模型参数文件,并将下载的文件替换到本地的 `chatglm2-6b` 目录下。 - - - 手动从 [THUDM/chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b) 下载所有文件 - - - 下载后检查`${CHECKPOINT}`目录如下所示 - - ``` - |-- config.json - |-- configuration_chatglm.py - |-- modeling_chatglm.py - |-- pytorch_model-00001-of-00007.bin - |-- pytorch_model-00002-of-00007.bin - |-- pytorch_model-00003-of-00007.bin - |-- pytorch_model-00004-of-00007.bin - |-- pytorch_model-00005-of-00007.bin - |-- pytorch_model-00006-of-00007.bin - |-- pytorch_model-00007-of-00007.bin - |-- pytorch_model.bin.index.json - |-- quantization.py - |-- tokenization_chatglm.py - |-- tokenizer_config.json - |-- tokenizer.model - ``` - - - 在config.json中添加如下配置: - - ``` - { - ...... - "world_size": 1, - "float_layers_id": [0] - } - ``` - -3. 获取量化权重 - - - 直接下载量化权重 - - - [A300I DUO 量化权重下载](https://model-weight.obs.cn-north-4.myhuaweicloud.com/chatglm2_6B_310p.tar.gz) - - [A800I A2 量化权重下载](https://model-weight.obs.cn-north-4.myhuaweicloud.com/chatglm2_6B_910b.tar.gz) - - 请使用wget下载,下载完成后请将文件解压到任意路径`QUANT_WEIGHT_PATH=${path-to-quant-weight}` - - - 手动生成量化权重 - - 详见章节[量化工具使用](#量化工具使用) - -4. 下载 `C-Eval` 数据集 - - 从 [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/e84444333b6d434ea7b0) 下载处理好的 `C-Eval` 数据集,解压到任意目录下 `DATASET={path-to-dataset}` 。 - -# 量化工具使用 - -量化权重的获取需要使用大模型量化工具(集成至CANN包中),详细操作手册可见[大模型权重量化工具-ModelSlim](https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/devtools/auxiliarydevtool/modelslim_0001.html)。 - -导出 ChatGLM2-6B 的量化权重或者是稀疏量化权重: - -```shell -# 量化权重导出 -python export_quant_weight.py --float_weight ${CHECKPOINT} --data_path ${DATASET}/val/Social_Science/teacher_qualification.jsonl --quant_weight ${QUANT_WEIGHT_PATH} -# 稀疏量化权重导出 -python export_quant_weight.py --float_weight ${CHECKPOINT} --data_path ${DATASET}/val/Other/civil_servant.jsonl --quant_weight ${QUANT_WEIGHT_PATH} --sparse -``` - -参数说明: - -- float_weight:浮点权重路径。 -- data_path:用于校准的数据文件路径。 -- quant_weight:导出的量化权重或者是稀疏量化权重路径。 -- sparse:默认为false,指量化,True指稀疏量化。 - -**特别注意1**:本章节依赖**pytorch 2.0.0**环境,大模型量化工具依赖指定pytorch版本(不依赖torch_npu,只依赖原生torch)。该环境的pytorch版本与后续步骤可能不同,后续将优化pytorch版本依赖的限制 - -**特别注意2**:本章节依赖 hugging face 的标准 transformers 包。若环境中的 transformers 包被改动过,可能引起相关报错,此时建议重新安装 transformers 包 - -**特别注意3**:稀疏量化权重的获取详见[大模型稀疏权重工具使用文档](https://codehub-y.huawei.com/mindstudio/MindStudio-Backend/automl/files?ref=master&filePath=modelslim%2Fpytorch%2Fllm_sparsequant%2FREADME.md&isFile=true) - -**特别注意4**:本章节执行完毕后,在`QUANT_WEIGHT_PATH`路径下生成如下权重文件,请检查是否缺失: - -``` -deq_scale.npy fp_bias.npy -input_offset.npy input_scale.npy -quant_bias.npy quant_weight.npy -weight_offset.npy weight_scale.npy -``` - -# 快速上手 - -## 获取源码及依赖 - -1. 获取源码 - - ```shell - cd ${path-to-atb_models}/pytorch/examples/chatglm2/6b - ``` -2. 安装第三方依赖 - - ```shell - pip install -r requirements.txt - ``` - -## 模型推理 - -- 可开启CPU Performance模式以提高模型推理性能 - - ``` - cpupower frequency-set -g performance - ``` - -- 推理前开启如下环境变量 - - ```shell - export HCCL_OP_BASE_FFTS_MODE_ENABLE=TRUE - export TASK_QUEUE_ENABLE=1 - export ATB_OPERATION_EXECUTE_ASYNC=1 - export ATB_LAYER_INTERNAL_TENSOR_REUSE=1 - - # 仅300 Ipro和300 IDuo上开启 - export HCCL_BUFFSIZE=110 - export ATB_USE_TILING_COPY_STREAM=1 - ``` - -- `C-Eval` 数据集推理 - - ```shell - # 浮点 - # 将TP_SIZE设为对应的并行数,例如单芯场景TP_SIZE=1,双芯场景TP_SIZE=2 - # 多芯场景请先执行权重生成(浮点单芯跳过) - python process_weights.py --model_path ${CHECKPOINT} --tp_size ${TP_SIZE} - # 执行浮点推理 - torchrun --nproc_per_node ${TP_SIZE} --master_port 2000 main.py --mode precision_dataset --model_path ${CHECKPOINT} --ceval_dataset ${DATASET} --batch 8 --tp_size ${TP_SIZE} - - # 量化 - # 添加量化环境变量 - export ENABLE_QUANT=1 - export QUANT_WEIGHT_PATH=${QUANT_WEIGHT_PATH} - # 将TP_SIZE设为对应的并行数,例如单芯场景TP_SIZE=1,双芯场景TP_SIZE=2 - # 执行权重生成(单芯/多芯都要执行) - python process_weights.py --model_path ${CHECKPOINT} --tp_size ${TP_SIZE} - # 执行量化推理 - torchrun --nproc_per_node ${TP_SIZE} --master_port 2000 main.py --mode precision_dataset --model_path ${CHECKPOINT} --ceval_dataset ${DATASET} --batch 8 --tp_size ${TP_SIZE} - - # 稀疏量化(当前仅支持300I DUO) - # 添加稀疏量化环境变量 - export ENABLE_SPARSE=1 - export QUANT_WEIGHT_PATH=${QUANT_WEIGHT_PATH} - export COMPRESS_WEIGHT_PATH=${COMPRESS_WEIGHT_PATH} - # 将TP_SIZE设为对应的并行数,例如单芯场景TP_SIZE=1,双芯场景TP_SIZE=2 - # 执行权重生成(单芯/多芯都要执行) - python process_weights.py --model_path ${CHECKPOINT} --tp_size ${TP_SIZE} - python3 generate_compress_weight.py --weight_path=${QUANT_WEIGHT_PATH} --save_path=${COMPRESS_WEIGHT_PATH} - # 执行稀疏量化推理 - torchrun --nproc_per_node ${TP_SIZE} --master_port 2000 main.py --mode precision_dataset --model_path ${CHECKPOINT} --ceval_dataset ${DATASET} --batch 8 --tp_size ${TP_SIZE} - ``` - -- 模型性能数据测试 - - **性能测试请先配置环境变量`export TIMEIT=1`,测试结束后删除该环境变量`unset TIMEIT`。** - - ```shell - # 浮点 - # 将TP_SIZE设为对应的并行数,例如单芯场景TP_SIZE=1,双芯场景TP_SIZE=2 - # 多芯场景请先执行权重生成(浮点单芯跳过) - python process_weights.py --model_path ${CHECKPOINT} --tp_size ${TP_SIZE} - # 执行浮点推理 - torchrun --nproc_per_node ${TP_SIZE} --master_port 2000 main.py --mode performance --model_path ${CHECKPOINT} --batch ${batch_size} --tp_size ${TP_SIZE} - - # 量化 - # 添加量化环境变量 - export ENABLE_QUANT=1 - export QUANT_WEIGHT_PATH=${QUANT_WEIGHT_PATH} - # 将TP_SIZE设为对应的并行数,例如单芯场景TP_SIZE=1,双芯场景TP_SIZE=2 - # 执行权重生成(单芯/多芯都要执行) - python process_weights.py --model_path ${CHECKPOINT} --tp_size ${TP_SIZE} - # 执行量化推理 - torchrun --nproc_per_node ${TP_SIZE} --master_port 2000 main.py --mode performance --model_path ${CHECKPOINT} --batch ${batch_size} --tp_size ${TP_SIZE} - - # 稀疏量化(当前仅支持300I DUO) - # 添加稀疏量化环境变量 - export ENABLE_SPARSE=1 - export QUANT_WEIGHT_PATH=${QUANT_WEIGHT_PATH} - export COMPRESS_WEIGHT_PATH=${COMPRESS_WEIGHT_PATH} - # 将TP_SIZE设为对应的并行数,例如单芯场景TP_SIZE=1,双芯场景TP_SIZE=2 - # 执行权重生成(单芯/多芯都要执行) - python process_weights.py --model_path ${CHECKPOINT} --tp_size ${TP_SIZE} - python3 generate_compress_weight.py --weight_path=${QUANT_WEIGHT_PATH} --save_path=${COMPRESS_WEIGHT_PATH} - # 执行稀疏量化推理 - torchrun --nproc_per_node ${TP_SIZE} --master_port 2000 main.py --mode performance --model_path ${CHECKPOINT} --batch ${batch_size} --tp_size ${TP_SIZE} - ``` - - 备注: - - 1. 可通过配置`--seqlen_in_pair`和`--seqlen_out_pair`指定输入输出序列长度,例如以下命令测试的输入输出组合为[256,256],[512,512],[1024,1024] - - ```shell - torchrun --nproc_per_node ${TP_SIZE} --master_port 2000 main.py --mode performance --model_path ${CHECKPOINT} --device 0 --seqlen_in_pair 256,512,1024 --seqlen_out_pair 256,512,1024 --batch 1 --tp_size ${TP_SIZE} --performance_output_file performance_bs1.csv - ``` - - 2. 环境变量 `MAX_SEQ_LEN` (默认值2048)必须大于等于 `seqlen_in + seqlen_out`,例如: - - ```shell - # 若 seqlen_in = 3584 seqlen_out = 512 - export MAX_SEQ_LEN=4096 - ``` - -- UI 交互 - - - 命令行交互 - - ```shell - # 浮点 - # 将TP_SIZE设为对应的并行数,例如单芯场景TP_SIZE=1,双芯场景TP_SIZE=2 - # 多芯场景请先执行权重生成(浮点单芯跳过) - python process_weights.py --model_path ${CHECKPOINT} --tp_size ${TP_SIZE} - # 执行浮点推理 - torchrun --nproc_per_node ${TP_SIZE} --master_port 2000 main.py --mode cli_demo --model_path ${CHECKPOINT} --tp_size ${TP_SIZE} - - # 量化 - # 添加量化环境变量 - export ENABLE_QUANT=1 - export QUANT_WEIGHT_PATH=${QUANT_WEIGHT_PATH} - # 将TP_SIZE设为对应的并行数,例如单芯场景TP_SIZE=1,双芯场景TP_SIZE=2 - # 执行权重生成(单芯/多芯都要执行) - python process_weights.py --model_path ${CHECKPOINT} --tp_size ${TP_SIZE} - # 执行量化推理 - torchrun --nproc_per_node ${TP_SIZE} --master_port 2000 main.py --mode cli_demo --model_path ${CHECKPOINT} --tp_size ${TP_SIZE} - ``` - - - Web 交互 - - ```shell - # 安装依赖 - pip install -r web_requirements.txt - - # 下载 GitHub 仓库 - git clone https://github.com/THUDM/ChatGLM2-6B.git - cd ChatGLM2-6B - git reset --hard 921d7e9adc69020a19169d1ba4f76c2675a2dd29 - - # 应用适配代码 - git apply ../web_demo.patch - cd .. - - # 将 TP_SIZE 设为对应的并行数,例如单芯场景 TP_SIZE=1,双芯场景 TP_SIZE=2 - - # Gradio 框架 - torchrun --nproc_per_node ${TP_SIZE} --master_port 2000 ChatGLM2-6B/web_demo.py --model_path ${CHECKPOINT} --tp_size ${TP_SIZE} - - # Streamlit 框架 - # ATB OpsRunner 的全局缓存暂不支持多线程,需要降低缓存级别,否则会报错 - # 0 不开启缓存,1 开启本地缓存,2 开启全局缓存,3 同时开启本地和全局缓存,默认为 3 - export ATB_OPSRUNNER_KERNEL_CACHE_TYPE=1 - torchrun --nproc_per_node ${TP_SIZE} --master_port 2000 -m streamlit run ChatGLM2-6B/web_demo2.py -- --model_path ${CHECKPOINT} --tp_size ${TP_SIZE} - ``` - -- `main.py` 参数说明: - - ```shell - --mode: 推理模式,可选单数据推理,数据集推理,性能测试以及命令行交互 - --model_path:模型权重路径 - --model:模型名称,当前仅支持chatglm2和chatglm3,默认为chatglm2 - --tp_size:张量并行数,等于使用的芯片数量 - --device:NPU设备id(可通过npu-smi info查看),多芯场景则为NPU设备起始id,例:--device=0 --tp_size=4,则使用device:0,1,2,3 - --batch:batch大小 - --model_file:推理使用的modeling文件 - --ceval_dataset:CEval数据集路径 - --seqlen_in_pair:性能测试时需要测试的输入长度,默认为[256, 512, 1024] - --seqlen_out_pair:性能测试时需要测试的输出长度,默认为[256, 512, 1024] - --performance_output_file:性能测试数据保存文件,默认为performance.csv - --print_response:是否打印性能测试的推理回答 - ``` - -# 模型参考精度和性能结果 - -- 参考精度 - - > 因为 `C-Eval` 数据集test子集需要上传官网得到结果,所以这里使用val子集进行精度对比 - - | ChatGLM2 | 类别 | Average Accuracy | - | ---------- | ---- | ---------------- | - | GPU (浮点bs8) | val | 53.56% | - | NPU (浮点bs8) | val | 53.12% | - -- 推理性能 - - > 这里性能结果仅作为参考,并非版本极致性能优化结果。 - - | 硬件形态 | 批大小 | 输入长度 | 输出长度 | 解码速度 | - | -------- | ------ | -------- | -------- | -------- | - | 300I Duo | 1 | 8192 | 1024 | 162ms | \ No newline at end of file diff --git a/llm-localization/ascend/mindie/mindie-1.0-qwen-72b.md b/llm-localization/ascend/mindie/mindie-1.0-qwen-72b.md deleted file mode 100644 index f94e801..0000000 --- a/llm-localization/ascend/mindie/mindie-1.0-qwen-72b.md +++ /dev/null @@ -1,302 +0,0 @@ -[TOC] - -# Qwen-72B模型-推理指导 - -注意,QWen-72b与14b版本模型结构一致,因此加速库及modeling等文件可复用,此处不再重复归档 - -# 快速上手 -### 路径变量解释 - -| 变量名 | 含义 | -|---------------------|----------------------------------------------------------------------| -| model_download_path | 开源权重放置目录 | -| llm_path | 加速库及模型库下载后放置目录 | -| model_path | 工作时模型所在的目录,可以和model_download_path相同,但一般模型是公共的,为了避免影响其他用户,单独建一个模型工作目录 | -| script_path | 工作脚本所在路径,本文为${llm_path}/pytorch/examples/qwen/72b | -| ceval_work_dir | ceval数据集、及结果保存所在目录,不必和模型脚本在相同目录 | - - -## 获取源码及依赖 -#### python requirements - -| 包名 | 推荐版本 | -|-------------------------------|--------| -| transformers | 4.30.2 | -| decorator | 5.1.1 | -| sympy | 1.11.1 | -| scipy | 1.11.3 | -| attrs | 23.1.0 | -| psutil | 5.9.6 | -| sentencepiece | 0.1.99 | -| tiktoken | 0.5.2 | -| transformers-stream-generator | 0.0.4 | -| einops | 0.7.0 | -| pandas | 0.8.2 | - -### 下载模型权重 - -下载模型权重,放置到自定义`${model_download_path}` (请下载链接中'Files and versions'页签下的所有文件) -``` -https://huggingface.co/Qwen/Qwen-72B -``` -注意:实际使用的模型可以是base版或chat版,应根据实际需求确定。例子中给出的是base版。 - -### 拷贝文件 - -### 准备 - -#### 1. 将开源模型拷贝到模型工作目录,权重文件使用软链接即可,同时将modeling文件拷贝到模型,并修改开源的config.json, - -```shell -cd ${model_path} -cp ${model_download_path}/*.py ./ -cp ${model_download_path}/*.json ./ -cp ${model_download_path}/*.tiktoken ./ -cp -s ${model_download_path}/*.safetensors ./ -``` - -#### 2. 安装 atb_speed_sdk - -```shell -cd ${llm_path}/pytorch/examples/atb_speed_sdk -pip install . -``` - -#### 3. 张量并行模型切分(仅在模型需要多卡并行时使用) - -```shell -cp ${script_path}/modeling_qwen_cut.py ${model_path} -cp ${script_path}/modeling_qwen_ascend.py ${model_path} -``` - -修改 ${model_path}里的config.json中的kv对,改成`"AutoModelForCausalLM": "modeling_qwen_cut.QWenLMHeadModel"` - -```text -修改`${script_path}/cut_model_and_run.sh` -将 `input_dir` 修改为模型所在路径 `${model_path}` -将 `output_dir` 修改为切分后的模型所存储的路径,如: `${model_path/part_model}`。模型切分成功后,会自动生成新目录part_model(用户无需新建该文件夹) -将 `rank_size` 修改为期望切分的份数,例如rank_size=8表示模型切分为8份。实际切分份数应视显存大小而定。 - -``` - -目录结构示例建议 - -``` ---model_path - *.py(模型源文件) - *.json(模型源文件) - *.tiktoken(模型源文件) - *.bin(模型源文件,软链接,部分模型权重为其它格式,如*.safetensors等) - modeling_qwen_cut.py(权重切分脚本) - --part_model(以双卡为例,权重切分成功后文件夹) - --0 - --1 - ......(其他) ---script_path - cut_model_and_run.sh - cut_model_util.py - main.py - config.ini - ......(其他) -``` - -执行 - -```shell -cd ${script_path} -bash cut_model_and_run.sh -``` - -切分所需时间较长,切分完成后,将会打印 'Tensor parallelism weights have been successfully saved.'。 - -#### 4.修改config.json配置 - -- 单卡运行时**必须**修改 -- 多卡运行时,会在切分阶段会自动修改,没有定制的情况下,可以不操作 - -##### 单卡 -修改${model_path}/config.json中的kv对,改成 - -``` -"AutoModelForCausalLM": "modeling_qwen_ascend.QWenLMHeadModel" -``` - -##### 多卡 - -修改 -${model_path}/part_model/{rank_id}/config.json中的kv对,改成 - -``` -"AutoModelForCausalLM": "modeling_qwen_ascend.QWenLMHeadModel" -``` - -# CPU高性能模式 - -可开启CPU Performance模式以提高模型推理性能。 - -``` - -cpupower frequency-set -g performance - -``` - -### 执行推理 - -#### 修改 ${script_path}/config.ini - -[config文件配置参考](../../atb_speed_sdk/README.md) -提示:多卡并行推理时,config.ini中model_path路径为part_model父文件夹。例如: - -``` -# 正确示例: - -model_path=../model - -# 错误示例: - -model_path=../model/part_model -``` - -#### main.py - -提供了demo推理,精度测试,性能测试三种下游任务。 -task_name可选inference、precision、performance。 - -- 单卡 - 修改 ${model_path}里的config.json中的kv对,改成`"AutoModelForCausalLM": "modeling_qwen_ascend.QWenLMHeadModel"` - -```shell -python main.py --task ${task_name} -``` - -注意,由于本模型体量较大,受硬件限制,单卡很可能无法跑起。 - -- 多卡 -```shell -bash cut_model_and_run.sh ${task_name} -``` - -**注意** -1.docker环境与conda环境有所不同,docker环境中启动模型时需要修改环境变量"ATB_OPERATION_EXECUTE_ASYNC=0"、"TASK_QUEUE_ENABLE=0",否则可能出现算子下发同步失败。 - -**可以使用 MAX_SEQ_LEN 环境变量来设置model支持的最大长度以优化显存占用, 默认使用config里面的max_model_length** -如 - -```shell -MAX_SEQ_LEN=2048 python main.py --task ${task_name} -``` - -或 - -```shell -MAX_SEQ_LEN=2048 bash cut_model_and_run.sh ${task_name} -``` - -如果遇到 - -```text -Traceback (most recent call last): - File "/root/miniconda3/envs/wqh39/lib/python3.9/site-packages/torch_npu/__init__.py", line 31, in - import torch_npu.npu - File "/root/miniconda3/envs/wqh39/lib/python3.9/site-packages/torch_npu/npu/__init__.py", line 46, in - from .utils import (is_initialized, _lazy_call, _lazy_init, init, set_dump, - File "/root/miniconda3/envs/wqh39/lib/python3.9/site-packages/torch_npu/npu/utils.py", line 27, in - import torch_npu._C -ImportError: /root/miniconda3/envs/wqh39/bin/../lib/libgomp.so.1: cannot allocate memory in static TLS block -Segmentation fault (core dumped) -``` - -则在命令行前加上`LD_PRELOAD=上面的error路径`。如 - -```shell -LD_PRELOAD=/root/miniconda3/envs/wqh39/bin/../lib/libgomp.so.1 MAX_SEQ_LEN=2048 python main.py --task ${task_name} --is_quant ${is_quant} -``` -# 竞品对比 - -待补充 - -# 附录: - -# 精度测试指南 - -## 配置说明 - -参考 [SDK精度测试指南CEVAL章节](../../atb_speed_sdk/README.md) - -## 运行脚本 - -- 单芯 - -```shell -cd ${script_path} -python main.py --task precision -``` - -- 多芯 -```shell -cd ${script_path} -bash cut_model_and_run.sh precision -``` - -结束后在${ceval_work_dir}/test_result目录下查看测试结果。[双芯结果每个两份,只需看其中一份即可]。 - -| 文件 | 用途 | -|---------------------------|----------------------| -| device0.log | 运行过程日志 | -| cache0.csv | 结果详情,C列为预期答案,D列为测试答案 | -| result_0_classes_acc.json | 测试数据下按不同维度统计准确率 | -| result_0_subject_acc.json | 测试数据下按不同学科统计准确率 | - -**注意:后续重新运行, 需要删除当前目录下生成的test_result文件夹,否则只会读取当前的目录下的测试结果** - -# 性能测试 - -在功能运行正常的基础下,执行以下步骤进行性能测试 - -## 按照推理指导,下载模型及配置路径,并安装atb_speed_sdk - -## 1. 准备 - -参考 [SDK性能测试指南精确打点法章节](../../atb_speed_sdk/README.md) 进行准备 - -## 2. 修改配置文件 - -- 配置config.ini中[performance]属性, 如下: - ``` - model_name=qwen_72b - perf_mode=detail - ``` - -## 3. 执行测试脚本 - -- 单芯 - -```shell -cd ${script_path} -TIMEIT=1 python main.py --task performance -``` - -- 多芯 -```shell -cd ${script_path} -TIMEIT=1 bash cut_model_and_run.sh performance -``` - -为了不影响正常使用,将`TIMEIT`设置成1来返回具体的性能测试的值,默认是0 - -### 性能测试结果 - -得到性能测试结果csv `performance_test_npu_${model_name}_xxx.csv` - -### 结果分析 - -| 列名 | 含义 | -|-------------------------------|------------| -| batch_size | batch大小 | -| input_seq_len(Encoding) | 输入长度 | -| output_seq_len(Decoding) | 输出长度 | -| ResponseTime(s) | 总响应时间 | -| forward_first_token_time(ms) | 首token推理时长 | -| forward_next_token_time(ms) | 增量推理时长 | -| pre_next_token_time(ms) | 前处理时长 | -| post_next_token_time_post(ms) | 后处理时长 | \ No newline at end of file diff --git a/llm-localization/ascend/mindie/mindie-1.0.RC2.md b/llm-localization/ascend/mindie/mindie-1.0.RC2.md new file mode 100644 index 0000000..1fca7d8 --- /dev/null +++ b/llm-localization/ascend/mindie/mindie-1.0.RC2.md @@ -0,0 +1,132 @@ + + + +文档: +- https://www.hiascend.com/document/detail/zh/mindie/10RC2/whatismindie/mindie_what_0001.html + +docker: +- https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f + + + +rsync -P --rsh=ssh -r root@192.168.16.211:/root/mindie-1.0.rc2.tar . + + + +swr.cn-south-1.myhuaweicloud.com/ascendhub/mindie:1.0.RC2-800I-A2-aarch64 + + +``` +docker run -it -d --name mindie-rc2-45 --net=host \ +-e ASCEND_VISIBLE_DEVICES=4,5 \ +-p 1925:1025 \ +--shm-size=32g \ +-w /workspace \ +-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ +-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ +-v /data/model_from_hf:/workspace/model \ +swr.cn-south-1.myhuaweicloud.com/ascendhub/mindie:1.0.RC2-800I-A2-aarch64 \ +/bin/bash + + +docker exec -it mindie-rc2-45 bash + + + +cd /opt/package +# 安装CANN包 +source ./install_and_enable_cann.sh + + + +source /usr/local/Ascend/ascend-toolkit/set_env.sh +source /usr/local/Ascend/nnal/atb/set_env.sh +source /usr/local/Ascend/mindie/set_env.sh +source /usr/local/Ascend/llm_model/set_env.sh + + + +vim /usr/local/Ascend/mindie/latest/mindie-service/conf/config.json + +/workspace/model/Qwen1.5-7B-Chat/ + + +export MIES_PYTHON_LOG_TO_FILE=1 +export MIES_PYTHON_LOG_TO_STDOUT=1 +export PYTHONPATH=/usr/local/Ascend/llm_model:$PYTHONPATH +cd /usr/local/Ascend/mindie/latest/mindie-service/bin +./mindieservice_daemon + +``` + + +## 新镜像 + +``` +docker commit -a "guodong" -m "mindie-1.0.RC2" 365815a95f16 harbor/ascend/mindie-base:1.0.RC2 + +# -p 192.168.16.xx:1025:1025 + +docker run -it --rm \ +-e ASCEND_VISIBLE_DEVICES=2,3 \ +-p 1025:1025 \ +--shm-size=32g \ +-w /workspace \ +-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ +-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ +-v /data/model_from_hf:/workspace/model \ +harbor/ascend/mindie-base:1.0.RC2 \ +/bin/bash + +``` + + +``` +llm-server3.sh + + + +docker run -it --rm \ +-e ASCEND_VISIBLE_DEVICES=6,7 \ +-p 1825:1025 \ +--env AIE_LLM_CONTINUOUS_BATCHING=1 \ +--shm-size=32g \ +-w /workspace \ +-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ +-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ +-v /data/model_from_hf/Qwen1.5-7B-Chat:/workspace/model \ +-v /home/workspace/llm-server3.sh:/workspace/llm-server.sh \ +-v /home/workspace/mindservice.log:/usr/local/Ascend/mindie/latest/mindie-service/logs/mindservice.log \ +harbor/ascend/mindie-base:1.0.RC2 \ +/bin/bash + + + + + +docker run -it --rm \ +-e ASCEND_VISIBLE_DEVICES=4,5 \ +-p 1525:1025 \ +--env AIE_LLM_CONTINUOUS_BATCHING=1 \ +--shm-size=32g \ +-w /workspace \ +-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ +-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ +-v /data/model_from_hf/Qwen1.5-7B-Chat:/workspace/model \ +-v /home/workspace/llm-server3.sh:/workspace/llm-server.sh \ +-v /home/workspace/mindservice.log:/usr/local/Ascend/mindie/latest/mindie-service/logs/mindservice.log \ +harbor/ascend/mindie-base:1.0.RC2 \ +/workspace/llm-server.sh \ +--model_name=qwen-chat \ +--model_weight_path=/workspace/model \ +--world_size=2 \ +--npu_mem_size=15 + + + + + +``` + + + diff --git a/llm-localization/ascend/mindie/mindie-1.0.rc2-config.json b/llm-localization/ascend/mindie/mindie-1.0.rc2-config.json new file mode 100644 index 0000000..60f14c2 --- /dev/null +++ b/llm-localization/ascend/mindie/mindie-1.0.rc2-config.json @@ -0,0 +1,88 @@ +{ + "OtherParam" : + { + "ResourceParam" : + { + "cacheBlockSize" : 128 + }, + "LogParam" : + { + "logLevel" : "Info", + "logPath" : "logs/mindservice.log" + }, + "ServeParam" : + { + "ipAddress" : "172.17.0.2", + "managementIpAddress" : "127.0.0.2", + "port" : 1025, + "managementPort" : 1026, + "maxLinkNum" : 1000, + "httpsEnabled" : false, + "tlsCaPath" : "security/ca/", + "tlsCaFile" : ["ca.pem"], + "tlsCert" : "security/certs/server.pem", + "tlsPk" : "security/keys/server.key.pem", + "tlsPkPwd" : "security/pass/mindie_server_key_pwd.txt", + "tlsCrl" : "security/certs/server_crl.pem", + "managementTlsCaFile" : ["management_ca.pem"], + "managementTlsCert" : "security/certs/management_server.pem", + "managementTlsPk" : "security/keys/management_server.key.pem", + "managementTlsPkPwd" : "security/pass/management_mindie_server_key_pwd.txt", + "managementTlsCrl" : "security/certs/management_server_crl.pem", + "kmcKsfMaster" : "tools/pmt/master/ksfa", + "kmcKsfStandby" : "tools/pmt/standby/ksfb", + "multiNodesInferPort" : 1120, + "interNodeTLSEnabled" : true, + "interNodeTlsCaFile" : "security/ca/ca.pem", + "interNodeTlsCert" : "security/certs/server.pem", + "interNodeTlsPk" : "security/keys/server.key.pem", + "interNodeTlsPkPwd" : "security/pass/mindie_server_key_pwd.txt", + "interNodeKmcKsfMaster" : "tools/pmt/master/ksfa", + "interNodeKmcKsfStandby" : "tools/pmt/standby/ksfb" + } + }, + "WorkFlowParam" : + { + "TemplateParam" : + { + "templateType" : "Standard", + "templateName" : "Standard_llama" + } + }, + "ModelDeployParam" : + { + "engineName" : "mindieservice_llm_engine", + "modelInstanceNumber" : 1, + "tokenizerProcessNumber" : 8, + "maxSeqLen" : 2560, + "npuDeviceIds" : [[$npuids]], + "multiNodesInferEnabled" : false, + "ModelParam" : [ + { + "modelName" : "$model_name", + "modelWeightPath" : "$model_weight_path", + "worldSize" : $world_size, + "cpuMemSize" : 5, + "npuMemSize" : $npu_mem_size, + "backendType": "atb", + "pluginParams" : "" + } + ] + }, + "ScheduleParam" : + { + "maxPrefillBatchSize" : 50, + "maxPrefillTokens" : 8192, + "prefillTimeMsPerReq" : 150, + "prefillPolicyType" : 0, + + "decodeTimeMsPerReq" : 50, + "decodePolicyType" : 0, + + "maxBatchSize" : 200, + "maxIterTimes" : 512, + "maxPreemptCount" : 0, + "supportSelectBatch" : true, + "maxQueueDelayMicroseconds" : 5000 + } +} \ No newline at end of file diff --git a/llm-localization/ascend/mindie/mindie-1.0.rc2-llm-server.sh b/llm-localization/ascend/mindie/mindie-1.0.rc2-llm-server.sh new file mode 100644 index 0000000..df28164 --- /dev/null +++ b/llm-localization/ascend/mindie/mindie-1.0.rc2-llm-server.sh @@ -0,0 +1,170 @@ +#!/bin/bash + +echo "入参:" $@ + +for a in "$@"; do + #echo $a + if [[ `echo $a | grep "^--model_name="` ]]; then + model_name=`echo $a | grep "^--model_name=" | awk -F '=' '{print $2}'` + fi + if [[ `echo $a | grep "^--model_weight_path="` ]]; then + model_weight_path=`echo $a | grep "^--model_weight_path=" | awk -F '=' '{print $2}'` + fi + if [[ `echo $a | grep "^--world_size="` ]]; then + world_size=`echo $a | grep "^--world_size=" | awk -F '=' '{print $2}'` + fi + if [[ `echo $a | grep "^--npu_mem_size="` ]]; then + npu_mem_size=`echo $a | grep "^--npu_mem_size=" | awk -F '=' '{print $2}'` + fi +done + +if [ -z "$model_name" ]; then + model_name="default" +fi + +if [ -z "$model_weight_path" ]; then + model_weight_path="/workspace/model" +fi + +if [ -z "$world_size" ]; then + world_size=4 +fi + +if [ -z "$npu_mem_size" ]; then + npu_mem_size=8 +fi + +echo "平台入参: model_name: $model_name, model_weight_path: $model_weight_path , world_size: $world_size , npu_mem_size: $npu_mem_size" + + +npuids="" +card_num=$(($world_size - 1)) +for i in `seq 0 $card_num` + do + if [[ $i == $card_num ]] ; + then + npuids=$npuids$i + else + npuids=$npuids$i"," + fi + done + + +echo $npuids + + +ip=`hostname -I` + +echo "docker ip: [$ip]" +ip=$(echo "$ip" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') +echo "docker handle ip: [$ip]" + +# DEPLOYMENT_CONF_PATH="/home/guodong.li/workspace/config.json" + +DEPLOYMENT_CONF_PATH="/usr/local/Ascend/mindie/latest/mindie-service/conf/config.json" + +cat < $DEPLOYMENT_CONF_PATH +{ + "OtherParam" : + { + "ResourceParam" : + { + "cacheBlockSize" : 128 + }, + "LogParam" : + { + "logLevel" : "Info", + "logPath" : "logs/mindservice.log" + }, + "ServeParam" : + { + "ipAddress" : "$ip", + "managementIpAddress" : "127.0.0.2", + "port" : 1025, + "managementPort" : 1026, + "maxLinkNum" : 1000, + "httpsEnabled" : false, + "tlsCaPath" : "security/ca/", + "tlsCaFile" : ["ca.pem"], + "tlsCert" : "security/certs/server.pem", + "tlsPk" : "security/keys/server.key.pem", + "tlsPkPwd" : "security/pass/mindie_server_key_pwd.txt", + "tlsCrl" : "security/certs/server_crl.pem", + "managementTlsCaFile" : ["management_ca.pem"], + "managementTlsCert" : "security/certs/management_server.pem", + "managementTlsPk" : "security/keys/management_server.key.pem", + "managementTlsPkPwd" : "security/pass/management_mindie_server_key_pwd.txt", + "managementTlsCrl" : "security/certs/management_server_crl.pem", + "kmcKsfMaster" : "tools/pmt/master/ksfa", + "kmcKsfStandby" : "tools/pmt/standby/ksfb", + "multiNodesInferPort" : 1120, + "interNodeTLSEnabled" : true, + "interNodeTlsCaFile" : "security/ca/ca.pem", + "interNodeTlsCert" : "security/certs/server.pem", + "interNodeTlsPk" : "security/keys/server.key.pem", + "interNodeTlsPkPwd" : "security/pass/mindie_server_key_pwd.txt", + "interNodeKmcKsfMaster" : "tools/pmt/master/ksfa", + "interNodeKmcKsfStandby" : "tools/pmt/standby/ksfb" + } + }, + "WorkFlowParam" : + { + "TemplateParam" : + { + "templateType" : "Standard", + "templateName" : "Standard_llama" + } + }, + "ModelDeployParam" : + { + "engineName" : "mindieservice_llm_engine", + "modelInstanceNumber" : 1, + "tokenizerProcessNumber" : 8, + "maxSeqLen" : 2560, + "npuDeviceIds" : [[$npuids]], + "multiNodesInferEnabled" : false, + "ModelParam" : [ + { + "modelName" : "$model_name", + "modelWeightPath" : "$model_weight_path", + "worldSize" : $world_size, + "cpuMemSize" : 5, + "npuMemSize" : $npu_mem_size, + "backendType": "atb", + "pluginParams" : "" + } + ] + }, + "ScheduleParam" : + { + "maxPrefillBatchSize" : 50, + "maxPrefillTokens" : 8192, + "prefillTimeMsPerReq" : 150, + "prefillPolicyType" : 0, + + "decodeTimeMsPerReq" : 50, + "decodePolicyType" : 0, + + "maxBatchSize" : 200, + "maxIterTimes" : 512, + "maxPreemptCount" : 0, + "supportSelectBatch" : true, + "maxQueueDelayMicroseconds" : 5000 + } +} +EOF + +echo "部署参数,$DEPLOYMENT_CONF_PATH" +cat $DEPLOYMENT_CONF_PATH + +source /usr/local/Ascend/ascend-toolkit/set_env.sh +source /usr/local/Ascend/nnal/atb/set_env.sh +source /usr/local/Ascend/mindie/set_env.sh +source /usr/local/Ascend/llm_model/set_env.sh + +export MIES_PYTHON_LOG_TO_FILE=1 +export MIES_PYTHON_LOG_TO_STDOUT=1 +export PYTHONPATH=/usr/local/Ascend/llm_model:$PYTHONPATH +cd /usr/local/Ascend/mindie/latest/mindie-service/bin + +./mindieservice_daemon diff --git a/llm-localization/ascend/mindie/mindie-api.md b/llm-localization/ascend/mindie/mindie-api.md index 4c0137e..ee940ac 100644 --- a/llm-localization/ascend/mindie/mindie-api.md +++ b/llm-localization/ascend/mindie/mindie-api.md @@ -73,8 +73,11 @@ curl "http://127.0.0.1:1025/v1/chat/completions" \ }' +# http://127.0.0.1:1025/v1/chat/completions +# +# http://192.168.16.xxx:1725/v1/chat/completions -curl "http://127.0.0.1:1025/v1/chat/completions" \ +curl "http://172.17.0.2:1025/v1/chat/completions" \ -H "Content-Type: application/json" \ -d '{ "model": "qwen1.5-14b", diff --git "a/llm-localization/ascend/mindie/\346\227\245\345\277\227\345\210\206\346\236\220.txt" "b/llm-localization/ascend/mindie/\346\227\245\345\277\227\345\210\206\346\236\220.txt" new file mode 100644 index 0000000..c3fd59e --- /dev/null +++ "b/llm-localization/ascend/mindie/\346\227\245\345\277\227\345\210\206\346\236\220.txt" @@ -0,0 +1,33 @@ + + +tail -100f mindservice.log | grep "COMPLETED REQ ID" + +2024-07-24 16:25:04.777655 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1236, 3 , 272 , 16 , 256 , 20 , 30 , 1 +2024-07-24 16:25:05.234118 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1239, 3 , 271 , 15 , 256 , 20 , 29 , 2 +2024-07-24 16:25:05.360007 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1262, 1 , 99 , 22 , 77 , 20 , 26 , 1 +2024-07-24 16:25:05.571847 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1244, 2 , 234 , 22 , 212 , 20 , 26 , 1 +2024-07-24 16:25:05.705152 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1241, 3 , 281 , 25 , 256 , 20 , 25 , 1 +2024-07-24 16:25:06.538975 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1258, 2 , 145 , 16 , 129 , 20 , 27 , 2 +2024-07-24 16:25:06.901611 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1274, 1 , 41 , 15 , 26 , 20 , 27 , 1 +2024-07-24 16:25:07.724699 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1253, 2 , 195 , 13 , 182 , 20 , 29 , 1 +2024-07-24 16:25:07.940994 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1277, 1 , 45 , 17 , 28 , 20 , 29 , 1 +2024-07-24 16:25:08.764214 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1257, 2 , 201 , 17 , 184 , 20 , 31 , 1 +2024-07-24 16:25:08.973185 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1280, 1 , 40 , 19 , 21 , 20 , 31 , 1 +2024-07-24 16:25:10.494941 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1282, 1 , 56 , 25 , 31 , 20 , 33 , 1 +2024-07-24 16:25:10.541398 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1250, 3 , 269 , 13 , 256 , 19 , 32 , 1 +2024-07-24 16:25:13.150968 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1284, 1 , 82 , 28 , 54 , 20 , 40 , 1 +2024-07-24 16:25:13.282448 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1259, 3 , 273 , 17 , 256 , 20 , 41 , 2 +2024-07-24 16:25:13.913430 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1261, 3 , 273 , 17 , 256 , 20 , 38 , 1 +2024-07-24 16:25:14.495745 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1263, 3 , 279 , 23 , 256 , 20 , 40 , 1 +2024-07-24 16:25:14.717521 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1264, 3 , 268 , 12 , 256 , 20 , 38 , 2 +2024-07-24 16:25:15.027415 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1266, 3 , 268 , 12 , 256 , 20 , 35 , 1 +2024-07-24 16:25:15.521481 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1287, 1 , 61 , 16 , 45 , 20 , 34 , 1 +2024-07-24 16:25:15.567090 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1267, 3 , 273 , 17 , 256 , 19 , 33 , 1 +2024-07-24 16:25:16.039858 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1268, 3 , 272 , 16 , 256 , 20 , 33 , 1 +2024-07-24 16:25:16.432710 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1269, 3 , 329 , 73 , 256 , 20 , 31 , 1 +2024-07-24 16:25:17.082790 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1270, 3 , 263 , 16 , 247 , 20 , 30 , 1 +2024-07-24 16:25:17.339481 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1289, 1 , 72 , 15 , 57 , 20 , 30 , 1 +2024-07-24 16:25:17.993777 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1271, 3 , 270 , 14 , 256 , 20 , 31 , 1 +2024-07-24 16:25:18.121696 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1273, 3 , 271 , 15 , 256 , 20 , 29 , 1 +2024-07-24 16:25:18.248203 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1286, 1 , 116 , 17 , 99 , 20 , 27 , 1 +2024-07-24 16:25:18.458886 1360 info ibis_request.cc:240] COMPLETED REQ ID: 1275, 3 , 280 , 24 , 256 , 20 , 27 , 1 diff --git a/llm-localization/ascend/pytorch/.DS_Store b/llm-localization/ascend/pytorch/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..8a52378d8e620596e5a419fb188e7fb119d1082a GIT binary patch literal 6148 zcmeH~I}XAy42EqU5)um|V-^m;4I)%dzy)}XMGS~NN9XrLNEBv-(C^Cr5+`oee`snE z(art57FmeM3~rQ_g@GyNQ|WOXV3WOMt}^U`@6lGBAx&FiO?i z5X*ZxSaM%YwqTTtX7i!>WVJ2^rco|hFo9`xFi-(1Fi>C|^M#%NTllB`e^A0H6`%rt zrhrbmUAMtY<=Oi7dY0d3*2WDE`gVkup8!nkC|<(duwHBc)?^DJ1LKc?%fLVdzN)|j DGWHW} literal 0 HcmV?d00001 diff --git a/llm-performance/.DS_Store b/llm-performance/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..32e64ca0681c2bcfeec6116e2322a6931c851f80 GIT binary patch literal 8196 zcmeHM&1(}u6o1pkZq`ysq1s-A6%R%1M`}e-FKL=0rRbq47Ex)l*=-WLyR&T41}W9W zTa6(81D?G|j~+b;9t4ki^W?>ge}D(kHy^2+>_$otQgEis`%T`w_h#p}FSC6c03i7- zYX)Ep0MghQW(H9tG&awAto2FFC?Y|BSRW66!W6h*Ejlr4rBDng1{4E|0mXn~;5aaV z&urGxJ)ZkwR!PNxV&K1IfSnHpc7{2Lp+J6gpx`9{VhYu~;W)zvh=#-ycOB+n@yUL0 zBU-=HvguG0XYmU~ejrZ$;*EzGfhv?B01KKNZI}cfY}9N3Md_l}gEH7wz3iBj^0xse z!NEWGz)-#@h%N;C82)8sp)~ifFtm>&3J;?JW0Ov0rdXz~&#zI4n2k#d1-Y~Im**G0 zFI{{;ho%X3)}ieDYHZB`+++>+Z=tl!X5E+Ty)k{KR&QjppGjYT>coJi>Dr)nzgVTs zVx!d9s^?3=UG`}O88h4Cl6kvkH`g*F^VDmUYMvc%hni)h$-UK@XVGS!Zh2On`xeE7 zuBY|2%;?U}rR;<;b9r`m!q~aW##d%%cX##l>GRXqm)ABO*Q0xaDI!l+7>dxxhEK6~ zY$ zB~j9c+seZdMb35+b>`0agLA8`)oUB{)?n-PqjxLM%C8y&>;NchRc&Rc^>Od(ix0g> z?>G$fwx_$)`jqIwzSYs=LuV_pwR(AwR+k6nQ-~_uh0Id*{O5Eip%^$a1O38tjMe`O zz2E;IIlgjK3@8SUKLaFHC>HW~o$dbsyDyp5+6MM@?5r`nP#||f!OL+(UXCLk{9uS< Z15KGvPGTq!HQ3aP08WDvih+O1z;Dd%L;?T+ literal 0 HcmV?d00001 diff --git a/llm-performance/mindie/.DS_Store b/llm-performance/mindie/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..f74692ae66648a7ec27de65dec41e310045153bd GIT binary patch literal 10244 zcmeHMU2GIp6h3EK=**PDbZAQy^&=*aNiKs7{3aE)M_+X6sWa5K9c<$WU(m#XN7{ts??mhRO zd(ORczI)Eyo?8IGSWasK2m^pfl~H8@HM0~p&+3dK_#CE(qz|Bj1!*{wDmcS4qyt8P zK!8AiK!8AiK!Cu10Rj5WW<@OLG-v|^0t5mCCJRlbwcqstUQeu0B z`jiKFI$@tieLBXer%G$`>;ZjM^ivGv>NFo_;zXl99pjX%19Ejhe`oYF6!dl{y|5<^ z809o*0|WvDW+K3HcM(iTQs(YX&fim}ouavGsbSM_+(_gG3MHlU=9iU8W#whZ5<_+& z;U?Xj6H8`~uu+$7o2gi{aInfB~# zc4 zwM^R@qaR^q_P+dxBO@jCAnPf?veZYjI?bfjI;WHvdE!u*iXZQSv^l-(3`f+ zT*h(+l*)9{Oy|s`tGAja6L$Js{j|$^haEfXwt0dBijs5mLpsSBc|A_jq^h!&I)`1u zwwxY4>kzGrM#vwiNLLnAhgPrK6xq6SPusqZgX0wo7b%tMV)-z|mE{_b8+x`s>7;c_ zGpxbxf}y!Xy?MhiQl{QzB{RBMiS^QWMa`1hrOQ;63*1pCiuUDF#;Kg)o>A&Vaa>ur zs6tl5t0Yl*`=~}A`G~EtoiuMoRl{pV@d)F$Wp!#fTUIx0B!y&`LniYjkPBptVN zShr|zW?DR~s{M@Ls%g4L7Sz@(sZjT;2Snuy@vt+L%<7#5x~Yo%PEqvElK5<;cFl0@ ziOB?;R17O`&?QJ`!S6z`F1nE)iD!;f#Rn5D`y*#5tcDhN5Dr2=Xi$LX-~wEPOK=6= zg%9C6d=59@CVUOwz<2Ni`~tt@JS;;IB~)=SR$&d+VLdirBksgDjAJ|Q!w&4m9_+<_ zJdT4nga)3(Q+O6f@EksgPvP_U0=|f^;%oRiUcxu<9ef|J<0tqjj^mej6K~;d{1Jb` zp9K&kK^Dq|6~ankz0e>u@^2T4*KGfUEnY|1OJuG=<=;qxIa^SL+8>f1zNbO;)dD|^ zs;XDit#8<}ZP(rjXXCzEbjQhV?s@iBXD8Jk(9Dub;jXa8n~xDU&EX z#c?LRQeG8O=239);Tm~eNSRN8!G{|{n?i~}@yLgbp@^)MP_#*&ZgXgptf0sT56C-Y zrIg}M8eh7kwnh#u-$QW;o?tW-3n?s$#JT(PWw;90-~;%K{P|nB4d27B2sj@V^4}F0 z##Oim*OCuMunC*71$X0KjAD#Dx&t4@PJ9dx;SoGW{(J(H_yn4mK?@6bhP?V5j*?G5 zjnCkDdaxKoKp;RMKp;RMK;S+?poD*G zn7#kE|M~y_?<0$XPYn>{1L-$_T0fMcp4AmN?&z% z_zo@&F;%4d72Q>Ds$VkQ(*R6m&?*8M04X-Xny2^5U17X4Oqdg&bIiN3%)(hGN1L-*Zai+1r0Uvms;T%m?DRB=HpQ0><2GdVY_?E3KZa>Pi#>rbk)k<%+=qS{I+z%yGc zwdqiMRX`O`1;z@<{t&PUh8`n_cI#kguK>g(hpo|#|V8dZThuW(Gs=%xQYwmAG&i~r-`~Pf`{!;~1 zfq$idNmj3`6)wr2tqY5jvo>J6VH1x;yeh63@ Kv{MECRDmA`_MQ^} literal 0 HcmV?d00001 diff --git a/llm-train/.DS_Store b/llm-train/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..b0a154d24fde98bef1532a8e60188b77f7a239e8 GIT binary patch literal 6148 zcmeHK!A`fCq_YU*WvHc7kw_=-NA{l2A>%YLbMh zuJz1a`N}7L|nZ na)ZkfbaX4mTx!L2R4o{n+nf7~Hr_xxbJ?))&BamiS zr@3XPIGq64>V5YBECI~vj`;L2HGl3tv75>mk&<$_D^+iuT+VxKqd(ET=8Nvebx;_h9TTG+bK~v! dK8mug`I^sr;g}e7=7Ubu&w%S9lLG&(z&GyF6{G+F literal 0 HcmV?d00001 diff --git a/llm-train/peft/.DS_Store b/llm-train/peft/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..a599417410fd01d14595897b0a361419d6afcf7a GIT binary patch literal 6148 zcmeHKPfNov6i>G4T8Ges!j1v217~#u@lxvi0#@{(GFv*d*fp|s?l1;D>lgBq_<4LU zNyV{w@FL>ggO^|O{xsy5l9w>XxIc*x7;_k77ARuHhGvDJA9YD8#)8PzHxf2vlEo}! z=~6Tq{vrdocU!P)6WFnwec8WgI(~{$8G-*TdP>m7OZkaVkLa%Y;=6|Lpts|6&r2hyh~YUopUIy`bmAl1yz~S{&9|0eT0Df^mh$&lKpWQVg+J dikqNHz%H-@=ou_Df(3*w0*VG|h=E^a-~*AnP2K9+JjVV&I>+6l8W%g*eU z+EnY41k}VAjrTXA#(+jq^i}Z|;{(A4iHRmACSG1Npd=>7|IC?%UZ{x=BGEa`od5i{ zbN2t|`{(o=LI@;^S_2_9gb<1kqq2nEbsE#NXG#-%2~z_46DOZB?F@2fGWjXifgs30 zkbxirK?Z^h1R1y)GJtzFYjm1@pKF6Y$Uu;R|78Z)_aTB0qj8^(a_W6Lu=9=pNGnk6 zH5|)qfHdOMxKBqpMGZFKu1L8n_(u%j?kJD=c5$DMa?0Ha_=gYhR|fxt0^aI4f5bB< z5a%@LgA4>2n3@4rHX&l-UoJB^{`_tlj$2`4uNhP{JHDC{>T-7Z)3AM0~ zaUHg8X3AqVv)9lE({go-ZMmvp=>^Zh&~!BE>oP3O9!%IpOLIKm6jxA@ifOrKc(`Fh zLoBkkaotEPGQ7U2F&1g8ZyFg9>HO&GjqT|}X3nyY@c_X82$0#60y=&z8neG4m9=x@ z?;-p+O{dht?n!9bsa1Gn)>2~!aF4M7zCaAi5 z9lPMRc$0M-I6t?lVr%TmGiPV7UT~0#;>kG+!*{H>bItmD;;mbEwx6FX&#RKEr3GCj z!!a_Z-Z^M!?m&;~WOYk3to~liwB;HH4ZR@FgP7Q1sX6_8Rn4Nsw=GeW$=2mrx{Dd( zaM5sw?s4<2&G0K%-^+MdNjR7>#%FftTZGyq3 z10+v|$Z_%%d5OG5PLsFES@JmoC}5lV0bhTtK16pq3%I1W$3v+x|e058G`I0di68}J^y45dRr`0TUMh+(*N4Z}G*ne;EU z>8Vd&yR!Mh+2-+D?JuZxJDvrk~W9~+a*YxSpi4E%cx(B9pEK1c|m2gUvSF&M;An$>Eb+1h$()aP?4ir?%P0xu*p;Is zOjWF>4ZO_D-OBD{ve#-?MYFv(t%^yz(W;6@tvQ{R`Q3Z>A3p28iTjB>*CaALag^LP zID>cCD6A+^+F0n5OzGeGBqz(VL4%r&Z`11t3e zKrEqJ3AX7iK{3LhV=&i<78IsK5p^gtPYkBRQ7??2V=&jK!-1K{2QxD>^Fm>2b{tu!ba|0myn|1SpFo@Kx?@Lw^&N*%A`;*!kWy0kdDYXz3qSV+h( o*Qi3UGS{(c&{e#PMG4vjH4q(xxkl8W*gpb_23uJM{wM?A0QgkSI{*Lx literal 0 HcmV?d00001 diff --git a/pic/llm/train/.DS_Store b/pic/llm/train/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..f4e842aade48d47ef4ed16de51107faa2cf85ec7 GIT binary patch literal 6148 zcmeHK&59F25Ux(5+i?+cP}pOG*MQ0XK)}m5#usoz4=QoSiMxZ7>0xGK4lyuieIY?S z$sPqy;v4uHR&`gFtdoEz5w{DfzwYX){`s2eo&o^j&*LFL4*(=8VXKS97NLI9hP2pn z15ubU3J4*AEIl0t4R2@o7a5?llaNCJQ>d`KIKNkVmTxS$;WN|Yag^$a;@_Qr`||V0 z`TcEU&2;=ne)-uj_-)QMl<*p=Xi?@o)5j=Jld>8NzKd47bM5+`+>`tAMR2AUK^0c> z(hrMg)OxIS8m;SL^dy-sCcRq^bXJ8)HZ4p*5>L_P`O_qe^}^TlEG|uKU^`@2b|<}q z6$t-A8|oxArQP2QxlOux5G3H%C-++BSurMhAVh7Deuui!I#VAIPv9^dkD8hy!+EBqMhOpsiSM298))s9z z2u?l(&n!4Y5oUHAUm150o<&zW1{?#I8ED&PkKX@>yYv6cL9XQ(a18uc42aG+82h*- zep?%xqqkO}zClL3yS*@Ff_QrG4NLz_yzO$ Bh#LR^ literal 0 HcmV?d00001