From b75342c8b35f792efbd7e630c81382747a7d49d4 Mon Sep 17 00:00:00 2001 From: wagga40 <6437862+wagga40@users.noreply.github.com> Date: Sun, 27 Oct 2024 21:09:06 +0100 Subject: [PATCH] Speed up the flattening and detection process Refactor a lot of code Remove forwarding --- docs/Zircolite_manual.pdf | Bin 690194 -> 690193 bytes tools/zircolite_server/Readme.md | 5 - tools/zircolite_server/jsonl.tmpl | 2 - tools/zircolite_server/requirements.txt | 3 - tools/zircolite_server/zircolite_server.py | 28 - zircolite.py | 1742 +++++++------------- zircolite_dev.py | 1325 +++++++-------- 7 files changed, 1193 insertions(+), 1912 deletions(-) delete mode 100644 tools/zircolite_server/Readme.md delete mode 100644 tools/zircolite_server/jsonl.tmpl delete mode 100644 tools/zircolite_server/requirements.txt delete mode 100644 tools/zircolite_server/zircolite_server.py diff --git a/docs/Zircolite_manual.pdf b/docs/Zircolite_manual.pdf index 089776c9018175beeba492c133a97f079234f55a..e469aaeae9605ed7dd6974c50bb64c77a3f18bb6 100644 GIT binary patch delta 10316 zcmajDRZtvU6SbS*lHeYk!QEX3cXxNU;6BIzK|)|~cMlePuwcR6-QC^cyyw)p`Rc#; zFLtf2wRiQ+)3v&4L*f|{;u(U$pTRI-STGzI9*h7+1S2&dAO8k%@Qa9`xO=!+nK_~O zEE^@RMfb2_gkQYiw0h_jiSk6Ds!vv^>KfHOe`+LI3sqA1joJV&FGV8I0+FyX?ZUdGhy;Ko!nr*te}@a!;?ay&^TYuf z`V9VaY54fSe4%(_;0&U1g;CnAMscMva1>XTjw(rhSg1X6O=P|i0K2mae1lTKE$fcR zSCg;bBjhmlmZr;{wQyLyRsHWF(N(9O73CZ~HNj=ghk3xt??U~l!K-D}d1rMwmbabf z)_A!3>>ty^3oP&aAzW&4HL?JRhv$C?Cau{Yi{XS{+~Bw`Nze~jzBB#E`xo3GOltq~ ziJj0XkA~tO>c%^u9uJR5_0igT!q~yaX5hrB$>|u__wn4c`T`_ADLH;C?zk_;M+8-5 z%9a#VjD+C4kGM}sT|HMFzT?@jZl|U$I5ztS1p2#7fTXArOSN8%gadBh-dyQPc9sX7wGsB94O~|qmhQN5ld51bQX~-?q%!6tl|9-tn#S0agYd^kO^n8lHiH(VVy8af&ANI^5zr4d83d##W68F zTIRtm0CENHPM$tHtd<~#R$^H!VcC}=Y@c9nrR)jvV*s!;;$g~qk&5s%KZl5v37M3u zqG+odQkJn)o&nZB`HIxo1mIIwwcZ3Cko$mY0Y0jS$gt=ZV&p!q03TU3d{}gPPBG8u zQ-q4I7hroC^GG&9d#6cHJOL79fv*B1;-yr;m>&P;*-$}dfBcKJ0#st;VH}qbj{X2u z>q%j}I}gC@3>&N#Ra>DwmR4asu~wq4k>+F_6u`$(RvJbnBOXa`3kegIF;CqgX_cn# zihSYS`jd;h-t?r0Y(%r?VUbZpA->NL-)CG6)Fa@_VwyQjN7xTEPFd(AL)1;HFym6z z`0e}}cfMgM|X4F-P}p=PxhcoxrtX7=ekZk{7dJVP}I_$;!5#rQm+^5 zBx|{jn(=~11Cn7cE|#3(dmUe-A)lw)$3mai_nV_VAG%Brw-+q0)L(zv0zv*p#VqB$ zDQh*kPO=P}ok-`6Q%GjBIkpZ@ zYSSB1xt2o0do~BbCi$?GIEGFB{5-=p3p8M2nk&oj3@!cLw^gA z^|_=JMs%z58PxoYPB>&BWNBOHQ(F;?$V%y;fG*(Shbh1uxIcL$b&P@KamXd46sats z!gthHrI9*=9fx*B=|Q9=R+@nKr|Hq%@=VIoNedWPcD^SIkU6|{1E^mYog}v-$)x-i zj%)w1`(LN411UcctWQu-zW3g9=VSNIaZnoHv6;zh{lRj0&{JbYAi4?TDLn$o5}Uc7 z{0#kW9;G@_qLK~*LmsidI9`T08Z;(M72Aa%s+XxN?)rGz^Ud@ptZ3Y+8T35U^-ab0 z*O@Cr&yfEIy#JGD$tE$&su(gGZ#cfI z>UfE(AjTh?$0*`X3Txp_E=#-iunt%SBB#wRqfd3{jC5J6iq_-Vnctzy)y+zQawVcK zO{)V6Ze^JjzYgw~Q`vog4nWf_ebwjoq`2NwN((X-b=nGEjI+!+-a6J#-(TOJoj}bF znLG*?p#Gh^s>2JD9Nu)53ZKpWg9jFxR-d-@u3Xlu1}8Kpj$9Rpr2u&DK;a{2mAUes zJE+y@WlE>6Cey<0;m`6fo%@Aq|CrK6Lx{&8-Pm6vWz~+AKIhRK_fqb$iM|2>ctZi4 z0eFdvnz{2f^ky}Y*CtaSXkgyycTgTzBVhy4P)v@rn+nz5k?a%JaMTEeMAjfybLgrp za5rCuPR!vjgs*Chyo!0qhP>jQvVy0=Q7L;kGOTejf^X3BH$M^;YtNvN7y+R@JHg&5 zq9i8(r6V@Xz^M)WuwEk2t%^q1glggX=BiTdcZ7g{GW*a zCjhF1Sk0JN$=gA5%c~p8*VyAe*3xD)(Gfe(M_~8zp71qn5=LCI#3wPZ1+%pG7Dph zYQXO6BYeX6=HihM^~<##aYTk+bDht(1G1U+_WL(B!+~_sxU&^-Rp#SW^JL>1v;FHk zqOP;GY^rk;vLNVxH)rCQaxWVO_`(;342ZP1!J>{^4$y>C6T?9x*03?4RHJp9k=V$w zwCVq>5Ouq>lYQq$lH#e==B&Aud2yB^-FRVmax|OULETs=aS+O<=42y7@xEfg31`-7 z7m>Oz%Mjof@LJL7oWq;Be)rwO;MW*P#l}TcbiPiT`7HmfJsU$N!G`X`Ezs&Zc!Q{mhpj=up;KQ%x0zm_^#H$>oTPDezC3z;Z_X z*6}-^Y~Seh4aTcswJKG;<^N=+q|hsd5pnT^>;GGJ=)pN0%T@yw)1g>x$^x1`AG`#1 z@;mI}UGl!%6o;7@9}wgmefF)RSL8(cnDc@b0keoS44@u5m2$*Ks|2jEl52+{LvNl2 zcR9Hi74OCNa*t#xFV{CG@4nuL%fsOAc7h5}uJpvlMpHfxclPG1*NL3uXn#q&5rRFc z`*EM!5%Lz~SYPxXfYN)x4-6wfsdYVJ2ABl%RDRN6Wc9ken9PDj%MOVksr8HHWQ9af49uHhb z)0}8rHkE!9MMK(>N;b}LWQN235CbB>dYoNgN#nY5oNI$88ysFNc5;;;7PducdST66@+5>_zR_!=S z;bO7|I?NC4IT#&(<*diXJ8^SbHU?je&kG*+mvjFttn6sb=t~%{LN=$*XgW`SZSK@G z!oXXp@ULP4^rT+bKl=GDJ2uZRDv`+~f{NelXd>SjvV5cd1V3!NPoQMb_#-Kt-Bxsb2}7rEP%kZ| zY7QHe{i(}c^G>21A@Z5m`cjmExYFk~w?cozFXw7S+W~;|V1Vrwy^@&1Vwx)pNG~{N zD?L>E5{{bPkx;hhUl9<6OF~g(;EC&pg|cvQ0Zd51FRxbi_nyY zPfBJM=FC|&aKiq>dDKQBc~b28xnVK1wo{6OIXT$0T_u4)pJT{RA5&@D|0BuqKO_^n?R(k2 zMEsYNB!bdW*^Tre7`ln@FemfeXRs|z(}YATeCPQtU5hNjil)7^3Zh-R4d)rka)?Lk z_UnaVKnAn#)4U^?<30%t`GiPAYhgWOGS-lWb@rQnX>fz{!`a07oxArWl|~X%^D~3M zKs5x!sxpwWPwx8T>p&ZDkaU&%Ezph6%YITd1zF55?T*&wqSgohIhIxA!3kc!oTZj; zE^zRc*P@!Qz)cKNf#Sv`u&cjmNgy*a3!exXRRb=Kt5KKrbv;meWh1_i{H_^aZq0}f z(DL;gp{#?CE2|Kp{|TU;*V zKCi=pU9SvS_&K4t=fH%}DL{u0REZr8(+ufAHmd{ax#P$*1$A{%ZXj60B)z!x0^y{4 z;a3Q%uVR1tKLg89r7^{oWP8ls-vVe;tA8I4tEXa+Qc7CsSmoS}xGP&TPow-xZ-5L1 zhnfG|$xHX<xRuQNm4pX8Z>skTSW{Y8T!r%n+yWG1iJ>hv_- zD=PxAMEFZ7oK1wnEoLYoa5vmiT8|CK5(NnQIu=WH-Sf8(FUd>~+f<;Rq&^f0WrFFb zNYYeW!iAbY%y5=45Js_}$W(bd6%x%qR{ohWb5W1ufKLxMusxJunjL?TQFRCv*eiT< zo2#DX8-|GzcctC?>9WY%wOl>=2}2MO5ALXVQ|vlNJq2C~F4>}+{`gey@+vmMonHV% z2XEpgo-}<)v2d9^z3Yeijo~Yzd+8Cw&r8=Nm=^G6ZE`4ieBzf9W_?S?V&n)mez=05 zh^+E%3BItD&%&5+cd%&yS{y!mxj#*6voNJTfE6OwV*vuN7(BP7AEX|T5Za?J7JI#ZAWSP!VhIjisIZ6%BrNWNSMl}`U7F{{(3f4B<|17#i4VJ|$B_!hF70R9p z{`?)%10#l0O(9{*=!R}1O7ix+&nt;5$$5vt*74{O)cKy<5u+^(2Pp-y_AH~b14~%t z8A&Nwl44vhgMWjNc$G}RW@IqKKVvWkn_0|zgt@{`c{hkjymc0IV%^XLg#9E-gGAay zE;@Y|I{XJ(gGO4piRX6>-CmoHc(*ysao-O!MgwOH3kL{3o)15oL|uNxT^MR;dAcnb84iFf`G$tGf)xjMg9&5Y@40ekqTr)UJv&RV{M&{oJzGuie*WtZ zGE!gynhOiJwL{~|Gk)Lp%H8b@-DATAk^4;r3Vm2S40tNi%+R3f;mwU;-|=Nbm`^P0 z*DoL<9@S~waf~!Nyy?rcCI2KVcjrKH?Qx|yYV+G|-bYYW=2p-dc(sP5ZNp+plb$_t zhK18DIgf%-`!9z>`y^h0nUF>jUV2a&F)s|w5QY_&o_6-ZEG1%tmtdFul9y zsO9P=Go3+~O-ES5*o$nkJ^qfN9Puw->>VOH%~aW@*s?^%MwRQgol_FBOp}BU?5nrf zxC`4Xy`;QGVC+De1{ngOaO?3U!E!pBw&`$CW?&h$Z68~q`rt2jpL)@Uk$||%F?Ip* zw9QR?Ir|I<%?*y6Rj)R!oW9Z-%T7td-TNIpWU0yWZWj5NZK$A1GA@kJfVv9ON8KSX zqw?Y7EDsCGvCZ1Wz-C{b?Q7&N=ajUyLP`d(H4Sw)^Mhu$1k-rdYl&9P>{vHq7`TSb zSPTgu;s(b~<9KZOE)Xib$!}(7YT$ zEh&HPWCWR4N^bk?ApT$?yU|3({Y(Q zlV&=avCu%9gz7VIKroRdZBY1FAKo_x0dZQQ-&ExvIg+m8Ke6+nk{-$iM3p$cCv16u0fUx;72dMEX03p2G6*1HfMuj$o+^h`%12gh;JxP$R#Bkr zU%^Um>tlJ&ii7M|DNIca|yVFK7X?j>}LI@N8@?O zOX;=FJ7>LVTQIgb)?8e{S*&`7tlrL_)fDOPS^MwUFOxK-OVidt)kZ7d=5+XlT(YvO zt-r*10-$E2YqspZwnvV`e};#m6nC~n4mwt3I33*{{t{!+``~Rd+~Xipb3(YwI$B^t z0FcaP4@hPQclh@6GLcA8S=cUIuqD;kVe9m9whi(pT2t&4ia1X z=El#%uVJ_Q;vS|hcfT?jHeFhvc7OKIV6WNW^xM+>y@KKU9Nc_UMtVU`I%L8*=^$M% zOh0io-cbH|bS{DYqE63Eh)mVl;St`TgO2=t7hNn{$F;k~fo>b6E(kI=kh zvFwN*8b03x=(t?AKwFZnmskK}NXzX!B@@irnuPZa&?(>LhE4JYc|mM3g_Iz{1S~@> z$$4JJXrbzkrvOvc+s0BOr=ugILB|PmLs1ZC=c-jUmTNrf7#-(stlLdC=zia5xtm1_ zPoTGc-8pe9!u-U;@1hfK5a%*O{fk@N@9*W-ct&lD?*9MmL%*w=%%L(;qp#d_7UMvf zrHpthv}XmX95E||kA%p)Af9uucy0vBRF;nLz!`-u#-%Y^m7%zcNM3F(r>}csgB4>w zH~Vo_3Z7cR2#~M3G?mVqpw8-}q@0B6Rn_lWW$D~1dnw+5C@f1%>y`8LN$1BsFr@w( z9Dl;pjph`%tv-mU6&uN#4dT11j=@*LDb*%0=M{x)T=EPHnes2l#hlg0pMI~NF{wHb zQu7z*ttHVk6qkpUUyl?owW0EL$~4-F2u!8Su7$XIlZhQuinj4gfb2knS)@ftY>Dcl zwz-mu7fNTs*PP3;i^9s@lU@ypV@yJ6gU=BM3SK9hW3tzO>O`7jTdzU@ys{e5@n2Bf zKHj;Ym?JfR#GW){GssXw8fM93C_||dgGD)bZtP&uP|MP0n$TBY8SjbeV{UM!B2XdO zzI6$4z`C&Onz5kKW^Riy39QS=@vYh z?H)%@Str(Wz7U=}yRRR2Cg@=ISawBx%Ly-6IIo71tsLL3Updoi8t?NqQS?Y3B4YMJtZL0>C(0jVatD1%`D-qsJaZcUM>Ixyx8s!-2P} zdvGzqGsz3!w@h%iFFT5iwzs{uo~C71iTRCWv}Os$N7YlgANBA5q0i$NiL?=nI>3pE!A>9ht1M14 zR!Rv(Dr5_HQq=UZxo+?m^gC2>*uex z!H@Aauj5PQ*p7W7ZD4#vN}qz4x-wQebAG)}mboz@;=CUC81{COvkO^w^!|W?MC*F5 zHy?MQZQie4E#UCNzJi|MgN>F))B_NxH5cIjRM+N1yns~>)61~wM{)lq`?9;HMcY;sgo_y^g|jRfS3FpVTn#^XrL&C-Humz(Or9^5JhkA4vqi z{ULYE{2|E**@%**I901uHQ_*g!=xu*)ho!NlaXN^i7Bf0q}0O;8xjXW01^HkNQ}?b zFzx#6L>5md1WhrMgtza@N$0!zg>&RiImN^f zht`^TYw0Ot9D#2!5sw3+$N6E~#~OI>3CGLaro9zMN#B_;d*p|-B=ZppQkJrQGk^4*8@*3Vw^Iigz#*hrjN4-yWbQnx#!fr zZEiAZGyvF)YgNeQ&i_Q}tJ``XijW}BqwI%i0f<0on@&?Zl*s)or;#9%S^!vPK@*2F zCfqBK?xQgcx2OWeElI8-psu>-&9gOl}jI zG7C81^6Wl9BG3fX2|X7P>hfv+*?9RCI*5Fyd_e9OT-JL>k|<<8DIYp^z?guc`+I1S@@7Pd`OsY)@&5XW5p4cH5o2I z-!|>974eIMIX}I<>VLV3yDBU_puMO$v@q^*Rs9H92Nvki;;&Sme=~mf6?uH1bC8|0 z=mmQ`9WD7r+NGJC(iVErZNTPE1{{<~yx!!cy@9&x3%g9RoOxaQ)jU}U9o=D&o4#ML z%w3DRe0={Nu}*?A#zx!ArP=xM9=Vgs>RoVI1E2hg5Qv`KZE!o1Pw9cpAV05q1&90m zs9Ld@1a&V{%&ZNS4>|}!#O?URkl2?h-hT${zu8me54t*ip`Bn*3yxMfpiMP_WwwsR zdjL_}){V88dz5t-@P&kTu1k@1VEw_=am1#zMxa%CnNzKkD0_rgwVN`DPO1HcS4Yg; zMJgJ2wa+ViWA0z~1~o;NsI_H5gF&7DEUa6Uq+yybBCN|-(;cdH4C~i6K>uu*RXd~m zol_TC>375L1qrEC?`>>Nu2j^#Y4R={dS8&J=O^4&VulT2|NH$%B~~Sloi!?{1EX6M z`h69k{GjaB)l9rakr2i+#Q=j)-Z^6)X0y0bZ#=i{Ee4m@XWNo;*8bStmo&8z_M3?d z3A#v-H<4Eo1xiKM`Yji*x33bt@q9 zE&|>lGTvvry8Xum+OEJ<;A`)l4x>*a0dBiKvG%1vO@%I%VZ_&Q?K*_63_Urj`M)MC z|A=s{hmF`Z&d1@>?NGICG2MQoP~V+%-4&avy~I$tq=fAa&FH9neDs$+zT1aLr?H!N zmK}a=>JXpCHIKan6UdPL;J$k4;YFY%%9B9}tn8o8l5;>>q5!ANc5^bNY!Ai+5+^4; zksWS?+$2u%W2b_`P>94ixh$-?EZNOC%*{A0&CR*kxj5MQxOlku%{ln2`8do)g#K?Y zQdh!IWd8TppLy0mRm&4+;Hq(OH%kejOg1{X8HM{QIyw|(oxA%RjW5MjwPXesW(i&N zCNA~QNw#*)482Wi&ar70*0`P69j-C1j$3VI)((L}8_stas`s{#<6qdtACZ@IK?JlQ2zkNOzCPw3jt>G+bF4@eGsJ+5nVrSaqQYpvGxl~m zK7@+E$_+-;466)~b@K+dUz3{_d5gk}FJ?Aj@(NIu2oB#!A;rB@&0KVwcNSkb>uuR&g@-^J9KnkOnab7Ifdf50 z=^e6?M@y#6i^O_%#DOi4PgbyRS`0S#`$yzESWajQ_S!P5AmLIwvD6LvQ|N2y&X=t6 z1TbRV3z=LXPR6~>BQqDyql?K-^Bj?r{lgH`YV^o=c%J-@nhpgK?Jc|pknKkiQS%NO z(_f+n7kg3CJ_Mn-d-$<`ctRmnJ}48L6oygmt}NB!Z9g4cnX4s0ZJsx^IA?LmTV@Sy z4WD_p0~%KTAoE)AFFTe0m2U{L?aOXKb@%uP}IQ{lJVb~t8b1HCs|(YKT*a9Ug^^2fjVRvUF!Y zQoV!cN1HEs-ec!oKwXcVV!%jhGq3p1Tg?9&Rqmm~>n0%-*g`O68ptxRkTcxz0{Ct=LJ8+}@ zLJjNnL7A}{t_aG#M|{{-W7ZZTc&1-j{eI( zzkNqcvB1?frFQNCh+kb8_ zt`pWHK2cS-dhVNxP{}Bi8@Kn8^Dxj^e!6G?$E`)F@@f3Xj8^JVBnmk)J3k6FwUml9 G%KrmhMf-;U delta 10300 zcmai)RZtzk5~hQ@yE_4bySux)1b25m1b2snI~?2{g1bAxgS)$L?%t+o)fA!2% zbya`=bj`ygCeS4%&;^Eog@A`Zgg}Nsg+PbEguphzomK(ZSOf*(UEN&FjqTyR)(n#6 zZ2N^VL!Vx-8k;c31}5bZ&}YR(uIFC=xZxyMBBjgV?)3w<&8CMWAmEJ-1*Ziv&OnuR z<`s(1MlH0+i;;5XvT{#{#t7(^9h$=k6e^k$Bm1Axyp94woBJ1>wKC6VR|b!b{zi_E z3OQNK_%nbkJsv?KU;#Rn0jiH*3nlrXhKJV{Uxn7;e8kBjM*sFx>Q#<^)LfO_^gU4nv zn`O$WRAeJQ(l&rQ3c_thnCw>YUq}l*C5)cvirPd|ih@cVrGw!4lR-Y+fPqlPDiTO) zr6TWChvE#$kO{}Zkqt~&>bDKe%!I?R=B~}>6@`IXLN|O8>y)PU(}O$0QnyVwM0PS1 z6KoDy$$Vp}x8bgxGZ-+4z52dWV_}53am7Tj-La#KJaNBPvTJM)Xx|)eZ^m=*GCyuk zv0p&exqN~NzC6YU)?dP509$I{z5}?J{|97>k!_JM=Fs*B=G~u$6z=IB%%FrpSmV}C z`vpd~?HI)d(IS&cvF?x4Y%%yBl-F4qd{5bD57W!*cI%75=>kvo?RUZ4iwa9`uD&O7 zOpl-1T#54H8bAozcOa-R+TsjU_@p!ttvsr;vj%84=PeDq=`mK9 zy@N?@*W}>9d1qbJ9QEb3Och{7OHdA;yLY|t-)lLyR;~T+$y*M1NWD5cq`BN$>8xw$ zblU_8y;z_+rx;E8)MCzB+DCp|uF50HOnU8MXFq zxR+UxnvelzA~He0A&cvzGGSPM%u>)mFra~iM}NwU=BMS;&$i)O1#=^fqrs@6`$9vS zr$L(AAUAMZiy(iOQLe^?S&G6dTl-a(DQ-R~Y)(ov0MTW8>IbRv9gdfyE1q+JcmU8JDuX*LQV3biR;P72DvN#@Q3;S?gIHY+fQbUCgb{66_7@)lRP~Y=ghj z%B5ZiDYS>dtihM{Wgv=z+%UW{{8b{2*frRfo z=$*2JSECFs0vp|MK>zCNmZ=rlV^&%aK{7WS8mrbX%10NZ@&4W&vkb`@c4o)BhOYY$ z?%sZNq$1}`6ZbvG;8xpCTDtd7Fq=lnkN=vAwH}TO(C4rwj4AQt7YZz|fIYvNM1>nc$}{>yEcE#S#eNN9M-r`-V=X!c&or?KuN;}KqZ z+5h5pQIXIU`cZ&aqN7Su5l!={InqEB8-3L9p+YRiYWVrtqJw4BBjl(!(jfEwDXXrh z8@TxaIGdnzmm%^v4|uT>ONv~XxEOgsk2|yQl(lG7Ns4(h4ZkaSlfJ}$IAQR^TMqDW zKSrJsZ1-hoaW?n0Uw?8!oTAEzIlS}g#PnV&M?Pw)64l~-W6o4~o#D@@ex32d%zW)R zTc91;@>t4iAZP)_e>{#+Djg+8Nh>zRq_j%|bcUx9Sf4>CiQ55;NN)J9#D2=PXE2=Qik(1>F$gJ6n$ZvT>bqso9zWxl5< zl_{dPs|yLMGKjh>qW?beQV0ppc5~(DMk97%ow)fcu3CMn%0T||4oB|&y+2VTV@RwU z5On=l+>5!`_GXqOA_RtM-jx(5I$R#>Iw7%ZNGuFH8m1E7>zcfwKVKvyB##S~e74*2LP6I5mtDcL`Bs zk|L*4>M$IOb}}1V+Z&CR-kP^01{fAwas?h8;rM7dTt$=c*ARzC77;%V3Hdr090T(z z*Pia%`*TfZ6OkxNLF_rA$aLf-EURwj`QYOPRAvRV`}yJAupBpGiQJR7>om1B1}iAb z+}C@f#HD8iaIIeW=flG~V9#{L!`tH{ z#RJsp(iY$yFdAIGkd~FV{bmM#A=U9r4W;mJCHfx?8T)O-QlZX#JCcG z=#HQ1VOe9CmUT0JxcKB~owrsrMt%44+?IV>QKJ~%T{W-V(#f7-el)W&Nj#nRLfQNP zdRULCTd{wrB^2BYD1E;K$m<-sSn;T$ZpQClNzpYVMnS2gY6+)tCsH`Ty(AXW2zx4r zXwD)z>vo1VwUFk8mhncEtyMH&xzmv#Pm7=oif~#^iQz(_Nj^gNkE%uwmHhnaFnhyhMbZD4LL2R)kT@XB`~`~)Lc=rH6cbEm&aqw7!&Xx-7?1fCyxvNgcW^E zx_MMuJf2VzxrelWrb6)@-skMbHCr>@2zAHuBzWYCkQqHdr8utMlDH?x@82Nyeut;K zd&W8s8JYP(@^6c+Y}^gnn}k0A>i5I5_cC~ZN=V*_z+W5*-e7*JpJqIkB9dk(KJna^ zVBs|8{e7ny+}h?NeT^9Z!g+dSXr2UtaS=+G(-;@ja;bZ=3Muj!euKzN404}+o@%rn znt41zJ_9o|qmB3hWwk3-Y^J?;(@ZUpF*nMJ^I^Q{8>?{^r~f;{y#XK+zvm0gBf&!9 zzkc&+`R6zG|M6S$gv|gms>IVLJcFL>jh+n|E;fy_}VR z;v_t=xwK-3pRh4#cTnHSy0TX&TJx-VaJyvMN?MyMa23feYiTS({kkZ}3#!}T9FTJ* z$ErEKdRjTyyz^M^6pXv9tmp9nJbs?JV%RRt6R*vj3Dcp~EI&N(&zHNr88Tr1Qr#KB zeC2dlbbtzTRy?OA$vW$vq_3_+=l6^o3a+tR|yGTetW$I!+C3tQnZ09I2e zg1Sj-H4f7-J^9EhVJ!nJ_)T<^1&A_5}W`@z&5bll8|&`F}zEZ2Nyf zt=seqqlR7GVfwDBQXU!nd#0A5&g$|DJMLLDfi=R}c3od&oeO!*g+g}c3<$AJzIxZw z%Woolt+%`i|883>ZE$lW(M;xEXy!D;jjUJ}NPasjJ?9tNPTSO4O}O?P$=mb0t6#lb zFJ(7orhUED1e}eXv>}(29fKe3HBz9l;7knMZKcRSR(93YS{hEF)Wp?ww!-GK#x*R3C4pX4+ZT>`w*ln3=Q?D;_46f3EJ) z_D|z{=&j8O=p$Z=B!pxXZ6rcU)Q#abxnUmUW9_2P0`PP5xxq_F;gsN1>sac8z@5{| zizoxP&@ss;zDHoC=ab@)(omMMW%LiThsU}$@M8EVX_C7A5TRW>z{lj1g;3s=W)Wc$ z%@#+Y11FP<@|L~NPe%os1c{kW4~OuYP6v%+mR4$m-KkdvdedqC+~s942+g3CR~BJH z-k87kAdfj?tNE}!dl*u4b;+IWH)xLfJ{FD(9!f3Udz_g3}& z7nXOFKntc!gNz3jWxh(Es%<&^?_skhuuUXe{^v)9M-qPagkE4q&WokRclzb%W@}!w z03N-IG7Ql0Vw}U{B^|mHFBf%0Nm*8f>FxHZZi|Sv-S}f;R}!qyHXv<%b$!IbH1^|o z*NPp`3iNUNQ)@zH!`tWiWhEnh_+awX=I&;Ym5jAuO)CDp8Q$%FNoHu#EHHm|?RaM6 z63Y_}6sx}jcs#ZIo=uhiUPwCQ--f$Or?*he@WZ8qM~yl5^*hdj#@9Yr#%rE{2Db;l zMo$UB#$KUkJUP}8#26%{2%C~Mc;>)xi0c>tu_?q7Ubhb;Ww6ei6pkPnnsyeF<%RHQ zfi#ZiF-0N2J1~gpW<$Lk>6huwTF*MEv#q1PY(@U(fcpyB;~+X7|eC;QJAqt%6`fI%DTWt_iehuSB{6JlL_# z;QlEXi=?JCw!0zdko(_j{_I&RGan)-@+z#`rB@r?W~*WsH~4Ssr5bR?WO4C^}!9|TgG|7HFopbmfDEkN}QZ9L^mNe{gJGoSW5KxmBy}$ zL0duJDEz|8J4@xmIBXB_o*t`;ZVGayt~O)+o1cEeZFNY590zCYd@pYm zZ2Gg`nwkjODU{-s2J!yc9x?!?)T#|8)oXE*_WmghD)h{dwXYmN5rJ?bJQv(>yNFec z1XJ6aQHO6C^o>mlDF*%qQ5#+{D5!MznJF-&l#Q4o_7^x9IZxyQ**tjdw*qcly6Ft^ zDYI*NS|t$G{*s#vi+PT@xVpCt(tvt|CH;cvUv?{NJU-u2BGAq+Ym@=nzKk7o|B#=O zsNQ55D%gln8KTMnF}A&kE@tL>D0A&)$aF>Lkt))lel|H?TD0_~Sv{F)QXF^5yl=@g zwCTyuCB-I+t~R~n)E;S*Jc^nUB(mV|DfZWR^D9sMGo8!b*@LAK$3Q z;Gp7kld9}7n|o=A$6BEEOUQ9YBuUXp5mrP|Wsn-EM&CmeHZ)j;S)k zroBbu3&4m0lx}vY$ISJR*T*`PB;RzQvV^HXxcMR-zf|nt?x9$r^rewOVW3O5s704W z8Dso^Ef;Lzn0pQql5}6!2F#ZZhS#bnuiD*>BYBX^x@IF~l zJw?I7UK8u?#A7EG8SxK7vhccm0pmq9taxrPjam3i@Ywu_U-b!Ns4XY0lwX6|q5@bl z=2eB*G}?-;xByt@zmNyBF1>k&n18q_S93yL1d+z!5~4Oa(JJeUag*lu>B3i7B}B7>)PCuI zV|SGSn^K#Env{0S&)%iuAS}QFdiN@M!I7yBT8BYTAt_n1{}j$HuKR`>YCDShCu(D0 zRY|Jn^O3I*zA(cFG2Jkrt4;fBWlN%x7-DPzfMa|yl1{pSQ-T$bhBGd~?IpY{6qZ%W z)L$15b}%dfyRV1CvO}CD`dC>Tr`U0pWh==8latF|pjJEM$k9#PQnT&kfE3Qb z10?LRe~oR6(GC^rpmrsB*gwYIGa~vhee>~kyZzh2*U7VO`1xe|ay^z@M50*RF_D!H zK>V(gp_TxbLC9?CJe_;s%3Dv0RyDn%ZsN$LQ_T1@Q8?H3r`9%-O1kRkNNa_QMI$-Y zFk4e7*Ir2UuT&w<=E&W)Ctz#PjMcs&!Cd7xze;=~?P`M}=h+A6iP%w;&R&~|i+fhN zg1wD=ldR##%IKLFFNrxrw{KQW=WNnCKymbAK;ub@K9i2>&}q$FvId>qYzSW4A{J*& zls(2WYMy)nC3R1-mlCf~o_wO>E?aj$Fzvhpy0N0q&W5?tP)KEciSr!(1u5H}H2CKC zT@k-!>#ADBFA@8xSuR9)YW%PT%ZybW&5bG6B#fAbBmF53!cq%2;AG9HgAQpdz(!6@ zDxsHP9+UaR2AUyRxRnaF88Q~vLLL6a8QJmiC?I17t(!+u3KoO#om@siZQd-Gyxi{3 zDZ5N%`KI|sO3PMHw=nB^T2(Hu2obeTQ|hQQvuS0qMp9d`mgVaS$&7-v@H|HB0-*`H zWL9Qd3$=R~Y&Bv*667!^&ogH@fY%RZVVFQOn2-MAz*i+yl4mj$K}^+3ZcVkv0ynAN zw4d!ITUxoHbS!11Z&jm@2QkPM7v_lH97fSb+{Tm+u9H%QeX2BEeT*{hsg$mKJ!8!e zE=lcZPi02aH@Iv&#WU^EN%#E8{TT7J13@i$Xr;SkVe4cNC_YK?V9<%Bur zCS<#tT|22^u@p1tK0eAH{mrJ}8&C3Z>Z}fyU4^WlBq|q~G&{n@g@$VLxbFHuN6OR_ zX{w`KiB0*Ws9q$zCQNk_z|2an&{pQ0Rc06SXu$of#xMTbTBg`bkK8ryk3?Rh%6u2b zy$|nYW3Njfzrqve?!|9DsZ2riRJat1#A=E}Yw<#n1dmu+DXfqyxttDtdTfq|21>nJ z#u3w+5uS$@I5wWZj7q319Ri8Fl(jRvo>6Z3x>98u(XyD2HMgKvz}W(LS=c>bd}IVG4hAx;PhMR8X}=W6JF;_ zi@JL$W`%%A6j%7dqf;&NvbKTa!oV#M-X7_+itawfz?&q*>cO+;&*DV}L@BmNl%dwe~rz zq{zw)@#6JE!nQ&IN}#8KpteEZ^Z<7gRh5*EjAIE~;SOb7QN-yp)*ph#3Vmir{o+RkP<{;4k5iad$A9_U$X8GL z;JnYjFSknZ@4V4`*~EU6Y-#==1OixoHCJ1dmRXkWHxe9pTP)|N?Ox(vUZSRSLU|{* zHhHGH69b-EClFinY)@d+RL4D}bHV4@rngpm_%FcW2b+fjTKfGC6 zolIb?P+g^%!PykH`V|=xD~yjDIrW)8vlq1<`enxk9=on+LLKB(R$?o#ny2aXLTr?H zXTo#L3?2eQ3Bwj(@$^Jy@8_>%J=oUN8us3H6p$43#Ju?$Y}mHC+fC0@4K(6^nL(tD zRS>iRYHfK-EQ^0|sq16eSWKz4AK2#w$#F(MRk*{KbavDQj%=mHoad`+J*XkoT8vDhUoB;znPiYWilg1xP=L1AfVk&vR{qW>MSk#r2nWnMgDuJ`NKK)to z?qdCT2_AQ)!jKuS*hJ<2M7r?-lh83bQ#0iXSR-Kt!O&7o7GtuleMejHRr8I1kKsi3 zh+iOTBDyeRLqMT!=stK!+T`_OOX{+#nqe??@FyKZnop^@D{_FkQE1*a@mbsoX0{Mb zH)Cb62%+uHAGb~(*QPUezvJQ5!dlI6lgv#^miQb6aNIW8Y^#>&Iz0B(uO44P&l#-% z^nyE@m6kP>+D`O!xIBZh^L{2=R9iY=M%%DxEVJ#|m-A`W*2KZf#OxBd20DhJ8b0HO zWoWw%{-(})w;Jbd77z7cMxI&~ax?qE+C!A)Q&POXjb$tIcfB$&O^0TGa2>)!2zb?S z5draJi7 zyMe+=*1_m6h8vrhhUYy<<@gwGZl3cx&MrnO8!s~tkg`_t1lpiv}7#O0w+`xS$30foLXIF5K^P$A}cDJY>dl82$P?}lRPPI zQ`>!AI%unjlH>|p6?RK-hg6@@vApFIW#ISil zVVMDCWnt8g)c*6gO%b?QicS^)YgjpXsZ8o41?$uBOlo?K%rxtESwtOGe|bUnQ9VKO ztMGk$+E`5w8(D34QUktBbWP$z(rrB+uml}Lp0cDtVk=?W(h)9(V#&Ra=~_mfMXLBt zNC@E11Va#B#zZ?r1~(ro(9n)C3p{hr)vG1hOR>I29oYHLGkIn_~LXA7A%qIjb zMvJF;q5zdf+;4Wmqm!1KPQo$~A4bwkACsOsTPLd|*m&>7DmU{P*Zv2hy%^;6gE4?k zH;j6;KSCdcwGTv{`}b5>H7LI7;#ov{9A8p6B%6@Oh?r1q+T!E^FD*BVN!0?%mHdzg z!e}V-hyi0l=}YBPEe7@AR=vL(<&35#iesW;76WLh`w9OE=4q{ zD;9Yp4Ua!84;FWbTX+Am$wXQ@Q$w`RW~- z4pJ`@O{|@pH~sj}YKY(4U-sSy4lmw>R(nXK8BeA>p02`CF!<@}hCF}CoXk~LEo(ty)$UO1{x+)Kx ziK7R-K|!tBS>9L;TY|ViIP0GER-U7BgjAV&KYridGd8l@Rx7XEG;{v&sf!Zy5Vk}z z!Yf#otUZpkFL_vdPL;Z5)2yx*B{JJhS-fZD=GE;f^asGkhnBV1Vy%;A{dkldRbP*72eU7e) zoLVmOjSVm@J}_&E@zdr#2Jq|5YiwMDbX^oJR8@5030*iMq~P*?2;S${_PpLEIVC{> zQQ!${+-O2U)s~898P##pFgh5(_DW{oUHW%K{u~`X>OrjJdt45Z4r+^ncp=u2NFUv2Mh1{U9}wUT!SdGv1FY`GDZ{i(dpC6K z-St8?{|%UaO@{8Sx{2~9dSih>>+<-Ut`F2=0@+#u|L;)m%T5fj~O zd1vJIW8EO}XAz1MF0WD=$|xeS!31+5IETrMeDJfq{;vW3#mS@$$k!oP7eSCi3%*ELZH_+A{<8QE6elc8o3b;HOZT; z_u&~X6U~^iGKf-41yb%OZ#1QtC7RqqjE618F5Y~k3A1S7EvKP#J=0;0DZ10hM#)U3 z^0#>YDt%v(U}-9X@4q8h(8YoctS4#i3uXd~%cb+=RM72lV93C0V_Z}yOfc&D>oox* zc7^ui)N7_`V}l`0a)!z>4_km#l+{1+<+aWysJfqx0URft>>DLJ0WfoG(SkBq=)=_h zbeb@|`1o2MA=97D&3R++`28WrG7zq!{ei^_2>b?I?k?d7b@RV`y!G@vUxF^)7UalC1U3!r%C0Jv zR=76b92vJAOc~xU-(L?;7lQYu9u=Ohj}MkgJ9&Hjp8jSb(aqmwDWUWo4unTv3SgCG zb2^H=7k3JF@_q&4vjo6|(Um0|9V6gN^$gYt==&o|5fT~Yp)t%b_=i)rkVFFV;|)bB z@~r2=Mb8I>I`2HcWOCxZyq+dr4iAMQ&IIRuWHrtxUPb4M?n~%jNs%=X&x#zCdvo%L zjJi9IafN&WoL?_91rMnZVIGulh$vro?p=I0{f8!mW?;T@&5+7{?9vk$g@k&JnUU_I zKg<@94}AY(WL|B!MF5Dg8ifF_M{_Gc%XK7uXZ0L8==E&TAp4C@x#st?iyp?AxlXYH zyhF3}u<;q8fVcJdo-6SkBY3(Oq*r3P;MQZHn8Y!)dP-HnE*#zcO@Vqnt^1X72B$Gp z>ihHuGMx{YI&~ydXw-Ue)X_wN%k6rrkN`k45?tM{i~IL0x)qCQQyXALB-(x$t4It9 zdkBvH{(h+P3az>TAT-rE`=Z&i8Y6%?(6NQH&o9XAjDA z2RYA>SW^e!W$dF&60(3TB_d%7#_;h4EupXH+cqF|#?5D9;T@w-DCQlIl*+95`Pq7u zMU@!tBi$5>SosNSHiEC9)OZx1-3J!G^BeoZBG5$g)_=0>Al>`rb!BCQsp99;f$-jN z{!114${!C#{KYoyr4oH{TfTjAS3aBPoLkk26b?J+p&D$ts7(O$%TU|RTie`(^WDeS z=j#LVAd^G4c3kX!-f}o&5G#CCEx^HImN>nsX<>ZaCNra$5ma27T^$ z&Ihf56+p6WniLps4C9x*u;-N`BZYNx4p5Rd6O1P8qe)Ko3X;iBa@%@JPVbQn2=c=h z`Z+H{RlUZQXJ%NDw0W64EpPb!yD5CB2~O%ClJ3>iDXvlLKS&E2!-IZDv>scOo+HF` zLwEt1eb!LzAX%M;nLG=_a@CtwrCj3%W%HLOLGQFN6BnlQSpPbvN40W?n~$SEMgYZt zTtDoYvc=C+J|<@G=L`>T_M|#lBin zujbhde)%tW)gX4$Q?n=S@KhVr1<9IF`H+1Zkz5_}Mb=t4OdVW&k-vyqJ(kigEq}1( z4JqTGH8&AX!R9bk8z!(|vjeXU{31?chpefy30`YYzG=Myd&QVz&3-kI05JDjfam#F zv1-Jijex+3Q&;Z1PjVyrZEkJSrwCKYlpr92WM{++dBzkkAPDE0*|2}&nYN|KeHyDP zYUP@Dt2fIF^60OAS7_dj{X5~+UxRn>qaA^F^d3>$!?$OSfZ%1v?OllXI6iu-&D_A{ zqr17aZV260&p~m;_#KHR2LSyDJkr0vX~`7aNDxiaGa_g9XWq8YidO!%M7c4v=}`u= zK72@>#=MWcmsUu29~&5Ng%5S^j{m{vMEWRpre~{rwj_TZ55bI3Emuv@=!~ihK<+cP z65zoE{bbbT8fGSu%IDbmM`BJpu&53-de&#=)t)|XK!zLa`>q+Fi8Cu|NIS@#8wa!} zpB?L3hX~x|*AKj_Cj|Z_*AJg?g3{d4)sJ1T$LHFE-iXk1B3QP%@BgUFVA@2qCn5HE zn3*MLE@-6_b36*)ihRl8&!6kG&pqRKHLW-jyXl(0V~qV&yL@rt7rO4!3*IN;Z?#HD zI%^jR?$gPQA97_YIugLg{5)*e2reX?iN&9!d+?*nt@Yii?AukyUEh7dnR(mYRu_3^ e2=1.1.2 -jinja2>=2.11.3 -werkzeug>=3.0.3 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tools/zircolite_server/zircolite_server.py b/tools/zircolite_server/zircolite_server.py deleted file mode 100644 index f1f9f45..0000000 --- a/tools/zircolite_server/zircolite_server.py +++ /dev/null @@ -1,28 +0,0 @@ -#!python3 -# -*- coding: utf-8 -*- - -# Zircolite **example** server -# Make you own if you want to use it in production - -from flask import Flask, request -from jinja2 import Template -import base64 -import json - -app = Flask(__name__) - -tmpl = open("jsonl.tmpl", 'r', encoding='utf-8') -template = Template(tmpl.read()) -tmpl.close() - -@app.route('/logs',methods=['POST']) -def logs(): - try: - with open("results.json", 'a') as f: - f.write(template.render(data=request.get_json())) - except Exception as e: - return {"status": "400"} - return {"status": "200"} - -if __name__ == '__main__': - app.run(host='0.0.0.0', port = 8080, debug=True) diff --git a/zircolite.py b/zircolite.py index db7a0d5..b8c4c5e 100755 --- a/zircolite.py +++ b/zircolite.py @@ -2,20 +2,19 @@ # Standard libs import argparse -import asyncio import base64 import chardet import csv import functools import hashlib import logging +import logging.config import multiprocessing as mp import os import random import re import shutil import signal -import socket import sqlite3 import string import subprocess @@ -30,7 +29,6 @@ import xxhash from colorama import Fore from tqdm import tqdm -from tqdm.asyncio import tqdm as tqdmAsync from RestrictedPython import compile_restricted from RestrictedPython import safe_builtins from RestrictedPython import limited_builtins @@ -39,26 +37,10 @@ from RestrictedPython.Guards import guarded_iter_unpack_sequence # External libs (Optional) -forwardingDisabled = False -try: - import aiohttp - import urllib3 - - urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) -except ImportError: - forwardingDisabled = True - -elasticForwardingDisabled = False -try: - from elasticsearch import AsyncElasticsearch -except ImportError: - elasticForwardingDisabled = True - updateDisabled = False try: import requests except ImportError: - forwardingDisabled = True updateDisabled = True sigmaConversionDisabled = False @@ -95,499 +77,117 @@ def signal_handler(sig, frame): sys.exit(0) -def quitOnError(message, logger=None): +def quitOnError(message): + """Log an error message and exit the program.""" + logger = logging.getLogger(__name__) logger.error(message) sys.exit(1) -def checkIfExists(path, errorMessage, logger=None): +def checkIfExists(path, errorMessage): """Test if path provided is a file""" if not (Path(path).is_file()): - quitOnError(errorMessage, logger) - - -def initLogger(debugMode, logFile=None): - fileLogLevel = logging.INFO - fileLogFormat = "%(asctime)s %(levelname)-8s %(message)s" - if debugMode: - fileLogLevel = logging.DEBUG - fileLogFormat = ( - "%(asctime)s %(levelname)-8s %(module)s:%(lineno)s %(funcName)s %(message)s" - ) - - if logFile is not None: - logging.basicConfig( - format=fileLogFormat, - filename=logFile, - level=fileLogLevel, - datefmt="%Y-%m-%d %H:%M:%S", - ) - logger = logging.StreamHandler() - formatter = logging.Formatter("%(message)s") - logger.setFormatter(formatter) - logger.setLevel(logging.INFO) - logging.getLogger().addHandler(logger) - else: - logging.basicConfig( - format="%(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S" - ) - - return logging.getLogger() - - -class templateEngine: - def __init__(self, logger=None, template=[], templateOutput=[], timeField=""): - self.logger = logger or logging.getLogger(__name__) - self.template = template - self.templateOutput = templateOutput + quitOnError(errorMessage) + + +def setup_logging(debug_mode, log_file=None): + """Set up logging configuration.""" + log_level = logging.DEBUG if debug_mode else logging.INFO + + # Define a configuration dictionary + logging_config = { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "console_formatter": {"format": "%(message)s"}, + "file_formatter": { + "format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s" + }, + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "formatter": "console_formatter", + "level": logging.INFO, + "stream": "ext://sys.stdout", + }, + "file": { + "class": "logging.FileHandler", + "formatter": "file_formatter", + "level": log_level, + "filename": log_file or "zircolite.log", + "encoding": "utf-8", + }, + }, + "root": { + "handlers": ["console", "file"] if log_file else ["console"], + "level": log_level, + }, + } + + logging.config.dictConfig(logging_config) + + +# Define the authorized BUILTINS for Resticted Python +def default_guarded_getitem(ob, index): + return ob[index] + + +class template_engine: + def __init__(self, templates=[], template_outputs=[], timeField=""): + self.logger = logging.getLogger(__name__) self.timeField = timeField + self.compiled_templates = {} + # Flatten templates and outputs if they are nested lists + self.template_paths = [ + tpl[0] if isinstance(tpl, list) else tpl for tpl in templates + ] + self.template_outputs = [ + out[0] if isinstance(out, list) else out for out in template_outputs + ] - def generateFromTemplate(self, templateFile, outputFilename, data): + def generate_from_template(self, template_file, outputFilename, data): """Use Jinja2 to output data in a specific format""" try: - - tmpl = open(templateFile, "r", encoding="utf-8") - template = Template(tmpl.read()) - + with open(template_file, "r", encoding="utf-8") as tmpl: + # Use the compiled template if available, otherwise compile it + if template_file in self.compiled_templates: + template = self.compiled_templates["templateFile"] + else: + template = Template(tmpl.read()) + self.compiled_templates["templateFile"] = template + # Render the template and write to the output file with open(outputFilename, "a", encoding="utf-8") as tpl: tpl.write(template.render(data=data, timeField=self.timeField)) except Exception as e: self.logger.error( - f"{Fore.RED} [-] Template error, activate debug mode to check for errors{Fore.RESET}" + f"{Fore.RED} [-] Template error, activate debug mode with '--debug' to check for errors{Fore.RESET}" ) self.logger.debug(f" [-] {e}") def run(self, data): - for template, templateOutput in zip(self.template, self.templateOutput): - self.logger.info( - f'[+] Applying template "{template[0]}", outputting to : {templateOutput[0]}' - ) - self.generateFromTemplate(template[0], templateOutput[0], data) - - -class eventForwarder: - """Class for handling event forwarding""" - - def __init__( - self, - remote, - timeField, - token, - logger=None, - index=None, - login="", - password="", - pipeline="", - ): - self.logger = logger or logging.getLogger(__name__) - self.remoteHost = remote - self.token = token - self.localHostname = socket.gethostname() - self.userAgent = "zircolite/2.x" - self.index = index - self.login = login - self.password = password - self.pipeline = pipeline - self.queueSize = 20 - self.connectionFailed = False - self.timeField = timeField - - def send(self, payloads, forwardAll=False): - if payloads: - if self.remoteHost: - try: - # Change EventLoopPolicy on Windows https://stackoverflow.com/questions/45600579/asyncio-event-loop-is-closed-when-getting-loop - if _platform == "win32": - asyncio.set_event_loop_policy( - asyncio.WindowsSelectorEventLoopPolicy() - ) - # Splunk HEC - if self.token: - asyncio.run( - self.sendAllAsyncQueue( - payloads, - timeField=self.timeField, - sigmaEvents=(not forwardAll), - mode="HEC", - ) - ) - # ElasticSearch - elif self.index: - self.disableESDefaultLogging() - asyncio.run( - self.sendAllAsyncQueue( - payloads, - timeField=self.timeField, - sigmaEvents=(not forwardAll), - mode="ES", - ) - ) - # HTTP - else: - asyncio.run( - self.sendAllAsyncQueue( - payloads, - timeField=self.timeField, - sigmaEvents=(not forwardAll), - mode="HTTP", - ) - ) - except Exception as e: - self.logger.debug(f"{Fore.RED} [-] {e}") - - def networkCheck(self): - """Check remote connectivity""" - self.logger.info(f"[+] Check connectivity to {self.remoteHost}") - try: - requests.get( - self.remoteHost, - headers={"user-agent": self.userAgent}, - timeout=10, - verify=False, - ) - except (requests.ConnectionError, requests.Timeout): - return False - return True - - def formatToEpoch(self, timestamp): - try: - return ( - str(time.mktime(time.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%f%z"))) - + timestamp.split(".")[1][:-1] - ) - except ValueError: - try: - return ( - str(time.mktime(time.strptime(timestamp, "%Y-%m-%dT%H:%M:%S%z"))) - + timestamp.split(".")[1][:-1] - ) - except Exception: - self.logger.debug( - f"{Fore.RED} [-] Timestamp error: {timestamp}{Fore.RESET}" - ) - - def disableESDefaultLogging(self): - """By Default Elastic client has a logger set to INFO level""" - es_log = logging.getLogger("elasticsearch") - es_log.setLevel(logging.ERROR) - es_log = logging.getLogger("elastic_transport") - es_log.setLevel(logging.ERROR) - - async def HECWorker(self, session, queue, sigmaEvents): - while True: - if self.index: - providedIndex = f"?index={self.index}" - else: - providedIndex = "" - data = await queue.get() # Pop data from Queue - resp = await session.post( - f"{self.remoteHost}/services/collector/event{providedIndex}", - headers={"Authorization": f"Splunk {self.token}"}, - json=data, - ) # Exec action from Queue - queue.task_done() # Notify Queue action ended - if str(resp.status)[0] in ["4", "5"]: - self.logger.error( - f"{Fore.RED} [-] Forwarding failed for event {Fore.RESET}" - ) - - async def ESWorker(self, session, queue, sigmaEvents): - while True: - data = await queue.get() # Pop data from Queue - index = self.index - if sigmaEvents: - index = f"{self.index}-sigma" - else: - if "OriginalLogfile" in data["payload"]: - index = f'{index}-{("".join([char for char in data["payload"]["OriginalLogfile"].split(".")[0] if (char.isalpha() or char == "-")])).lower()}' - try: - await session.index( - index=index, document=data["payload"], id=data["hash"] - ) # Exec action from Queue - except Exception as e: - if "error" in e.body: - if e.body["error"]["type"] == "mapper_parsing_exception": - errField = e.body["error"]["reason"].split("[")[1].split("]")[0] - errType = e.body["error"]["reason"].split("[")[2].split("]")[0] - errValue = ( - e.body["error"]["reason"].split("value: '")[1].split("'")[0] - ) - canInsert = False - - if errType == "long" and errValue.startswith( - "0x" - ): # Hex value in long field - data["payload"][errField] = int( - data["payload"][errField], 16 - ) - canInsert = True - elif errType == "boolean" and errValue.startswith( - "0" - ): # 0 value in bool field - data["payload"][errField] = "false" - canInsert = True - elif errType == "boolean" and errValue.startswith( - "1" - ): # 1 value in bool field - data["payload"][errField] = "true" - canInsert = True - elif ( - errType == "long" - and isinstance((data["payload"][errField]), int) - and data["payload"][errField] > (2**63 - 1) - ): # ES limit - data["payload"][errField] = 2**63 - 1 - canInsert = True - elif ( - errType == "long" - and isinstance((data["payload"][errField]), int) - and data["payload"][errField] < -(2**63) - ): # ES limit - data["payload"][errField] = -(2**63) - canInsert = True - elif errType == "long" and isinstance( - data["payload"][errField], argparse.BooleanOptionalAction - ): - if type(data["payload"][errField]): - data["payload"][errField] = 1 - else: - data["payload"][errField] = 0 - canInsert = True - else: - self.logger.debug( - f"{Fore.RED} [-] ES Mapping parser error : {e}{Fore.RESET}" - ) - if canInsert: - try: - await session.index( - index=index, - document=data["payload"], - id=data["hash"], - ) - except Exception as e: - self.logger.debug( - f"{Fore.RED} [-] ES error : {e}{Fore.RESET}" - ) - elif e.body["error"]["type"] == "illegal_argument_exception": - errField = e.body["error"]["reason"].split("[")[1].split("]")[0] - data["payload"].pop(errField, None) # remove value from payload - try: - await session.index( - index=index, document=data["payload"], id=data["hash"] - ) - except Exception as e: - self.logger.debug( - f"{Fore.RED} [-] ES error : {e}{Fore.RESET}" - ) - else: - self.logger.debug( - f"{Fore.RED} [-] ES error : {e}{Fore.RESET}" - ) - - queue.task_done() # Notify Queue action ended - - async def HTTPWorker(self, session, queue, sigmaEvents): - while True: - data = await queue.get() # Pop data from Queue - resp = await session.post( - self.remoteHost, headers={"user-agent": self.userAgent}, json=data - ) # Exec action from Queue - queue.task_done() # Notify Queue action ended - if str(resp.status)[0] in ["4", "5"]: - self.logger.error( - f"{Fore.RED} [-] Forwarding failed for event {Fore.RESET}" - ) - - def formatEventForES(self, payload, match={}, timeField="", sigmaEvents=False): - if self.pipeline != "": - payload["pipeline"] = self.pipeline - if sigmaEvents: - payload = { - "title": payload["title"], - "id": payload["id"], - "sigmafile": payload["sigmafile"], - "description": payload["description"], - "sigma": payload["sigma"], - "rule_level": payload["rule_level"], - "tags": payload["tags"], - "host": self.localHostname, - } - [ - ( - payload.update({key: eval(value)}) - if value in ["False", "True"] - else payload.update({key: value}) - ) - for key, value in match.items() - ] # In detected events boolean are stored as strings - - return {"payload": payload, "hash": xxhash.xxh64_hexdigest(str(payload))} - - def formatEventForSplunk(self, payload, match={}, timeField="", sigmaEvents=False): - if sigmaEvents: - payload = { - "title": payload["title"], - "id": payload["id"], - "sigmafile": payload["sigmafile"], - "description": payload["description"], - "sigma": payload["sigma"], - "rule_level": payload["rule_level"], - "tags": payload["tags"], - } - [payload.update({key: value}) for key, value in match.items()] - if timeField == "": - return {"sourcetype": "_json", "event": payload, "host": self.localHostname} - elif timeField not in payload: - self.logger.error( - f"{Fore.RED} [-] Provided time field was not found {Fore.RESET}" - ) - return {"sourcetype": "_json", "event": payload, "host": self.localHostname} - else: - return { - "sourcetype": "_json", - "event": payload, - "host": self.localHostname, - "time": self.formatToEpoch(payload[timeField]), - } - - def formatEventForHTTTP(self, payload, match={}, timeField="", sigmaEvents=False): - payload.update({"host": self.localHostname}) - return payload - - def initESSession(self): - if self.login == "": - session = AsyncElasticsearch(hosts=[self.remoteHost], verify_certs=False) - else: - session = AsyncElasticsearch( - hosts=[self.remoteHost], - verify_certs=False, - basic_auth=(self.login, self.password), - ) - return session - - async def testESSession(self, session): - try: - await session.info() - except Exception: - self.logger.error(f"{Fore.RED} [-] Connection to ES failed {Fore.RESET}") - await session.close() - self.connectionFailed = True - - async def testSplunkSession(self, session): - data = {"sourcetype": "_json", "event": {}, "host": self.localHostname} - resp = await session.post( - f"{self.remoteHost}/services/collector/event", - headers={"Authorization": f"Splunk {self.token}"}, - json=data, - ) - if str(resp.status)[0] in ["4", "5"]: - await session.close() - self.logger.error( - f"{Fore.RED} [-] Connection to Splunk HEC failed - Forwarding disabled {Fore.RESET}" - ) - self.connectionFailed = True - - async def testHTTPSession(self, session): - resp = await session.post( - self.remoteHost, headers={"user-agent": self.userAgent}, json={} - ) - if str(resp.status)[0] in ["4", "5"]: - await session.close() - self.logger.error( - f"{Fore.RED} [-] Connection to HTTP Server failed - Forwarding disabled {Fore.RESET}" - ) - self.connectionFailed = True - - async def sendAllAsyncQueue( - self, payloads, timeField="", sigmaEvents=False, mode="" - ): - - if self.connectionFailed: - return - - if mode == "ES": - session = self.initESSession() - await self.testESSession(session) - if self.connectionFailed: - return - fnformatEvent = self.formatEventForES - fnWorker = self.ESWorker - elif mode == "HEC": - session = aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) - await self.testSplunkSession(session) - if self.connectionFailed: - return - fnformatEvent = self.formatEventForSplunk - fnWorker = self.HECWorker - elif mode == "HTTP": - session = aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) - await self.testHTTPSession(session) - if self.connectionFailed: - return - fnformatEvent = self.formatEventForHTTTP - fnWorker = self.HTTPWorker - else: - return - - # Init queue - queue = asyncio.Queue() - tasks = [] - - if not sigmaEvents: - self.logger.info("[+] Gathering events to forward") - payloads = tqdmAsync(payloads, colour="yellow") - - for payload in payloads: - if sigmaEvents: - for match in payload["matches"]: - queue.put_nowait( - fnformatEvent( - payload=payload, - match=match, - timeField=timeField, - sigmaEvents=sigmaEvents, - ) - ) - else: - queue.put_nowait( - fnformatEvent( - payload=payload, timeField=timeField, sigmaEvents=sigmaEvents - ) - ) - - # Create workers to process Queue - for i in range(20): - task = asyncio.create_task( - fnWorker(session, queue, sigmaEvents=sigmaEvents) - ) - tasks.append(task) - if not sigmaEvents: + for template, template_output in zip( + self.template_paths, self.template_outputs + ): self.logger.info( - f"[+] Forwarding {queue.qsize()} events to {self.remoteHost} {Fore.CYAN}(Don't panic if nothing change for a long time){Fore.RESET}" + f'[+] Applying template "{template}", outputting to : {template_output}' ) - await queue.join() - # Cancel our worker tasks. - for task in tasks: - task.cancel() - # Wait until all worker tasks are cancelled. - await asyncio.gather(*tasks, return_exceptions=True) - await session.close() + self.generate_from_template(template, template_output, data) -class JSONFlattener: +class json_flattener: """Perform JSON Flattening""" def __init__( self, configFile, - logger=None, timeAfter="1970-01-01T00:00:00", timeBefore="9999-12-12T23:59:59", timeField=None, hashes=False, - args_config=None, + input_format=None, ): - self.logger = logger or logging.getLogger(__name__) + self.logger = logging.getLogger(__name__) self.keyDict = {} self.fieldStmt = "" self.valuesStmt = [] @@ -595,20 +195,18 @@ def __init__( self.timeBefore = timeBefore self.timeField = timeField self.hashes = hashes - self.args_config = args_config - self.JSONArray = args_config.json_array_input + self.JSONArray = False + # Initialize the cache for compiled code self.compiled_code_cache = {} - # Convert the argparse.Namespace to a dictionary - args_dict = vars(args_config) - # Find the chosen input format - self.chosen_input = next( - (key for key, value in args_dict.items() if "_input" in key and value), None - ) + self.chosen_input = input_format if self.chosen_input is None: self.chosen_input = "evtx_input" # Since evtx is the default input, we force it no chosen input has been found + if self.chosen_input == "json_array_input": + self.JSONArray = True + with open(configFile, "r", encoding="UTF-8") as fieldMappingsFile: self.fieldMappingsDict = json.loads(fieldMappingsFile.read()) self.fieldExclusions = self.fieldMappingsDict["exclusions"] @@ -619,16 +217,10 @@ def __init__( self.transforms = self.fieldMappingsDict["transforms"] self.transforms_enabled = self.fieldMappingsDict["transforms_enabled"] - # Define the authorized BUILTINS for Resticted Python - def default_guarded_getitem(ob, index): - return ob[index] - - default_guarded_getattr = getattr - self.RestrictedPython_BUILTINS = { "__name__": "script", "_getiter_": default_guarded_getiter, - "_getattr_": default_guarded_getattr, + "_getattr_": getattr, "_getitem_": default_guarded_getitem, "base64": base64, "re": re, @@ -639,7 +231,26 @@ def default_guarded_getitem(ob, index): self.RestrictedPython_BUILTINS.update(limited_builtins) self.RestrictedPython_BUILTINS.update(utility_builtins) - def run(self, file): + def transform_value(self, code, param): + try: + # Check if the code has already been compiled + if code in self.compiled_code_cache: + byte_code = self.compiled_code_cache[code] + else: + # Compile the code and store it in the cache + byte_code = compile_restricted( + code, filename="", mode="exec" + ) + self.compiled_code_cache[code] = byte_code + # Prepare the execution environment + TransformFunction = {} + exec(byte_code, self.RestrictedPython_BUILTINS, TransformFunction) + return TransformFunction["transform"](param) + except Exception as e: + self.logger.debug(f"ERROR: Couldn't apply transform: {e}") + return param # Return the original parameter if transform fails + + def process_file(self, file): """ Flatten json object with nested keys into a single level. Returns the flattened json object @@ -649,25 +260,6 @@ def run(self, file): JSONOutput = [] fieldStmt = "" - def transformValue(code, param): - try: - # Check if the code has already been compiled - if code in self.compiled_code_cache: - byte_code = self.compiled_code_cache[code] - else: - # Compile the code and store it in the cache - byte_code = compile_restricted( - code, filename="", mode="exec" - ) - self.compiled_code_cache[code] = byte_code - # Prepare the execution environment - TransformFunction = {} - exec(byte_code, self.RestrictedPython_BUILTINS, TransformFunction) - return TransformFunction["transform"](param) - except Exception as e: - self.logger.debug(f"ERROR: Couldn't apply transform: {e}") - return param # Return the original parameter if transform fails - def flatten(x, name=""): nonlocal fieldStmt # If it is a Dict go deeper @@ -724,9 +316,11 @@ def flatten(x, name=""): ) transformedValuesByKeys[ transform["alias_name"] - ] = transformValue(transformCode, value) + ] = self.transform_value( + transformCode, value + ) else: - value = transformValue( + value = self.transform_value( transformCode, value ) @@ -824,60 +418,72 @@ def flatten(x, name=""): JSONLine = {} return {"dbFields": fieldStmt, "dbValues": JSONOutput} - def runAll(self, EVTXJSONList): - for evtxJSON in tqdm(EVTXJSONList, colour="yellow"): + def save_to_file(self, outputFile): + with open(outputFile, "w", encoding="utf-8") as file: + for JSONLine in tqdm(self.valuesStmt, colour="yellow"): + file.write(f'{json.dumps(JSONLine).decode("utf-8")}\n') + + def run(self, EVTXJSONList): + for evtxJSON in EVTXJSONList: if os.stat(evtxJSON).st_size != 0: - results = self.run(evtxJSON) + results = self.process_file(evtxJSON) self.fieldStmt += results["dbFields"] self.valuesStmt += results["dbValues"] -class zirCore: +class zircore: """Load data into database and apply detection rules""" def __init__( self, - config, - logger=None, noOutput=False, - timeAfter="1970-01-01T00:00:00", - timeBefore="9999-12-12T23:59:59", limit=-1, - csvMode=False, - timeField=None, - hashes=False, - dbLocation=":memory:", + csv_output=False, + db_location=":memory:", delimiter=";", + tmp_directory=".", + tmp_directory_db=".", ): - self.logger = logger or logging.getLogger(__name__) - self.dbConnection = self.createConnection(dbLocation) + self.logger = logging.getLogger(__name__) + + self.tmp_directory = tmp_directory + self.tmp_directory_db = tmp_directory_db + self.db_connection = self.create_connection(db_location) self.fullResults = [] + self.rule_results = [] self.ruleset = {} self.noOutput = noOutput - self.timeAfter = timeAfter - self.timeBefore = timeBefore - self.config = config self.limit = limit - self.csvMode = csvMode - self.timeField = timeField - self.hashes = hashes + self.csv_output = csv_output self.delimiter = delimiter + # if not csv_output: + + if not Path(str(tmp_directory)).is_dir(): + os.mkdir(tmp_directory) + if "?mode=memory&cache=shared" in db_location: + tmp_filename = f'{db_location.replace("file:", "").replace("?mode=memory&cache=shared", "")}.json' + else: + tmp_filename = f"{db_location}.json" + self.tmp_file = open(f"{tmp_directory}/{tmp_filename}", "w", encoding="utf-8") + def close(self): - self.dbConnection.close() + self.db_connection.close() - def createConnection(self, db): + def create_connection(self, db): """create a database connection to a SQLite database""" conn = None self.logger.debug(f"CONNECTING TO : {db}") try: - if db == ":memory:": + if "?mode=memory&cache=shared" in db: conn = sqlite3.connect(db, isolation_level=None) conn.execute("PRAGMA journal_mode = MEMORY;") conn.execute("PRAGMA synchronous = OFF;") conn.execute("PRAGMA temp_store = MEMORY;") else: - conn = sqlite3.connect(db) + if not Path(str(self.tmp_directory_db)).is_dir(): + os.mkdir(self.tmp_directory_db) + conn = sqlite3.connect(f"{self.tmp_directory_db}/{db}") conn.row_factory = sqlite3.Row # Allows to get a dict def udf_regex(x, y): @@ -895,42 +501,43 @@ def udf_regex(x, y): self.logger.error(f"{Fore.RED} [-] {e}") return conn - def createDb(self, fieldStmt): + def create_db(self, fieldStmt): createTableStmt = f"CREATE TABLE logs ( row_id INTEGER, {fieldStmt} PRIMARY KEY(row_id AUTOINCREMENT) );" self.logger.debug(f" CREATE : {createTableStmt}") - if not self.executeQuery(createTableStmt): + if not self.execute_simple_query(createTableStmt): self.logger.error(f"{Fore.RED} [-] Unable to create table{Fore.RESET}") sys.exit(1) - def createIndex(self): - self.executeQuery('CREATE INDEX "idx_eventid" ON "logs" ("eventid");') + def create_index(self): + self.execute_simple_query('CREATE INDEX "idx_eventid" ON "logs" ("EventID");') + self.execute_simple_query('CREATE INDEX "idx_channel" ON "logs" ("Channel");') - def executeQuery(self, query): + def execute_simple_query(self, query): """Perform a SQL Query with the provided connection""" - if self.dbConnection is not None: - dbHandle = self.dbConnection.cursor() + if self.db_connection is None: + self.logger.error(f"{Fore.RED} [-] No connection to Db{Fore.RESET}") + return False + else: + dbHandle = self.db_connection.cursor() self.logger.debug(f"EXECUTING : {query}") try: dbHandle.execute(query) - self.dbConnection.commit() - return True + self.db_connection.commit() except Error as e: self.logger.debug(f" [-] {e}") return False - else: - self.logger.error(f"{Fore.RED} [-] No connection to Db{Fore.RESET}") - return False + return True - def executeSelectQuery(self, query): + def execute_select_query(self, query): """ Execute a SELECT SQL query and return the results as a list of dictionaries. """ - if self.dbConnection is None: + if self.db_connection is None: self.logger.error(f"{Fore.RED} [-] No connection to Db{Fore.RESET}") return [] try: - cursor = self.dbConnection.cursor() - self.logger.debug(f"Executing SELECT query: {query}") + cursor = self.db_connection.cursor() + self.logger.debug(f"EXECUTING SELECT QUERY: {query}") cursor.execute(query) rows = cursor.fetchall() # Convert rows to list of dictionaries @@ -940,17 +547,17 @@ def executeSelectQuery(self, query): self.logger.debug(f" [-] SQL query error: {e}") return [] - def loadDbInMemory(self, db): + def load_db_in_memory(self, db): """In db only mode it is possible to restore an on disk Db to avoid EVTX extraction and flattening""" - dbfileConnection = self.createConnection(db) - dbfileConnection.backup(self.dbConnection) + dbfileConnection = self.create_connection(db) + dbfileConnection.backup(self.db_connection) dbfileConnection.close() def escape_identifier(self, identifier): """Escape SQL identifiers like table or column names.""" return identifier.replace('"', '""') - def insertData2Db(self, JSONLine): + def insert_data_to_db(self, JSONLine): """Build a parameterized INSERT INTO query and insert data into the database.""" columns = JSONLine.keys() columnsEscaped = ", ".join([self.escape_identifier(col) for col in columns]) @@ -965,31 +572,23 @@ def insertData2Db(self, JSONLine): values.append(value) insertStmt = f"INSERT INTO logs ({columnsEscaped}) VALUES ({placeholders})" try: - self.dbConnection.execute(insertStmt, values) + self.db_connection.execute(insertStmt, values) return True except Exception as e: self.logger.debug(f" [-] {e}") return False - def insertFlattenedJSON2Db(self, flattenedJSON, forwarder=None): - if forwarder: - forwarder.send(flattenedJSON, forwardAll=True) - for JSONLine in tqdm(flattenedJSON, colour="yellow"): - self.insertData2Db(JSONLine) - self.createIndex() + def insert_flat_json_to_db(self, flattenedJSON): + for JSONLine in flattenedJSON: + self.insert_data_to_db(JSONLine) - def saveFlattenedJSON2File(self, flattenedJSON, outputFile): - with open(outputFile, "w", encoding="utf-8") as file: - for JSONLine in tqdm(flattenedJSON, colour="yellow"): - file.write(f'{json.dumps(JSONLine).decode("utf-8")}\n') - - def saveDbToDisk(self, dbFilename): + def save_db_to_disk(self, dbFilename): self.logger.info("[+] Saving working data to disk as a SQLite DB") onDiskDb = sqlite3.connect(dbFilename) - self.dbConnection.backup(onDiskDb) + self.db_connection.backup(onDiskDb) onDiskDb.close() - def executeRule(self, rule): + def execute_rule(self, rule): """ Execute a single Sigma rule against the database and return the results. """ @@ -1010,9 +609,9 @@ def executeRule(self, rule): # Process each SQL query in the rule for SQLQuery in sigma_queries: - data = self.executeSelectQuery(SQLQuery) + data = self.execute_select_query(SQLQuery) if data: - if self.csvMode: + if self.csv_output: # Clean values for CSV output cleaned_rows = [ { @@ -1038,7 +637,7 @@ def executeRule(self, rule): "id": rule_id, "description": ( description.replace("\n", "").replace("\r", "") - if self.csvMode + if self.csv_output else description ), "sigmafile": filename, @@ -1048,6 +647,11 @@ def executeRule(self, rule): "count": len(filteredRows), "matches": filteredRows, } + + if not self.csv_output: + json_bytes = json.dumps(results) + self.tmp_file.write(f"{json_bytes.decode('utf-8')}\n") + self.logger.debug( f"DETECTED: {title} - Matches: {len(filteredRows)} events" ) @@ -1055,21 +659,11 @@ def executeRule(self, rule): else: return {} - def loadRulesetFromFile(self, filename, ruleFilters): - try: - with open(filename, encoding="utf-8") as f: - self.ruleset = json.loads(f.read()) - self.applyRulesetFilters(ruleFilters) - except Exception as e: - self.logger.error( - f"{Fore.RED} [-] Loading JSON ruleset failed, are you sure it is a valid JSON file ? : {e}{Fore.RESET}" - ) - - def loadRulesetFromVar(self, ruleset, ruleFilters): + def load_ruleset_from_var(self, ruleset, ruleFilters): self.ruleset = ruleset - self.applyRulesetFilters(ruleFilters) + self.apply_ruleset_filters(ruleFilters) - def applyRulesetFilters(self, ruleFilters=None): + def apply_ruleset_filters(self, ruleFilters=None): # Remove empty rule and remove filtered rules self.ruleset = list(filter(None, self.ruleset)) if ruleFilters is not None: @@ -1079,219 +673,98 @@ def applyRulesetFilters(self, ruleFilters=None): if not any(ruleFilter in rule["title"] for ruleFilter in ruleFilters) ] - def ruleLevelPrintFormatter(self, level, orgFormat=Fore.RESET): - if level == "informational": - return f"{Fore.WHITE}{level}{orgFormat}" - if level == "low": - return f"{Fore.GREEN}{level}{orgFormat}" - if level == "medium": - return f"{Fore.YELLOW}{level}{orgFormat}" - if level == "high": - return f"{Fore.MAGENTA}{level}{orgFormat}" - if level == "critical": - return f"{Fore.RED}{level}{orgFormat}" - - def executeRuleset( - self, - outFile, - writeMode="w", - forwarder=None, - showAll=False, - KeepResults=False, - remote=None, - stream=False, - lastRuleset=False, - ): + def execute_ruleset(self): """ Execute all rules in the ruleset and handle output. """ - csvWriter = None - first_json_output = True # To manage commas in JSON output - is_json_mode = not self.csvMode - - # Prepare output file handle if needed - fileHandle = None - if not self.noOutput: - # Open file in text mode since we will write decoded strings - fileHandle = open(outFile, writeMode, encoding="utf-8", newline="") - if is_json_mode and writeMode != "a": - fileHandle.write("[") # Start JSON array - - # Iterate over rules in the ruleset - with tqdm(self.ruleset, colour="yellow") as ruleBar: - for rule in ruleBar: - # Show all rules if showAll is True - if showAll and "title" in rule: - rule_title = rule["title"] - rule_level = rule.get("level", "unknown") - formatted_level = self.ruleLevelPrintFormatter( - rule_level, Fore.BLUE - ) - ruleBar.write( - f"{Fore.BLUE} - {rule_title} [{formatted_level}]{Fore.RESET}" - ) - # Execute the rule - ruleResults = self.executeRule(rule) - if not ruleResults: - continue # No matches, skip to next rule - - # Apply limit if set - if self.limit != -1 and ruleResults["count"] > self.limit: - continue # Exceeds limit, skip this result - - # Write progress message - rule_title = ruleResults["title"] - rule_level = ruleResults.get("rule_level", "unknown") - formatted_level = self.ruleLevelPrintFormatter(rule_level, Fore.CYAN) - rule_count = ruleResults["count"] - ruleBar.write( - f"{Fore.CYAN} - {rule_title} [{formatted_level}] : {rule_count} events{Fore.RESET}" - ) + for rule in self.ruleset: - # Store results if needed - if KeepResults or (remote and not stream): - self.fullResults.append(ruleResults) - - # Forward results if streaming - if stream and forwarder: - forwarder.send([ruleResults], False) - - # Handle output to file - if not self.noOutput: - if self.csvMode: - # Initialize CSV writer if not already done - if csvWriter is None: - fieldnames = [ - "rule_title", - "rule_description", - "rule_level", - "rule_count", - ] + list(ruleResults["matches"][0].keys()) - csvWriter = csv.DictWriter( - fileHandle, - delimiter=self.delimiter, - fieldnames=fieldnames, - ) - csvWriter.writeheader() - # Write matches to CSV - for data in ruleResults["matches"]: - dictCSV = { - "rule_title": ruleResults["title"], - "rule_description": ruleResults["description"], - "rule_level": ruleResults["rule_level"], - "rule_count": ruleResults["count"], - **data, - } - csvWriter.writerow(dictCSV) - else: - # Write results as JSON using orjson - try: - # Handle commas between JSON objects - if not first_json_output: - fileHandle.write(",\n") - else: - first_json_output = False - # Serialize ruleResults to JSON bytes with indentation - json_bytes = json.dumps( - ruleResults, option=json.OPT_INDENT_2 - ) - # Write the decoded JSON string to the file - fileHandle.write(json_bytes.decode("utf-8")) - except Exception as e: - self.logger.error(f"Error saving some results: {e}") + # Execute the rule + ruleResults = self.execute_rule(rule) + if not ruleResults: + continue # No matches, skip to next rule - # Close output file handle if needed - if not self.noOutput: - if is_json_mode and lastRuleset: - fileHandle.write("]") # Close JSON array - fileHandle.close() + # Apply limit if set + if self.limit != -1 and ruleResults["count"] > self.limit: + continue # Exceeds limit, skip this result - def run( - self, - EVTXJSONList, - Insert2Db=True, - saveToFile=False, - forwarder=None, - args_config=None, - ): - self.logger.info("[+] Processing events") - flattener = JSONFlattener( - configFile=self.config, - timeAfter=self.timeAfter, - timeBefore=self.timeBefore, - timeField=self.timeField, - hashes=self.hashes, - args_config=args_config, - ) - flattener.runAll(EVTXJSONList) - if saveToFile: - filename = f"flattened_events_{''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(4))}.json" - self.logger.info(f"[+] Saving flattened JSON to : {filename}") - self.saveFlattenedJSON2File(flattener.valuesStmt, filename) - if Insert2Db: - self.logger.info("[+] Creating model") - self.createDb(flattener.fieldStmt) - self.logger.info("[+] Inserting data") - self.insertFlattenedJSON2Db(flattener.valuesStmt, forwarder) - self.logger.info("[+] Cleaning unused objects") - else: - return flattener.keyDict - del flattener + # Store if the rule has matched : title, level, count only + self.rule_results.append( + { + "rule_title": ruleResults["title"], + "rule_level": ruleResults["rule_level"], + "rule_count": ruleResults["count"], + } + ) + + # self.fullResults.append(ruleResults) + + self.tmp_file.close() -class evtxExtractor: +class evtx_extractor: def __init__( self, - logger=None, providedTmpDir=None, - coreCount=None, - useExternalBinaries=True, - binPath=None, - xmlLogs=False, - sysmon4linux=False, - auditdLogs=False, + cores=None, + use_external_binaries=True, + binaries_path=None, encoding=None, - evtxtract=False, - csvInput=False, + input_format=None, ): - self.logger = logger or logging.getLogger(__name__) + self.logger = logging.getLogger(__name__) + if Path(str(providedTmpDir)).is_dir(): - self.tmpDir = f"tmp-{self.randString()}" + self.tmpDir = f"tmp-{self.rand_string()}" self.logger.error( f"{Fore.RED} [-] Provided directory already exists using '{self.tmpDir}' instead{Fore.RESET}" ) else: - self.tmpDir = providedTmpDir or f"tmp-{self.randString()}" + self.tmpDir = providedTmpDir or f"tmp-{self.rand_string()}" os.mkdir(self.tmpDir) - self.cores = coreCount or os.cpu_count() - self.useExternalBinaries = useExternalBinaries - self.sysmon4linux = sysmon4linux - self.xmlLogs = xmlLogs - self.auditdLogs = auditdLogs - self.evtxtract = evtxtract - self.csvInput = csvInput + + self.cores = cores or os.cpu_count() + self.use_external_binaries = use_external_binaries + self.sysmon4linux = False + self.xmlLogs = False + self.csvInput = False + self.auditdLogs = False + self.evtxtract = False + + if input_format == "sysmon_linux_input": + self.sysmon4linux = True + elif input_format == "xml_input": + self.xmlLogs = True + elif input_format == "csv_input": + self.csvInput = True + elif input_format == "auditd_input": + self.auditdLogs = True + elif input_format == "evtxtract_input": + self.evtxtract = True + # Hardcoded hash list of evtx_dump binaries self.validHashList = [ "bbcce464533e0364", "e642f5c23e156deb", "5a7a1005885a1a11", ] + # Sysmon 4 Linux default encoding is ISO-8859-1, Auditd is UTF-8 - if not encoding and sysmon4linux: + if not encoding and self.sysmon4linux: self.encoding = "ISO-8859-1" - elif not encoding and (auditdLogs or evtxtract or xmlLogs): + elif not encoding and (self.auditdLogs or self.evtxtract or self.xmlLogs): self.encoding = "utf-8" else: self.encoding = encoding - self.evtxDumpCmd = self.getOSExternalTools(binPath) + self.evtx_dump_cmd = self.getOSExternalTools(binaries_path) - def randString(self): + def rand_string(self, length=8): return "".join( random.SystemRandom().choice(string.ascii_uppercase + string.digits) - for _ in range(8) + for _ in range(length) ) def getOSExternalTools(self, binPath): @@ -1311,13 +784,13 @@ def runUsingBindings(self, file): Convert EVTX to JSON using evtx_dump bindings (slower) Drop resulting JSON files in a tmp folder. """ - if not self.useExternalBinaries: + if not self.use_external_binaries: try: filepath = Path(file) filename = filepath.name parser = PyEvtxParser(str(filepath)) with open( - f"{self.tmpDir}/{str(filename)}-{self.randString()}.json", + f"{self.tmpDir}/{str(filename)}-{self.rand_string()}.json", "w", encoding="utf-8", ) as f: @@ -1513,7 +986,7 @@ def run(self, file): """ self.logger.debug(f"EXTRACTING : {file}") filename = Path(file).name - outputJSONFilename = f"{self.tmpDir}/{str(filename)}-{self.randString()}.json" + outputJSONFilename = f"{self.tmpDir}/{str(filename)}-{self.rand_string()}.json" # Auditd or Sysmon4Linux logs if self.sysmon4linux or self.auditdLogs: # Choose which log backend to use @@ -1556,17 +1029,17 @@ def run(self, file): self.logger.error(f"{Fore.RED} [-] {e}{Fore.RESET}") # EVTX else: - if not self.useExternalBinaries or not Path(self.evtxDumpCmd).is_file(): + if not self.use_external_binaries or not Path(self.evtx_dump_cmd).is_file(): self.logger.debug( " [-] No external binaries args or evtx_dump is missing" ) self.runUsingBindings(file) else: # Check if the binary is valid does not avoid TOCTOU - if self.verifyBinHash(self.evtxDumpCmd): + if self.verifyBinHash(self.evtx_dump_cmd): try: cmd = [ - self.evtxDumpCmd, + self.evtx_dump_cmd, "--no-confirm-overwrite", "-o", "jsonl", @@ -1586,79 +1059,73 @@ def cleanup(self): shutil.rmtree(self.tmpDir) -class zircoGuiGenerator: +class gui_generator: """ Generate the mini GUI """ - def __init__( - self, packageDir, templateFile, logger=None, outputFile=None, timeField="" - ): - self.logger = logger or logging.getLogger(__name__) - self.templateFile = templateFile - self.tmpDir = f"tmp-zircogui-{self.randString()}" - self.tmpFile = f"data-{self.randString()}.js" - self.outputFile = outputFile or f"zircogui-output-{self.randString()}" - self.packageDir = packageDir - self.timeField = timeField + def __init__(self, package_dir, template_file, output_file=None, time_field=""): + self.logger = logging.getLogger(__name__) + self.template_file = template_file + self.tmp_dir = f"tmp-zircogui-{self.rand_string()}" + self.tmp_file = f"data-{self.rand_string()}.js" + self.output_file = output_file or f"zircogui-output-{self.rand_string()}" + self.package_dir = package_dir + self.time_field = time_field - def randString(self): + def rand_string(self, length=4): return "".join( random.SystemRandom().choice(string.ascii_uppercase + string.digits) - for _ in range(4) + for _ in range(length) ) def unzip(self): try: - shutil.unpack_archive(self.packageDir, self.tmpDir, "zip") + shutil.unpack_archive(self.package_dir, self.tmp_dir, "zip") except Exception as e: self.logger.error(f" [-] {e}") def zip(self): try: - shutil.make_archive(self.outputFile, "zip", f"{self.tmpDir}/zircogui") + shutil.make_archive(self.output_file, "zip", f"{self.tmp_dir}/zircogui") except Exception as e: self.logger.error(f" [-] {e}") - def generate(self, data): + def run(self, data): self.unzip() try: self.logger.info( - f"[+] Generating ZircoGui package to : {self.outputFile}.zip" + f"[+] Generating ZircoGui package to : {self.output_file}.zip" ) - exportforzircoguiTmpl = templateEngine( - self.logger, self.templateFile, self.tmpFile, self.timeField - ) - exportforzircoguiTmpl.generateFromTemplate( - exportforzircoguiTmpl.template, - exportforzircoguiTmpl.templateOutput, - data, + exportforzircoguiTmpl = template_engine( + [self.template_file], [self.tmp_file], self.time_field ) + exportforzircoguiTmpl.run(data) except Exception as e: self.logger.error(f" [-] {e}") - shutil.move(self.tmpFile, f"{self.tmpDir}/zircogui/data.js") + shutil.move(self.tmp_file, f"{self.tmp_dir}/zircogui/data.js") self.zip() - shutil.rmtree(self.tmpDir) + shutil.rmtree(self.tmp_dir) -class rulesUpdater: +class rules_updater: """ Download rulesets from the https://github.com/wagga40/Zircolite-Rules repository and update if necessary. """ - def __init__(self, logger=None): + def __init__(self): self.url = ( "https://github.com/wagga40/Zircolite-Rules/archive/refs/heads/main.zip" ) - self.logger = logger or logging.getLogger(__name__) - self.tempFile = f"tmp-rules-{self.randString()}.zip" - self.tmpDir = f"tmp-rules-{self.randString()}" + self.logger = logging.getLogger(__name__) + self.tempFile = f"tmp-rules-{self.rand_string()}.zip" + self.tmpDir = f"tmp-rules-{self.rand_string()}" self.updatedRulesets = [] - def randString(self): + def rand_string(self, length=4): return "".join( random.SystemRandom().choice(string.ascii_uppercase + string.digits) - for _ in range(4) + for _ in range(length) ) def download(self): @@ -1716,10 +1183,10 @@ def run(self): self.logger.error(f" [-] {e}") -class rulesetHandler: +class ruleset_handler: - def __init__(self, logger=None, config=None, listPipelineOnly=False): - self.logger = logger or logging.getLogger(__name__) + def __init__(self, config=None, listPipelineOnly=False): + self.logger = logging.getLogger(__name__) self.saveRuleset = config.save_ruleset self.rulesetPathList = config.ruleset self.cores = config.cores or os.cpu_count() @@ -1760,18 +1227,35 @@ def __init__(self, logger=None, config=None, listPipelineOnly=False): self.Rulesets = self.rulesetParsing() # Combining Rulesets - if config.combine_rulesets: - self.Rulesets = [ - item - for subRuleset in self.Rulesets - if subRuleset - for item in subRuleset - ] - self.Rulesets = [ - sorted(self.Rulesets, key=lambda d: d["level"]) - ] # Sorting by level - - if all(not subRuleset for subRuleset in self.Rulesets): + # if config.combine_rulesets: + self.Rulesets = [ + item for subRuleset in self.Rulesets if subRuleset for item in subRuleset + ] + # Remove duplicates based on 'id' or 'title' + unique_rules = [] + seen_keys = set() + for rule in self.Rulesets: + # Use 'id' or 'title' as the unique key + rule_key = rule.get("id") or rule.get("title") + if rule_key and rule_key not in seen_keys: + seen_keys.add(rule_key) + unique_rules.append(rule) + + level_order = { + "critical": 1, + "high": 2, + "medium": 3, + "low": 4, + "informational": 5, + } + self.Rulesets = sorted( + unique_rules, + key=lambda d: level_order.get( + d.get("level", "informational"), float("inf") + ), + ) # Sorting by level + + if len(self.Rulesets) == 0: self.logger.error(f"{Fore.RED} [-] No rules to execute !{Fore.RESET}") def isYAML(self, filepath): @@ -1916,10 +1400,10 @@ def rulesetParsing(self): def selectFiles(pathList, selectFilesList): if selectFilesList is not None: return [ - evtx - for evtx in [str(element) for element in list(pathList)] + logs + for logs in [str(element) for element in list(pathList)] if any( - fileFilters[0].lower() in evtx.lower() + fileFilters[0].lower() in logs.lower() for fileFilters in selectFilesList ) ] @@ -1929,10 +1413,10 @@ def selectFiles(pathList, selectFilesList): def avoidFiles(pathList, avoidFilesList): if avoidFilesList is not None: return [ - evtx - for evtx in [str(element) for element in list(pathList)] + logs + for logs in [str(element) for element in list(pathList)] if all( - fileFilters[0].lower() not in evtx.lower() + fileFilters[0].lower() not in logs.lower() for fileFilters in avoidFilesList ) ] @@ -1942,16 +1426,6 @@ def avoidFiles(pathList, avoidFilesList): def ImportErrorHandler(config): importErrorList = [] - if forwardingDisabled: - importErrorList.append( - f"{Fore.LIGHTYELLOW_EX} [i] Cannot import 'aiohttp' or 'urllib3' or 'requests', events forwarding is disabled{Fore.RESET}" - ) - config.remote = None - if elasticForwardingDisabled: - importErrorList.append( - f"{Fore.LIGHTYELLOW_EX} [i] Cannot import 'elasticsearch[async]', events forwarding to Elastic is disabled{Fore.RESET}" - ) - config.index = None if updateDisabled: importErrorList.append( f"{Fore.LIGHTYELLOW_EX} [i] Cannot import 'requests', events update is disabled{Fore.RESET}" @@ -1996,11 +1470,101 @@ def ImportErrorHandler(config): ) +def runner(file, params): + """Runner function to flatten events and apply rules with multiprocessing""" + + flattener = json_flattener( + configFile=params["config"], + timeAfter=params["events_after"], + timeBefore=params["events_before"], + timeField=params["timefield"], + hashes=params["hashes"], + input_format=params["input_format"], + ) + + flattener.run([file]) + + # Save the flattened JSON to a file + if params["keepflat"]: + flattener.save_to_file(f"flattened_events_{rand_string(4)}.json") + + # Initialize zircore + filename = os.path.basename(file) + if params["on_disk_db"]: + db_location = f"{filename}-{rand_string(4)}.db" + else: + db_location = f"file:{filename}?mode=memory&cache=shared" + + zircolite_core = zircore( + limit=params["limit"], + csv_output=params["csv_output"], + db_location=db_location, + delimiter=params["delimiter"], + tmp_directory=params["tmp_directory"], + tmp_directory_db=params["tmp_directory_db"], + ) + + zircolite_core.create_db(flattener.fieldStmt) + zircolite_core.insert_flat_json_to_db(flattener.valuesStmt) + del flattener + zircolite_core.create_index() + + ruleset = params["rulesets"] + zircolite_core.load_ruleset_from_var( + ruleset=ruleset, ruleFilters=params["rulefilter"] + ) + zircolite_core.execute_ruleset() + zircolite_core.close() + + return zircolite_core.fullResults, zircolite_core.rule_results + + +def runner_wrapper(args): + """Helper function to allow TQDM to display a progress bar""" + return runner(*args) + + +def format_rule_level(level, reset=Fore.RESET): + if level == "informational": + return f"{Fore.WHITE}{level}{reset}" + if level == "low": + return f"{Fore.GREEN}{level}{reset}" + if level == "medium": + return f"{Fore.YELLOW}{level}{reset}" + if level == "high": + return f"{Fore.MAGENTA}{level}{reset}" + if level == "critical": + return f"{Fore.RED}{level}{reset}" + return level # Default case + + +def rand_string(length=10): + return "".join( + random.SystemRandom().choice(string.ascii_uppercase + string.digits) + for _ in range(length) + ) + + +def concatenate_files(input_dir, output_file, buffer_size=1024 * 1024): + input_files = list(Path(input_dir).rglob("*.json")) + with open(output_file, "wb") as outfile: + for fname in input_files: + if not os.path.isfile(fname): + print(f"File not found: {fname}") + continue + with open(fname, "rb") as infile: + while True: + buffer = infile.read(buffer_size) + if not buffer: + break + outfile.write(buffer) + + ################################################################ # MAIN() ################################################################ def main(): - version = "2.30.0" + version = "2.50.0" # Init Args handling parser = argparse.ArgumentParser() @@ -2010,8 +1574,8 @@ def main(): ) logsInputArgs.add_argument( "-e", - "--evtx", "--events", + "--evtx", help="Log file or directory where log files are stored in supported format", type=str, ) @@ -2080,13 +1644,6 @@ def main(): help="Source logs are in JSON but as an array", action="store_true", ) - eventFormatsArgs.add_argument( - "--db-input", - "-D", - "--dbonly", - help="Directly use a previously saved database file, timerange filters will not work", - action="store_true", - ) eventFormatsArgs.add_argument( "-S", "--sysmon-linux-input", @@ -2135,12 +1692,6 @@ def main(): rulesetsFormatsArgs.add_argument( "-nsc", "--no-sigma-conversion", help=argparse.SUPPRESS, action="store_true" ) - rulesetsFormatsArgs.add_argument( - "-cr", - "--combine-rulesets", - help="Merge all rulesets provided into one", - action="store_true", - ) rulesetsFormatsArgs.add_argument( "-sr", "--save-ruleset", @@ -2229,7 +1780,7 @@ def main(): "-L", "--limit", "--limit-results", - help="Discard results (in output file or forwarded events) that are above the provided limit", + help="Discard results that are above the provided limit", type=int, default=-1, ) @@ -2268,7 +1819,8 @@ def main(): configFormatsArgs.add_argument( "--cores", help="Specify how many cores you want to use, default is all cores, works only for EVTX extraction", - type=str, + default=os.cpu_count(), + type=int, ) configFormatsArgs.add_argument( "--debug", help="Activate debug logging", action="store_true" @@ -2276,22 +1828,11 @@ def main(): configFormatsArgs.add_argument( "--imports", help="Show detailed module import errors", action="store_true" ) - configFormatsArgs.add_argument( - "--showall", - help="Show all events, useful to check what rule takes takes time to execute", - action="store_true", - ) - configFormatsArgs.add_argument( - "-n", - "--nolog", - help="Don't create a log file or a result file (useful when forwarding)", - action="store_true", - ) configFormatsArgs.add_argument( "--ondiskdb", + "--on-disk-db", help="Use an on-disk database instead of the in-memory one (much slower !). Use if your system has limited RAM or if your dataset is very large and you cannot split it", - type=str, - default=":memory:", + action="store_true", ) configFormatsArgs.add_argument( "-RE", @@ -2308,41 +1849,13 @@ def main(): configFormatsArgs.add_argument( "-v", "--version", help="Show Zircolite version", action="store_true" ) - # Forwarding options - forwardingFormatsArgs = parser.add_argument_group( - f"{Fore.BLUE}FORWARDING OPTIONS{Fore.RESET}" - ) - forwardingFormatsArgs.add_argument( - "--remote", - help="Forward results to a HTTP/Splunk/Elasticsearch, please provide the full address e.g http[s]://address:port[/uri]", - type=str, - ) - forwardingFormatsArgs.add_argument( - "--token", help="Use this to provide Splunk HEC Token", type=str - ) - forwardingFormatsArgs.add_argument( - "--index", help="Use this to provide ES index", type=str - ) - forwardingFormatsArgs.add_argument( - "--eslogin", help="ES login", type=str, default="" - ) - forwardingFormatsArgs.add_argument( - "--espass", help="ES password", type=str, default="" - ) - forwardingFormatsArgs.add_argument( - "--stream", - help="By default event forwarding is done at the end, this option activate forwarding events when detected", - action="store_true", - ) - forwardingFormatsArgs.add_argument( - "--forwardall", help="Forward all events", action="store_true" - ) - forwardingFormatsArgs.add_argument( + configFormatsArgs.add_argument( "--timefield", - help="Provide time field name for event forwarding, default is 'SystemTime'", + help="Use this option to provide timestamp field name, default is 'SystemTime'", default="SystemTime", action="store_true", ) + # Templating and Mini GUI options templatingFormatsArgs = parser.add_argument_group( f"{Fore.BLUE}TEMPLATING AND MINI GUI OPTIONS{Fore.RESET}" @@ -2369,11 +1882,10 @@ def main(): signal.signal(signal.SIGINT, signal_handler) # Init logging - if args.nolog: - args.logfile = None - consoleLogger = initLogger(args.debug, args.logfile) + setup_logging(args.debug, args.logfile) + logger = logging.getLogger() - consoleLogger.info( + logger.info( """ ███████╗██╗██████╗ ██████╗ ██████╗ ██╗ ██╗████████╗███████╗ ╚══███╔╝██║██╔══██╗██╔════╝██╔═══██╗██║ ██║╚══██╔══╝██╔════╝ @@ -2387,22 +1899,22 @@ def main(): # Print version an quit if args.version: - consoleLogger.info(f"Zircolite - v{version}") + logger.info(f"Zircolite - v{version}") sys.exit(0) # Show imports status importsMessage, args, mustQuit = ImportErrorHandler(args) if importsMessage != "": - consoleLogger.info(f"[+] Modules imports status: \n{importsMessage}") + logger.info(f"[+] Modules imports status: \n{importsMessage}") else: - consoleLogger.info("[+] Modules imports status: OK") + logger.info("[+] Modules imports status: OK") if mustQuit: sys.exit(1) # Update rulesets if args.update_rules: - consoleLogger.info("[+] Updating rules") - updater = rulesUpdater(logger=consoleLogger) + logger.info("[+] Updating rules") + updater = rules_updater() updater.run() sys.exit(0) @@ -2413,56 +1925,30 @@ def main(): args.ruleset = ["rules/rules_windows_generic_pysigma.json"] # Loading rulesets - consoleLogger.info("[+] Loading ruleset(s)") - rulesetsManager = rulesetHandler(consoleLogger, args, args.pipeline_list) + logger.info("[+] Loading ruleset(s)") + rulesetsManager = ruleset_handler(args, args.pipeline_list) if args.pipeline_list: sys.exit(0) # Check mandatory CLI options - if not args.evtx: - consoleLogger.error( + if not args.events: + logger.error( f"{Fore.RED} [-] No events source path provided. Use '-e ', '--events '{Fore.RESET}" ), sys.exit(2) - if args.forwardall and args.db_input: - consoleLogger.error( - f"{Fore.RED} [-] Can't forward all events in db only mode {Fore.RESET}" - ), sys.exit(2) if args.csv and len(args.ruleset) > 1: - consoleLogger.error( + logger.error( f"{Fore.RED} [-] Since fields in results can change between rulesets, it is not possible to have CSV output when using multiple rulesets{Fore.RESET}" ), sys.exit(2) - consoleLogger.info("[+] Checking prerequisites") - - # Init Forwarding - forwarder = None - if args.remote is not None: - consoleLogger.info( - f"{Fore.LIGHTRED_EX}[!] Forwarding is not tested anymore and will be removed in the future{Fore.RESET}" - ) - forwarder = eventForwarder( - remote=args.remote, - timeField=args.timefield, - token=args.token, - logger=consoleLogger, - index=args.index, - login=args.eslogin, - password=args.espass, - ) - if not forwarder.networkCheck(): - quitOnError( - f"{Fore.RED} [-] Remote host cannot be reached : {args.remote}{Fore.RESET}", - consoleLogger, - ) + logger.info("[+] Checking prerequisites") # Checking provided timestamps try: - eventsAfter = time.strptime(args.after, "%Y-%m-%dT%H:%M:%S") - eventsBefore = time.strptime(args.before, "%Y-%m-%dT%H:%M:%S") + events_after = time.strptime(args.after, "%Y-%m-%dT%H:%M:%S") + events_before = time.strptime(args.before, "%Y-%m-%dT%H:%M:%S") except Exception: quitOnError( - f"{Fore.RED} [-] Wrong timestamp format. Please use 'AAAA-MM-DDTHH:MM:SS'", - consoleLogger, + f"{Fore.RED} [-] Wrong timestamp format. Please use 'AAAA-MM-DDTHH:MM:SS'" ) # Check templates args @@ -2470,243 +1956,257 @@ def main(): if args.template is not None: if args.csv: quitOnError( - f"{Fore.RED} [-] You cannot use templates in CSV mode{Fore.RESET}", - consoleLogger, + f"{Fore.RED} [-] You cannot use templates in CSV mode{Fore.RESET}" ) if (args.templateOutput is None) or ( len(args.template) != len(args.templateOutput) ): quitOnError( - f"{Fore.RED} [-] Number of templates output must match number of templates{Fore.RESET}", - consoleLogger, + f"{Fore.RED} [-] Number of templates output must match number of templates{Fore.RESET}" ) for template in args.template: checkIfExists( template[0], f"{Fore.RED} [-] Cannot find template : {template[0]}. DEfault templates are available here : https://github.com/wagga40/Zircolite/tree/master/templates{Fore.RESET}", - consoleLogger, ) readyForTemplating = True # Change output filename in CSV mode if args.csv: readyForTemplating = False + # If outfile is not provided, default to 'detected_events.csv' instead of 'detected_events.json' if args.outfile == "detected_events.json": args.outfile = "detected_events.csv" - # If on-disk DB already exists, quit. - if args.ondiskdb != ":memory:" and (Path(args.ondiskdb).is_file()): - quitOnError( - f"{Fore.RED} [-] On-disk database already exists{Fore.RESET}", - consoleLogger, - ) - # Start time counting start_time = time.time() - # Initialize zirCore - zircoliteCore = zirCore( - args.config, - logger=consoleLogger, - noOutput=args.nolog, - timeAfter=eventsAfter, - timeBefore=eventsBefore, - limit=args.limit, - csvMode=args.csv, - timeField=args.timefield, - hashes=args.hashes, - dbLocation=args.ondiskdb, - delimiter=args.csv_delimiter, - ) - - # If we are not working directly with the db - if not args.db_input: - # If we are working with json we change the file extension if it is not user-provided - if not args.fileext: - if args.json_input or args.json_array_input: - args.fileext = "json" - elif args.sysmon_linux_input or args.auditd_input: - args.fileext = "log" - elif args.xml_input: - args.fileext = "xml" - elif args.csv_input: - args.fileext = "csv" - else: - args.fileext = "evtx" - - LogPath = Path(args.evtx) - if LogPath.is_dir(): - # Log recursive search in given directory with given file extension or pattern - pattern = f"*.{args.fileext}" - # If a Glob pattern is provided - if args.file_pattern not in [None, ""]: - pattern = args.file_pattern - fnGlob = LogPath.rglob - - if args.no_recursion: - fnGlob = LogPath.glob - LogList = list(fnGlob(pattern)) - elif LogPath.is_file(): - LogList = [LogPath] + # If we are working with json file extension is changed if it is not user-provided + if not args.fileext: + if args.json_input or args.json_array_input: + args.fileext = "json" + elif args.sysmon_linux_input or args.auditd_input: + args.fileext = "log" + elif args.xml_input: + args.fileext = "xml" + elif args.csv_input: + args.fileext = "csv" else: - quitOnError( - f"{Fore.RED} [-] Unable to find events from submitted path{Fore.RESET}", - consoleLogger, - ) - - # Applying file filters in this order : "select" than "avoid" - FileList = avoidFiles(selectFiles(LogList, args.select), args.avoid) - if len(FileList) <= 0: - quitOnError( - f"{Fore.RED} [-] No file found. Please verify filters, directory or the extension with '--fileext' or '--file-pattern'{Fore.RESET}", - consoleLogger, - ) - - if not args.json_input and not args.json_array_input: - # Init EVTX extractor object - extractor = evtxExtractor( - logger=consoleLogger, - providedTmpDir=args.tmpdir, - coreCount=args.cores, - useExternalBinaries=(not args.noexternal), - binPath=args.evtx_dump, - xmlLogs=args.xml_input, - sysmon4linux=args.sysmon_linux_input, - auditdLogs=args.auditd_input, - evtxtract=args.evtxtract_input, - encoding=args.logs_encoding, - csvInput=args.csv_input, - ) - consoleLogger.info( - f"[+] Extracting events Using '{extractor.tmpDir}' directory " - ) - for evtx in tqdm(FileList, colour="yellow"): - extractor.run(evtx) - # Set the path for the next step - LogJSONList = list(Path(extractor.tmpDir).rglob("*.json")) - else: - LogJSONList = FileList + args.fileext = "evtx" + + LogPath = Path(args.events) + if LogPath.is_dir(): + # Log recursive search in given directory with given file extension or pattern + pattern = f"*.{args.fileext}" + # If a Glob pattern is provided + if args.file_pattern not in [None, ""]: + pattern = args.file_pattern + fnGlob = LogPath.rglob + # If directory recursion is not wanted + if args.no_recursion: + fnGlob = LogPath.glob + LogList = list(fnGlob(pattern)) + elif LogPath.is_file(): + LogList = [LogPath] + else: + quitOnError( + f"{Fore.RED} [-] Unable to find events from submitted path{Fore.RESET}" + ) - checkIfExists( - args.config, - f"{Fore.RED} [-] Cannot find mapping file, you can get the default one here : https://github.com/wagga40/Zircolite/blob/master/config/fieldMappings.json {Fore.RESET}", - consoleLogger, + # Applying file filters in this order : "select" than "avoid" + FileList = avoidFiles(selectFiles(LogList, args.select), args.avoid) + if len(FileList) <= 0: + quitOnError( + f"{Fore.RED} [-] No file found. Please verify filters, directory or the extension with '--fileext' or '--file-pattern'{Fore.RESET}" ) - if LogJSONList == []: - quitOnError( - f"{Fore.RED} [-] No files containing logs found.{Fore.RESET}", - consoleLogger, - ) - # Print field list and exit - if args.fieldlist: - fields = zircoliteCore.run(LogJSONList, Insert2Db=False, args_config=args) - zircoliteCore.close() - if not args.json_input and not args.json_array_input and not args.keeptmp: - extractor.cleanup() - [ - print(sortedField) - for sortedField in sorted([field for field in fields.values()]) - ] - sys.exit(0) - - # Flatten and insert to Db - if args.forwardall: - zircoliteCore.run( - LogJSONList, - saveToFile=args.keepflat, - forwarder=forwarder, - args_config=args, - ) - else: - zircoliteCore.run(LogJSONList, saveToFile=args.keepflat, args_config=args) - # Unload In memory DB to disk. Done here to allow debug in case of ruleset execution error - if args.dbfile is not None: - zircoliteCore.saveDbToDisk(args.dbfile) + args_dict = vars(args) + # Find the chosen input format + chosen_input = next( + (key for key, value in args_dict.items() if "_input" in key and value), None + ) + + if not args.json_input and not args.json_array_input: + # Init EVTX extractor object + extractor = evtx_extractor( + providedTmpDir=args.tmpdir, + cores=args.cores, + use_external_binaries=(not args.noexternal), + binaries_path=args.evtx_dump, + encoding=args.logs_encoding, + input_format=chosen_input, + ) + logger.info(f"[+] Extracting events using '{extractor.tmpDir}' directory ") + for evtx in tqdm(FileList, colour="yellow"): + extractor.run(evtx) + # Set the path for the next step + LogJSONList = list(Path(extractor.tmpDir).rglob("*.json")) else: - consoleLogger.info(f"[+] Creating model from disk : {args.evtx}") - zircoliteCore.loadDbInMemory(args.evtx) + LogJSONList = FileList + + checkIfExists( + args.config, + f"{Fore.RED} [-] Cannot find mapping file, you can get the default one here : https://github.com/wagga40/Zircolite/blob/master/config/fieldMappings.json {Fore.RESET}", + ) + if LogJSONList == []: + quitOnError(f"{Fore.RED} [-] No files containing logs found.{Fore.RESET}") + + # TODO : Add option for already flattened event + logger.info( + f"[+] Processing events and applying {Fore.CYAN}{len(rulesetsManager.Rulesets)}{Fore.RESET} rules" + ) # flatten array of "rulefilter" arguments if args.rulefilter: args.rulefilter = [item for sublist in args.rulefilter for item in sublist] - writeMode = "w" - for ruleset in rulesetsManager.Rulesets: - zircoliteCore.loadRulesetFromVar(ruleset=ruleset, ruleFilters=args.rulefilter) - if args.limit > 0: - consoleLogger.info( - f"[+] Limited mode : detections with more than {args.limit} events will be discarded" - ) - consoleLogger.info( - f"[+] Executing ruleset - {len(zircoliteCore.ruleset)} rules" - ) - zircoliteCore.executeRuleset( - args.outfile, - writeMode=writeMode, - forwarder=forwarder, - showAll=args.showall, - KeepResults=(readyForTemplating or args.package), - remote=args.remote, - stream=args.stream, - lastRuleset=(ruleset == rulesetsManager.Rulesets[-1]), + tmp_directory = f"tmp-output-{rand_string()}" + tmp_directory_db = f"tmp-db-{rand_string()}" if args.ondiskdb else "" + + # Pack the parameters for multiprocessing + param_list = { + "config": args.config, + "events_after": events_after, + "events_before": events_before, + "timefield": args.timefield, + "hashes": args.hashes, + "input_format": chosen_input, + "csv_output": args.csv, + "limit": args.limit, + "on_disk_db": args.ondiskdb, + "delimiter": args.csv_delimiter, + "keepflat": args.keepflat, + "rulefilter": args.rulefilter, + "rulesets": rulesetsManager.Rulesets, + "tmp_directory": tmp_directory, + "tmp_directory_db": tmp_directory_db, + } + + params_map = [] + for file in LogJSONList: + params_map.append((file, param_list)) + + all_full_results = [] + all_rule_results = [] + # Perform the JSON flattening and the detection process with multiprocessing + pool = mp.Pool(args.cores) + with tqdm(total=len(params_map), colour="yellow") as pbar: + for full_results, rule_results in pool.imap_unordered( + runner_wrapper, params_map + ): + all_full_results.extend(full_results) + all_rule_results.extend(rule_results) + pbar.update() + pool.close() + pool.join() + + # Merge the rule results from all processes + aggregated_rules = {} + for rule in all_rule_results: + key = rule["rule_title"] + if key in aggregated_rules: + aggregated_rules[key]["rule_count"] += rule["rule_count"] + else: + aggregated_rules[key] = rule.copy() + + level_order = {"critical": 1, "high": 2, "medium": 3, "low": 4, "informational": 5} + + aggregated_rules = sorted( + aggregated_rules.values(), + key=lambda d: level_order.get( + d.get("rule_level", "informational"), float("inf") + ), + ) # Sort by level + for rule in aggregated_rules: + rule_title = rule["rule_title"] + rule_level = rule["rule_level"] + rule_count = rule["rule_count"] + formatted_level = format_rule_level(rule_level, Fore.CYAN) + logger.info( + f"{Fore.CYAN} - {rule_title} [{formatted_level}] : {rule_count} events{Fore.RESET}" ) - writeMode = "a" # Next iterations will append to results file - - consoleLogger.info(f"[+] Results written in : {args.outfile}") - # Forward events - if args.remote is not None and not args.stream: # If not in stream mode - consoleLogger.info(f"[+] Forwarding to : {args.remote}") - forwarder.send(zircoliteCore.fullResults, False) - if args.remote is not None and args.stream: - consoleLogger.info(f"[+] Forwarded to : {args.remote}") + logger.info(f"[+] Writing results to the output file : {args.outfile}") + + concatenate_files(tmp_directory, args.outfile) + # if not keep_tmp_output: + shutil.rmtree(tmp_directory) + # if not keep_tmp_db: + if args.ondiskdb: + shutil.rmtree(tmp_directory_db) + + # if not args.csv: + # with open(args.outfile, 'w', encoding='utf-8') as outfile: + # # Serialize the list of rule results to JSON with indentation + # json_bytes = json.dumps(all_full_results, option=json.OPT_INDENT_2) + # # Write the decoded JSON string to the file + # outfile.write(json_bytes.decode('utf-8')) + # else: + # # For CSV mode, collect all field names + # fieldnames_set = set(["rule_title", "rule_description", "rule_level", "rule_count"]) + + # for rule_result in all_full_results: + # matches = rule_result['matches'] + # if matches: + # for data in matches: + # fieldnames_set.update(data.keys()) + + # # For CSV mode, write matches to CSV + # with open(args.outfile, 'w', encoding='utf-8', newline='') as outfile: + # writer = csv.DictWriter(outfile, delimiter=args.csv_delimiter, fieldnames=fieldnames_set) + # writer.writeheader() + # for rule_result in all_full_results: + # matches = rule_result['matches'] + # if matches: + # for data in matches: + # dictCSV = { + # "rule_title": rule_result["title"], + # "rule_description": rule_result["description"], + # "rule_level": rule_result["rule_level"], + # "rule_count": rule_result["count"], + # **data + # } + # writer.writerow(dictCSV) # Templating - if readyForTemplating and zircoliteCore.fullResults != []: - templateGenerator = templateEngine( - consoleLogger, args.template, args.templateOutput, args.timefield + if readyForTemplating and all_full_results != []: + template_generator = template_engine( + args.template, args.templateOutput, args.timefield ) - templateGenerator.run(zircoliteCore.fullResults) + template_generator.run(all_full_results) # Generate ZircoGui package - if args.package and zircoliteCore.fullResults != []: + if args.package and all_full_results != []: if ( Path("templates/exportForZircoGui.tmpl").is_file() and Path("gui/zircogui.zip").is_file() ): - packager = zircoGuiGenerator( + packager = gui_generator( "gui/zircogui.zip", "templates/exportForZircoGui.tmpl", - consoleLogger, None, args.timefield, ) - packager.generate(zircoliteCore.fullResults) + packager.run(all_full_results) # Remove working directory containing logs as json if not args.keeptmp: - consoleLogger.info("[+] Cleaning") + logger.info("[+] Cleaning") try: - if not args.json_input and not args.json_array_input and not args.db_input: + if not args.json_input and not args.json_array_input: extractor.cleanup() except OSError as e: - consoleLogger.error( - f"{Fore.RED} [-] Error during cleanup {e}{Fore.RESET}" - ) + logger.error(f"{Fore.RED} [-] Error during cleanup {e}{Fore.RESET}") # Remove files submitted for analysis if args.remove_events: - for EVTX in LogList: + for logs in LogList: try: - os.remove(EVTX) + os.remove(logs) except OSError as e: - consoleLogger.error( - f"{Fore.RED} [-] Cannot remove files {e}{Fore.RESET}" - ) + logger.error(f"{Fore.RED} [-] Cannot remove files {e}{Fore.RESET}") - zircoliteCore.close() - consoleLogger.info(f"\nFinished in {int((time.time() - start_time))} seconds") + logger.info(f"\nFinished in {int((time.time() - start_time))} seconds") if __name__ == "__main__": diff --git a/zircolite_dev.py b/zircolite_dev.py index 3ea5c47..7413940 100755 --- a/zircolite_dev.py +++ b/zircolite_dev.py @@ -2,20 +2,19 @@ # Standard libs import argparse -import asyncio import base64 import chardet import csv import functools import hashlib import logging +import logging.config import multiprocessing as mp import os import random import re import shutil import signal -import socket import sqlite3 import string import subprocess @@ -30,7 +29,6 @@ import xxhash from colorama import Fore from tqdm import tqdm -from tqdm.asyncio import tqdm as tqdmAsync from RestrictedPython import compile_restricted from RestrictedPython import safe_builtins from RestrictedPython import limited_builtins @@ -39,25 +37,10 @@ from RestrictedPython.Guards import guarded_iter_unpack_sequence # External libs (Optional) -forwardingDisabled = False -try: - import aiohttp - import urllib3 - urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) -except ImportError: - forwardingDisabled = True - -elasticForwardingDisabled = False -try: - from elasticsearch import AsyncElasticsearch -except ImportError: - elasticForwardingDisabled = True - updateDisabled = False try: import requests except ImportError: - forwardingDisabled = True updateDisabled = True sigmaConversionDisabled = False @@ -92,317 +75,96 @@ def signal_handler(sig, frame): print("[-] Execution interrupted !") sys.exit(0) -def quitOnError(message, logger=None): +def quitOnError(message): + """Log an error message and exit the program.""" + logger = logging.getLogger(__name__) logger.error(message) sys.exit(1) -def checkIfExists(path, errorMessage, logger=None): +def checkIfExists(path, errorMessage): """Test if path provided is a file""" if not (Path(path).is_file()): - quitOnError(errorMessage, logger) - -def initLogger(debugMode, logFile=None): - fileLogLevel = logging.INFO - fileLogFormat = "%(asctime)s %(levelname)-8s %(message)s" - if debugMode: - fileLogLevel = logging.DEBUG - fileLogFormat = "%(asctime)s %(levelname)-8s %(module)s:%(lineno)s %(funcName)s %(message)s" - - if logFile is not None: - logging.basicConfig(format=fileLogFormat, filename=logFile, level=fileLogLevel, datefmt='%Y-%m-%d %H:%M:%S') - logger = logging.StreamHandler() - formatter = logging.Formatter('%(message)s') - logger.setFormatter(formatter) - logger.setLevel(logging.INFO) - logging.getLogger().addHandler(logger) - else: - logging.basicConfig(format='%(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S') - - return logging.getLogger() - -class templateEngine: - def __init__(self, logger=None, template=[], templateOutput=[], timeField=""): - self.logger = logger or logging.getLogger(__name__) - self.template = template - self.templateOutput = templateOutput + quitOnError(errorMessage) + +def setup_logging(debug_mode, log_file=None): + """Set up logging configuration.""" + log_level = logging.DEBUG if debug_mode else logging.INFO + + # Define a configuration dictionary + logging_config = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'console_formatter': { + 'format': '%(message)s' + }, + 'file_formatter': { + 'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s' + }, + }, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + 'formatter': 'console_formatter', + 'level': logging.INFO, + 'stream': 'ext://sys.stdout', + }, + 'file': { + 'class': 'logging.FileHandler', + 'formatter': 'file_formatter', + 'level': log_level, + 'filename': log_file or 'zircolite.log', + 'encoding': 'utf-8', + }, + }, + 'root': { + 'handlers': ['console', 'file'] if log_file else ['console'], + 'level': log_level, + }, + } + + logging.config.dictConfig(logging_config) + +# Define the authorized BUILTINS for Resticted Python +def default_guarded_getitem(ob, index): + return ob[index] + +class template_engine: + def __init__(self, templates=[], template_outputs=[], timeField=""): + self.logger = logging.getLogger(__name__) self.timeField = timeField + self.compiled_templates = {} + # Flatten templates and outputs if they are nested lists + self.template_paths = [tpl[0] if isinstance(tpl, list) else tpl for tpl in templates] + self.template_outputs = [out[0] if isinstance(out, list) else out for out in template_outputs] - def generateFromTemplate(self, templateFile, outputFilename, data): + def generate_from_template(self, template_file, outputFilename, data): """ Use Jinja2 to output data in a specific format """ try: - - tmpl = open(templateFile, 'r', encoding='utf-8') - template = Template(tmpl.read()) - + with open(template_file, 'r', encoding='utf-8') as tmpl: + # Use the compiled template if available, otherwise compile it + if template_file in self.compiled_templates: + template = self.compiled_templates["templateFile"] + else: + template = Template(tmpl.read()) + self.compiled_templates["templateFile"] = template + # Render the template and write to the output file with open(outputFilename, 'a', encoding='utf-8') as tpl: tpl.write(template.render(data=data, timeField=self.timeField)) except Exception as e: - self.logger.error(f"{Fore.RED} [-] Template error, activate debug mode to check for errors{Fore.RESET}") + self.logger.error(f"{Fore.RED} [-] Template error, activate debug mode with '--debug' to check for errors{Fore.RESET}") self.logger.debug(f" [-] {e}") def run(self, data): - for template, templateOutput in zip(self.template, self.templateOutput): - self.logger.info(f'[+] Applying template "{template[0]}", outputting to : {templateOutput[0]}') - self.generateFromTemplate(template[0], templateOutput[0], data) - -class eventForwarder: - """ Class for handling event forwarding """ - def __init__(self, remote, timeField, token, logger=None, index=None, login='', password='', pipeline=''): - self.logger = logger or logging.getLogger(__name__) - self.remoteHost = remote - self.token = token - self.localHostname = socket.gethostname() - self.userAgent = "zircolite/2.x" - self.index = index - self.login = login - self.password = password - self.pipeline = pipeline - self.queueSize = 20 - self.connectionFailed = False - self.timeField = timeField - - def send(self, payloads, forwardAll=False): - if payloads: - if self.remoteHost: - try: - # Change EventLoopPolicy on Windows https://stackoverflow.com/questions/45600579/asyncio-event-loop-is-closed-when-getting-loop - if _platform == "win32": - asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) - # Splunk HEC - if self.token: - asyncio.run(self.sendAllAsyncQueue(payloads, timeField=self.timeField, sigmaEvents=(not forwardAll), mode="HEC")) - # ElasticSearch - elif self.index: - self.disableESDefaultLogging() - asyncio.run(self.sendAllAsyncQueue(payloads, timeField=self.timeField, sigmaEvents=(not forwardAll), mode="ES")) - # HTTP - else: - asyncio.run(self.sendAllAsyncQueue(payloads, timeField=self.timeField, sigmaEvents=(not forwardAll), mode="HTTP")) - except Exception as e: - self.logger.debug(f"{Fore.RED} [-] {e}") - - def networkCheck(self): - """ Check remote connectivity """ - self.logger.info(f'[+] Check connectivity to {self.remoteHost}') - try: - requests.get(self.remoteHost, headers={'user-agent': self.userAgent}, timeout=10, verify=False) - except (requests.ConnectionError, requests.Timeout): - return False - return True - - def formatToEpoch(self, timestamp): - try: - return str(time.mktime(time.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%f%z'))) + timestamp.split(".")[1][:-1] - except ValueError: - try: - return str(time.mktime(time.strptime(timestamp, '%Y-%m-%dT%H:%M:%S%z'))) + timestamp.split(".")[1][:-1] - except Exception: - self.logger.debug(f"{Fore.RED} [-] Timestamp error: {timestamp}{Fore.RESET}") - - def disableESDefaultLogging(self): - """ By Default Elastic client has a logger set to INFO level """ - es_log = logging.getLogger("elasticsearch") - es_log.setLevel(logging.ERROR) - es_log = logging.getLogger("elastic_transport") - es_log.setLevel(logging.ERROR) - - async def HECWorker(self, session, queue, sigmaEvents): - while True: - if self.index: - providedIndex = f"?index={self.index}" - else: - providedIndex = "" - data = await queue.get() # Pop data from Queue - resp = await session.post(f"{self.remoteHost}/services/collector/event{providedIndex}", headers={'Authorization': f"Splunk {self.token}"}, json=data) # Exec action from Queue - queue.task_done() # Notify Queue action ended - if str(resp.status)[0] in ["4", "5"]: - self.logger.error(f"{Fore.RED} [-] Forwarding failed for event {Fore.RESET}") - - async def ESWorker(self, session, queue, sigmaEvents): - while True: - data = await queue.get() # Pop data from Queue - index = self.index - if sigmaEvents: - index = f'{self.index}-sigma' - else: - if "OriginalLogfile" in data["payload"]: - index = f'{index}-{("".join([char for char in data["payload"]["OriginalLogfile"].split(".")[0] if (char.isalpha() or char == "-")])).lower()}' - try: - await session.index(index=index, document=data["payload"], id=data["hash"]) # Exec action from Queue - except Exception as e: - if "error" in e.body: - if e.body["error"]["type"] == "mapper_parsing_exception": - errField = e.body["error"]["reason"].split("[")[1].split("]")[0] - errType = e.body["error"]["reason"].split("[")[2].split("]")[0] - errValue = e.body["error"]["reason"].split("value: '")[1].split("'")[0] - canInsert = False - - if errType == "long" and errValue.startswith("0x"): # Hex value in long field - data["payload"][errField] = int(data["payload"][errField], 16) - canInsert = True - elif errType == "boolean" and errValue.startswith("0"): # 0 value in bool field - data["payload"][errField] = "false" - canInsert = True - elif errType == "boolean" and errValue.startswith("1"): # 1 value in bool field - data["payload"][errField] = "true" - canInsert = True - elif errType == "long" and isinstance((data["payload"][errField]), int) and data["payload"][errField] > (2**63 -1): # ES limit - data["payload"][errField] = 2 ** 63 - 1 - canInsert = True - elif errType == "long" and isinstance((data["payload"][errField]), int) and data["payload"][errField] < -(2**63): # ES limit - data["payload"][errField] = -(2 ** 63) - canInsert = True - elif errType == "long" and isinstance(data["payload"][errField], argparse.BooleanOptionalAction): - if type(data["payload"][errField]): - data["payload"][errField] = 1 - else: - data["payload"][errField] = 0 - canInsert = True - else: - self.logger.debug(f"{Fore.RED} [-] ES Mapping parser error : {e}{Fore.RESET}") - if canInsert: - try: - await session.index(index=index, document=data["payload"], id=data["hash"]) - except Exception as e: - self.logger.debug(f"{Fore.RED} [-] ES error : {e}{Fore.RESET}") - elif e.body["error"]["type"] == "illegal_argument_exception": - errField = e.body["error"]["reason"].split("[")[1].split("]")[0] - data["payload"].pop(errField, None) # remove value from payload - try: - await session.index(index=index, document=data["payload"], id=data["hash"]) - except Exception as e: - self.logger.debug(f"{Fore.RED} [-] ES error : {e}{Fore.RESET}") - else: - self.logger.debug(f"{Fore.RED} [-] ES error : {e}{Fore.RESET}") - - queue.task_done() # Notify Queue action ended - - async def HTTPWorker(self, session, queue, sigmaEvents): - while True: - data = await queue.get() # Pop data from Queue - resp = await session.post(self.remoteHost, headers={"user-agent": self.userAgent}, json=data) # Exec action from Queue - queue.task_done() # Notify Queue action ended - if str(resp.status)[0] in ["4", "5"]: - self.logger.error(f"{Fore.RED} [-] Forwarding failed for event {Fore.RESET}") - - def formatEventForES(self, payload, match={}, timeField="", sigmaEvents=False): - if self.pipeline != "": - payload["pipeline"] = self.pipeline - if sigmaEvents: - payload = {"title": payload["title"], "id": payload["id"],"sigmafile": payload["sigmafile"], "description": payload["description"], "sigma": payload["sigma"], "rule_level": payload["rule_level"], "tags": payload["tags"], "host": self.localHostname} - [payload.update({key: eval(value)}) if value in ["False", "True"] else payload.update({key: value}) for key, value in match.items()] # In detected events boolean are stored as strings - - return {"payload": payload, "hash":xxhash.xxh64_hexdigest(str(payload))} - - def formatEventForSplunk(self, payload, match={}, timeField="", sigmaEvents=False): - if sigmaEvents: - payload = {"title": payload["title"], "id": payload["id"],"sigmafile": payload["sigmafile"], "description": payload["description"], "sigma": payload["sigma"], "rule_level": payload["rule_level"], "tags": payload["tags"]} - [payload.update({key: value}) for key, value in match.items()] - if (timeField == ""): - return {"sourcetype": "_json", "event": payload, "host": self.localHostname } - elif (timeField not in payload): - self.logger.error(f"{Fore.RED} [-] Provided time field was not found {Fore.RESET}") - return {"sourcetype": "_json", "event": payload, "host": self.localHostname } - else: - return {"sourcetype": "_json", "event": payload, "host": self.localHostname, "time": self.formatToEpoch(payload[timeField])} - - def formatEventForHTTTP(self, payload, match={}, timeField="", sigmaEvents=False): - payload.update({"host": self.localHostname}) - return payload + for template, template_output in zip(self.template_paths, self.template_outputs): + self.logger.info(f'[+] Applying template "{template}", outputting to : {template_output}') + self.generate_from_template(template, template_output, data) - def initESSession(self): - if self.login == "": - session = AsyncElasticsearch(hosts=[self.remoteHost], verify_certs=False) - else: - session = AsyncElasticsearch(hosts=[self.remoteHost], verify_certs=False, basic_auth=(self.login, self.password)) - return session - - async def testESSession(self, session): - try: - await session.info() - except Exception: - self.logger.error(f"{Fore.RED} [-] Connection to ES failed {Fore.RESET}") - await session.close() - self.connectionFailed = True - - async def testSplunkSession(self, session): - data = {"sourcetype": "_json", "event": {}, "host": self.localHostname } - resp = await session.post(f"{self.remoteHost}/services/collector/event", headers={'Authorization': f"Splunk {self.token}"}, json=data) - if str(resp.status)[0] in ["4", "5"]: - await session.close() - self.logger.error(f"{Fore.RED} [-] Connection to Splunk HEC failed - Forwarding disabled {Fore.RESET}") - self.connectionFailed = True - - async def testHTTPSession(self, session): - resp = await session.post(self.remoteHost, headers={"user-agent": self.userAgent}, json={}) - if str(resp.status)[0] in ["4", "5"]: - await session.close() - self.logger.error(f"{Fore.RED} [-] Connection to HTTP Server failed - Forwarding disabled {Fore.RESET}") - self.connectionFailed = True - - async def sendAllAsyncQueue(self, payloads, timeField="", sigmaEvents=False, mode=""): - - if self.connectionFailed: - return - - if mode == "ES": - session = self.initESSession() - await self.testESSession(session) - if self.connectionFailed: - return - fnformatEvent = self.formatEventForES - fnWorker = self.ESWorker - elif mode == "HEC": - session = aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) - await self.testSplunkSession(session) - if self.connectionFailed: - return - fnformatEvent = self.formatEventForSplunk - fnWorker = self.HECWorker - elif mode == "HTTP": - session = aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) - await self.testHTTPSession(session) - if self.connectionFailed: - return - fnformatEvent = self.formatEventForHTTTP - fnWorker = self.HTTPWorker - else: - return - - # Init queue - queue = asyncio.Queue() - tasks = [] - - if not sigmaEvents: - self.logger.info('[+] Gathering events to forward') - payloads = tqdmAsync(payloads, colour="yellow") - - for payload in payloads: - if sigmaEvents: - for match in payload["matches"]: - queue.put_nowait(fnformatEvent(payload=payload, match=match, timeField=timeField, sigmaEvents=sigmaEvents)) - else: - queue.put_nowait(fnformatEvent(payload=payload, timeField=timeField, sigmaEvents=sigmaEvents)) - - # Create workers to process Queue - for i in range(20): - task = asyncio.create_task(fnWorker(session, queue, sigmaEvents=sigmaEvents)) - tasks.append(task) - if not sigmaEvents: - self.logger.info(f'[+] Forwarding {queue.qsize()} events to {self.remoteHost} {Fore.CYAN}(Don\'t panic if nothing change for a long time){Fore.RESET}') - await queue.join() - # Cancel our worker tasks. - for task in tasks: - task.cancel() - # Wait until all worker tasks are cancelled. - await asyncio.gather(*tasks, return_exceptions=True) - await session.close() - -class JSONFlattener: +class json_flattener: """ Perform JSON Flattening """ - def __init__(self, configFile, logger=None, timeAfter="1970-01-01T00:00:00", timeBefore="9999-12-12T23:59:59", timeField=None, hashes=False, args_config=None): - self.logger = logger or logging.getLogger(__name__) + def __init__(self, configFile, timeAfter="1970-01-01T00:00:00", timeBefore="9999-12-12T23:59:59", timeField=None, hashes=False, input_format=None): + self.logger = logging.getLogger(__name__) self.keyDict = {} self.fieldStmt = "" self.valuesStmt = [] @@ -410,18 +172,18 @@ def __init__(self, configFile, logger=None, timeAfter="1970-01-01T00:00:00", tim self.timeBefore = timeBefore self.timeField = timeField self.hashes = hashes - self.args_config = args_config - self.JSONArray = args_config.json_array_input + self.JSONArray = False + # Initialize the cache for compiled code self.compiled_code_cache = {} - # Convert the argparse.Namespace to a dictionary - args_dict = vars(args_config) - # Find the chosen input format - self.chosen_input = next((key for key, value in args_dict.items() if "_input" in key and value), None) + self.chosen_input = input_format if self.chosen_input is None: self.chosen_input = "evtx_input" # Since evtx is the default input, we force it no chosen input has been found + if self.chosen_input == "json_array_input": + self.JSONArray = True + with open(configFile, 'r', encoding='UTF-8') as fieldMappingsFile: self.fieldMappingsDict = json.loads(fieldMappingsFile.read()) self.fieldExclusions = self.fieldMappingsDict["exclusions"] @@ -431,17 +193,11 @@ def __init__(self, configFile, logger=None, timeAfter="1970-01-01T00:00:00", tim self.fieldSplitList = self.fieldMappingsDict["split"] self.transforms = self.fieldMappingsDict["transforms"] self.transforms_enabled = self.fieldMappingsDict["transforms_enabled"] - - # Define the authorized BUILTINS for Resticted Python - def default_guarded_getitem(ob, index): - return ob[index] - default_guarded_getattr = getattr - self.RestrictedPython_BUILTINS = { '__name__': 'script', "_getiter_": default_guarded_getiter, - '_getattr_': default_guarded_getattr, + '_getattr_': getattr, '_getitem_': default_guarded_getitem, 'base64': base64, 're': re, @@ -451,8 +207,25 @@ def default_guarded_getitem(ob, index): self.RestrictedPython_BUILTINS.update(safe_builtins) self.RestrictedPython_BUILTINS.update(limited_builtins) self.RestrictedPython_BUILTINS.update(utility_builtins) + + def transform_value(self, code, param): + try: + # Check if the code has already been compiled + if code in self.compiled_code_cache: + byte_code = self.compiled_code_cache[code] + else: + # Compile the code and store it in the cache + byte_code = compile_restricted(code, filename='', mode='exec') + self.compiled_code_cache[code] = byte_code + # Prepare the execution environment + TransformFunction = {} + exec(byte_code, self.RestrictedPython_BUILTINS, TransformFunction) + return TransformFunction["transform"](param) + except Exception as e: + self.logger.debug(f"ERROR: Couldn't apply transform: {e}") + return param # Return the original parameter if transform fails - def run(self, file): + def process_file(self, file): """ Flatten json object with nested keys into a single level. Returns the flattened json object @@ -462,23 +235,6 @@ def run(self, file): JSONOutput = [] fieldStmt = "" - def transformValue(code, param): - try: - # Check if the code has already been compiled - if code in self.compiled_code_cache: - byte_code = self.compiled_code_cache[code] - else: - # Compile the code and store it in the cache - byte_code = compile_restricted(code, filename='', mode='exec') - self.compiled_code_cache[code] = byte_code - # Prepare the execution environment - TransformFunction = {} - exec(byte_code, self.RestrictedPython_BUILTINS, TransformFunction) - return TransformFunction["transform"](param) - except Exception as e: - self.logger.debug(f"ERROR: Couldn't apply transform: {e}") - return param # Return the original parameter if transform fails - def flatten(x, name=''): nonlocal fieldStmt # If it is a Dict go deeper @@ -523,9 +279,9 @@ def flatten(x, name=''): if transform["alias"]: keys.append(transform["alias_name"]) keysThatNeedTransformedValues.append(transform["alias_name"]) - transformedValuesByKeys[transform["alias_name"]] = transformValue(transformCode, value) + transformedValuesByKeys[transform["alias_name"]] = self.transform_value(transformCode, value) else: - value = transformValue(transformCode, value) + value = self.transform_value(transformCode, value) # Applying field splitting fieldsToSplit = [] @@ -600,46 +356,65 @@ def flatten(x, name=''): JSONLine = {} return {"dbFields": fieldStmt, "dbValues": JSONOutput} - def runAll(self, EVTXJSONList): - for evtxJSON in tqdm(EVTXJSONList, colour="yellow"): + def save_to_file(self, outputFile): + with open(outputFile, 'w', encoding='utf-8') as file: + for JSONLine in tqdm(self.valuesStmt, colour="yellow"): + file.write(f'{json.dumps(JSONLine).decode("utf-8")}\n') + + def run(self, EVTXJSONList): + for evtxJSON in EVTXJSONList: if os.stat(evtxJSON).st_size != 0: - results = self.run(evtxJSON) + results = self.process_file(evtxJSON) self.fieldStmt += results["dbFields"] self.valuesStmt += results["dbValues"] -class zirCore: +class zircore: """ Load data into database and apply detection rules """ - def __init__(self, config, logger=None, noOutput=False, timeAfter="1970-01-01T00:00:00", timeBefore="9999-12-12T23:59:59", limit=-1, csvMode=False, timeField=None, hashes=False, dbLocation=":memory:", delimiter=";"): - self.logger = logger or logging.getLogger(__name__) - self.dbConnection = self.createConnection(dbLocation) + def __init__(self, noOutput=False, limit=-1, csv_output=False, db_location=":memory:", delimiter=";", + tmp_directory=".", + tmp_directory_db="." + ): + self.logger = logging.getLogger(__name__) + + self.tmp_directory = tmp_directory + self.tmp_directory_db = tmp_directory_db + self.db_connection = self.create_connection(db_location) self.fullResults = [] + self.rule_results = [] self.ruleset = {} self.noOutput = noOutput - self.timeAfter = timeAfter - self.timeBefore = timeBefore - self.config = config self.limit = limit - self.csvMode = csvMode - self.timeField = timeField - self.hashes = hashes + self.csv_output = csv_output self.delimiter = delimiter - + + #if not csv_output: + + if not Path(str(tmp_directory)).is_dir(): + os.mkdir(tmp_directory) + if "?mode=memory&cache=shared" in db_location: + tmp_filename = f'{db_location.replace("file:", "").replace("?mode=memory&cache=shared", "")}.json' + else: + tmp_filename = f"{db_location}.json" + self.tmp_file = open(f'{tmp_directory}/{tmp_filename}', 'w', encoding='utf-8') + def close(self): - self.dbConnection.close() + self.db_connection.close() - def createConnection(self, db): + def create_connection(self, db): """ create a database connection to a SQLite database """ conn = None self.logger.debug(f"CONNECTING TO : {db}") try: - if db == ':memory:': + if "?mode=memory&cache=shared" in db: conn = sqlite3.connect(db, isolation_level=None) conn.execute('PRAGMA journal_mode = MEMORY;') conn.execute('PRAGMA synchronous = OFF;') conn.execute('PRAGMA temp_store = MEMORY;') else: - conn = sqlite3.connect(db) + if not Path(str(self.tmp_directory_db)).is_dir(): + os.mkdir(self.tmp_directory_db) + conn = sqlite3.connect(f"{self.tmp_directory_db}/{db}") conn.row_factory = sqlite3.Row # Allows to get a dict def udf_regex(x, y): @@ -655,42 +430,43 @@ def udf_regex(x, y): self.logger.error(f"{Fore.RED} [-] {e}") return conn - def createDb(self, fieldStmt): + def create_db(self, fieldStmt): createTableStmt = f"CREATE TABLE logs ( row_id INTEGER, {fieldStmt} PRIMARY KEY(row_id AUTOINCREMENT) );" self.logger.debug(f" CREATE : {createTableStmt}") - if not self.executeQuery(createTableStmt): + if not self.execute_simple_query(createTableStmt): self.logger.error(f"{Fore.RED} [-] Unable to create table{Fore.RESET}") sys.exit(1) - def createIndex(self): - self.executeQuery('CREATE INDEX "idx_eventid" ON "logs" ("eventid");') + def create_index(self): + self.execute_simple_query('CREATE INDEX "idx_eventid" ON "logs" ("EventID");') + self.execute_simple_query('CREATE INDEX "idx_channel" ON "logs" ("Channel");') - def executeQuery(self, query): + def execute_simple_query(self, query): """ Perform a SQL Query with the provided connection """ - if self.dbConnection is not None: - dbHandle = self.dbConnection.cursor() + if self.db_connection is None: + self.logger.error(f"{Fore.RED} [-] No connection to Db{Fore.RESET}") + return False + else: + dbHandle = self.db_connection.cursor() self.logger.debug(f"EXECUTING : {query}") try: dbHandle.execute(query) - self.dbConnection.commit() - return True + self.db_connection.commit() except Error as e: self.logger.debug(f" [-] {e}") return False - else: - self.logger.error(f"{Fore.RED} [-] No connection to Db{Fore.RESET}") - return False + return True - def executeSelectQuery(self, query): + def execute_select_query(self, query): """ Execute a SELECT SQL query and return the results as a list of dictionaries. """ - if self.dbConnection is None: + if self.db_connection is None: self.logger.error(f"{Fore.RED} [-] No connection to Db{Fore.RESET}") return [] try: - cursor = self.dbConnection.cursor() - self.logger.debug(f"Executing SELECT query: {query}") + cursor = self.db_connection.cursor() + self.logger.debug(f"EXECUTING SELECT QUERY: {query}") cursor.execute(query) rows = cursor.fetchall() # Convert rows to list of dictionaries @@ -700,17 +476,17 @@ def executeSelectQuery(self, query): self.logger.debug(f" [-] SQL query error: {e}") return [] - def loadDbInMemory(self, db): + def load_db_in_memory(self, db): """ In db only mode it is possible to restore an on disk Db to avoid EVTX extraction and flattening """ - dbfileConnection = self.createConnection(db) - dbfileConnection.backup(self.dbConnection) + dbfileConnection = self.create_connection(db) + dbfileConnection.backup(self.db_connection) dbfileConnection.close() def escape_identifier(self, identifier): """Escape SQL identifiers like table or column names.""" return identifier.replace("\"", "\"\"") - def insertData2Db(self, JSONLine): + def insert_data_to_db(self, JSONLine): """Build a parameterized INSERT INTO query and insert data into the database.""" columns = JSONLine.keys() columnsEscaped = ', '.join([self.escape_identifier(col) for col in columns]) @@ -725,31 +501,23 @@ def insertData2Db(self, JSONLine): values.append(value) insertStmt = f'INSERT INTO logs ({columnsEscaped}) VALUES ({placeholders})' try: - self.dbConnection.execute(insertStmt, values) + self.db_connection.execute(insertStmt, values) return True except Exception as e: self.logger.debug(f" [-] {e}") return False - def insertFlattenedJSON2Db(self, flattenedJSON, forwarder=None): - if forwarder: - forwarder.send(flattenedJSON, forwardAll=True) - for JSONLine in tqdm(flattenedJSON, colour="yellow"): - self.insertData2Db(JSONLine) - self.createIndex() - - def saveFlattenedJSON2File(self, flattenedJSON, outputFile): - with open(outputFile, 'w', encoding='utf-8') as file: - for JSONLine in tqdm(flattenedJSON, colour="yellow"): - file.write(f'{json.dumps(JSONLine).decode("utf-8")}\n') + def insert_flat_json_to_db(self, flattenedJSON): + for JSONLine in flattenedJSON: + self.insert_data_to_db(JSONLine) - def saveDbToDisk(self, dbFilename): + def save_db_to_disk(self, dbFilename): self.logger.info("[+] Saving working data to disk as a SQLite DB") onDiskDb = sqlite3.connect(dbFilename) - self.dbConnection.backup(onDiskDb) + self.db_connection.backup(onDiskDb) onDiskDb.close() - def executeRule(self, rule): + def execute_rule(self, rule): """ Execute a single Sigma rule against the database and return the results. """ @@ -770,9 +538,9 @@ def executeRule(self, rule): # Process each SQL query in the rule for SQLQuery in sigma_queries: - data = self.executeSelectQuery(SQLQuery) + data = self.execute_select_query(SQLQuery) if data: - if self.csvMode: + if self.csv_output: # Clean values for CSV output cleaned_rows = [ {k: str(v).replace("\n", "").replace("\r", "").replace("None", "") for k, v in dict(row).items()} @@ -790,7 +558,7 @@ def executeRule(self, rule): results = { "title": title, "id": rule_id, - "description": description.replace("\n", "").replace("\r", "") if self.csvMode else description, + "description": description.replace("\n", "").replace("\r", "") if self.csv_output else description, "sigmafile": filename, "sigma": sigma_queries, "rule_level": rule_level, @@ -798,180 +566,99 @@ def executeRule(self, rule): "count": len(filteredRows), "matches": filteredRows } + + if not self.csv_output: + json_bytes = json.dumps(results) + self.tmp_file.write(f"{json_bytes.decode('utf-8')}\n") + self.logger.debug(f'DETECTED: {title} - Matches: {len(filteredRows)} events') return results else: return {} - def loadRulesetFromFile(self, filename, ruleFilters): - try: - with open(filename, encoding='utf-8') as f: - self.ruleset = json.loads(f.read()) - self.applyRulesetFilters(ruleFilters) - except Exception as e: - self.logger.error(f"{Fore.RED} [-] Loading JSON ruleset failed, are you sure it is a valid JSON file ? : {e}{Fore.RESET}") - - def loadRulesetFromVar(self, ruleset, ruleFilters): + def load_ruleset_from_var(self, ruleset, ruleFilters): self.ruleset = ruleset - self.applyRulesetFilters(ruleFilters) + self.apply_ruleset_filters(ruleFilters) - def applyRulesetFilters(self, ruleFilters=None): + def apply_ruleset_filters(self, ruleFilters=None): # Remove empty rule and remove filtered rules self.ruleset = list(filter(None, self.ruleset)) if ruleFilters is not None: self.ruleset = [rule for rule in self.ruleset if not any(ruleFilter in rule["title"] for ruleFilter in ruleFilters)] - def ruleLevelPrintFormatter(self, level, orgFormat=Fore.RESET): - if level == "informational": - return f'{Fore.WHITE}{level}{orgFormat}' - if level == "low": - return f'{Fore.GREEN}{level}{orgFormat}' - if level == "medium": - return f'{Fore.YELLOW}{level}{orgFormat}' - if level == "high": - return f'{Fore.MAGENTA}{level}{orgFormat}' - if level == "critical": - return f'{Fore.RED}{level}{orgFormat}' - - def executeRuleset(self, outFile, writeMode='w', forwarder=None, showAll=False, - KeepResults=False, remote=None, stream=False, lastRuleset=False): + def execute_ruleset(self): """ Execute all rules in the ruleset and handle output. """ - csvWriter = None - first_json_output = True # To manage commas in JSON output - is_json_mode = not self.csvMode - - # Prepare output file handle if needed - fileHandle = None - if not self.noOutput: - # Open file in text mode since we will write decoded strings - fileHandle = open(outFile, writeMode, encoding='utf-8', newline='') - if is_json_mode and writeMode != 'a': - fileHandle.write('[') # Start JSON array - - # Iterate over rules in the ruleset - with tqdm(self.ruleset, colour="yellow") as ruleBar: - for rule in ruleBar: - # Show all rules if showAll is True - if showAll and "title" in rule: - rule_title = rule["title"] - rule_level = rule.get("level", "unknown") - formatted_level = self.ruleLevelPrintFormatter(rule_level, Fore.BLUE) - ruleBar.write(f'{Fore.BLUE} - {rule_title} [{formatted_level}]{Fore.RESET}') - - # Execute the rule - ruleResults = self.executeRule(rule) - if not ruleResults: - continue # No matches, skip to next rule - - # Apply limit if set - if self.limit != -1 and ruleResults["count"] > self.limit: - continue # Exceeds limit, skip this result - - # Write progress message - rule_title = ruleResults["title"] - rule_level = ruleResults.get("rule_level", "unknown") - formatted_level = self.ruleLevelPrintFormatter(rule_level, Fore.CYAN) - rule_count = ruleResults["count"] - ruleBar.write(f'{Fore.CYAN} - {rule_title} [{formatted_level}] : {rule_count} events{Fore.RESET}') - - # Store results if needed - if KeepResults or (remote and not stream): - self.fullResults.append(ruleResults) - - # Forward results if streaming - if stream and forwarder: - forwarder.send([ruleResults], False) - - # Handle output to file - if not self.noOutput: - if self.csvMode: - # Initialize CSV writer if not already done - if csvWriter is None: - fieldnames = ["rule_title", "rule_description", "rule_level", "rule_count"] + list(ruleResults["matches"][0].keys()) - csvWriter = csv.DictWriter(fileHandle, delimiter=self.delimiter, fieldnames=fieldnames) - csvWriter.writeheader() - # Write matches to CSV - for data in ruleResults["matches"]: - dictCSV = { - "rule_title": ruleResults["title"], - "rule_description": ruleResults["description"], - "rule_level": ruleResults["rule_level"], - "rule_count": ruleResults["count"], - **data - } - csvWriter.writerow(dictCSV) - else: - # Write results as JSON using orjson - try: - # Handle commas between JSON objects - if not first_json_output: - fileHandle.write(',\n') - else: - first_json_output = False - # Serialize ruleResults to JSON bytes with indentation - json_bytes = json.dumps(ruleResults, option=json.OPT_INDENT_2) - # Write the decoded JSON string to the file - fileHandle.write(json_bytes.decode('utf-8')) - except Exception as e: - self.logger.error(f"Error saving some results: {e}") - - # Close output file handle if needed - if not self.noOutput: - if is_json_mode and lastRuleset: - fileHandle.write(']') # Close JSON array - fileHandle.close() - - def run(self, EVTXJSONList, Insert2Db=True, saveToFile=False, forwarder=None, args_config=None): - self.logger.info("[+] Processing events") - flattener = JSONFlattener(configFile=self.config, timeAfter=self.timeAfter, timeBefore=self.timeBefore, timeField=self.timeField, hashes=self.hashes, args_config=args_config) - flattener.runAll(EVTXJSONList) - if saveToFile: - filename = f"flattened_events_{''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(4))}.json" - self.logger.info(f"[+] Saving flattened JSON to : {filename}") - self.saveFlattenedJSON2File(flattener.valuesStmt, filename) - if Insert2Db: - self.logger.info("[+] Creating model") - self.createDb(flattener.fieldStmt) - self.logger.info("[+] Inserting data") - self.insertFlattenedJSON2Db(flattener.valuesStmt, forwarder) - self.logger.info("[+] Cleaning unused objects") - else: - return flattener.keyDict - del flattener -class evtxExtractor: + for rule in self.ruleset: + + # Execute the rule + ruleResults = self.execute_rule(rule) + if not ruleResults: + continue # No matches, skip to next rule + + # Apply limit if set + if self.limit != -1 and ruleResults["count"] > self.limit: + continue # Exceeds limit, skip this result + + # Store if the rule has matched : title, level, count only + self.rule_results.append({ + "rule_title": ruleResults["title"], + "rule_level": ruleResults["rule_level"], + "rule_count": ruleResults["count"], + }) + + #self.fullResults.append(ruleResults) + + self.tmp_file.close() + +class evtx_extractor: + + def __init__(self, providedTmpDir=None, cores=None, use_external_binaries=True, binaries_path = None, encoding=None, input_format=None): + self.logger = logging.getLogger(__name__) - def __init__(self, logger=None, providedTmpDir=None, coreCount=None, useExternalBinaries=True, binPath = None, xmlLogs=False, sysmon4linux=False, auditdLogs=False, encoding=None, evtxtract=False, csvInput=False): - self.logger = logger or logging.getLogger(__name__) if Path(str(providedTmpDir)).is_dir(): - self.tmpDir = f"tmp-{self.randString()}" + self.tmpDir = f"tmp-{self.rand_string()}" self.logger.error(f"{Fore.RED} [-] Provided directory already exists using '{self.tmpDir}' instead{Fore.RESET}") else: - self.tmpDir = providedTmpDir or f"tmp-{self.randString()}" + self.tmpDir = providedTmpDir or f"tmp-{self.rand_string()}" os.mkdir(self.tmpDir) - self.cores = coreCount or os.cpu_count() - self.useExternalBinaries = useExternalBinaries - self.sysmon4linux = sysmon4linux - self.xmlLogs = xmlLogs - self.auditdLogs = auditdLogs - self.evtxtract = evtxtract - self.csvInput = csvInput + + self.cores = cores or os.cpu_count() + self.use_external_binaries = use_external_binaries + self.sysmon4linux = False + self.xmlLogs = False + self.csvInput = False + self.auditdLogs = False + self.evtxtract = False + + if input_format == "sysmon_linux_input": + self.sysmon4linux = True + elif input_format == "xml_input": + self.xmlLogs = True + elif input_format == "csv_input": + self.csvInput = True + elif input_format == "auditd_input": + self.auditdLogs = True + elif input_format == "evtxtract_input": + self.evtxtract = True + # Hardcoded hash list of evtx_dump binaries self.validHashList = ["bbcce464533e0364", "e642f5c23e156deb", "5a7a1005885a1a11"] + # Sysmon 4 Linux default encoding is ISO-8859-1, Auditd is UTF-8 - if not encoding and sysmon4linux: + if not encoding and self.sysmon4linux: self.encoding = "ISO-8859-1" - elif not encoding and (auditdLogs or evtxtract or xmlLogs): + elif not encoding and (self.auditdLogs or self.evtxtract or self.xmlLogs): self.encoding = "utf-8" else: self.encoding = encoding - self.evtxDumpCmd = self.getOSExternalTools(binPath) + self.evtx_dump_cmd = self.getOSExternalTools(binaries_path) - def randString(self): - return ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(8)) + def rand_string(self, length=8): + return ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(length)) def getOSExternalTools(self, binPath): """ Determine which binaries to run depending on host OS : 32Bits is NOT supported for now since evtx_dump is 64bits only""" @@ -990,12 +677,12 @@ def runUsingBindings(self, file): Convert EVTX to JSON using evtx_dump bindings (slower) Drop resulting JSON files in a tmp folder. """ - if not self.useExternalBinaries: + if not self.use_external_binaries: try: filepath = Path(file) filename = filepath.name parser = PyEvtxParser(str(filepath)) - with open(f"{self.tmpDir}/{str(filename)}-{self.randString()}.json", "w", encoding="utf-8") as f: + with open(f"{self.tmpDir}/{str(filename)}-{self.rand_string()}.json", "w", encoding="utf-8") as f: for record in parser.records_json(): f.write(f'{json.dumps(json.loads(record["data"])).decode("utf-8")}\n') except Exception as e: @@ -1165,7 +852,7 @@ def run(self, file): """ self.logger.debug(f"EXTRACTING : {file}") filename = Path(file).name - outputJSONFilename = f"{self.tmpDir}/{str(filename)}-{self.randString()}.json" + outputJSONFilename = f"{self.tmpDir}/{str(filename)}-{self.rand_string()}.json" # Auditd or Sysmon4Linux logs if self.sysmon4linux or self.auditdLogs: # Choose which log backend to use @@ -1201,14 +888,14 @@ def run(self, file): self.logger.error(f"{Fore.RED} [-] {e}{Fore.RESET}") # EVTX else: - if not self.useExternalBinaries or not Path(self.evtxDumpCmd).is_file(): + if not self.use_external_binaries or not Path(self.evtx_dump_cmd).is_file(): self.logger.debug(" [-] No external binaries args or evtx_dump is missing") self.runUsingBindings(file) else: # Check if the binary is valid does not avoid TOCTOU - if self.verifyBinHash(self.evtxDumpCmd): + if self.verifyBinHash(self.evtx_dump_cmd): try: - cmd = [self.evtxDumpCmd, "--no-confirm-overwrite", "-o", "jsonl", str(file), "-f", outputJSONFilename, "-t", str(self.cores)] + cmd = [self.evtx_dump_cmd, "--no-confirm-overwrite", "-o", "jsonl", str(file), "-f", outputJSONFilename, "-t", str(self.cores)] subprocess.call(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) except Exception as e: self.logger.error(f"{Fore.RED} [-] {e}{Fore.RESET}") @@ -1216,60 +903,60 @@ def run(self, file): def cleanup(self): shutil.rmtree(self.tmpDir) -class zircoGuiGenerator: +class gui_generator: """ Generate the mini GUI """ - def __init__(self, packageDir, templateFile, logger=None, outputFile = None, timeField = ""): - self.logger = logger or logging.getLogger(__name__) - self.templateFile = templateFile - self.tmpDir = f'tmp-zircogui-{self.randString()}' - self.tmpFile = f'data-{self.randString()}.js' - self.outputFile = outputFile or f'zircogui-output-{self.randString()}' - self.packageDir = packageDir - self.timeField = timeField - - def randString(self): - return ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(4)) + def __init__(self, package_dir, template_file, output_file = None, time_field = ""): + self.logger = logging.getLogger(__name__) + self.template_file = template_file + self.tmp_dir = f'tmp-zircogui-{self.rand_string()}' + self.tmp_file = f'data-{self.rand_string()}.js' + self.output_file = output_file or f'zircogui-output-{self.rand_string()}' + self.package_dir = package_dir + self.time_field = time_field + + def rand_string(self, length=4): + return ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(length)) def unzip(self): try: - shutil.unpack_archive(self.packageDir, self.tmpDir, "zip") + shutil.unpack_archive(self.package_dir, self.tmp_dir, "zip") except Exception as e: self.logger.error(f" [-] {e}") def zip(self): try: - shutil.make_archive(self.outputFile, 'zip', f"{self.tmpDir}/zircogui") + shutil.make_archive(self.output_file, 'zip', f"{self.tmp_dir}/zircogui") except Exception as e: self.logger.error(f" [-] {e}") - def generate(self, data): + def run(self, data): self.unzip() try: - self.logger.info(f"[+] Generating ZircoGui package to : {self.outputFile}.zip") - exportforzircoguiTmpl = templateEngine(self.logger, self.templateFile, self.tmpFile, self.timeField) - exportforzircoguiTmpl.generateFromTemplate(exportforzircoguiTmpl.template, exportforzircoguiTmpl.templateOutput, data) + self.logger.info(f"[+] Generating ZircoGui package to : {self.output_file}.zip") + exportforzircoguiTmpl = template_engine([self.template_file], [self.tmp_file], self.time_field) + exportforzircoguiTmpl.run(data) except Exception as e: self.logger.error(f" [-] {e}") - shutil.move(self.tmpFile, f'{self.tmpDir}/zircogui/data.js') + shutil.move(self.tmp_file, f'{self.tmp_dir}/zircogui/data.js') self.zip() - shutil.rmtree(self.tmpDir) + shutil.rmtree(self.tmp_dir) -class rulesUpdater: +class rules_updater: """ Download rulesets from the https://github.com/wagga40/Zircolite-Rules repository and update if necessary. """ - def __init__(self, logger=None): + def __init__(self): self.url = "https://github.com/wagga40/Zircolite-Rules/archive/refs/heads/main.zip" - self.logger = logger or logging.getLogger(__name__) - self.tempFile = f'tmp-rules-{self.randString()}.zip' - self.tmpDir = f'tmp-rules-{self.randString()}' + self.logger = logging.getLogger(__name__) + self.tempFile = f'tmp-rules-{self.rand_string()}.zip' + self.tmpDir = f'tmp-rules-{self.rand_string()}' self.updatedRulesets = [] - def randString(self): - return ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(4)) + def rand_string(self, length=4): + return ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(length)) def download(self): resp = requests.get(self.url, stream=True) @@ -1314,10 +1001,10 @@ def run(self): except Exception as e: self.logger.error(f" [-] {e}") -class rulesetHandler: +class ruleset_handler: - def __init__(self, logger=None, config=None, listPipelineOnly=False): - self.logger = logger or logging.getLogger(__name__) + def __init__(self, config=None, listPipelineOnly=False): + self.logger = logging.getLogger(__name__) self.saveRuleset = config.save_ruleset self.rulesetPathList = config.ruleset self.cores = config.cores or os.cpu_count() @@ -1351,11 +1038,28 @@ def __init__(self, logger=None, config=None, listPipelineOnly=False): self.Rulesets = self.rulesetParsing() # Combining Rulesets - if config.combine_rulesets: - self.Rulesets = [item for subRuleset in self.Rulesets if subRuleset for item in subRuleset] - self.Rulesets = [sorted(self.Rulesets, key=lambda d: d['level'])] # Sorting by level - - if all(not subRuleset for subRuleset in self.Rulesets): + #if config.combine_rulesets: + self.Rulesets = [item for subRuleset in self.Rulesets if subRuleset for item in subRuleset] + # Remove duplicates based on 'id' or 'title' + unique_rules = [] + seen_keys = set() + for rule in self.Rulesets: + # Use 'id' or 'title' as the unique key + rule_key = rule.get('id') or rule.get('title') + if rule_key and rule_key not in seen_keys: + seen_keys.add(rule_key) + unique_rules.append(rule) + + level_order = { + "critical": 1, + "high": 2, + "medium": 3, + "low": 4, + "informational": 5 + } + self.Rulesets = sorted(unique_rules, key=lambda d: level_order.get(d.get('level', 'informational'), float('inf'))) # Sorting by level + + if len(self.Rulesets) == 0: self.logger.error(f"{Fore.RED} [-] No rules to execute !{Fore.RESET}") def isYAML(self, filepath): @@ -1460,23 +1164,17 @@ def rulesetParsing(self): def selectFiles(pathList, selectFilesList): if selectFilesList is not None: - return [evtx for evtx in [str(element) for element in list(pathList)] if any(fileFilters[0].lower() in evtx.lower() for fileFilters in selectFilesList)] + return [logs for logs in [str(element) for element in list(pathList)] if any(fileFilters[0].lower() in logs.lower() for fileFilters in selectFilesList)] return pathList def avoidFiles(pathList, avoidFilesList): if avoidFilesList is not None: - return [evtx for evtx in [str(element) for element in list(pathList)] if all(fileFilters[0].lower() not in evtx.lower() for fileFilters in avoidFilesList)] + return [logs for logs in [str(element) for element in list(pathList)] if all(fileFilters[0].lower() not in logs.lower() for fileFilters in avoidFilesList)] return pathList def ImportErrorHandler(config): importErrorList = [] - if forwardingDisabled: - importErrorList.append(f"{Fore.LIGHTYELLOW_EX} [i] Cannot import 'aiohttp' or 'urllib3' or 'requests', events forwarding is disabled{Fore.RESET}") - config.remote = None - if elasticForwardingDisabled: - importErrorList.append(f"{Fore.LIGHTYELLOW_EX} [i] Cannot import 'elasticsearch[async]', events forwarding to Elastic is disabled{Fore.RESET}") - config.index = None if updateDisabled: importErrorList.append(f"{Fore.LIGHTYELLOW_EX} [i] Cannot import 'requests', events update is disabled{Fore.RESET}") config.update_rules = False @@ -1502,17 +1200,97 @@ def ImportErrorHandler(config): return f"{Fore.LIGHTYELLOW_EX} [i] Import errors, certain functionalities may be disabled ('--imports' for details)\n Supplemental imports can be installed with 'requirements.full.txt'{Fore.RESET}", config, False +def runner(file, params): + """ Runner function to flatten events and apply rules with multiprocessing """ + + flattener = json_flattener( + configFile=params["config"], + timeAfter=params["events_after"], + timeBefore=params["events_before"], + timeField=params["timefield"], + hashes=params["hashes"], + input_format=params["input_format"] + ) + + flattener.run([file]) + + # Save the flattened JSON to a file + if params["keepflat"]: + flattener.save_to_file(f"flattened_events_{rand_string(4)}.json") + + # Initialize zircore + filename = os.path.basename(file) + if params["on_disk_db"]: + db_location = f"{filename}-{rand_string(4)}.db" + else: + db_location = f"file:{filename}?mode=memory&cache=shared" + + zircolite_core = zircore( + limit=params["limit"], + csv_output=params["csv_output"], + db_location=db_location, + delimiter=params["delimiter"], + tmp_directory=params["tmp_directory"], + tmp_directory_db=params["tmp_directory_db"] + ) + + zircolite_core.create_db(flattener.fieldStmt) + zircolite_core.insert_flat_json_to_db(flattener.valuesStmt) + del flattener + zircolite_core.create_index() + + ruleset = params["rulesets"] + zircolite_core.load_ruleset_from_var(ruleset=ruleset, ruleFilters=params["rulefilter"]) + zircolite_core.execute_ruleset() + zircolite_core.close() + + return zircolite_core.fullResults, zircolite_core.rule_results + +def runner_wrapper(args): + """ Helper function to allow TQDM to display a progress bar""" + return runner(*args) + +def format_rule_level(level, reset=Fore.RESET): + if level == "informational": + return f'{Fore.WHITE}{level}{reset}' + if level == "low": + return f'{Fore.GREEN}{level}{reset}' + if level == "medium": + return f'{Fore.YELLOW}{level}{reset}' + if level == "high": + return f'{Fore.MAGENTA}{level}{reset}' + if level == "critical": + return f'{Fore.RED}{level}{reset}' + return level # Default case + +def rand_string(length=10): + return ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(length)) + +def concatenate_files(input_dir, output_file, buffer_size=1024*1024): + input_files = list(Path(input_dir).rglob("*.json")) + with open(output_file, 'wb') as outfile: + for fname in input_files: + if not os.path.isfile(fname): + print(f"File not found: {fname}") + continue + with open(fname, 'rb') as infile: + while True: + buffer = infile.read(buffer_size) + if not buffer: + break + outfile.write(buffer) + ################################################################ # MAIN() ################################################################ def main(): - version = "2.30.0" + version = "2.50.0" # Init Args handling parser = argparse.ArgumentParser() # Input files and filtering/selection options logsInputArgs = parser.add_argument_group(f'{Fore.BLUE}INPUT FILES AND FILTERING/SELECTION OPTIONS{Fore.RESET}') - logsInputArgs.add_argument("-e", "--evtx", "--events", help="Log file or directory where log files are stored in supported format", type=str) + logsInputArgs.add_argument("-e", "--events", "--evtx", help="Log file or directory where log files are stored in supported format", type=str) logsInputArgs.add_argument("-s", "--select", help="Only files with filenames containing the provided string will be used. If there is/are exclusion(s) (--avoid) they will be handled after selection", action='append', nargs='+') logsInputArgs.add_argument("-a", "--avoid", help="Files files with filenames containing the provided string will NOT be used", action='append', nargs='+') logsInputArgs.add_argument("-f", "--fileext", help="Extension of the log files", type=str) @@ -1527,7 +1305,6 @@ def main(): eventFormatsArgs = parser.add_mutually_exclusive_group() eventFormatsArgs.add_argument("-j", "--json-input", "--jsononly", "--jsonline", "--jsonl", help="If logs files are already in JSON lines format ('jsonl' in evtx_dump) ", action='store_true') eventFormatsArgs.add_argument("--json-array-input", "--jsonarray", "--json-array", help="Source logs are in JSON but as an array", action='store_true') - eventFormatsArgs.add_argument("--db-input", "-D", "--dbonly", help="Directly use a previously saved database file, timerange filters will not work", action='store_true') eventFormatsArgs.add_argument("-S", "--sysmon-linux-input", "--sysmon4linux", "--sysmon-linux", help="Use this option if your log file is a Sysmon for linux log file, default file extension is '.log'", action='store_true') eventFormatsArgs.add_argument("-AU", "--auditd-input", "--auditd", help="Use this option if your log file is a Auditd log file, default file extension is '.log'", action='store_true') eventFormatsArgs.add_argument("-x", "--xml-input", "--xml", help="Use this option if your log file is a EVTX converted to XML log file, default file extension is '.xml'", action='store_true') @@ -1537,7 +1314,6 @@ def main(): rulesetsFormatsArgs = parser.add_argument_group(f'{Fore.BLUE}RULES AND RULESETS OPTIONS{Fore.RESET}') rulesetsFormatsArgs.add_argument("-r", "--ruleset", help="Sigma ruleset : JSON (Zircolite format) or YAML/Directory containing YAML files (Native Sigma format)", action='append', nargs='+') rulesetsFormatsArgs.add_argument("-nsc", "--no-sigma-conversion", help=argparse.SUPPRESS, action='store_true') - rulesetsFormatsArgs.add_argument("-cr", "--combine-rulesets", help="Merge all rulesets provided into one", action='store_true') rulesetsFormatsArgs.add_argument("-sr", "--save-ruleset", help="Save converted ruleset (Sigma to Zircolite format) to disk", action='store_true') rulesetsFormatsArgs.add_argument("-p", "--pipeline", help="For all the native Sigma rulesets (YAML) use this pipeline. Multiple can be used. Examples : 'sysmon', 'windows-logsources', 'windows-audit'. You can list installed pipelines with '--pipeline-list'.", action='append', nargs='+') rulesetsFormatsArgs.add_argument("-pl", "--pipeline-list", help="List installed pysigma pipelines", action='store_true') @@ -1554,7 +1330,7 @@ def main(): outputFormatsArgs.add_argument("-d", "--dbfile", help="Save all logs in a SQLite Db to the specified file", type=str) outputFormatsArgs.add_argument("-l", "--logfile", help="Log file name", default="zircolite.log", type=str) outputFormatsArgs.add_argument("--hashes", help="Add an xxhash64 of the original log event to each event", action='store_true') - outputFormatsArgs.add_argument("-L", "--limit", "--limit-results", help="Discard results (in output file or forwarded events) that are above the provided limit", type=int, default=-1) + outputFormatsArgs.add_argument("-L", "--limit", "--limit-results", help="Discard results that are above the provided limit", type=int, default=-1) # Advanced configuration options configFormatsArgs = parser.add_argument_group(f'{Fore.BLUE}ADVANCED CONFIGURATION OPTIONS{Fore.RESET}') configFormatsArgs.add_argument("-c", "--config", help="JSON File containing field mappings and exclusions", type=str, default="config/fieldMappings.json") @@ -1562,25 +1338,15 @@ def main(): configFormatsArgs.add_argument("--fieldlist", help="Get all events fields", action='store_true') configFormatsArgs.add_argument("--evtx_dump", help="Tell Zircolite to use this binary for EVTX conversion, on Linux and MacOS the path must be valid to launch the binary (eg. './evtx_dump' and not 'evtx_dump')", type=str, default=None) configFormatsArgs.add_argument("--noexternal", "--bindings", help="Don't use evtx_dump external binaries (slower)", action='store_true') - configFormatsArgs.add_argument("--cores", help="Specify how many cores you want to use, default is all cores, works only for EVTX extraction", type=str) + configFormatsArgs.add_argument("--cores", help="Specify how many cores you want to use, default is all cores, works only for EVTX extraction", default=os.cpu_count(), type=int) configFormatsArgs.add_argument("--debug", help="Activate debug logging", action='store_true') configFormatsArgs.add_argument("--imports", help="Show detailed module import errors", action='store_true') - configFormatsArgs.add_argument("--showall", help="Show all events, useful to check what rule takes takes time to execute", action='store_true') - configFormatsArgs.add_argument("-n", "--nolog", help="Don't create a log file or a result file (useful when forwarding)", action='store_true') - configFormatsArgs.add_argument("--ondiskdb", help="Use an on-disk database instead of the in-memory one (much slower !). Use if your system has limited RAM or if your dataset is very large and you cannot split it", type=str, default=":memory:") + configFormatsArgs.add_argument("--ondiskdb", "--on-disk-db", help="Use an on-disk database instead of the in-memory one (much slower !). Use if your system has limited RAM or if your dataset is very large and you cannot split it", action='store_true') configFormatsArgs.add_argument("-RE", "--remove-events", help="Zircolite will try to remove events/logs submitted if analysis is successful (use at your own risk)", action='store_true') configFormatsArgs.add_argument("-U", "--update-rules", help="Update rulesets located in the 'rules' directory", action='store_true') configFormatsArgs.add_argument("-v", "--version", help="Show Zircolite version", action='store_true') - # Forwarding options - forwardingFormatsArgs = parser.add_argument_group(f'{Fore.BLUE}FORWARDING OPTIONS{Fore.RESET}') - forwardingFormatsArgs.add_argument("--remote", help="Forward results to a HTTP/Splunk/Elasticsearch, please provide the full address e.g http[s]://address:port[/uri]", type=str) - forwardingFormatsArgs.add_argument("--token", help="Use this to provide Splunk HEC Token", type=str) - forwardingFormatsArgs.add_argument("--index", help="Use this to provide ES index", type=str) - forwardingFormatsArgs.add_argument("--eslogin", help="ES login", type=str, default="") - forwardingFormatsArgs.add_argument("--espass", help="ES password", type=str, default="") - forwardingFormatsArgs.add_argument("--stream", help="By default event forwarding is done at the end, this option activate forwarding events when detected", action="store_true") - forwardingFormatsArgs.add_argument("--forwardall", help="Forward all events", action="store_true") - forwardingFormatsArgs.add_argument("--timefield", help="Provide time field name for event forwarding, default is 'SystemTime'", default="SystemTime", action="store_true") + configFormatsArgs.add_argument("--timefield", help="Use this option to provide timestamp field name, default is 'SystemTime'", default="SystemTime", action="store_true") + # Templating and Mini GUI options templatingFormatsArgs = parser.add_argument_group(f'{Fore.BLUE}TEMPLATING AND MINI GUI OPTIONS{Fore.RESET}') templatingFormatsArgs.add_argument("--template", help="If a Jinja2 template is specified it will be used to generated output", type=str, action='append', nargs='+') @@ -1591,11 +1357,10 @@ def main(): signal.signal(signal.SIGINT, signal_handler) # Init logging - if args.nolog: - args.logfile = None - consoleLogger = initLogger(args.debug, args.logfile) + setup_logging(args.debug, args.logfile) + logger = logging.getLogger() - consoleLogger.info(""" + logger.info(""" ███████╗██╗██████╗ ██████╗ ██████╗ ██╗ ██╗████████╗███████╗ ╚══███╔╝██║██╔══██╗██╔════╝██╔═══██╗██║ ██║╚══██╔══╝██╔════╝ ███╔╝ ██║██████╔╝██║ ██║ ██║██║ ██║ ██║ █████╗ @@ -1607,22 +1372,22 @@ def main(): # Print version an quit if args.version: - consoleLogger.info(f"Zircolite - v{version}") + logger.info(f"Zircolite - v{version}") sys.exit(0) # Show imports status importsMessage, args, mustQuit = ImportErrorHandler(args) if importsMessage != "": - consoleLogger.info(f"[+] Modules imports status: \n{importsMessage}") + logger.info(f"[+] Modules imports status: \n{importsMessage}") else: - consoleLogger.info("[+] Modules imports status: OK") + logger.info("[+] Modules imports status: OK") if mustQuit: sys.exit(1) # Update rulesets if args.update_rules: - consoleLogger.info("[+] Updating rules") - updater = rulesUpdater(logger=consoleLogger) + logger.info("[+] Updating rules") + updater = rules_updater() updater.run() sys.exit(0) @@ -1633,188 +1398,242 @@ def main(): args.ruleset = ["rules/rules_windows_generic_pysigma.json"] # Loading rulesets - consoleLogger.info("[+] Loading ruleset(s)") - rulesetsManager = rulesetHandler(consoleLogger, args, args.pipeline_list) + logger.info("[+] Loading ruleset(s)") + rulesetsManager = ruleset_handler(args, args.pipeline_list) if args.pipeline_list: sys.exit(0) # Check mandatory CLI options - if not args.evtx: - consoleLogger.error(f"{Fore.RED} [-] No events source path provided. Use '-e ', '--events '{Fore.RESET}"), sys.exit(2) - if args.forwardall and args.db_input: - consoleLogger.error(f"{Fore.RED} [-] Can't forward all events in db only mode {Fore.RESET}"), sys.exit(2) + if not args.events: + logger.error(f"{Fore.RED} [-] No events source path provided. Use '-e ', '--events '{Fore.RESET}"), sys.exit(2) if args.csv and len(args.ruleset) > 1: - consoleLogger.error(f"{Fore.RED} [-] Since fields in results can change between rulesets, it is not possible to have CSV output when using multiple rulesets{Fore.RESET}"), sys.exit(2) - - consoleLogger.info("[+] Checking prerequisites") - - # Init Forwarding - forwarder = None - if args.remote is not None: - consoleLogger.info(f"{Fore.LIGHTRED_EX}[!] Forwarding is not tested anymore and will be removed in the future{Fore.RESET}") - forwarder = eventForwarder(remote=args.remote, timeField=args.timefield, token=args.token, logger=consoleLogger, index=args.index, login=args.eslogin, password=args.espass) - if not forwarder.networkCheck(): - quitOnError(f"{Fore.RED} [-] Remote host cannot be reached : {args.remote}{Fore.RESET}", consoleLogger) + logger.error(f"{Fore.RED} [-] Since fields in results can change between rulesets, it is not possible to have CSV output when using multiple rulesets{Fore.RESET}"), sys.exit(2) + logger.info("[+] Checking prerequisites") + # Checking provided timestamps try: - eventsAfter = time.strptime(args.after, '%Y-%m-%dT%H:%M:%S') - eventsBefore = time.strptime(args.before, '%Y-%m-%dT%H:%M:%S') + events_after = time.strptime(args.after, '%Y-%m-%dT%H:%M:%S') + events_before = time.strptime(args.before, '%Y-%m-%dT%H:%M:%S') except Exception: - quitOnError(f"{Fore.RED} [-] Wrong timestamp format. Please use 'AAAA-MM-DDTHH:MM:SS'", consoleLogger) + quitOnError(f"{Fore.RED} [-] Wrong timestamp format. Please use 'AAAA-MM-DDTHH:MM:SS'") # Check templates args readyForTemplating = False if (args.template is not None): if args.csv: - quitOnError(f"{Fore.RED} [-] You cannot use templates in CSV mode{Fore.RESET}", consoleLogger) + quitOnError(f"{Fore.RED} [-] You cannot use templates in CSV mode{Fore.RESET}") if (args.templateOutput is None) or (len(args.template) != len(args.templateOutput)): - quitOnError(f"{Fore.RED} [-] Number of templates output must match number of templates{Fore.RESET}", consoleLogger) + quitOnError(f"{Fore.RED} [-] Number of templates output must match number of templates{Fore.RESET}") for template in args.template: - checkIfExists(template[0], f"{Fore.RED} [-] Cannot find template : {template[0]}. DEfault templates are available here : https://github.com/wagga40/Zircolite/tree/master/templates{Fore.RESET}", consoleLogger) + checkIfExists(template[0], f"{Fore.RED} [-] Cannot find template : {template[0]}. DEfault templates are available here : https://github.com/wagga40/Zircolite/tree/master/templates{Fore.RESET}") readyForTemplating = True # Change output filename in CSV mode if args.csv: readyForTemplating = False - if args.outfile == "detected_events.json": + # If outfile is not provided, default to 'detected_events.csv' instead of 'detected_events.json' + if args.outfile == "detected_events.json": args.outfile = "detected_events.csv" - # If on-disk DB already exists, quit. - if args.ondiskdb != ":memory:" and (Path(args.ondiskdb).is_file()): - quitOnError(f"{Fore.RED} [-] On-disk database already exists{Fore.RESET}", consoleLogger) - # Start time counting start_time = time.time() - # Initialize zirCore - zircoliteCore = zirCore(args.config, logger=consoleLogger, noOutput=args.nolog, timeAfter=eventsAfter, timeBefore=eventsBefore, limit=args.limit, csvMode=args.csv, timeField=args.timefield, hashes=args.hashes, dbLocation=args.ondiskdb, delimiter=args.csv_delimiter) - - # If we are not working directly with the db - if not args.db_input: - # If we are working with json we change the file extension if it is not user-provided - if not args.fileext: - if args.json_input or args.json_array_input: - args.fileext = "json" - elif (args.sysmon_linux_input or args.auditd_input): - args.fileext = "log" - elif args.xml_input: - args.fileext = "xml" - elif args.csv_input: - args.fileext = "csv" - else: - args.fileext = "evtx" - - LogPath = Path(args.evtx) - if LogPath.is_dir(): - # Log recursive search in given directory with given file extension or pattern - pattern = f"*.{args.fileext}" - # If a Glob pattern is provided - if args.file_pattern not in [None, ""]: - pattern = args.file_pattern - fnGlob = LogPath.rglob - - if args.no_recursion: - fnGlob = LogPath.glob - LogList = list(fnGlob(pattern)) - elif LogPath.is_file(): - LogList = [LogPath] - else: - quitOnError(f"{Fore.RED} [-] Unable to find events from submitted path{Fore.RESET}", consoleLogger) - - # Applying file filters in this order : "select" than "avoid" - FileList = avoidFiles(selectFiles(LogList, args.select), args.avoid) - if len(FileList) <= 0: - quitOnError(f"{Fore.RED} [-] No file found. Please verify filters, directory or the extension with '--fileext' or '--file-pattern'{Fore.RESET}", consoleLogger) - - if not args.json_input and not args.json_array_input: - # Init EVTX extractor object - extractor = evtxExtractor(logger=consoleLogger, providedTmpDir=args.tmpdir, coreCount=args.cores, useExternalBinaries=(not args.noexternal), binPath=args.evtx_dump, xmlLogs=args.xml_input, sysmon4linux=args.sysmon_linux_input, auditdLogs=args.auditd_input, evtxtract=args.evtxtract_input, encoding=args.logs_encoding, csvInput=args.csv_input) - consoleLogger.info(f"[+] Extracting events Using '{extractor.tmpDir}' directory ") - for evtx in tqdm(FileList, colour="yellow"): - extractor.run(evtx) - # Set the path for the next step - LogJSONList = list(Path(extractor.tmpDir).rglob("*.json")) + # If we are working with json file extension is changed if it is not user-provided + if not args.fileext: + if args.json_input or args.json_array_input: + args.fileext = "json" + elif (args.sysmon_linux_input or args.auditd_input): + args.fileext = "log" + elif args.xml_input: + args.fileext = "xml" + elif args.csv_input: + args.fileext = "csv" else: - LogJSONList = FileList + args.fileext = "evtx" + + LogPath = Path(args.events) + if LogPath.is_dir(): + # Log recursive search in given directory with given file extension or pattern + pattern = f"*.{args.fileext}" + # If a Glob pattern is provided + if args.file_pattern not in [None, ""]: + pattern = args.file_pattern + fnGlob = LogPath.rglob + # If directory recursion is not wanted + if args.no_recursion: + fnGlob = LogPath.glob + LogList = list(fnGlob(pattern)) + elif LogPath.is_file(): + LogList = [LogPath] + else: + quitOnError(f"{Fore.RED} [-] Unable to find events from submitted path{Fore.RESET}") + + # Applying file filters in this order : "select" than "avoid" + FileList = avoidFiles(selectFiles(LogList, args.select), args.avoid) + if len(FileList) <= 0: + quitOnError(f"{Fore.RED} [-] No file found. Please verify filters, directory or the extension with '--fileext' or '--file-pattern'{Fore.RESET}") + + args_dict = vars(args) + # Find the chosen input format + chosen_input = next((key for key, value in args_dict.items() if "_input" in key and value), None) + + if not args.json_input and not args.json_array_input: + # Init EVTX extractor object + extractor = evtx_extractor(providedTmpDir=args.tmpdir, cores=args.cores, use_external_binaries=(not args.noexternal), binaries_path=args.evtx_dump, encoding=args.logs_encoding, input_format=chosen_input) + logger.info(f"[+] Extracting events using '{extractor.tmpDir}' directory ") + for evtx in tqdm(FileList, colour="yellow"): + extractor.run(evtx) + # Set the path for the next step + LogJSONList = list(Path(extractor.tmpDir).rglob("*.json")) + else: + LogJSONList = FileList - checkIfExists(args.config, f"{Fore.RED} [-] Cannot find mapping file, you can get the default one here : https://github.com/wagga40/Zircolite/blob/master/config/fieldMappings.json {Fore.RESET}", consoleLogger) - if LogJSONList == []: - quitOnError(f"{Fore.RED} [-] No files containing logs found.{Fore.RESET}", consoleLogger) + checkIfExists(args.config, f"{Fore.RED} [-] Cannot find mapping file, you can get the default one here : https://github.com/wagga40/Zircolite/blob/master/config/fieldMappings.json {Fore.RESET}") + if LogJSONList == []: + quitOnError(f"{Fore.RED} [-] No files containing logs found.{Fore.RESET}") - # Print field list and exit - if args.fieldlist: - fields = zircoliteCore.run(LogJSONList, Insert2Db=False, args_config=args) - zircoliteCore.close() - if not args.json_input and not args.json_array_input and not args.keeptmp: - extractor.cleanup() - [print(sortedField) for sortedField in sorted([field for field in fields.values()])] - sys.exit(0) - - # Flatten and insert to Db - if args.forwardall: - zircoliteCore.run(LogJSONList, saveToFile=args.keepflat, forwarder=forwarder, args_config=args) - else: - zircoliteCore.run(LogJSONList, saveToFile=args.keepflat, args_config=args) - # Unload In memory DB to disk. Done here to allow debug in case of ruleset execution error - if args.dbfile is not None: - zircoliteCore.saveDbToDisk(args.dbfile) - else: - consoleLogger.info(f"[+] Creating model from disk : {args.evtx}") - zircoliteCore.loadDbInMemory(args.evtx) + # TODO : Add option for already flattened event + logger.info(f"[+] Processing events and applying {Fore.CYAN}{len(rulesetsManager.Rulesets)}{Fore.RESET} rules") # flatten array of "rulefilter" arguments if args.rulefilter: args.rulefilter = [item for sublist in args.rulefilter for item in sublist] - writeMode = "w" - for ruleset in rulesetsManager.Rulesets: - zircoliteCore.loadRulesetFromVar(ruleset=ruleset, ruleFilters=args.rulefilter) - if args.limit > 0: - consoleLogger.info(f"[+] Limited mode : detections with more than {args.limit} events will be discarded") - consoleLogger.info(f"[+] Executing ruleset - {len(zircoliteCore.ruleset)} rules") - zircoliteCore.executeRuleset(args.outfile, writeMode=writeMode, forwarder=forwarder, showAll=args.showall, KeepResults=(readyForTemplating or args.package), remote=args.remote, stream=args.stream, lastRuleset=(ruleset == rulesetsManager.Rulesets[-1])) - writeMode = "a" # Next iterations will append to results file - - consoleLogger.info(f"[+] Results written in : {args.outfile}") - - # Forward events - if args.remote is not None and not args.stream: # If not in stream mode - consoleLogger.info(f"[+] Forwarding to : {args.remote}") - forwarder.send(zircoliteCore.fullResults, False) - if args.remote is not None and args.stream: - consoleLogger.info(f"[+] Forwarded to : {args.remote}") + tmp_directory = f'tmp-output-{rand_string()}' + tmp_directory_db = f'tmp-db-{rand_string()}' if args.ondiskdb else "" + + # Pack the parameters for multiprocessing + param_list = { + "config": args.config, + "events_after": events_after, + "events_before": events_before, + "timefield": args.timefield, + "hashes": args.hashes, + "input_format": chosen_input, + "csv_output": args.csv, + "limit": args.limit, + "on_disk_db": args.ondiskdb, + "delimiter": args.csv_delimiter, + "keepflat": args.keepflat, + "rulefilter": args.rulefilter, + "rulesets": rulesetsManager.Rulesets, + "tmp_directory": tmp_directory, + "tmp_directory_db": tmp_directory_db + } + + params_map = [] + for file in LogJSONList: + params_map.append((file, param_list)) + + all_full_results = [] + all_rule_results = [] + # Perform the JSON flattening and the detection process with multiprocessing + pool = mp.Pool(args.cores) + with tqdm(total=len(params_map), colour='yellow') as pbar: + for full_results, rule_results in pool.imap_unordered(runner_wrapper, params_map): + all_full_results.extend(full_results) + all_rule_results.extend(rule_results) + pbar.update() + pool.close() + pool.join() + + # Merge the rule results from all processes + aggregated_rules = {} + for rule in all_rule_results: + key = rule['rule_title'] + if key in aggregated_rules: + aggregated_rules[key]['rule_count'] += rule['rule_count'] + else: + aggregated_rules[key] = rule.copy() + + level_order = { + "critical": 1, + "high": 2, + "medium": 3, + "low": 4, + "informational": 5 + } + + aggregated_rules = sorted(aggregated_rules.values(), key=lambda d: level_order.get(d.get('rule_level', 'informational'), float('inf'))) # Sort by level + for rule in aggregated_rules: + rule_title = rule['rule_title'] + rule_level = rule['rule_level'] + rule_count = rule['rule_count'] + formatted_level = format_rule_level(rule_level, Fore.CYAN) + logger.info(f'{Fore.CYAN} - {rule_title} [{formatted_level}] : {rule_count} events{Fore.RESET}') + + logger.info(f"[+] Writing results to the output file : {args.outfile}") + + concatenate_files(tmp_directory, args.outfile) + #if not keep_tmp_output: + shutil.rmtree(tmp_directory) + #if not keep_tmp_db: + if args.ondiskdb: + shutil.rmtree(tmp_directory_db) + + # if not args.csv: + # with open(args.outfile, 'w', encoding='utf-8') as outfile: + # # Serialize the list of rule results to JSON with indentation + # json_bytes = json.dumps(all_full_results, option=json.OPT_INDENT_2) + # # Write the decoded JSON string to the file + # outfile.write(json_bytes.decode('utf-8')) + # else: + # # For CSV mode, collect all field names + # fieldnames_set = set(["rule_title", "rule_description", "rule_level", "rule_count"]) + + # for rule_result in all_full_results: + # matches = rule_result['matches'] + # if matches: + # for data in matches: + # fieldnames_set.update(data.keys()) + + # # For CSV mode, write matches to CSV + # with open(args.outfile, 'w', encoding='utf-8', newline='') as outfile: + # writer = csv.DictWriter(outfile, delimiter=args.csv_delimiter, fieldnames=fieldnames_set) + # writer.writeheader() + # for rule_result in all_full_results: + # matches = rule_result['matches'] + # if matches: + # for data in matches: + # dictCSV = { + # "rule_title": rule_result["title"], + # "rule_description": rule_result["description"], + # "rule_level": rule_result["rule_level"], + # "rule_count": rule_result["count"], + # **data + # } + # writer.writerow(dictCSV) # Templating - if readyForTemplating and zircoliteCore.fullResults != []: - templateGenerator = templateEngine(consoleLogger, args.template, args.templateOutput, args.timefield) - templateGenerator.run(zircoliteCore.fullResults) + if readyForTemplating and all_full_results != []: + template_generator = template_engine(args.template, args.templateOutput, args.timefield) + template_generator.run(all_full_results) # Generate ZircoGui package - if args.package and zircoliteCore.fullResults != []: - if Path("templates/exportForZircoGui.tmpl").is_file() and Path("gui/zircogui.zip").is_file(): - packager = zircoGuiGenerator("gui/zircogui.zip", "templates/exportForZircoGui.tmpl", consoleLogger, None, args.timefield) - packager.generate(zircoliteCore.fullResults) + if args.package and all_full_results != []: + if Path("templates/exportForZircoGui.tmpl").is_file() and Path("gui/zircogui.zip").is_file(): + packager = gui_generator("gui/zircogui.zip", "templates/exportForZircoGui.tmpl", None, args.timefield) + packager.run(all_full_results) # Remove working directory containing logs as json if not args.keeptmp: - consoleLogger.info("[+] Cleaning") + logger.info("[+] Cleaning") try: - if not args.json_input and not args.json_array_input and not args.db_input: + if not args.json_input and not args.json_array_input: extractor.cleanup() except OSError as e: - consoleLogger.error(f"{Fore.RED} [-] Error during cleanup {e}{Fore.RESET}") + logger.error(f"{Fore.RED} [-] Error during cleanup {e}{Fore.RESET}") # Remove files submitted for analysis if args.remove_events: - for EVTX in LogList: + for logs in LogList: try: - os.remove(EVTX) + os.remove(logs) except OSError as e: - consoleLogger.error(f"{Fore.RED} [-] Cannot remove files {e}{Fore.RESET}") + logger.error(f"{Fore.RED} [-] Cannot remove files {e}{Fore.RESET}") - zircoliteCore.close() - consoleLogger.info(f"\nFinished in {int((time.time() - start_time))} seconds") + logger.info(f"\nFinished in {int((time.time() - start_time))} seconds") if __name__ == "__main__": main() \ No newline at end of file