From a4351449d1417465a6b5950bf6d8a6b2148763dd Mon Sep 17 00:00:00 2001 From: Ian Norris Date: Sun, 30 Jul 2023 07:46:34 -0700 Subject: [PATCH] [32] allow access to header inspired from https://github.com/ruby-docx/docx/pull/73 but stripped down to just the header to see if that might be more amenable to get in. Also because of the TODO note in the update function, only supports reading these files, not updating them. --- lib/docx/document.rb | 12 +++++++++++- spec/docx/document_spec.rb | 12 ++++++++++++ spec/fixtures/multi_doc.docx | Bin 0 -> 6282 bytes 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 spec/fixtures/multi_doc.docx diff --git a/lib/docx/document.rb b/lib/docx/document.rb index f4ae3f0..566fad6 100755 --- a/lib/docx/document.rb +++ b/lib/docx/document.rb @@ -18,7 +18,7 @@ module Docx # puts d.text # end class Document - attr_reader :xml, :doc, :zip, :styles + attr_reader :xml, :doc, :zip, :styles, :headers def initialize(path_or_io, options = {}) @replace = {} @@ -37,6 +37,7 @@ def initialize(path_or_io, options = {}) @document_xml = document.get_input_stream.read @doc = Nokogiri::XML(@document_xml) load_styles + load_headers yield(self) if block_given? ensure @zip.close unless @zip.nil? @@ -170,6 +171,15 @@ def replace_entry(entry_path, file_contents) private + def load_headers + header_files = @zip.glob("word/header*.xml").map{|h| h.name} + filename_and_contents_pairs = header_files.map do |file| + simple_file_name = file.sub(/^word\//, "").sub(/\.xml$/, "") + [simple_file_name, Nokogiri::XML(@zip.read(file))] + end + @headers = Hash[filename_and_contents_pairs] + end + def load_styles @styles_xml = @zip.read('word/styles.xml') @styles = Nokogiri::XML(@styles_xml) diff --git a/spec/docx/document_spec.rb b/spec/docx/document_spec.rb index 00bd63f..29a5e7d 100755 --- a/spec/docx/document_spec.rb +++ b/spec/docx/document_spec.rb @@ -54,6 +54,18 @@ end end + describe 'read headers' do + before do + @doc = Docx::Document.open(@fixtures_path + '/multi_doc.docx') + end + + it 'can extract headers' do + expect(@doc.headers).to_not be_nil + expect(@doc.headers.keys).to eq ["header1"] + expect(@doc.headers["header1"].text).to eq "Hello from the header." + end + end + describe 'read tables' do before do @doc = Docx::Document.open(@fixtures_path + '/tables.docx') diff --git a/spec/fixtures/multi_doc.docx b/spec/fixtures/multi_doc.docx new file mode 100644 index 0000000000000000000000000000000000000000..008d06eadeff3b85691cf1d98670c7273e4fb0e0 GIT binary patch literal 6282 zcmbVQ1yodBy9NfNduXLW5G16fq#I$79J*uZZb@lDx}>{PK*=GcL6DG^5D*FJx`Y4s zDZ1|OUF*KHX4dR8=Q(@!+wY?!2S5bEp`f6^K~t5U!rc;d*mpe#6Nn=#%gtxmD+M`# z2uAR?O9HW-IS5lo@6Zvos4ev&a$xfI&d3llheg@-ya-y8QdnlsJNJX#yJOZvSDA`w z;o&0CVha&_w$UawOSf4;Rto)b)+TkK=nt~V+7F871G5eAWL}1J$!`~NvfCG?Qkgyl z^#~YJ*A&Fq&($9%`98|bV4s+@>;=C?s5_Uw;NkWltfmd5H`UG8uBhQAz~^F6<6hTL)v-|ER^-*2vk~#Kwum%^LC_6~*>j zLfLR6j!)6p&$V1;fH0+iBP(UxydnZ#22j^@j2erVyR>x~%i z9P9utgfM30V99Ayw)E%2XD8q9oLj2+EHW_aqFyc{gxkvFjNmuA4h3u~_eX2KpnqLE zX(gk47R6uGV*J7@b)A1Fkz8rcy;_B)Eh7a%!`u0U!gjVkpd&z)C^DFw(k*ivIe(*1J(%Lo@NCJ(yyW0Dn^*?!T&YG;wmW zurYJIQJO@Q999P#hWK$B^*%XX^oeTjb_1N<^AXa*xG7}_gPli5*y_e$#JtV5?5IFh zZ`)@BAxHh_$2-VP?z|aT$V4V)@Kh+IQ%0dWTs)Jqu025G zZjz#ErbJ+Ew?hq0q7c!JQm&#(E&93|caUFZSt7-naW}oum+0C}=6h2V8+)YW%1@UZ zo(YkPKH=7C-^K-=~SVMvt%}rQ4hCixeTlq9o@EXm4S+=L~OkI~S;Q^#BK1#8K=;uK71`*_1H=EkBuD5r_V&D1f zg14#puaI6vVW)V(-&i8%(@>@?JRICA;@>Rs)7x%`=!Dv=P3jXI?{Dgv-cR9rB0<2k z#6neaQi;fOcy#8<&3R7#Df`xznmZA((gcxt4Dk;uT!#meIiPTk-Uqp=wYZCUTzqoF z_=Kn`GqNf4X2Ku!hRZcR$E1caS&0w1bkw9y6l__mO;?Y1=!;`?n{k`nOBzTl8~K!B zq3M5E-(54dIWSqr+*MiG4se*HZeor?E^yomq$;h?e`4XV?`^+ZZ9j5S9D! z&Dd#-^}(AHvNFlHU(*V~=u|KmN7WJ~&_y1P4swfW`w16zMx!4$wuckq`Ug)Kx6F53apcm2Z9ECp3 z9$U32)}7q8_R8{gN}l8DMqmZbp@;*KoFNj&q6Q^88gp0c@JIbLJ8>cP)4?Tab1zB> z!T|{Fl&>e-;dKpWG1Rxz&O%xQ9z4!N40`|?Ga`(rga@ z5L|Q8yOa^g!de8#npv zI#dHfmsaZR(&?g`J>fc^ z_IE6J@;tJ=KQlQdUm)Bl(@{$4H?ruB_B}J8HO=!PkJWg(X zj*%~@Man zyZ60~$>}XC0?$|1gtk2ui#=b)Z>93fdXO<2w`)cO11JvJ~W#TKD zsnV8+=Zr#aOe@OYsHlx05k4+2c%$#o6getebok-GxT%9P2GG(if9}9yp8Y*X!voAx zUAM@X05StDBee+cnS#zo z^EZu<=tIXER*NO=@3d|9w! zF>)d}QEhNVVb;h`pAf237cRxC=d2J;+s;m%lc*WsQ>jK^8P5(GP4sM9?96R3sWy=| z>DOWNmJdG2rB#?aZ!YeAde+P}df2%3Wcazyz?WSw#p{a`)V41pFCn?4$Bd=qG4}fP z)?S??Y*2VO=z^~Zn*gaMN%<$DKD!NBJz~k0_@(#W?CA!dWzA56@ySZMy<@IttC-TM zh{4Dy_Ik8XeD%j-IGP2i_ImPW4Gwu6fqu@}B*zk0wlwOG155|r?|$wKJ)$5!(&CBm z)fM)St)L)LL{Et}TX?^<)E0A&JW$sb#`U}n&5`wa%!XPm6^a&4Bo<9#nYk3DxybiN z5fOFS_rx-TMyuFfjfJ@&_!Ek&boBBw%`oBAQl4Q+x@e9{;+Pva5k(+YXL|!+KLgSk zr8;6)QC2WMx@fMQwinl?X$i@JiH4#`%%Qe^!cTR0H9XXd7WKdAthWT@x!zmGN|<-1 z=GXA}xVH(rXqI<#(MvJz$|;PUqj_qN=iMZOmBGIJ=h)iC)!MdKge_@=8@)`k2Z}C&M%{nW;jGj7ul#*lVSe|9!>#jYoFp z<9tVYk?=V3puE&=n>o55vE{BA`slGc&W7%Ez#1l1M?~}>u)Pv0P-3tPxRKiyr#6&K zi*a8WPSQMs368+=3cuej4&uinYfdyTHEI+h`e?murkMLk-0FIB;emVcR)W)4ILCmt zpgJF9*(h&zbk~_9F;T9q`?@ zEkOEPl<5^o(vlmLYI{x@ZH(?=EaMzJuOH_&#}rYZV;Z7c{1#qfGYX?05GAZZHrj9z z(L0n3U8>XWOg+gPaAO?uA#_STSYhq@Ya92|IOZk>#(yymLEQhxIG65+;-Sv+>+=4D zjDl!#JkxVca>aK;iL6qe$dY{sU5!^7D1SDH%M7&X5}{K9auKOy#v-U_@tM71OB07D z&hjpYdp2*YS@K_yS026^|^6VZ1 z-;rwRq?!EPhr|u*X#}P+>vuJeb5?C>X4(1hzY^?2Xm+{tzhVN1!*kH*TMQ)!YWKNk z&_#_DIlB_&ONf2A^B0o#csvSCzD7Y8Yl=>mxYZN+&nYUdeSuGPz?4CE@)qUX$w zQp~x8>tXLKHu9`jk!|()NN)o`^{+RNJ&HT|qnwwQ`K?yrs^%kKmuc4O_U=0_gUaVWF66pWZH#|yNRDj-+T~u zt}9i$Vo&HuG3VCD6%A$tNr`oXdiT$A^CSxQ;;Jz=`!&rTIfvR$=)9Y& znbB?Rd1d0jhd@~SdCORp?xXNaGAEDw?Mp6s&mDufbj?d0)L&vNR3j>+dwmM0vJ(+w zCD``75In-`IcGZin1b7vhxKrA?0qX!-u*fmfj&QX!=icG7UoR*hKd^17+ISXyg=;i zT?w(slo#`QZ3=ySb%Tz8c^@s6S^gxz7MfMDjlV6k&9Y)2sRK^xSMDptDzdUjQ-mEd z>8h;hbn}CT%wg`u<6R$`nhC{YVNh{?9b$=`)~T#P$Gb=nT0qQWS7)6pH>~f_Z%-=1 z=RNf*+4#%l2>3bz$!fL3+s(eatyV6vz|j!RG0Aj|kmaSJo|QNWQ(Rf9hm<%yw~Vme zowHpz&rz6rWJHGPURjo?KsG^tRIo*Cu!=FlK(~0FWL;Iu7(cdqy+lZ?gH2uo9U1C4 z#~k$HCQA-L-j(W03W*TnDb&c3qFI*iXtY37X|R@t_u2@WqKfY-Dtb+os-&#oO=p1d zeHhv7YUbnsk9INRe%KoaPVna=uGNbRtBQwoSnKyfldIzDgObs%gp*Ni*Y}|oCHocr^+~Ij42oeJ%HK z2)hiDsCdCRcTYs;{H`J!9#3D3HX46CPaZzYzWZ{Kl%5NOj+oP0m5>m1$Oa#Z=IWz< zB2#3q|DlPM%2q7L;9%A-K?Lu!$i<1`w5cmEdDwPR-aNCt)5{YCQ{_ouTcWaTf4%uw z+EJYowGkY&IiBir)#1t;c;Ni28E?9b!Ow?J;bSy5+T0=7T7iR^J z(_WuXKzx);YLT~$^UOkM8N;3&$V%%BjP!fpE@bz*=OZyOVRASjzF0d|=*C=)4>ALj zROq{gvSQbW ztq(KQz7|$}7%b+^6yWC)ZA4lV5pRF)vg)zse6ic0o9Pwz>TOakdvbYSAfD*`FP_1^ z&9)eC8>-!*uSG(Tcy03bmV)n{(%{f zc4>^&Q_}Dgs+ZuIR}UK#%G~;hghK&&u`@_C)YedcPb$4Ggp)%%H)viBE@Z%H zUw>sk)J}w{H=nvw${^G5g6>K9J87o=9asO((xly%36S{v9(6VE>{U+?$i}L2l)R$w z`QA4fTb4pWMZQK(#0P~J3nM7(AV+o)-yTQjGCHTM*i>vq1nSdKOst`1Tuc9<&UZlv zW*eoRVqTCdcMhlflIF9iP?o{F2#muNxivz|654@mO`V>!E4n@aQJuh6jvb1y28$&; znySM~)Tc8RcI139R|7A&7zql~MBPx7Df$vv(%8aZT;DHh#Y3iB@R?b0y@?bs8`hLG z%n&O`H)#qU0SNbteZ9>>-mtH~%PlVUPvC71?uMlNC3vu{{zh5;`J~%S*G;(o5(Sv& z{CLuB2JcV!ZF=Tr!2Kocu)gvK{5Nv!PyFrC`Se5 z-Jjsw`S