diff --git a/lib/docx/document.rb b/lib/docx/document.rb index f4ae3f0..566fad6 100755 --- a/lib/docx/document.rb +++ b/lib/docx/document.rb @@ -18,7 +18,7 @@ module Docx # puts d.text # end class Document - attr_reader :xml, :doc, :zip, :styles + attr_reader :xml, :doc, :zip, :styles, :headers def initialize(path_or_io, options = {}) @replace = {} @@ -37,6 +37,7 @@ def initialize(path_or_io, options = {}) @document_xml = document.get_input_stream.read @doc = Nokogiri::XML(@document_xml) load_styles + load_headers yield(self) if block_given? ensure @zip.close unless @zip.nil? @@ -170,6 +171,15 @@ def replace_entry(entry_path, file_contents) private + def load_headers + header_files = @zip.glob("word/header*.xml").map{|h| h.name} + filename_and_contents_pairs = header_files.map do |file| + simple_file_name = file.sub(/^word\//, "").sub(/\.xml$/, "") + [simple_file_name, Nokogiri::XML(@zip.read(file))] + end + @headers = Hash[filename_and_contents_pairs] + end + def load_styles @styles_xml = @zip.read('word/styles.xml') @styles = Nokogiri::XML(@styles_xml) diff --git a/spec/docx/document_spec.rb b/spec/docx/document_spec.rb index 00bd63f..29a5e7d 100755 --- a/spec/docx/document_spec.rb +++ b/spec/docx/document_spec.rb @@ -54,6 +54,18 @@ end end + describe 'read headers' do + before do + @doc = Docx::Document.open(@fixtures_path + '/multi_doc.docx') + end + + it 'can extract headers' do + expect(@doc.headers).to_not be_nil + expect(@doc.headers.keys).to eq ["header1"] + expect(@doc.headers["header1"].text).to eq "Hello from the header." + end + end + describe 'read tables' do before do @doc = Docx::Document.open(@fixtures_path + '/tables.docx') diff --git a/spec/fixtures/multi_doc.docx b/spec/fixtures/multi_doc.docx new file mode 100644 index 0000000..008d06e Binary files /dev/null and b/spec/fixtures/multi_doc.docx differ