-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess_monograph_epub
235 lines (203 loc) · 9.39 KB
/
process_monograph_epub
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
#!/usr/bin/env ruby
# frozen_string_literal: true
require 'optparse'
require 'ostruct'
# Determine the root directory of the code base.
script_dir = File.expand_path(File.dirname(__FILE__))
root_dir = File.dirname(script_dir)
require_relative File.join(root_dir, 'lib', 'logger')
script_logger = UMPTG::Logger.create(logger_fp: STDOUT)
# Process the script parameters.
options = OpenStruct.new
#options.publisher_dir = Dir.pwd
options.publisher_dir = "s:/Information\ Management/Fulcrum/UMP"
#options.publisher_dir = "c:/Users/tbelc/Documents/Fulcrum/UMP/"
options.manifest_file = nil
options.fulcrum_host = nil
options.resource_css = :default
options.default_actions = {
keywords: :disable,
resources: :embed
}
option_parser = OptionParser.new do |opts|
opts.banner = "Usage: #{File.basename(__FILE__)} [-c default|enhanced] [-f production|preview|staging] [-d <publisher_dir>] [-r disable|embed|link|none|update_alt] [-k disable|link|none] <monograph_id> [<monograph_id>..]"
opts.on('-c', '--resource_css [default|enhanced]', 'Resource CSS styling') do |type|
options.resource_css = type
end
opts.on('-d', '--publisher_directory [DIRECTORY]', 'Publisher site directory') do |publisher_dir|
options.publisher_dir = publisher_dir
end
opts.on('-f', '--fulcrum_host [production|preview|staging]', 'Fulcrum environment') do |fulcrum_host|
options.fulcrum_host = fulcrum_host
end
opts.on('-k', '--keywords [disable|link|none]', 'Default keywords processing action') do |action|
options.default_actions[:keywords] = action.to_sym
end
opts.on('-m', '--manifest_file [FILE]', 'Manifest file path') do |manifest_file|
options.manifest_file = manifest_file
end
opts.on('-r', '--resources [disable|embed|link|none|update_alt]', 'Default resources processing action') do |action|
options.default_actions[:resources] = action.to_sym
end
opts.on_tail('-h', '--help', 'Print this help message') do
puts opts
exit 0
end
end
option_parser.parse!(ARGV)
if ARGV.count < 1
puts option_parser.help
return
end
monograph_id_list = ARGV
require_relative File.join(root_dir, 'lib', 'fulcrum')
require_relative File.join(root_dir, 'lib', 'services')
# Verify existence of the source/project root directories.
publisher_dir = File.expand_path(options.publisher_dir)
unless File.exist?(publisher_dir)
script_logger.error("source directory #{publisher_dir} does not exist.")
exit 1
end
# If manifest file is specified, verify its existence.
unless options.manifest_file.nil? or File.exist?(options.manifest_file)
script_logger.error("manifest file #{options.manifest_file} does not exist.")
exit 1
end
script_logger.fatal("Error: invalid default keywords action #{options.options.default_actions[:keywords]}.") \
unless UMPTG::Fulcrum::EPUBProcessor.DEFAULT_ACTIONS[:keywords].find {|t| t == options.default_actions[:keywords] }
script_logger.fatal("Error: invalid default resources action #{options.default_actions[:resources]}.") \
unless UMPTG::Fulcrum::EPUBProcessor.DEFAULT_ACTIONS[:resources].find {|t| t == options.default_actions[:resources] }
script_logger.info("Processing keywords #{options.default_actions[:keywords].to_s}") \
unless options.default_actions[:keywords] == :disable
script_logger.info("Processing resources #{options.default_actions[:resources].to_s}") \
unless options.default_actions[:resources] == :disable
# Create the serivce for retrieving the NOID manifest.
service = UMPTG::Services::Heliotrope.new(
:fulcrum_host => options.fulcrum_host
)
# Provide the directory path for adding the stylesheet link.
fulcrum_css_name = options.resource_css == :default ? 'fulcrum_default.css' : 'fulcrum_enhanced.css'
fulcrum_css_file = File.join(root_dir, 'lib', 'css', fulcrum_css_name)
monograph_loggers = {}
monograph_id_list.each do |monograph_id|
# Use the monograph ID to retrieve the NOID from Fulcrum.
script_logger.info("*" * 10 + " #{monograph_id} " + "*" * 10)
=begin
monograph_noid_list = service.monograph_noid(identifier: monograph_id)
if monograph_noid_list[monograph_id].nil? or monograph_noid_list[monograph_id].empty?
script_logger.error("Error: no NOID found for monograph ID #{monograph_id}")
next
end
monograph_noid = monograph_noid_list[monograph_id].first
# Use the monograph NOID to retrieve the monograph manifest from Fulcrum.
csv_body = service.monograph_export(noid: monograph_noid)
if csv_body.nil? or csv_body.empty?
script_logger.error("Error: no manifest found for id #{monograph_id}")
next
end
manifest = UMPTG::Fulcrum::Manifest::Document.new(
:csv_body => csv_body
)
=end
if options.manifest_file.nil?
manifest = UMPTG::Fulcrum::Manifest::Document.new(
fulcrum_host: options.fulcrum_host,
monograph_id: monograph_id
)
else
manifest = UMPTG::Fulcrum::Manifest::Document.new(
csv_file: options.manifest_file
)
end
monograph_noid = manifest.monograph_row['noid']
# From the manifest, determine the ebook ISBN without dashes.
ebook_isbn = manifest.isbn["open access"]
ebook_isbn = manifest.isbn["ebook"] if ebook_isbn.nil?
if ebook_isbn.nil?
script_logger.warn("Error: no ebook ISBN found. Using ID #{monograph_id}.")
ebook_isbn = monograph_id
end
# Find the ebook source folder. First look for a directory
# using the monograph id. If not found, then look for one
# using the ISBN.
monograph_dir_list = Dir.glob(File.join(publisher_dir, monograph_id))
if monograph_dir_list.count == 0
script_logger.warn("Warning: ebook source directory using id #{monograph_id} not found. Using ISBN #{ebook_isbn}.")
ebook_isbn = ebook_isbn.strip.gsub('-', '')
monograph_dir_list = Dir.glob(File.join(publisher_dir, "#{ebook_isbn}_*"))
if monograph_dir_list.count == 0
script_logger.error("Error: ebook source directory not found for id #{monograph_id}.")
next
end
end
monograph_dir = monograph_dir_list[0]
script_logger.warn("Multiple ebook source directories found for id #{monograph_id}. Using #{monograph_dir}") \
if monograph_dir_list.count > 1
script_logger.info("Using directory #{monograph_dir}") if monograph_dir_list.count == 1
# Determine the resource_processing directory.
process_dir = File.join(monograph_dir, "resource_processing")
# Verify that the resource_processing directory exists.
unless File.exist?(process_dir) and File.directory?(process_dir)
script_logger.error("Error: directory #{File.basename(process_dir)} is not a valid directory.")
next
end
# Create the log file in the resource_processing directory.
monograph_log_file = File.join(process_dir, File.basename(__FILE__) + ".log")
monograph_log = File.open(monograph_log_file, File::WRONLY | File::TRUNC | File::CREAT)
monograph_loggers[monograph_id] = Logger.new(monograph_log)
monograph_loggers[monograph_id].formatter = proc do |severity, datetime, progname, msg|
"#{severity}: #{msg}\n"
end
monograph_loggers[monograph_id].info("*" * 10 + " #{monograph_id} " + "*" * 10)
# Determine if the resources directory exists.
resources_dir = File.join(monograph_dir, "resources")
if !File.exist?(resources_dir) or !File.directory?(resources_dir)
monograph_loggers[monograph_id].error("Error: #{File.basename(resources_dir)} is not a valid directory.")
next
end
# Find the epub file name and determine whether it exists.
epub_row = manifest.representative_row(kind: "epub")
if epub_row.nil?
monograph_loggers[monograph_id].error("Error: no EPUB row found in manifest for id #{monograph_id}")
next
end
epub_file_name = epub_row['file_name']
epub_file = File.join(monograph_dir, epub_file_name)
unless File.exist?(epub_file)
monograph_loggers[monograph_id].error("Error: #{epub_file_name} not found in manifest for id #{monograph_id}")
next
end
monograph_loggers[monograph_id].info("#{epub_file_name} found in manifest for id #{monograph_id}")
# Determine if the resources directory contains any files.
resource_file_list = Dir.glob(File.join(resources_dir, "*"))
if resource_file_list.count == 0
monograph_loggers[monograph_id].error("Error: #{File.basename(resources_dir)} contains no resources.")
next
end
monograph_loggers[monograph_id].info("Resources directory exists containing #{resource_file_list.count} files.")
# Determine if the resource map file exists.
resource_map_file = File.join(process_dir, "resource_map.xml")
if !File.exist?(resource_map_file)
monograph_loggers[monograph_id].error("Error: resource map file #{File.basename(resource_map_file)} must exist.")
next
end
# Create a new EPUB with the resource references detailed
# in the resource map processed.
processed_epub = UMPTG::Fulcrum::EPUBProcessor.process(
epub_file: epub_file,
default_actions: options.default_actions,
resource_metadata: manifest,
resource_map_file: resource_map_file,
fulcrum_css_file: fulcrum_css_file,
monograph_noid: monograph_noid,
logger: monograph_loggers[monograph_id]
)
# Remove the old one if it exists.
processed_epub_file = File.join(process_dir, File.basename(epub_file))
FileUtils.remove_file(processed_epub_file, true)
# Save the processed EPUB in the processing directory.
processed_epub.save(epub_file: processed_epub_file)
end
monograph_loggers.each do |monograph_id, logger|
logger.close
end