-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjnl_construct
291 lines (247 loc) · 9.38 KB
/
jnl_construct
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
#!/usr/bin/env ruby
# frozen_string_literal: true
require 'optparse'
require 'ostruct'
# Determine the root directory of the code base.
script_dir = File.expand_path(File.dirname(__FILE__))
root_dir = File.dirname(script_dir)
require_relative File.join(root_dir, "lib", "logger")
script_logger = UMPTG::Logger.create(logger_fp: STDOUT)
# Process the script parameters.
options = OpenStruct.new
options.fulcrum_host = nil
options.journal_dir = Dir.pwd
options.generate_xhtml = false
options.article_type = nil
option_parser = OptionParser.new do |opts|
opts.banner = "Usage: #{File.basename(__FILE__)} [-f production|preview|staging] [-d journal_dir] [-t article_type] [-x] [journal_name...]"
opts.on('-d', '--directory [DIRECTORY]', 'Journal directory') do |da|
options.journal_dir = da
end
opts.on('-f', '--fulcrum_host host', 'Fulcrum environment') do |fulcrum_host|
options.fulcrum_host = fulcrum_host
end
opts.on('-t', '--article_type type', 'JATS Article Type') do |article_type|
options.article_type = article_type
end
opts.on('-x', '--xhtml', 'Generate XHTML') do |da|
options.generate_xhtml = true
end
opts.on_tail('-h', '--help', 'Print this help message') do
script_logger.info(opts)
exit 0
end
end
option_parser.parse!(ARGV)
# Journal directory exist?
journal_dir = File.expand_path(options.journal_dir)
unless File.directory?(journal_dir)
script_logger.error("journal directory not found \"#{journal_dir}.")
exit 1
end
# Journal directory valid?
articles_dir = File.join(journal_dir, "articles")
dlxs_dir = File.join(journal_dir, "dlxs")
resources_dir = File.join(journal_dir, "resources")
unless File.directory?(articles_dir) and File.directory?(dlxs_dir) and File.directory?(resources_dir)
script_logger.error("journal directory invalid \"#{File.basename(journal_dir)}.")
exit 1
end
journal_name_list = ARGV
if journal_name_list.empty?
journal_name_list = []
Dir.glob(File.join(dlxs_dir, "*.xml")).each do |dlxs_file|
journal_name_list << File.basename(dlxs_file, ".*")
end
end
require_relative File.join(root_dir, 'lib', 'xslt')
require_relative File.join(root_dir, "lib", "fulcrum")
require_relative File.join(root_dir, "lib", "services")
# Create the serivce for retrieving the manifest.
service = UMPTG::Services::Heliotrope.new(
:fulcrum_host => options.fulcrum_host
)
require 'fileutils'
require 'nokogiri'
require 'htmlentities'
require 'uri'
dlxs2jats_xsl_file = File.join(root_dir, "lib", "journal", "xsl", "dlxs2jats.xsl")
jats2html_xsl_file = File.join(root_dir, "lib", "journal", "xsl", "jats-html.xsl")
jats2html_xsl_file = File.join(root_dir, "lib", "journal", "xsl", "janeway", "janeway.xsl")
#jats2html_xsl_file = File.join(root_dir, "..", "janeway", "src", "transform", "xsl", "default.xsl")
KSP='#'
RSP=';'
FSP='^'
VSP='='
#https://heliotrope-preview.hydra.lib.umich.edu/downloads/wh246t40n?file=embed_css
LINK_HREF_MARKUP = "%s/downloads/%s?file=embed_css"
LINK_MARKUP = <<-LMARKUP
<link href=\"%s\" rel=\"stylesheet\" type=\"text/css\"></link>
LMARKUP
AUDIO_IFRAME_MARKUP = <<-AMARKUP
<iframe id=\"fulcrum-embed-iframe-%s\" src=\"%s\" title=\"%s\"></iframe>
AMARKUP
VIDEO_IFRAME_MARKUP = <<-VMARKUP
<iframe id=\"fulcrum-embed-iframe-%s\" src=\"%s&fs=1\" title=\"%s\" allowfullscreen></iframe>
VMARKUP
MEDIA_MARKUP = <<-MMARKUP
<div id=\"fulcrum-embed-outer-%s\">
<div id=\"fulcrum-embed-inner-%s\">
%s
</div>
</div>
MMARKUP
RESOURCE_MARKUP = <<-RMARKUP
<resource entity=\"%s\" file_name=\"%s\" file_type=\"%s\" noid=\"%s\"
link=\"%s\" embed_link=\"%s\" css_link=\"%s\" doi=\"%s\" doi_noprefix=\"%s\">
<title>%s</title>
<caption>%s</caption>
<embed_code>%s</embed_code>
<css_stylesheet>%s</css_stylesheet>
</resource>
RMARKUP
def detect(path)
ext = File.extname(path)
return (ext.nil? or ext.empty?) ? "" : ext[1..-1]
header = File.binread(path, 50).downcase
case
when header[0..2] == 'gif'
return 'gif'
when header[1..3] == 'png'
return 'png'
when header[5..8] == 'jfif'
return 'jpg'
when header[28..30] == 'mp4'
return 'mp4'
when header[1..3] == 'pdf'
return 'pdf'
end
return ""
end
if options.article_type.nil?
script_logger.info("no article type assigned")
else
script_logger.info("assigning article type #{options.article_type}")
end
encoder = HTMLEntities.new
journal_name_list.each do |journal_name|
dlxs_file = File.join(dlxs_dir, journal_name + ".xml")
if !File.exist?(dlxs_file)
script_logger.error("#{journal_name}: no DLXS file.")
next
end
article_dir = File.join(articles_dir, journal_name)
FileUtils.mkdir_p article_dir
jats_file = File.join(article_dir, journal_name + "_jats.xml")
script_logger.info("#{journal_name}: #{File.basename(dlxs_file)} ==> #{File.basename(jats_file)}")
# Pass the resource file names as a parameter to the XSLT.
resource_file_list = Dir.glob(File.join(resources_dir, "#{journal_name}*"))
resource_file_table = {}
resource_file_list.each do |resource_file|
fname = File.basename(resource_file)
key = File.basename(fname, '.*')
next if key.end_with?("-lg")
=begin
# If hi-res, then use this image.
if key.end_with?("-lg")
key = key[0..-4]
resource_file_table[key] = resource_file
next
end
=end
# Not hi-res, use this one if one has
# not been found to this point.
unless resource_file_table.key?(key)
resource_file_table[key] = resource_file
end
end
# If a monograph exists, retrieve its manifest
manifest = nil
id2csv_body_list = service.monograph_export(identifier: "#{journal_name}")
if id2csv_body_list[journal_name].empty?
script_logger.warn("no manifest found for id #{journal_name}")
else
manifest = UMPTG::Fulcrum::Manifest::Document.new(
csv_body: id2csv_body_list[journal_name].first
)
if manifest.nil?
script_logger.error("generating manifest CSV for #{journal_name}")
end
script_logger.info("manifest found for #{journal_name}")
end
# Copy the resource files to be used
# and construct the XSLT parameter.
resource_list = []
resource_list_param = ""
resource_file_table.each do |key,resource_file|
script_logger.info("Copying resource #{File.basename(resource_file)}")
res_file = File.join(article_dir, File.basename(resource_file))
FileUtils.cp(resource_file, article_dir) unless File.exist?(res_file)
file_type = detect(resource_file)
link = title = caption = embed_link = doi = embed_code = ""
fileset = manifest.fileset(File.basename(resource_file)) unless manifest.nil?
unless fileset.nil? or fileset['noid'].strip.empty?
link = fileset['link'][12..-3]
link_uri = URI(link)
link_scheme_host = link_uri.scheme + "://" + link_uri.host
embed_markup = fileset['embed_code']
unless embed_markup.nil? or embed_markup.empty?
embed_doc = Nokogiri::XML::DocumentFragment.parse(embed_markup)
iframe_node = embed_doc.xpath("descendant-or-self::*[local-name()='iframe']").first
embed_link = iframe_node['src']
end
title = fileset['title']
caption = fileset['caption']
doi = fileset['doi']
doi_noprefix = doi.nil? ? "" : doi.delete_prefix("https://doi.org/")
embed_code = fileset['embed_code']
noid = fileset['noid']
css_link = sprintf(LINK_HREF_MARKUP, link_scheme_host, noid)
resource_type = fileset['resource_type']
resource_type = "" if resource_type.nil?
case resource_type.downcase
when "video"
iframe_markup = sprintf(VIDEO_IFRAME_MARKUP, noid, embed_link, title)
when "image"
iframe_markup = sprintf(VIDEO_IFRAME_MARKUP, noid, embed_link, title)
when ""
iframe_markup = ""
else
iframe_markup = sprintf(AUDIO_IFRAME_MARKUP, noid, embed_link, title)
end
css_embed_code = sprintf(LINK_MARKUP, css_link) + \
sprintf(MEDIA_MARKUP, noid, noid, iframe_markup)
end
#resource_list_param += "#{key}:#{File.join("resources", File.basename(resource_file))};"
resource_list_param += "#{key}#{KSP}file_name#{VSP}#{File.basename(resource_file)}#{FSP}file_type#{VSP}#{file_type}#{FSP}link#{VSP}#{link}#{FSP}embed_link#{VSP}#{embed_link}#{FSP}caption#{VSP}#{caption}#{FSP}title#{VSP}#{title}#{RSP}"
resource_list << sprintf(RESOURCE_MARKUP, key, \
File.basename(resource_file), file_type, noid, link, embed_link, css_link, \
doi, doi_noprefix, title, caption, \
encoder.encode(embed_code), encoder.encode(css_embed_code))
end
resource_list_param2 = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<resources>" + resource_list.join + "</resources>"
rfile = File.expand_path(File.basename(__FILE__) + "_resources.xml")
File.write(rfile, resource_list_param2)
resource_list_param = resource_list_param[0..-3]
params = {
"image_list" => URI(rfile)
}
params["article_type"] = options.article_type unless options.article_type.nil?
rc = UMPTG::XSLT.transform(
xslpath: dlxs2jats_xsl_file,
srcpath: dlxs_file,
destpath: jats_file,
logger: script_logger,
parameters: params
)
FileUtils.rm(rfile)
if rc and options.generate_xhtml
html_file = File.join(File.dirname(jats_file), File.basename(jats_file, ".*") + ".html")
UMPTG::XSLT.transform(
xslpath: jats2html_xsl_file,
srcpath: jats_file,
destpath: html_file,
logger: script_logger
)
end
end