-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmonograph_resource_metadata
113 lines (93 loc) · 3.45 KB
/
monograph_resource_metadata
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env ruby
# frozen_string_literal: true
# New version of process_monograph_resource_metadata
require 'optparse'
require 'ostruct'
require 'os'
# Determine the root directory of the code base.
script_dir = File.expand_path(File.dirname(__FILE__))
root_dir = File.dirname(script_dir)
require_relative File.join(root_dir, "lib", "logger")
script_logger = UMPTG::Logger.create(logger_fp: STDOUT)
# Process the script parameters.
options = OpenStruct.new
option_parser = OptionParser.new do |opts|
opts.banner = "Usage: #{File.basename(__FILE__)} monograph_dir [monograph_dir...]"
opts.on_tail('-h', '--help', 'Print this help message') do
script_logger.info(opts)
exit 0
end
end
option_parser.parse!(ARGV)
if ARGV.count < 1
script_logger.info(option_parser.help)
return
end
# Process the command line parameters.
monograph_dir_list = ARGV
require_relative File.join(root_dir, "lib", "epub")
require_relative File.join(root_dir, "lib", "fulcrum", "monographdir")
require_relative File.join(root_dir, "lib", "fulcrum", "metadata")
xml_processor = UMPTG::Fulcrum::Metadata::Processor.new()
epub_processor = UMPTG::EPUB::XProcessor.new(
xml_processor: xml_processor
)
monograph_dir_list.each do |mdir|
monograph_dir = UMPTG::Fulcrum::MonographDir.new(
monograph_dir: mdir
)
if monograph_dir.monograph_dir.nil?
script_logger.error("invalid monograph directory #{mdir}")
next
end
script_logger.info("processing monograph directory #{monograph_dir.monograph_dir}")
# Create the monograph processing directory.
FileUtils.mkdir_p monograph_dir.processing_dir
# Create the processing log file for this monograph directory.
logger_file = File.join(
monograph_dir.processing_dir,
File.basename(__FILE__) + ".log"
)
logger = UMPTG::Logger.create(logger_file: logger_file)
xml_processor.logger = logger
epub_processor.logger = logger
epub_processor.logger.info("*** Processing Monograph directory #{File.basename(monograph_dir.monograph_dir)} ***")
if monograph_dir.fmsl_file.nil?
epub_processor.logger.warn("no FMSL file loaded.")
next
end
epub = UMPTG::EPUB(epub_file: monograph_dir.epub_file)
epub_processor.logger.info("Using EPUB #{File.basename(monograph_dir.epub_file)}.")
entry_actions = epub_processor.run(
epub,
{
normalize: true
}
)
epub_processor.logger.info("Using FMSL #{File.basename(monograph_dir.fmsl_file)}.")
epub_processor.logger.warn("No alt text/caption found for resource \"***row left intentionally blank***\".")
fmsl_csv = xml_processor.update_fmsl(
fmsl_file: monograph_dir.fmsl_file,
entry_actions: entry_actions
)
# Add new columns to the CSV headers if needed.
new_fmsl_headers = fmsl_csv.headers
new_fmsl_headers << "Caption" unless new_fmsl_headers.include?("Caption")
new_fmsl_headers << "Alternative Text" unless new_fmsl_headers.include?("Alternative Text")
# Save the updated FMSL in the resource processing directory.
new_fmsl_file = File.join(monograph_dir.processing_dir, File.basename(monograph_dir.fmsl_file))
CSV.open(
new_fmsl_file,
"w",
:write_headers=> true,
:headers => new_fmsl_headers
) do |csv|
fmsl_csv.each do |fmsl_row|
new_row = {}
fmsl_row.each do |key,value|
new_row[key] = value.strip.force_encoding("UTF-8") unless value.nil?
end
csv << new_row
end
end
end