-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbtaa_gen_monograph_manifest
160 lines (136 loc) · 4.61 KB
/
btaa_gen_monograph_manifest
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env ruby
# frozen_string_literal: true
# Script inputs a Fulcrum monograph CSV file and a
# list of monograph directories
# and performs the following:
# 1. Uses the monograph directory name (ISBN)
# and looks up the metadata in the CSV.
# 2. Creates the monograph manifest.csv used
# for creating the monograph on Fulcrum.
require 'optparse'
require 'ostruct'
require 'os'
# Determine the root directory of the code base.
script_dir = File.expand_path(File.dirname(__FILE__))
root_dir = File.dirname(script_dir)
require_relative File.join(root_dir, "lib", "logger")
script_logger = UMPTG::Logger.create(logger_fp: STDOUT)
# Process the script parameters.
options = OpenStruct.new
options.write_mono_file = false
option_parser = OptionParser.new do |opts|
opts.banner = "Usage: #{File.basename(__FILE__)} <csv_file> [<monograph_dir>..]"
opts.on_tail('-h', '--help', 'Print this help message') do
script_logger.info(opts)
exit 0
end
end
option_parser.parse!(ARGV)
if ARGV.count < 2
script_logger.info(option_parser.help)
exit 0
end
csv_file = ARGV[0]
monograph_dir_list = ARGV[1..-1]
require 'csv'
require_relative File.join(root_dir, 'lib', 'fulcrum', 'manifest')
MONOGRAPH_HEADERS = UMPTG::Fulcrum::Manifest::Validation::CollectionSchema.headers()
monograph_headers = MONOGRAPH_HEADERS
=begin
monograph_headers = MONOGRAPH_HEADERS \
- [ "File Name" ] - [ "Representative Kind" ] - [ "Resource Type" ] \
- [ "Alternative Text" ] - [ "Caption" ] - [ "Content Type" ] \
- [ "Exclusive to Fulcrum" ] - [ "External Resource URL" ] - [ "NOID" ] \
- [ "Section" ] - [ "Redirect to" ] - [ "Transcript" ] \
- [ "Translation" ] - [ "Link" ] - [ "Sort Date" ]
=end
csv_file = File.expand_path(csv_file)
unless File.exist?(csv_file)
script_logger.error("invalid CSV file path #{csv_file}.")
exit 1
end
script_logger.info("*** processing #{File.basename(csv_file)} ***")
STDOUT.flush
=begin
# This should be removed once the metadata
# is updated.
isbn_map = {
"9780299316983" => "9780299316990",
"9780299150839" => "9780299150891",
"9780299311186" => "9780299311193"
}
=end
fulcrum_body = File.read(csv_file)
fulcrum_csv = CSV.parse(
fulcrum_body,
headers: true,
return_headers: false
)
monograph_dir_list.each do |monograph_dir|
monograph_dir = File.expand_path(monograph_dir)
unless File.directory?(monograph_dir)
script_logger.error("directory \"#{File.basename(monograph_dir)}\" does not exist.")
next
end
script_logger.info("processing directory \"#{File.basename(monograph_dir)}\"")
isbn = File.basename(monograph_dir)[0..12]
#isbn = isbn_map[isbn] if isbn_map.include?(isbn)
# Search CSV for monograph metadata. Directory name
# is the search key. First, search HEBID field (HEBID).
# If fails, then search the ISBN fields (ISBN[1-3]_13).
fm_row_list = fulcrum_csv.select do |row|
#(!row['ISBN(s)'].nil? and row['ISBN(s)'].gsub(/\-/,'').include?(isbn))
(!row['ISBN_SEARCH'].nil? and row['ISBN_SEARCH'].include?(isbn))
end
if fm_row_list.empty?
script_logger.warn("no CSV row found for #{isbn}. Skipping.")
next
end
if fm_row_list.count > 1
script_logger.warn("multiple CSV rows found for #{isbn}. Skipping.")
next
end
script_logger.info("Found CSV row for #{isbn}.")
monograph_row = fm_row_list.first
file_list = Dir.glob(File.join(monograph_dir, "*"))
rep_row_list = []
file_list.each do |fl|
file_name = File.basename(fl)
ext = File.extname(file_name)
next if ext.nil? or ext.strip.empty?
case ext.strip.downcase
when '.jpg', '.png', '.bmp', '.jpeg', '.tif'
rep_kind = 'cover'
when '.epub'
rep_kind = 'epub'
when '.pdf'
rep_kind = 'pdf_ebook'
else
script_logger.warn("skipping file #{file_name}")
next
end
rep_row_list << {
'File Name' => file_name,
'Title' => file_name,
'Representative Kind' => rep_kind
}
end
monograph_row['File Name'] = '://:MONOGRAPH://:'
new_monograph_row = {}
monograph_headers.each {|h| new_monograph_row[h] = monograph_row[h] }
manifest_body = CSV.generate(
:headers => monograph_headers,
:write_headers => true
) do |csv|
csv << { "File Name" => "***row left intentionally blank***" }
rep_row_list.each do |rep_row|
csv << rep_row
end
#csv << monograph_row
csv << new_monograph_row
end
# Save the Fulcrum metadata CSV file.
fulcrum_file = File.join(monograph_dir, "manifest.csv")
script_logger.info("Creating metadata file #{fulcrum_file}")
File.write(fulcrum_file, manifest_body)
end