-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbtaa_gen_monograph_csv
170 lines (149 loc) · 5.28 KB
/
btaa_gen_monograph_csv
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#!/usr/bin/env ruby
# frozen_string_literal: true
# Script inputs a BTAA TMM metadata CSV file
# containing monograph metadata and
# generates a Fulcrum metadata CSV file with
# each row containing Fulcrum monograph metadata.
require 'optparse'
require 'ostruct'
require 'os'
# Determine the root directory of the code base.
script_dir = File.expand_path(File.dirname(__FILE__))
root_dir = File.dirname(script_dir)
require_relative File.join(root_dir, "lib", "logger")
script_logger = UMPTG::Logger.create(logger_fp: STDOUT)
# Process the script parameters.
options = OpenStruct.new
options.write_mono_file = false
option_parser = OptionParser.new do |opts|
opts.banner = "Usage: #{File.basename(__FILE__)} <btaa_tmm_csv_file> [<btaa_tmm_csv_file>..]"
opts.on_tail('-h', '--help', 'Print this help message') do
script_logger.info(opts)
exit 0
end
end
option_parser.parse!(ARGV)
if ARGV.count < 1
script_logger.info(option_parser.help)
exit 0
end
btaa_tmm_csv_file_list = ARGV
require 'csv'
require_relative File.join(root_dir, 'lib', 'fulcrum', 'manifest')
MONOGRAPH_HEADERS = UMPTG::Fulcrum::Manifest::Validation::CollectionSchema.headers()
btaa_tmm_csv_file_list.each do |btaa_tmm_csv_file|
btaa_tmm_csv_file = File.expand_path(btaa_tmm_csv_file)
unless File.exist?(btaa_tmm_csv_file)
script_logger.error("invalid CSV file path #{btaa_tmm_csv_file}.")
next
end
script_logger.info("*** processing #{File.basename(btaa_tmm_csv_file)} ***")
STDOUT.flush
fm_csv = CSV.parse(
File.read(btaa_tmm_csv_file),
:headers => true,
#:header_converters=> lambda {|f| fm_header_convert(f)},
:return_headers => false
)
# Each title may have multiple rows. The extra
# rows may contain additional ISBNs. Traverse
# the rows and condense the extra ISBNs into
# a new column named ISBN(s).
fm_row_list = []
frow = nil
fm_csv.each do |fm_row|
title = fm_row['Title']
unless title.nil? or title.strip.empty?
#script_logger.info("title:#{title}")
frow = fm_row
frow['ISBN(s)'] = ""
frow['ISBN_SEARCH'] = ""
fm_row_list << frow
end
isbn = fm_row['ISBN13'].strip
isbn = isbn.gsub(/\-/,'')
isbn = isbn[0..12]
frow['ISBN_SEARCH'] += isbn + ";"
isbn = "#{isbn[0..2]}-#{isbn[3..3]}-#{isbn[4..8]}-#{isbn[9..11]}-#{isbn[12..12]}"
format = fm_row['Format'].strip.downcase
entry = "#{isbn} (#{format});"
frow['ISBN(s)'] += entry
end
# Map the TMM columns to Fulcrum import columns.
monograph_row_list = []
fm_row_list.each do |fm_row|
monograph_row = {}
# Make sure required fields are filled in:
# Title, Publisher (citation), Creator(s), Pub Year,
# Pub Location, Handle, Identifier(s), ISBN(s)
unless fm_row.nil?
title = fm_row['Title']
title += ": #{fm_row['Subtitle']}" unless fm_row['Subtitle'].nil? or fm_row['Subtitle'].empty?
monograph_row['Title'] = title
author_list = []
(1..4).each do |ndx|
lname = fm_row["Author (#{ndx}) Last Name"]
#lname = fm_row["authorlastname#{ndx}"]
unless lname.nil? or lname.empty?
fname = fm_row["Author (#{ndx}) First Name"]
role = fm_row["Author (#{ndx}) Type"]
orcid = fm_row["Author (#{ndx}) ORCHID(s)"]
#puts orcid unless orcid.nil? or orcid.strip.empty?
author = "#{lname}, #{fname}"
author += " (#{role})" unless role.nil? or role.strip.empty?
author += "|#{orcid}" unless orcid.nil? or orcid.strip.empty?
author_list << author
end
end
monograph_row['Creator(s)'] = author_list.join(';')
fm_row.each do |key,val|
next if key.nil? or key.strip.empty?
case key
when 'ID'
monograph_row['Identifier(s)'] = val.downcase
when 'Publisher'
monograph_row['Publisher'] = val
when 'Pub City 1','Publication Location'
monograph_row['Pub Location'] = val
when 'Book Description Marketing','Book Description'
monograph_row['Description'] = val
when 'Open Access Funder'
monograph_row['Funder'] = val
when 'Open Access Avail'
monograph_row['Open Access?'] = val
when 'Subjects'
monograph_row['Subject'] = val
when 'Copyright Holder'
monograph_row['Rightsholder'] = val
when 'Copyright Year'
monograph_row['Pub Year'] = val
when 'Handle Prefix'
when 'ISBN13'
when 'Pub Format', 'Pub Format 2'
when 'Pub Date 2', 'Pub Date 3'
when 'Title', 'Subtitle'
else
monograph_row[key] = val
end
end
end
monograph_row_list << monograph_row
end
# Generate a new CSV listing Fulcrum metadata
# for each title.
fulcrum_body = CSV.generate(
:headers => MONOGRAPH_HEADERS + [ 'ISBN_SEARCH'],
:write_headers => true
) do |csv|
monograph_row_list.each do |monograph_row|
csv << monograph_row
end
end
# Save the Fulcrum metadata CSV file.
fulcrum_file = File.join(
File.dirname(btaa_tmm_csv_file),
File.basename(btaa_tmm_csv_file, ".*") + "_mono" + File.extname(btaa_tmm_csv_file)
)
script_logger.info("Creating metadata file #{fulcrum_file}")
File.write(fulcrum_file, fulcrum_body)
end