-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrossref_check
188 lines (160 loc) · 5.03 KB
/
crossref_check
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#!/usr/bin/env ruby
# frozen_string_literal: true
# Script compares the proposed Crossref submission
# with current DOI registrations
require 'optparse'
require 'ostruct'
require 'os'
# Determine the root directory of the code base.
script_dir = File.expand_path(File.dirname(__FILE__))
root_dir = File.dirname(script_dir)
require_relative File.join(root_dir, "lib", "logger")
script_logger = UMPTG::Logger.create(logger_fp: STDOUT)
# Process the script parameters.
options = OpenStruct.new
options.message_type = :works
option_parser = OptionParser.new do |opts|
opts.banner = "Usage: #{File.basename(__FILE__)} submission_file [doi...]"
opts.on_tail('-h', '--help', 'Print this help message') do
script_logger.info(opts)
exit 0
end
end
option_parser.parse!(ARGV)
if ARGV.count < 1
script_logger.info(option_parser.help)
return
end
# Process the command line parameters.
submission_file = ARGV[0]
doi_list = ARGV[1..-1]
DOI_DATA_XPATH = <<-SXPATH
//*[
local-name()='book' and @book_type='monograph'
]/*[
local-name()='book_metadata'
]
SXPATH
require 'nokogiri'
require 'serrano'
require 'uri'
class DOIRecord
attr_reader :doi_data_node, :doi, :print_isbn, :pub_year, :doi_prefixed, :url_proposed
attr_accessor :url_actual
def initialize(args = {})
@doi_data_node = args[:doi_data_node]
@pub_year = @doi_data_node.xpath("./*[local-name()='publication_date']/*[local-name()='year']").first.text
@print_isbn = @doi_data_node.xpath("./*[local-name()='isbn' and @media_type='print']").first.text
@doi = @doi_data_node.xpath("./*[local-name()='doi_data']/*[local-name()='doi']").first.text
@doi_prefixed = "http://doi.org/" + @doi
@url_proposed = @doi_data_node.xpath("./*[local-name()='doi_data']/*[local-name()='resource']").first.text
@url_actual = ""
end
end
submission_file = File.expand_path(submission_file)
unless File.exist?(submission_file)
script_logger.error("Submission file doesn't exist, #{submission_file}")
exit(1)
end
submission_doc = Nokogiri::XML(File.open(submission_file))
Serrano.configuration do |config|
config.base_url = "https://api.crossref.org"
config.mailto = "[email protected]"
end
def log_doi(script_logger, record)
if record.nil?
script_logger.warn("2,no DOI record found #{doi}")
return
end
uri_actual_host = URI(record.url_actual).host
uri_actual_host.delete_prefix!('www.') unless uri_actual_host.nil?
uri_proposed_host = URI(record.url_proposed).host
uri_proposed_host.delete_prefix!('www.') unless uri_proposed_host.nil?
return if uri_actual_host == uri_proposed_host
#return if uri_actual_host == "hdl.handle.net"
script_logger.info("#{record.doi}:#{record.print_isbn},#{record.url_actual},#{record.url_proposed}")
end
DOI_SKIP_LIST = [
=begin
# Currently not registered due to
# no UMP page exists as they have
# not passed Eloquence verification.
"10.3998/mpub.11871089",
"10.3998/mpub.12912253",
"10.3998/mpub.12838586",
"10.3998/mpub.12783158",
"10.3998/mpub.12823887",
"10.3998/mpub.14415745",
"10.3998/mpub.12901912",
"10.3998/mpub.14406207",
"10.3998/mpub.12793897",
# Currently produce false negatives.
# Crossref API returns UMP site, but
# DOI is correctly registered for Fulcrum.
=end
# Need to be registered
]
ISBN_SKIP_LIST = [
]
doi_records = {}
doi_data_node_list = submission_doc.xpath(DOI_DATA_XPATH)
doi_data_node_list.each do |doi_data_node|
doi_record = DOIRecord.new(
doi_data_node: doi_data_node
)
next if DOI_SKIP_LIST.include?(doi_record.doi)
#next if ISBN_SKIP_LIST.include?(doi_record.print_isbn)
doi_records[doi_record.doi] = doi_record
end
if doi_list.empty?
doi_records.each do |doi,record|
if record.pub_year.to_i > 2023
doi_list << record.doi
else
doi_list << record.doi
end
end
end
response_list = []
response_list = []
case options.message_type
when :funders
response_list = Serrano.funders(ids: doi_list)
when :works
begin
response_list = Serrano.works(ids: doi_list)
rescue StandardError => e
script_logger.error(e.message)
exit(1)
end
else
script_logger.error("message type #{options.message_type} not supported.")
exit(1)
end
response_list.each do |response|
unless response["status"].downcase == "ok"
script_logger.error("#{options.message_type} request failed,#{response}")
next
end
#script_logger.info("*** message-type:#{response['message-type']}, version:#{response['message-version']} ***")
message = response["message"]
doi = message["DOI"]
if doi.nil?
script_logger.warn("no DOI")
next
end
resource = message["resource"]
if resource.nil?
script_logger.warn("no resource")
next
end
url = resource["primary"]["URL"]
#script_logger.info(resource)
doi_record = doi_records[doi]
if doi_record.nil?
script_logger.warn("no DOI record found #{doi}")
next
end
doi_record.url_actual = url
end
doi_list.each {|doi| log_doi(script_logger, doi_records[doi]) }