Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions lib/integrations/crossref.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def query_by_doi(doi:)
nil
end

def query_by_preprint_doi(doi:)
def query_by_preprint_doi(resource:, doi:)
return nil unless doi.present?

bare = bare_doi(doi_string: doi)
Expand All @@ -27,7 +27,8 @@ def query_by_preprint_doi(doi:)
return nil unless id.present? && id['id-type'] == 'doi'

b = bare_doi(doi_string: id['id'])
query_by_doi(doi: b)
item = query_by_doi(doi: b)
crossref_item_scoring(resource, item)&.last
rescue Serrano::NotFound, Serrano::BadGateway, Serrano::Error, Serrano::GatewayTimeout, Serrano::InternalServerError,
Serrano::ServiceUnavailable
nil
Expand Down Expand Up @@ -87,30 +88,27 @@ def match_resource_with_crossref_record(resource:, response:)
return nil unless resource.present? && response.present? && resource.title.present?

scores = []
names = resource.authors.map do |author|
{ first: author.author_first_name&.downcase, last: author.author_last_name&.downcase }
end
orcids = resource.authors.map { |author| author.author_orcid&.downcase }

response['items'].each do |item|
next unless item['title'].present?
next if exclude_dois(resource).include?(item['DOI'])

scores << crossref_item_scoring(resource, item, names, orcids)
scores << crossref_item_scoring(resource, item)
end
# Sort by the score and return the one with the highest score
scores.max_by { |a| a[0] }
end

def crossref_item_scoring(resource, item, names, orcids)
return 0.0 unless resource.present? && resource.title.present? && item.present? && item['title'].present?
def crossref_item_scoring(resource, item)
return [0.0, item] unless resource&.title&.present? && item&.[]('title')&.present?

# Compare the titles using the Amatch NLP library
amatch = item['title'].first.pair_distance_similar(resource.title)
# quarter weight for matching journal title
if resource.journal.present? && item['container-title']&.first&.present?
amatch += 0.25 * resource.journal.title.pair_distance_similar(item['container-title'].first)
end
names = resource.authors.map { |a| { first: a.author_first_name, last: a.author_last_name } }
orcids = resource.authors.map(&:author_orcid).reject(&:blank?).map(&:downcase)
# If authors are available compare them as well, for half weight
amatch += 0.5 * crossref_author_scoring(names, orcids, item['author']) if item['author'].present? && (names.present? || orcids.present?)
item['provenance_score'] = item['score']
Expand All @@ -130,7 +128,7 @@ def crossref_author_scoring(names, orcids, authors)
end
next unless author['family'].present?

names_to_compare = names.select { |h| h[:last]&.include?(author['family']&.downcase) }
names_to_compare = names.select { |h| h[:last]&.downcase&.include?(author['family']&.downcase) }
next if names_to_compare.empty?

scores = names_to_compare.map do |name|
Expand Down
2 changes: 1 addition & 1 deletion lib/tasks/publication_updater.rake
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace :publication_updater do

begin
# Hit Crossref for info
cr = Integrations::Crossref.query_by_preprint_doi(doi: preprint)
cr = Integrations::Crossref.query_by_preprint_doi(resource: resource, doi: preprint)
rescue URI::InvalidURIError, MultiJson::ParseError => e
# If the URI is invalid, just skip to the next record
# MultiJson::ParseError is for current Serrano redirect bug
Expand Down
16 changes: 8 additions & 8 deletions spec/lib/integrations/crossref_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -137,11 +137,11 @@ module Integrations

describe '#query_by_preprint_doi' do
it 'returns nil if the DOI is nil' do
expect(Crossref.send(:query_by_preprint_doi, doi: nil)).to eql(nil)
expect(Crossref.send(:query_by_preprint_doi, resource: @resource, doi: nil)).to eql(nil)
end

it 'returns a parsed json response' do
expect(Crossref.send(:query_by_preprint_doi, doi: '10.123/12345').is_a?(Hash)).to eql(true)
expect(Crossref.send(:query_by_preprint_doi, resource: @resource, doi: '10.123/12345').is_a?(Hash)).to eql(true)
end
end

Expand Down Expand Up @@ -212,34 +212,34 @@ module Integrations

describe '#crossref_item_scoring' do
it 'returns zero id the resource is nil' do
expect(Crossref.send(:crossref_item_scoring, nil, { 'title' => 'ABC' }, nil, nil)).to eql(0.0)
expect(Crossref.send(:crossref_item_scoring, nil, { 'title' => 'ABC' }).first).to eql(0.0)
end

it 'returns zero id the resource has no title' do
@resource.title = nil
expect(Crossref.send(:crossref_item_scoring, @resource, {}, nil, nil)).to eql(0.0)
expect(Crossref.send(:crossref_item_scoring, @resource, {}).first).to eql(0.0)
end

it 'returns zero id the Crossref response does not have a title' do
expect(Crossref.send(:crossref_item_scoring, @resource, {}, nil, nil)).to eql(0.0)
expect(Crossref.send(:crossref_item_scoring, @resource, {}).first).to eql(0.0)
end

it 'returns a high score when the titles are close' do
item = { 'title' => ['Testing Item Scoring'] }
@resource.title = 'Data from: Testing Scoring'
expect(Crossref.send(:crossref_item_scoring, @resource, item, nil, nil).first >= 0.5).to eql(true)
expect(Crossref.send(:crossref_item_scoring, @resource, item).first >= 0.5).to eql(true)
end

it 'returns a low score when the titles are dissimilar' do
item = { 'title' => ['Testing Item Scoring'] }
@resource.title = 'A completely different scoring title'
expect(Crossref.send(:crossref_item_scoring, @resource, item, nil, nil).first < 0.5).to eql(true)
expect(Crossref.send(:crossref_item_scoring, @resource, item).first < 0.5).to eql(true)
end

it 'sets the item[`score`] and item[`provenance_score`]' do
item = { 'title' => ['Testing Item Scoring'], 'score' => 12.3 }
@resource.title = 'A completely different scoring title'
item = Crossref.send(:crossref_item_scoring, @resource, item, nil, nil).last
item = Crossref.send(:crossref_item_scoring, @resource, item).last
expect(item['score'].present?).to eql(true)
expect(item['provenance_score'].present?).to eql(true)
expect(item['provenance_score']).to eql(12.3)
Expand Down