Skip to content

Commit e07f753

Browse files
authored
Merge pull request #2416 from seek4science/publication-abstract-trim
Strip leading/trailing whitespace from publication abstracts
2 parents 5489256 + 467c49f commit e07f753

File tree

3 files changed

+65
-2
lines changed

3 files changed

+65
-2
lines changed

app/models/publication.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class Publication < ApplicationRecord
4949
has_one :content_blob, ->(r) { where('content_blobs.asset_version =? AND deleted=?', r.version, false) }, as: :asset, foreign_key: :asset_id
5050

5151
explicit_versioning(:version_column => "version", sync_ignore_columns: ['license','other_creators']) do
52-
acts_as_versioned_resource
52+
acts_as_versioned_resource
5353
has_one :content_blob, -> (r) { where('content_blobs.asset_version =? AND content_blobs.asset_type =?', r.version, r.parent.class.name) },
5454
:primary_key => :publication_id,:foreign_key => :asset_id
5555

@@ -58,7 +58,7 @@ def doi_uri
5858
end
5959

6060
alias_method :doi_identifier, :doi_uri
61-
end
61+
end
6262

6363
belongs_to :publication_type
6464

@@ -78,6 +78,8 @@ def doi_uri
7878

7979
accepts_nested_attributes_for :publication_authors
8080

81+
auto_strip_attributes :abstract
82+
8183
# Types of registration
8284
REGISTRATION_BY_PUBMED = 1
8385
REGISTRATION_BY_DOI = 2

lib/tasks/seek_upgrades.rake

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ namespace :seek do
1919
update_rdf
2020
update_morpheus_model
2121
db:seed:018_discipline_vocab
22+
strip_publication_abstracts
2223
]
2324

2425
# these are the tasks that are executes for each upgrade as standard, and rarely change
@@ -99,6 +100,21 @@ namespace :seek do
99100
end
100101
end
101102

103+
task(strip_publication_abstracts: [:environment]) do
104+
puts 'Stripping publication abstracts...'
105+
updated_count = 0
106+
Publication.select(:id, :abstract).find_each do |publication|
107+
if publication.abstract.present?
108+
stripped = publication.abstract.strip
109+
if stripped.length != publication.abstract.length
110+
publication.update_column(:abstract, stripped)
111+
updated_count += 1
112+
end
113+
end
114+
end
115+
puts "... updated #{updated_count} publications"
116+
end
117+
102118
private
103119

104120
##

test/unit/publication_test.rb

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,4 +539,49 @@ class PublicationTest < ActiveSupport::TestCase
539539
assert_includes publication.related_models, assay_model
540540
assert_includes publication.related_models, model
541541
end
542+
543+
test 'strips leading and trailing whitespace from abstract on save' do
544+
publication = FactoryBot.build(:publication, abstract:
545+
" \n This is an abstract with leading and trailing whitespace \n ")
546+
547+
User.with_current_user(publication.contributor) do
548+
publication.save!
549+
end
550+
551+
assert_equal 'This is an abstract with leading and trailing whitespace', publication.abstract
552+
end
553+
554+
test 'handles nil abstract gracefully' do
555+
publication = FactoryBot.build(:publication, abstract: nil)
556+
557+
User.with_current_user(publication.contributor) do
558+
publication.save!
559+
end
560+
561+
assert_nil publication.abstract
562+
end
563+
564+
test 'strips whitespace from abstract when extracted from pubmed' do
565+
publication_hash = {
566+
'title' => 'SEEK publication',
567+
'abstract' => " An investigation into blalblabla \n ",
568+
'journal' => 'The testing journal',
569+
'pubmed' => '12345',
570+
'doi' => nil
571+
}
572+
bio_reference = Bio::Reference.new(publication_hash)
573+
publication = Publication.new(
574+
title: 'Test',
575+
projects: [FactoryBot.create(:project)],
576+
publication_type: FactoryBot.create(:journal)
577+
)
578+
579+
publication.extract_pubmed_metadata(bio_reference)
580+
581+
User.with_current_user(FactoryBot.create(:person).user) do
582+
publication.save!
583+
end
584+
585+
assert_equal 'An investigation into blalblabla', publication.abstract
586+
end
542587
end

0 commit comments

Comments
 (0)