Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 48 additions & 22 deletions spec/lib/llt/segmenter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,18 @@ def load_fixture(filename)
sentences[2].to_s.should == "Marcus Antonius!"
end

it "creates indices by default" do
txt = "Cicero est; quis Caesar est? Marcus Antonius!"
sentences = segmenter.segment(txt)
sentences.map(&:id).should == [1, 2, 3]
end
describe "with options (indices)" do
it "creates indices by default" do
txt = "Cicero est; quis Caesar est? Marcus Antonius!"
sentences = segmenter.segment(txt)
sentences.map(&:id).should == [1, 2, 3]
end

it "indices can be turned off" do
txt = "Cicero est; quis Caesar est? Marcus Antonius!"
sentences = segmenter.segment(txt, indexing: false)
sentences.map(&:id).should == [nil, nil, nil]
it "indices can be turned off" do
txt = "Cicero est; quis Caesar est? Marcus Antonius!"
sentences = segmenter.segment(txt, indexing: false)
sentences.map(&:id).should == [nil, nil, nil]
end
end

it "handles abbreviated names" do
Expand All @@ -50,16 +52,18 @@ def load_fixture(filename)
sentences[1].to_s.should == "M. Tullius Cicero est."
end

it "handles abbreviated dates" do
txt = "Is dies erat a. d. V Kal. Apr. L. Pisone, A. Gabinio consulibus."
sentences = segmenter.segment(txt)
sentences.should have(1).item
end
describe "handles dates" do
it "with abbreviations" do
txt = "Is dies erat a. d. V Kal. Apr. L. Pisone, A. Gabinio consulibus."
sentences = segmenter.segment(txt)
sentences.should have(1).item
end

it "handles more dates" do
txt = "Is dies erat a. d. V Ian. Non. Feb. Octob. L. App. Pisone ."
sentences = segmenter.segment(txt)
sentences.should have(1).item
it "with more (and alternative) abbreviations" do
txt = "Is dies erat a. d. V Ian. Non. Feb. Octob. L. App. Pisone ."
sentences = segmenter.segment(txt)
sentences.should have(1).item
end
end

it "are only triggered when they have a leading word boundary" do
Expand Down Expand Up @@ -108,10 +112,32 @@ def load_fixture(filename)
end
end

it "handles 'cos' abbreviations" do
txt = "Coss. cos. Pisone Cicerone aliquae fiunt. Conss. aliud verbum est."
sentences = segmenter.segment(txt)
sentences.should have(2).item
describe "handles abbreviations used mostly in inscriptions" do
it "cos" do
txt = "Coss. cos. Pisone Cicerone aliquae fiunt. Conss. aliud verbum est."
sentences = segmenter.segment(txt)
sentences.should have(2).item
end

it "res p." do
txt = "Haec res p. subiecta est."
sentences = segmenter.segment(txt)
sentences.should have(1).item
end

it "f." do
txt = "Marcus Cn. f. est."
sentences = segmenter.segment(txt)
sentences.should have(1).item
end

describe "at the end of a sentence" do
it "tr. pl." do
txt = "Erat tr. pl. Est homo."
sentences = segmenter.segment(txt)
sentences.should have(1).item
end
end
end

it "splits at :" do
Expand Down