Skip to content

Commit 7d85860

Browse files
committed
optional name detection by annotation (close #143)
1 parent e3123c9 commit 7d85860

File tree

2 files changed

+13
-5
lines changed

2 files changed

+13
-5
lines changed

pkg/ent/token/features.go

+5
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ type Features struct {
2525
// Abbr feature: token ends with a period.
2626
Abbr bool
2727

28+
// HasAnnotation is true if there is an apparent nomenclatural annotation
29+
// attached to a name candidate. This feature can only be set for the
30+
// first token of a candidate.
31+
HasAnnotation bool
32+
2833
// PotentialBinomialGenus feature: the token might be a genus of name.
2934
PotentialBinomialGenus bool
3035

pkg/ent/token/token.go

+8-5
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ import (
1313
"github.com/gnames/gnfinder/pkg/io/dict"
1414
)
1515

16-
// tokenSN represents a word separated by spaces in a text. Words that are
17-
// split by new lines are concatenated.
16+
// tokenSN (a token for a 'Scientific Name') represents a word separated by
17+
// spaces in a text. Words that are split by new lines are concatenated.
1818
type tokenSN struct {
1919
gner.TokenNER
2020

@@ -218,10 +218,13 @@ func checkRank(t TokenSN, d *dict.Dictionary) bool {
218218

219219
// UpperIndex takes an index of a token and length of the tokens slice and
220220
// returns an upper index of what could be a slice of a name. We expect that
221-
// that most of the names will fit into 5 words. Other cases would require
222-
// more thorough algorithims that we can run later as plugins.
221+
// that most of the names will fit into 5 words. We also expect that annotation
222+
// will be located not further away than 10 words away.
223+
// Other cases would require more thorough algorithims that we can run
224+
// later as plugins.
223225
func UpperIndex(i int, l int) int {
224-
upperIndex := i + 5
226+
// 10 here is 5 tokens for name, 5 tokens for annotation
227+
upperIndex := i + 10
225228
if l < upperIndex {
226229
upperIndex = l
227230
}

0 commit comments

Comments
 (0)