-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathspeciesGenusList.pl
executable file
·37 lines (31 loc) · 1 KB
/
speciesGenusList.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/env perl
# speciesGenusList.pl by Amory Meltzer
# Licensed under the WTFPL http://www.wtfpl.net/
# Parse a list of species for those with identical genus and species names
# Data from Wikispecies: http://dumps.wikimedia.org/backup-index.html
# Use all title from mainspace
# Consider > to genus_species_date after uniq due to some weird dupes I didn't fix
use 5.006;
use strict;
use warnings;
use English qw(-no_match_vars); # Avoid regex speed penalty in perl <=5.16
use lib q{./lib};
use Wikispecies::GenusSpecies;
if (@ARGV != 1) {
print "Usage: $PROGRAM_NAME <species list>\n";
exit;
}
my $input = $ARGV[0];
open my $list, '<', "$input" or die $ERRNO;
while (my $species = <$list>) {
chomp $species;
# Cleanup titles before checking
$species = noVars($species);
$species = noParens($species); # Must be after rmOdds for the time being
$species = rmOdds($species);
my @words = compareGP($species);
if (scalar @words) {
print "$words[0]_$words[1]\n";
}
}
close $list or die $ERRNO;