-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathextract_loadset
More file actions
executable file
·97 lines (83 loc) · 2.89 KB
/
extract_loadset
File metadata and controls
executable file
·97 lines (83 loc) · 2.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/perl
# Copyright 2009-2012, Equinox Software, Inc.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
use strict;
use warnings;
use open ':utf8';
use Getopt::Long;
my $conf = {}; # configuration hashref
initialize($conf);
# build exclusion hash
open FP, '<', shift or die "Can't open matchset file: $!\n";
my %exclude = ();
while (<FP>) {
chomp;
my ($lead,$sub) = split /\t/;
$sub =~ s/\s//g; # any whitespace is extraneous
$exclude{$sub} = 1 unless ($sub < $conf->{lowerbound});
}
close FP;
# strip exclusions from marcxml file
open MI, '<', $conf->{input} or die "Can't open input file: $!\n";
open MO, '>', $conf->{output} or die "Can't open output file: $!\n";
while (<MI>) {
m/tag="$conf->{tag}".+?<subfield code="$conf->{subfield}">(\d+)</;
next unless defined $1;
if ($conf->{reverse}) {
print MO if $exclude{$1};
} else {
print MO unless $exclude{$1};
}
}
sub initialize {
my ($c) = @_;
my @missing = ();
# set mode on existing filehandles
binmode(STDIN, ':utf8');
my $rc = GetOptions( $c,
'lowerbound|l=i',
'input|i=s',
'output|o=s',
'tag|t=i',
'subfield|s=s',
'reverse|r',
'help|h',
);
show_help() unless $rc;
show_help() if ($c->{help});
$conf->{tag} = $conf->{tag} || 903;
$conf->{subfield} = $conf->{subfield} || 'a';
my @keys = keys %{$c};
show_help() unless (@ARGV and @keys);
for my $key ('output', 'lowerbound', 'input')
{ push @missing, $key unless $c->{$key} }
if (@missing) {
print "Required option: ", join(', ', @missing), " missing!\n";
show_help();
}
}
sub show_help {
print <<HELP;
Usage is: extract_loadset -l BOUND -i INPUTXML -o OUTPUTXML MATCHSET
--lowerbound -l Lowest record ID which will be included in the loadset
--input -i MARCXML input file
--output -o MARCXML output file
--tag -t MARC tag to use as identifier (default: 903)
--subfield -s Subfield of --tag argument (default: 'a')
--reverse -r Output subordinate bibs rather than lead bibs
HELP
exit;
}