diff --git a/category_def.yaml b/category_def.yaml
index 0119b4f..b7e4106 100644
--- a/category_def.yaml
+++ b/category_def.yaml
@@ -7,12 +7,15 @@ geo:
detail:
subject:
title:
- en: Subject archives folders
- de: Sacharchiv Mappen
+ en: Subject archives
+ de: Sacharchiv
ware:
title:
- en: Commodities/wares archives folders
- de: Warenarchiv Mappen
+ en: Commodities/wares archives
+ de: Warenarchiv
+ ordered_by:
+ en: by wares
+ de: nach Waren
subject:
prov: hwwa
title:
@@ -22,8 +25,11 @@ subject:
detail:
geo:
title:
- en: Countries-subject archives folders
- de: Länder-Sacharchiv Mappen
+ en: Countries-subject archives
+ de: Länder-Sacharchiv
+ ordered_by:
+ en: by countries
+ de: nach Ländern
ware:
prov: hwwa
title:
@@ -33,5 +39,5 @@ ware:
detail:
geo:
title:
- en: Commodities/wares archives folders
- de: Warenarchiv Mappen
+ en: Commodities/wares archives
+ de: Warenarchiv
diff --git a/count_film_images.pl b/count_film_images.pl
index caeb076..5808a0e 100644
--- a/count_film_images.pl
+++ b/count_film_images.pl
@@ -8,7 +8,8 @@
use strict;
use warnings;
-use utf8;
+use autodie;
+use utf8::all;
use Data::Dumper;
##use Data::Dumper::Names;
@@ -49,7 +50,8 @@
# findbuch input
my $findbuch_file =
$filmdata_root->child( $set . '_' . $collection . '.json' );
- my $findbuch_data = from_json( $findbuch_file->slurp ) || die "not found";
+ my $findbuch_data = decode_json( $findbuch_file->slurp_raw )
+ || die "not found";
my $last_film_id = 0;
foreach my $entry ( @{$findbuch_data} ) {
diff --git a/create_category_pages.pl b/create_category_pages.pl
index 1969e3e..edbedc7 100644
--- a/create_category_pages.pl
+++ b/create_category_pages.pl
@@ -13,9 +13,8 @@
use strict;
use warnings;
-use utf8;
-
-use lib './lib';
+use autodie;
+use utf8::all;
use Carp;
use Data::Dumper;
@@ -30,8 +29,6 @@
use ZBW::PM20x::Folder;
use ZBW::PM20x::Vocab;
-binmode( STDOUT, ":encoding(UTF-8)" );
-
##Readonly my $WEB_ROOT => path('/tmp/category');
Readonly my $WEB_ROOT => path('../web/category');
Readonly my $KLASSDATA_ROOT => path('../data/klassdata');
@@ -162,8 +159,11 @@
my %filmsections;
foreach my $filming (qw/ 1 2/) {
- $filmsections{$filming} =
- [ $master_voc->filmsectionlist( $category_id, $filming ) ];
+ $filmsections{$filming} = [
+ $master_voc->filmsectionlist(
+ $category_id, $filming, $detail_type
+ )
+ ];
}
my $folder_count =
@@ -184,7 +184,8 @@
$category_type eq 'ware'
? $firstletter
: $master_voc->subheading( $lang, $firstletter );
- push( @lines, '', "### $subhead ", '' );
+ push( @lines, '', "#### $subhead ",
+ '' );
push( @tabs, { startchar => $firstletter } );
$firstletter_old = $firstletter;
}
@@ -317,6 +318,19 @@
#
###########################
+# data structure %category_data:
+
+# category_type
+# category_id # defines page
+# detail_type
+# folder
+# lines
+# de|en
+# filming_loop
+# de|en
+# filming
+# filmsection_loop
+
my %category_data;
print "\nCollect data for folders\n";
@@ -460,7 +474,7 @@
# prepend subheading
my $subheading =
$detail_voc->subheading( $lang, $firstletter ) || $firstletter;
- $text .= "\n\n### $subheading\n\n";
+ $text .= "\n\n#### $subheading\n\n";
# all text line for this subheading
$text .= join( "\n", @{ $lines_ref->{$firstletter} } );
@@ -479,12 +493,13 @@
print "\n\nCollect data for film sections\n";
-# only top level for the country-subject and ware archives
-foreach my $category_type (qw/ geo ware /) {
+# now for all top level pages
+foreach my $category_type (qw/ geo subject ware /) {
print "\nfilm sections category_type: $category_type\n";
# master vocabulary reference
$master_voc = ZBW::PM20x::Vocab->new($category_type);
+ my $master_type = $master_voc->vocab_name;
foreach my $lang (@LANGUAGES) {
print " lang: $lang\n";
@@ -493,9 +508,9 @@
my @detail_types =
sort keys %{ $definitions_ref->{$category_type}{detail} };
foreach my $detail_type (@detail_types) {
- next if $category_type eq 'geo' and $detail_type eq 'ware';
print " detail_type: $detail_type\n";
+ my $detail_voc = ZBW::PM20x::Vocab->new($detail_type);
my $def_ref = $definitions_ref->{$category_type}->{detail}{$detail_type};
my $detail_title = $def_ref->{title}{$lang};
@@ -507,10 +522,13 @@
foreach my $filming (qw/ 1 2 /) {
my $filming_ref = $filming_def_ref->{$filming};
+ # filmsections for the master / detail combination (works in either
+ # normal or inversed hierarchical order)
my @filmsectionlist =
- $master_voc->filmsectionlist( $category_id, $filming );
+ $master_voc->filmsectionlist( $category_id, $filming,
+ $detail_type );
- # how to deal deal wth mission information depends ...
+ # how to deal deal with missing information depends ...
if ( not scalar(@filmsectionlist) > 0 ) {
if ( $filming eq '1'
and $category_data{$category_type}{$category_id}{$detail_type}
@@ -518,6 +536,7 @@
{
## is ok
} else {
+ ## in which cases should a warning be issued?
## warn "no film data for $category_id in filming $filming\n";
}
next;
@@ -525,19 +544,35 @@
my @filmsection_loop;
foreach my $section (@filmsectionlist) {
- ## TODO is this correct? includes position and R/L!
- my $film_id = substr( $section->{'@id'}, 25 );
- my $entry = {
+ my $section_id = substr( $section->{'@id'}, 25 );
+
+ my $section_label =
+ $section->label( $lang, $detail_voc ) || $section->{title};
+
+ my $entry = {
"is_$lang" => 1,
filmviewer_url => $section->{'@id'},
- film_id => $film_id,
- first_img => $section->{title},
+ section_id => $section_id,
+ section_label => $section_label,
+ image_count => $section->img_count,
};
+ if ( $section->is_filmstartonly ) {
+ $entry->{is_filmstartonly} = 1;
+ }
push( @filmsection_loop, $entry );
}
+ # sort ware entries alphabetically
+ if ( $detail_type eq 'ware' ) {
+ my $uc = Unicode::Collate->new();
+ @filmsection_loop =
+ sort { $uc->cmp( $a->{'section_label'}, $b->{'section_label'} ) }
+ @filmsection_loop;
+ }
+
my %filming_data = (
"is_$lang" => 1,
+ detail_title => $detail_title,
filming_title => $filming_ref->{title}{$lang},
legal => $filming_ref->{legal}{$lang},
filmsection_loop => \@filmsection_loop,
@@ -545,19 +580,44 @@
$master_voc->film_img_count( $category_id, $filming ),
);
+ # remove image count for ware section on geo pages
+ # or geo sections on subject pages
+ if ( ( $master_type eq 'geo' and $detail_type eq 'ware' )
+ or ( $master_type eq 'subject' and $detail_type eq 'geo' ) )
+ {
+ delete $filming_data{total_number_of_images};
+ }
+
push( @filmings, \%filming_data );
} # $filming
if ( scalar(@filmings) ) {
$category_data{$category_type}{$category_id}{$detail_type}
{filming_loop}{$lang} = \@filmings;
- }
+
+ # add data for special text about secondary categories
+ if ( ( $master_type eq 'geo' and $detail_type eq 'ware' )
+ or ( $master_type eq 'subject' and $detail_type eq 'geo' ) )
+ {
+ my $collection = $detail_type eq 'ware' ? 'wa' : 'sh';
+ my %suppl = (
+ label => $master_voc->label( $lang, $category_id ),
+ detail_title => $detail_title,
+ ordered_by => $def_ref->{ordered_by}{$lang},
+ filmlist1 => "/film/h1_$collection.de.html",
+ filmlist2 => "/film/h2_$collection.de.html",
+ );
+ $category_data{$category_type}{$category_id}{$detail_type}
+ {secondary_category}{$lang} = \%suppl;
+ } # secondary_category
+ } # scalar(@filmings)
} # $category_id
} # $detail_type
} # $lang
}
###print "\n## size inc. film: ", total_size(\%category_data) / (1024*1024), "\n";
+###path('/tmp/category.dat')->spew(Dumper \%category_data); exit;
print "\n\nOutput of individual category pages\n\n";
@@ -593,8 +653,19 @@
if ( defined $filming_loop_ref ) {
$data{filming_loop} = $filming_loop_ref;
}
- push( @detail_data, \%data );
+ # supplemental data for secondary category
+ ## TODO fix ugly construct
+ if ( defined $category_ref->{$detail_type}{secondary_category} ) {
+ $data{is_secondary_category} = 1;
+ foreach
+ my $key (qw[ label ordered_by detail_title filmlist1 filmlist2 ])
+ {
+ $data{$key} =
+ $category_ref->{$detail_type}{secondary_category}{$lang}{$key};
+ }
+ }
+ push( @detail_data, \%data );
} # $detail_type
# actual output
@@ -617,6 +688,7 @@ sub output_category_page {
$PROV{ $definitions_ref->{$category_type}{prov} }{name}{$lang};
my $signature = $master_voc->signature($id);
my $label = $master_voc->label( $lang, $id );
+ $label =~ s/"/\\"/g;
my $backlinktitle =
$lang eq 'en'
? 'Category Overview'
diff --git a/create_euipo.pl b/create_euipo.pl
index 763c80f..a3f6399 100644
--- a/create_euipo.pl
+++ b/create_euipo.pl
@@ -7,7 +7,8 @@
use strict;
use warnings;
-use utf8;
+use autodie;
+use utf8::all;
use Data::Dumper;
use JSON;
diff --git a/create_filmlists.pl b/create_filmlists.pl
index 00d5577..38bc6c0 100644
--- a/create_filmlists.pl
+++ b/create_filmlists.pl
@@ -5,7 +5,8 @@
use strict;
use warnings;
-use utf8;
+use autodie;
+use utf8::all;
use Data::Dumper;
use JSON;
@@ -17,7 +18,7 @@
my $filmdata_root = path('../data/filmdata');
my $img_file = $filmdata_root->child('img_count.json');
my $ip_hints =
- path('../web/templates/fragments/ip_hints.de.md.frag')->slurp_utf8;
+ path('../web/templates/fragments/ip_hints.de.md.frag')->slurp;
my %page = (
h => {
@@ -72,7 +73,7 @@
);
# TEMPORARY: remove path
-my $img_count = decode_json( $img_file->slurp );
+my $img_count = decode_json( $img_file->slurp_raw );
my %img_cnt;
foreach my $key ( keys %{$img_count} ) {
my $shortkey = substr( $key, 18 );
@@ -90,7 +91,7 @@
my $zotero_file = $filmdata_root->child("zotero.$page_name.json");
my %zotero_film;
if ( -f $zotero_file ) {
- %zotero_film = %{ decode_json( $zotero_file->slurp ) };
+ %zotero_film = %{ decode_json( $zotero_file->slurp_raw ) };
}
# some header information for the page
@@ -119,7 +120,7 @@
$filmfile = $filmdata_root->child( $page_name . '.json' );
}
my @film_sections =
- @{ decode_json( $filmfile->slurp ) };
+ @{ decode_json( $filmfile->slurp_raw ) };
# iterate through the list of film sections (from the excel file)
foreach my $film_section (@film_sections) {
diff --git a/create_filmviewer_links.pl b/create_filmviewer_links.pl
index 2249f0b..8353cc8 100644
--- a/create_filmviewer_links.pl
+++ b/create_filmviewer_links.pl
@@ -8,9 +8,8 @@
use strict;
use warnings;
-use utf8;
-
-use lib './lib';
+use autodie;
+use utf8::all;
use Data::Dumper;
use JSON;
diff --git a/create_folder_list.pl b/create_folder_list.pl
index 5eb7a7b..fa8439d 100644
--- a/create_folder_list.pl
+++ b/create_folder_list.pl
@@ -9,9 +9,8 @@
use strict;
use warnings;
-use utf8;
-
-use lib './lib';
+use autodie;
+use utf8::all;
use Data::Dumper;
use HTML::Template;
@@ -22,7 +21,6 @@
use YAML;
use ZBW::PM20x::Folder;
-binmode( STDOUT, ":utf8" );
$Data::Dumper::Sortkeys = 1;
Readonly my $FOLDER_DATA => path('/pm20/data/rdf/pm20.extended.jsonld');
@@ -268,7 +266,7 @@ sub load_ids {
my $coll_id_ref = shift or die "param missing";
# create a list of numerical keys for each collection
- my $data = decode_json( $FOLDER_DATA->slurp );
+ my $data = decode_json( $FOLDER_DATA->slurp_raw );
foreach my $entry ( @{ $data->{'@graph'} } ) {
$entry->{identifier} =~ m/^(co|pe|sh|wa)\/(\d{6}(?:,\d{6})?)$/;
push( @{ $coll_id_ref->{$1} }, $2 );
diff --git a/create_folder_pages.pl b/create_folder_pages.pl
index 9158caf..fa52634 100644
--- a/create_folder_pages.pl
+++ b/create_folder_pages.pl
@@ -10,12 +10,10 @@
use strict;
use warnings;
-use utf8;
-
-use lib './lib';
+use autodie;
+use utf8::all;
use Data::Dumper;
-use Encode;
use HTML::Template;
use JSON;
use Path::Tiny;
@@ -400,6 +398,7 @@ sub mk_folder {
}
}
}
+
# film sections, do not exist for persons
if ( $collection eq 'co' ) {
my $company_id = "co/$folder_nk";
@@ -433,12 +432,12 @@ sub mk_folder {
my @filmsection_loop;
foreach my $section ( sort @filmsectionlist ) {
- my $film_id = substr( $section->{'@id'}, 25 );
- my $entry = {
+ my $section_id = substr( $section->{'@id'}, 25 );
+ my $entry = {
"is_$lang" => 1,
- filmviewer_url => "https://pm20.zbw.eu/film/$film_id",
- film_id => $film_id,
- first_img => $section->{title},
+ filmviewer_url => "https://pm20.zbw.eu/film/$section_id",
+ section_id => $section_id,
+ section_label => $section->{title},
};
push( @filmsection_loop, $entry );
}
@@ -491,7 +490,7 @@ sub load_ids {
my $folder_id = shift;
# create a list of numerical keys for each collection
- my $data = decode_json( $FOLDER_DATA->slurp );
+ my $data = decode_json( $FOLDER_DATA->slurp_raw );
foreach my $entry ( @{ $data->{'@graph'} } ) {
$entry->{identifier} =~ m/^(co|pe|sh|wa)\/(\d{6}(?:,\d{6})?)$/;
push( @{ $coll_id->{$1} }, $2 );
@@ -793,8 +792,7 @@ sub add_schema_jsonld {
'@graph' => [$schema_data_ref],
};
- # will be utf8-encoded later in template
- return decode( 'UTF-8', encode_json($schema_ld) );
+ return $schema_ld;
}
sub add_jsonld {
@@ -805,8 +803,7 @@ sub add_jsonld {
'@graph' => [$folderdata_raw],
};
- # will be utf8-encoded later in template
- return decode( 'UTF-8', encode_json($ld) );
+ return $ld;
}
sub get_wd_uri {
diff --git a/create_folder_tree.pl b/create_folder_tree.pl
index 0a808db..d44b274 100644
--- a/create_folder_tree.pl
+++ b/create_folder_tree.pl
@@ -11,8 +11,8 @@
use strict;
use warnings;
-
-use lib './lib';
+use autodie;
+use utf8::all;
use Data::Dumper;
use JSON;
@@ -116,7 +116,7 @@ sub load_files {
my $collection = shift || die "param missing";
$imagedata_file = $IMAGEDATA_ROOT->child("${collection}_image.json");
- $imagedata_ref = decode_json( $imagedata_file->slurp );
+ $imagedata_ref = decode_json( $imagedata_file->slurp_raw );
}
sub usage {
diff --git a/create_iiif_img.pl b/create_iiif_img.pl
index 637e215..6eb808e 100644
--- a/create_iiif_img.pl
+++ b/create_iiif_img.pl
@@ -8,11 +8,10 @@
use strict;
use warnings;
-
-use lib './lib';
+use autodie;
+use utf8::all;
use Data::Dumper;
-use Encode;
use HTML::Entities;
use HTML::Template;
use Image::Thumbnail;
@@ -150,9 +149,9 @@ sub load_files {
my $collection = shift || die "param missing";
$imagedata_file = $IMAGEDATA_ROOT->child("${collection}_image.json");
- $imagedata_ref = decode_json( $imagedata_file->slurp );
+ $imagedata_ref = decode_json( $imagedata_file->slurp_raw );
$imagesize_file = $IMAGEDATA_ROOT->child("${collection}_size.json");
- $imagesize_ref = decode_json( $imagesize_file->slurp );
+ $imagesize_ref = decode_json( $imagesize_file->slurp_raw );
}
sub get_max_image_fn {
diff --git a/create_iiif_manifest.pl b/create_iiif_manifest.pl
index 9b78f35..c973059 100644
--- a/create_iiif_manifest.pl
+++ b/create_iiif_manifest.pl
@@ -10,12 +10,10 @@
use strict;
use warnings;
-use utf8;
-
-use lib './lib';
+use autodie;
+use utf8::all;
use Data::Dumper;
-use Encode;
use HTML::Entities;
use HTML::Template;
use JSON;
@@ -180,13 +178,13 @@ sub load_files {
my $collection = shift || die "param missing";
$docdata_file = $DOCDATA_ROOT->child("${collection}_docdata.json");
- $docdata_ref = decode_json( $docdata_file->slurp );
+ $docdata_ref = decode_json( $docdata_file->slurp_raw );
$imagedata_file = $IMAGEDATA_ROOT->child("${collection}_image.json");
- $imagedata_ref = decode_json( $imagedata_file->slurp );
+ $imagedata_ref = decode_json( $imagedata_file->slurp_raw );
$imagesize_file = $IMAGEDATA_ROOT->child("${collection}_size.json");
- $imagesize_ref = decode_json( $imagesize_file->slurp );
+ $imagesize_ref = decode_json( $imagesize_file->slurp_raw );
$folderdata_file = $FOLDERDATA_ROOT->child("${collection}_label.json");
- $folderdata_ref = decode_json( $folderdata_file->slurp );
+ $folderdata_ref = decode_json( $folderdata_file->slurp_raw );
}
sub get_max_image_fn {
diff --git a/create_mets.pl b/create_mets.pl
index 55e48bc..6b1f20d 100644
--- a/create_mets.pl
+++ b/create_mets.pl
@@ -13,12 +13,10 @@
use strict;
use warnings;
-use utf8;
-
-use lib './lib';
+use autodie;
+use utf8::all;
use Data::Dumper;
-use Encode;
use HTML::Entities qw(encode_entities_numeric);
use HTML::Template;
use JSON;
@@ -151,7 +149,7 @@ sub mk_folder {
sub load_files {
my $collection = shift || die "param missing";
$imagedata_file = $IMAGEDATA_ROOT->child("${collection}_image.json");
- $imagedata_ref = decode_json( $imagedata_file->slurp );
+ $imagedata_ref = decode_json( $imagedata_file->slurp_raw );
}
sub build_file_grp {
diff --git a/create_prev_next_film_inc.pl b/create_prev_next_film_inc.pl
index a2ec67b..729dd65 100644
--- a/create_prev_next_film_inc.pl
+++ b/create_prev_next_film_inc.pl
@@ -5,6 +5,8 @@
use strict;
use warnings;
+use autodie;
+use utf8::all;
use Path::Tiny;
diff --git a/create_sitemap.pl b/create_sitemap.pl
index 4ba311e..b496e13 100644
--- a/create_sitemap.pl
+++ b/create_sitemap.pl
@@ -5,11 +5,9 @@
use strict;
use warnings;
-
+use autodie;
use utf8;
-use lib './lib';
-
use Data::Dumper;
use List::MoreUtils qw/uniq/;
use Path::Tiny;
diff --git a/film_iiif_img.pl b/film_iiif_img.pl
index e5f9299..8b2e2aa 100644
--- a/film_iiif_img.pl
+++ b/film_iiif_img.pl
@@ -8,11 +8,10 @@
use strict;
use warnings;
-
-use lib './lib';
+use autodie;
+use utf8::all;
use Data::Dumper;
-use Encode;
use HTML::Entities;
use HTML::Template;
use Image::Size;
diff --git a/film_mets.pl b/film_mets.pl
index f1c4d6d..9d5d903 100644
--- a/film_mets.pl
+++ b/film_mets.pl
@@ -13,12 +13,10 @@
use strict;
use warnings;
-use utf8;
-
-use lib './lib';
+use autodie;
+use utf8::all;
use Data::Dumper;
-use Encode;
use HTML::Entities qw(encode_entities_numeric);
use HTML::Template;
use JSON;
@@ -139,7 +137,7 @@ sub mk_folder {
sub load_files {
my $subset = shift || die "param missing";
$imagedata_file = $IMAGEDATA_ROOT->child("${subset}_image.json");
- $imagedata_ref = decode_json( $imagedata_file->slurp );
+ $imagedata_ref = decode_json( $imagedata_file->slurp_raw );
}
sub build_file_grp {
diff --git a/filming_def.yaml b/filming_def.yaml
index 98ee5de..47afa08 100644
--- a/filming_def.yaml
+++ b/filming_def.yaml
@@ -1,14 +1,14 @@
1:
title:
- en: Sections of digitized microfilms (1st filming 1908-1949)
- de: Abschnitte von digitalisierten Mikrofilmen (1. Verfilmung 1908-1949)
+ en: 1st filming 1908-1949
+ de: 1. Verfilmung 1908-1949
legal:
en: For intellectual property law reasons accessible only on the web from the European Union legal area and from the ZBW reading room.
de: Aus urheberrechtlichen Gründen im Web nur aus dem EU-Rechtsraum und im ZBW-Lesesaal zugänglich.
2:
title:
- en: Sections of digitized microfilms (2nd filming 1950-1960)
- de: Abschnitte von digitalisierten Mikrofilmen (2. Verfilmung 1950-1960)
+ en: 2nd filming 1950-1960
+ de: 2. Verfilmung 1950-1960
legal:
en: For intellectual property law reasons accessible only from ZBW reading room.
de: Aus urheberrechtlichen Gründen nur im ZBW-Lesesaal zugänglich.
diff --git a/folder2pdf.pl b/folder2pdf.pl
index 08d1bd2..6bb2449 100755
--- a/folder2pdf.pl
+++ b/folder2pdf.pl
@@ -14,7 +14,8 @@
use strict;
use warnings;
-use utf8;
+use autodie;
+use utf8::all;
use Data::Dumper;
use File::Temp;
diff --git a/html_tmpl/category.md.tmpl b/html_tmpl/category.md.tmpl
index 74e0822..f41701a 100644
--- a/html_tmpl/category.md.tmpl
+++ b/html_tmpl/category.md.tmpl
@@ -17,25 +17,27 @@ Scope Note:
-[Sacharchiv](#sacharchiv-mappen) [Warenarchiv](#warenarchiv-mappen)
-[Subject archives](#subject-archives-folders) [Commodities/wares archives](#commoditieswares-archives-folders)
+[Sacharchiv](#sacharchiv) [Warenarchiv](#warenarchiv)
+[Subject archives](#subject-archives) [Commodities/wares archives](#commoditieswares-archives)
##
+### MappenFolders
+
-Insgesamt Mappen, Dokumente - Mappen bis 1949 komplettunvollständig. Weiteres Material auf digitalisiertem Mikrofilm [siehe unten](#filmsections).
+Insgesamt Mappen, Dokumente - Mappen bis 1949 komplettunvollständig. Weiteres Material auf digitalisiertem Mikrofilm [siehe unten](#geo_filmsections).
_Klicken Sie "(xy Dokumente)" für die Dokumentanzeige im Viewer und "(Mappe)" für Mappeninformation._
-In total folders, documents - folders complete until 1949incomplete. Further material on digitized microfilm [see below](#filmsections).
+In total folders, documents - folders complete until 1949incomplete. For further material on digitized microfilm [see below](#geo_filmsections).
_For direct access to the documents, click the "(xy documents)" link, for folder information use the "(folder)" link._
@@ -59,7 +61,7 @@ _For direct access to the documents, click the "(xy documents)" link, for folder
Insgesamt Mappen, Dokumente - Mappen bis 1949 komplettunvollständig.
-Nicht als Mappe aufbereitetes Material finden Sie unter [digitalisierte Microfilme](/film/h1_sh.de.html).
+Nicht als Mappe aufbereitetes Material finden Sie unter [digitalisierte Mikrofilme](/film/h1_sh.de.html).
_Klicken Sie "(xy Dokumente)" für die Dokumentanzeige im Viewer und "(Mappe)" für Mappeninformation._
@@ -85,6 +87,8 @@ _For direct access to the documents, click the "(xy documents)" link, for folder
+
+
Insgesamt Mappen (bis 1949), Dokumente
- Mappen unvollständig. Nicht als Mappe aufbereitetes Material finden Sie
@@ -94,16 +98,23 @@ und der [2. Verfilmung](/film/h2_wa.de.html).
_Klicken Sie "(xy Dokumente)" für die Dokumentanzeige im Viewer und "(Mappe)" für Mappeninformation._
-In total folders (until 1949), documents
-- folders incomplete. For material not published as folders, please check the
-digitized micro-films of the [1st filming](/film/h1_wa.de.html) and [2nd
-filming](/film/h2_wa.de.html) (in German).
+In total folders (until 1949), documents - folders incomplete. For further material on
+digitized microfilm [see below](#ware_filmsections).
_For direct access to the documents, click the "(xy documents)" link, for folder information use the "(folder)" link._
-
+
+
+Kein als Mappe erschlossenes Material.No material prepared as folder.
+
+
+
+
+
+
diff --git a/html_tmpl/filmsections.md.inc b/html_tmpl/filmsections.md.inc
index 5626964..8f7ef77 100644
--- a/html_tmpl/filmsections.md.inc
+++ b/html_tmpl/filmsections.md.inc
@@ -1,23 +1,46 @@
-
+
+
+
+### Abschnitte von digitalisierten Mikrofilmen
+
+Die Erschließung der digitalisierten Mikrofilme ist work in progress. Sie wird, Film für Film, von Mitgliedern des [Wikipedia Projekt Pressearchiv](https://de.wikipedia.org/wiki/Wikipedia:Projekt_Pressearchiv) in Zusammenarbeit mit der ZBW geleistet. Einige Filme sind vollständig erschlossen, einige nur auf oberster Ebene, andere gar nicht. Die folgende Liste von Filmabschnitten ist daher unvollständig. TODO: für unterschiedliche Fälle präzisieren!
+
+Weiteres Material über "" ist verstreut über die Filmlisten, die primär geordnet sind. ([1. Verfilmung](), [2. Verfilmung]()).
+
+[Ausgegraute Abschnittsnamen]{.gray} zeigen lediglich einen zufälligen Einschnitt, den Beginn eines neuen Films, an. Das entsprechende Thema kann bereits auf dem vorigen Film begonnen haben, und die angezeigte Anzahl Aufnahmen umfasst meist nicht nur das genannte Thema.
+
+
+
+### Sections of digitized microfilms
+
+The indexing of the unprocessed digitized microfilms is work in progress. It is conducted, film by film, by members of the [Wikipedia Projekt Pressearchiv](https://de.wikipedia.org/wiki/Wikipedia:Projekt_Pressearchiv) in cooperation with ZBW. Some films are fully indexed, some only top-level, some not at all. The list of film sections below is therefore uncomplete. TODO: put more precisely for different cases.
+
+Further material about "" is dispersed across the film lists, which are primarily ordered ([1st filming](), [2nd filming]()).
+
+[Grayed-out section labels]{.gray} indicates just an arbitrary cut at the beginning of an new film. The topic may have already started on the previous film, and the number of images may comprise much more than the given topic.
+
+
-##
+#### -
+
Gesamtzahl der Mikrofilmaufnahmen: unbekannt - derzeit ggf. auffindbar nur über die [Filmliste]().
Total number of images: unknown - currently only findable via the [film list]().
+
::: {.sectiontable}
|
-----|-------
-Bilder auf Film Images on film |
+--|-------
+ | _[[(Aufnahmenimages)]{.hint}]{.gray}_
:::
diff --git a/imagesize.pl b/imagesize.pl
index 17f1eef..138431f 100644
--- a/imagesize.pl
+++ b/imagesize.pl
@@ -7,6 +7,8 @@
use strict;
use warnings;
+use autodie;
+use utf8::all;
use Data::Dumper;
use Devel::Size qw/ total_size /;
diff --git a/img_to_public.pl b/img_to_public.pl
index a215097..3263188 100755
--- a/img_to_public.pl
+++ b/img_to_public.pl
@@ -16,8 +16,8 @@
use strict;
use warnings;
-use utf8;
-use lib './lib';
+use autodie;
+use utf8::all;
use Data::Dumper;
use JSON;
@@ -25,8 +25,6 @@
use YAML::Tiny;
use ZBW::PM20x::Vocab;
-binmode( STDOUT, ":utf8" );
-
my $film_root = path('/pm20/film/');
my $pub_film_root = path('/pm20/web/film/');
my $klassdata_root = path('/pm20/data/klassdata/');
diff --git a/lib/ZBW/PM20x/Film.pm b/lib/ZBW/PM20x/Film.pm
index 380d1d7..db1cc6a 100644
--- a/lib/ZBW/PM20x/Film.pm
+++ b/lib/ZBW/PM20x/Film.pm
@@ -4,9 +4,10 @@ package ZBW::PM20x::Film;
use strict;
use warnings;
-use utf8;
+use autodie;
+use utf8::all;
-use Carp qw/ cluck confess croak /;
+use Carp;
use Data::Dumper;
use JSON;
use Path::Tiny;
@@ -18,9 +19,10 @@ Readonly my $IMG_COUNT => _init_img_count();
# items in a collection are primarily grouped by $type, identified by zotero
# or filmlist properties
+# CAUTION: for geo categories, subject, ware and company categories are related!
Readonly my %GROUPING_PROPERTY => (
co => {
- ## ignore countries for now!
+ ## ignore countries for now! (logically primary category for companies?)
primary_group => {
type => 'company',
zotero => 'pm20Id',
@@ -43,6 +45,7 @@ Readonly my %GROUPING_PROPERTY => (
type => 'geo',
zotero => 'geo_id',
filmlist => 'start_company_id',
+ jsonld => 'country',
rdf_pred => 'zbwext:country',
rdf_prefix => 'pm20geo',
},
@@ -59,6 +62,7 @@ Readonly my %GROUPING_PROPERTY => (
secondary_group => {
type => 'subject',
zotero => 'subject_id',
+ jsonld => 'subject',
rdf_pred => 'zbwext:subject',
rdf_prefix => 'pm20subject',
},
@@ -69,9 +73,10 @@ Readonly my %GROUPING_PROPERTY => (
# $SECTION = { $section_uri => { img_count, ...} }
# $FOLDER = { $collection => { $folder_nk => { $filming => [ $section_uri, ... ] } } }
# $CATEGORY = { $category_type => { $category_id => { $filming => [ $section_uri ... ] } } }
+# $CATEGORY_INV = { $type => { $secondary_category_id => { $filming => [ $section_uri ... ] } } }
# DOES NOT WORK WITH Readonly!
##Readonly my ( $FILM, $SECTION, $FOLDER, $CATEGORY ) => _load_filmdata();
-my ( $FILM, $SECTION, $FOLDER, $CATEGORY ) = _load_filmdata();
+my ( $FILM, $SECTION ) = _load_filmdata();
=encoding utf8
@@ -85,7 +90,6 @@ ZBW::PM20x::Film - Functions for PM20 microfilms
use ZBW::PM20x::Film;
my $film = ZBW::PM20x::Film->new('h1/sh/S0073H_1');
my @films = ZBW::PM20x::Film->films('h1_sh');
- my @folder_sections = ZBW::PM20x::Film->foldersections('co/004711', 1);
my $film_name = $film->name(); # S0073H_1
my $logical_name = $film->logigcal_name(); # S0073H
@@ -105,7 +109,7 @@ called logical film.
=item new ($film_id)
-Return a new film object for the film id.
+Return a new film object for the film id (e.g., 'h1/wa/W0186H').
=cut
@@ -113,24 +117,23 @@ sub new {
my $class = shift or croak('param missing');
my $film_id = shift or croak('param missing');
- my ( $set, $collection, $film_name );
-
- # TODO check/extend for Kiel films
- # NB a film named "S0901aH" exists!
- if ( $film_id =~ m;^(h[12])/(co|wa|sh)/([AFSW]\d{4}a?H(_[12])?$)$; ) {
- $set = $1;
- $collection = $2;
- $film_name = $3;
- } else {
+ if ( not $class->valid($film_id) ) {
confess "Invalid film id $film_id";
}
+ $film_id =~ m;^(h[12])/(co|wa|sh)/([AFSW]\d{4}a?H(_[12])?$)$;;
+ my $set = $1;
+ my $collection = $2;
+ my $film_name = $3;
+ my $uri = $FILM_ROOT_URI . $film_id;
+
my $self = {
film_id => $film_id,
set => $set,
collection => $collection,
film_name => $film_name,
- uri => $FILM_ROOT_URI . $film_id,
+ uri => $uri,
+ status => $FILM->{$uri}{status},
};
bless $self, $class;
@@ -156,19 +159,6 @@ sub new_from_location {
return $class->new($film_id);
}
-=item get_grouping_properties ($collection)
-
-Return metadata structure about the grouping properties for a collection.
-
-=cut
-
-sub get_grouping_properties {
- my $class = shift or croak('param missing');
- my $collection = shift or croak('param missing');
-
- return $GROUPING_PROPERTY{$collection};
-}
-
=item films ($subset)
Return a list of films sorted by film id for a subset (e.g. "h1_sh"). (Films
@@ -185,9 +175,13 @@ sub films {
my $subset_path = $subset =~ s/_/\//r;
- foreach my $film_id ( keys %{$IMG_COUNT} ) {
+ foreach my $film_id ( sort keys %{$IMG_COUNT} ) {
next unless $film_id =~ m/^$subset_path\//;
+ # skip film image sets which are already online as folders
+ # (and therefore not part of film dataset)
+ next unless $FILM->{"$FILM_ROOT_URI$film_id"};
+
# fix error with redundant _1/_2 and full films (e.g. A0023H)
next
if (defined $IMG_COUNT->{ ${film_id} . '_1' }
@@ -208,51 +202,41 @@ sub films {
return @films;
}
-=item foldersections ($folder_id, $filming)
+=item valid ($film_id)
-Return a list of film sections for the folder, for a certain filming (1|2).
-Currently, only for collection 'co'.
+Returns 1 if a $film_id is valid (id is formally valid and film is not empty or
+already online), undef otherwise.
=cut
-sub foldersections {
- my $class = shift or croak('param missing');
- my $folder_id = shift or croak('param missing');
- my $filming = shift or croak('param missing');
-
- my @sectionlist;
- my ( $collection, $folder_nk ) = $folder_id =~ m;^(co)/(\d{6})$;;
- foreach my $section_uri ( @{ $FOLDER->{$collection}{$folder_nk}{$filming} } )
- {
- my %entry = ( $section_uri => $SECTION->{$section_uri}, );
- push( @sectionlist, $SECTION->{$section_uri} );
- }
- return @sectionlist;
-}
-
-=item categorysections ($category_type, $category_id, $filming)
-
-Return a list of film sections for the category, for a certain filming (1|2).
-Currently, only works for the primary category. (geo for sh, ware for wa)
+sub valid {
+ my $class = shift or croak('param missing');
+ my $film_id = shift or croak('param missing');
-=cut
+ # formally valid film id
+ my ( $set, $collection, $film_name, $uri );
-sub categorysections {
- my $class = shift or croak('param missing');
- my $category_type = shift or croak('param missing');
- my $category_id = shift or croak('param missing');
- my $filming = shift or croak('param missing');
+ # TODO check/extend for Kiel films
+ # NB a film named "S0901aH" exists!
+ if ( $film_id =~ m;^(h[12])/(co|wa|sh)/([AFSW]\d{4}a?H(_[12])?$)$; ) {
+ $set = $1;
+ $collection = $2;
+ $film_name = $3;
+ $uri = $FILM_ROOT_URI . $film_id;
+ } else {
+ carp("Invalid film id $film_id");
+ return;
+ }
- my @sectionlist;
+ # TODO check with collection-specific regex
-# $CATEGORY = { $category_type => { $category_id => { $filming => [ $section_uri ... ] } } }
- foreach
- my $section_uri ( @{ $CATEGORY->{$category_type}{$category_id}{$filming} } )
- {
- my %entry = ( $section_uri => $SECTION->{$section_uri}, );
- push( @sectionlist, $SECTION->{$section_uri} );
+ # do not accept ids for films which are not in the film dataset
+ # (may be non-existing or already online as folder)
+ if ( not defined $FILM->{$uri} ) {
+ return;
}
- return @sectionlist;
+
+ return 1;
}
=back
@@ -261,6 +245,20 @@ sub categorysections {
=over 2
+=item id ()
+
+Return the film identifier (e.g., h1/sh/S0073H_1).
+
+=cut
+
+sub id {
+ my $self = shift or croak('param missing');
+
+ my $id = $self->{film_id};
+
+ return $id;
+}
+
=item name ()
Return the actual name of the film (e.g., S0073H_1).
@@ -292,7 +290,7 @@ sub logical_name {
=item sections ()
-Return a list of film sections for a film.
+Return a list of film sections for a film.
=cut
@@ -301,7 +299,7 @@ sub sections {
my @section_uris = ();
if ( not defined $FILM->{ $self->{uri} }{sections} ) {
- warn "No sections for ", Dumper $self;
+ carp "No sections for ", Dumper $self;
} else {
@section_uris = @{ $FILM->{ $self->{uri} }{sections} };
}
@@ -314,7 +312,7 @@ sub sections {
=item img_count ()
-Return the numer of images files under the film directory.
+Return the numer of image files under the film directory.
=cut
@@ -326,6 +324,24 @@ sub img_count {
return $img_count;
}
+=item status ()
+
+Returns one of the following processing stati:
+
+- indexed - film is completly indexed
+
+- unindexed - film is not indexed (only country start entries for sh)
+
+=cut
+
+sub status {
+ my $self = shift or croak('param missing');
+
+ my $status = $self->{status};
+
+ return $status;
+}
+
=back
=cut
@@ -354,10 +370,12 @@ sub _init_img_count {
sub _load_filmdata {
- my ( $FILM, $SECTION, $FOLDER, $CATEGORY );
+ my ( $FILM, $SECTION );
- my $film_file = path('../data/rdf/film.jsonld');
- my @filmdata = @{ decode_json( $film_file->slurp )->{'@graph'} };
+ # opening _raw is necessary to avoid "Wide character ..." problem with
+ # decode_json (slurp_utf8 does not work!)
+ my $film_file = path('/pm20/data/rdf/film.jsonld');
+ my @filmdata = @{ decode_json( $film_file->slurp_raw )->{'@graph'} };
foreach my $filmdata_ref (@filmdata) {
my $type = $filmdata_ref->{'@type'};
@@ -372,47 +390,20 @@ sub _load_filmdata {
}
}
- # films, folders and categories
+ # add sections to films
foreach my $section_uri ( sort keys %{$SECTION} ) {
- $section_uri =~ m;/film/h(1|2)/(co|wa|sh)/(.+)?/(\d+)(?:/(R|L))?$;;
- my $filming = $1;
- my $collection = $2;
- my $film_name = $3;
- my $img_nr = $4;
- my $rl = $5;
- my $section_ref = $SECTION->{$section_uri};
-
- # films
- ( my $film_uri = $section_uri ) =~ s/^((?:.+)?\/$film_name).+/$1/;
+ ( my $film_uri ) =
+ $section_uri =~ m;^(.+?/film/[hk][12]/(?:co|sh|wa)/.+?)/.+$;;
push( @{ $FILM->{$film_uri}{sections} }, $section_uri );
+ }
- # folders (currently only for co)
- if ( my $pm20_uri = $section_ref->{about}{'@id'} ) {
- $pm20_uri =~ m;folder/co/(\d{6});;
- my $folder_nk = $1;
- push( @{ $FOLDER->{$collection}{$folder_nk}{$filming} }, $section_uri );
- }
+ return $FILM, $SECTION;
+}
- # categories
- else {
- my $grp_prop_ref = ZBW::PM20x::Film->get_grouping_properties($collection);
- my $category_type = $grp_prop_ref->{primary_group}{type};
- my $category_prop = $grp_prop_ref->{primary_group}{jsonld};
-
- if ( $section_ref->{$category_prop}
- and my $category_uri = $section_ref->{$category_prop}{'@id'} )
- {
- $category_uri =~ m;category/$category_type/i/(\d{6});;
- my $category_id = $1;
- push(
- @{ $CATEGORY->{$category_type}{$category_id}{$filming} },
- $section_uri
- );
- }
- }
- }
- return $FILM, $SECTION, $FOLDER, $CATEGORY;
+# use only to transmit the pointer to Film::Section
+sub _SECTION() {
+ return $SECTION;
}
1;
diff --git a/lib/ZBW/PM20x/Film/Section.pm b/lib/ZBW/PM20x/Film/Section.pm
new file mode 100644
index 0000000..f6b1456
--- /dev/null
+++ b/lib/ZBW/PM20x/Film/Section.pm
@@ -0,0 +1,696 @@
+# nbt, 2025-11-15
+
+package ZBW::PM20x::Film::Section;
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Carp;
+use Data::Dumper;
+use JSON;
+use Path::Tiny;
+use Readonly;
+use ZBW::PM20x::Film;
+use ZBW::PM20x::Vocab;
+
+Readonly my $FILM_ROOT_URI => 'https://pm20.zbw.eu/film/';
+
+# items in a collection are primarily grouped by $type, identified by zotero
+# or filmlist properties
+# CAUTION: for geo categories, subject, ware and company categories are related!
+Readonly my %GROUPING_PROPERTY => (
+ co => {
+ ## ignore countries for now! (logically primary category for companies?)
+ primary_group => {
+ type => 'company',
+ zotero => 'pm20_id',
+ filmlist => 'start_company_id',
+ jsonld => 'about',
+ rdf_pred => 'schema:about',
+ rdf_prefix => 'pm20co',
+ },
+ },
+ wa => {
+ primary_group => {
+ type => 'ware',
+ zotero => 'ware_id',
+ filmlist => 'start_ware_id',
+ jsonld => 'ware',
+ rdf_pred => 'zbwext:ware',
+ rdf_prefix => 'pm20ware',
+ },
+ secondary_group => {
+ type => 'geo',
+ zotero => 'geo_id',
+ filmlist => 'start_geo_id',
+ jsonld => 'country',
+ rdf_pred => 'zbwext:country',
+ rdf_prefix => 'pm20geo',
+ },
+ },
+ sh => {
+ primary_group => {
+ type => 'geo',
+ zotero => 'geo_id',
+ filmlist => 'start_geo_id',
+ jsonld => 'country',
+ rdf_pred => 'zbwext:country',
+ rdf_prefix => 'pm20geo',
+ },
+ secondary_group => {
+ type => 'subject',
+ zotero => 'subject_id',
+ filmlist => 'start_subject_id',
+ jsonld => 'subject',
+ rdf_pred => 'zbwext:subject',
+ rdf_prefix => 'pm20subject',
+ },
+ },
+);
+
+# Film structures
+
+# $FILM = { $film_id => { total_image_count, ... }, sections => [ $section_uri, ... ] }
+# $SECTION = { $section_uri => { img_count, ...} }
+
+# Film::Section structures
+
+# $SECTION = { $section_uri => $section, ... }
+# $FOLDER = { $collection => { $folder_nk => { $filming => [ $section_uri, ... ] } } }
+# $CATEGORY = { $category_type => { $category_id => { $filming => [ $section_uri ... ] } } }
+# $CATEGORY_INV = { $type => { $secondary_category_id => { $filming => [ $section_uri ... ] } } }
+
+my ( $SECTION, $FOLDER, $CATEGORY, $CATEGORY_INV );
+( $SECTION, $FOLDER, $CATEGORY, $CATEGORY_INV ) = _init_data();
+
+my %vocab = (
+ subject => undef,
+ geo => undef,
+ ware => undef,
+);
+
+=encoding utf8
+
+=head1 NAME
+
+ZBW::PM20x::Film::Section - Functions for sections of PM20 microfilms
+
+=head1 SYNOPSIS
+
+ use ZBW::PM20x::Film::Section;
+ my $section = ZBW::PM20x::Film::Section->init_from_uri('https://pm20.zbw.eu/film/h1/sh/S0373H/0002');
+ my $section = ZBW::PM20x::Film::Section->init_from_id('h1/sh/S0373H/1115/R');
+
+ my @folder_sections = ZBW::PM20x::Film::Section->foldersections('co/004711', 1);
+
+=head1 DESCRIPTION
+
+A film section is defined by the id of the (physical) film and the sequential
+number of the image with which the section starts, plus optionally /(L|R) for
+the left or right page on the image. The sequential number is derived from the
+image file name.
+
+Currently, a section may span both parts of a split film (_1/_2).
+
+=head1 Class methods
+
+=over 2
+
+=item init_from_uri ($uri)
+
+Return a filmsection object for the filmsection uri from film data.
+
+=cut
+
+sub init_from_uri {
+ my $class = shift or croak('param missing');
+ my $section_uri = shift or croak('param missing');
+
+ my ( $section_id, $film_id, $img_nr, $img_pos );
+
+ # TODO check/extend for Kiel films
+ # NB a film named "S0901aH" exists!
+ if ( $section_uri =~
+m;^${FILM_ROOT_URI}((h[12]/(?:co|wa|sh)/[AFSW]\d{4}a?H(?:_[12])?)/(\d{4})(?:/([LR]))?)$;
+ )
+ {
+ $section_id = $1;
+ $film_id = $2;
+ $img_nr = $3;
+ $img_pos = $4;
+ } else {
+ confess "Invalid film section uri $section_uri";
+ }
+
+ my $self = $SECTION->{$section_uri};
+ if ( not $self ) {
+ confess "Invalid film section uri $section_uri";
+ }
+
+ bless $self, $class;
+
+ return $self;
+}
+
+=item init_from_id ($section_id)
+
+Return a filmsection object for the filmsection id.
+
+=cut
+
+sub init_from_id {
+ my $class = shift or croak('param missing');
+ my $section_id = shift or croak('param missing');
+
+ my $uri = $FILM_ROOT_URI . $section_id;
+
+ my $self = $class->init_from_uri($uri);
+
+ return $self;
+}
+
+=item foldersections ($folder_id, $filming)
+
+Return a list of film sections for the folder, for a certain filming (1|2).
+Currently, only for collection 'co'.
+
+=cut
+
+sub foldersections {
+ my $class = shift or croak('param missing');
+ my $folder_id = shift or croak('param missing');
+ my $filming = shift or croak('param missing');
+
+ my @sectionlist;
+ my ( $collection, $folder_nk ) = $folder_id =~ m;^(co)/(\d{6})$;;
+
+ foreach my $section_uri ( @{ $FOLDER->{$collection}{$folder_nk}{$filming} } )
+ {
+ my %entry = ( $section_uri => $SECTION->{$section_uri}, );
+ push( @sectionlist, $SECTION->{$section_uri} );
+ }
+ return @sectionlist;
+}
+
+=item categorysections ($category_type, $category_id, $filming)
+
+Return a list of film sections of type secondary for a certain primary category, for a
+certain filming (1|2).
+
+Valid $category_type are:
+
+=over 2
+
+=item *
+
+geo - retrieves a list of subject entries for this geo
+
+=item *
+
+ware - retrieves a list of geo entries for this ware
+
+=back
+
+=cut
+
+sub categorysections {
+ my $class = shift or croak('param missing');
+ my $category_type = shift or croak('param missing');
+ my $category_id = shift or croak('param missing');
+ my $filming = shift or croak('param missing');
+
+ croak("wrong category type $category_type")
+ unless $category_type =~ m/^(geo|ware)$/;
+
+ return unless $CATEGORY->{$category_type}{$category_id}{$filming};
+
+ my @sectionlist = @{ $CATEGORY->{$category_type}{$category_id}{$filming} };
+
+ return @sectionlist;
+}
+
+=item categorysections_inv ($category_type, $category_id, $filming)
+
+Inversely, return a list of film sections of type primary for a certain
+secondary category, for a certain filming (1|2).
+
+Valid $category_type are:
+
+=over 4
+
+=item *
+
+geo - retrieves a list of ware entries for this geo
+
+=item *
+
+subject - retrieves a list of geo entries for this subject
+
+=back
+
+=cut
+
+sub categorysections_inv {
+ my $class = shift or croak('param missing');
+ my $category_type = shift or croak('param missing');
+ my $category_id = shift or croak('param missing');
+ my $filming = shift or croak('param missing');
+
+ croak("wrong category type $category_type")
+ unless $category_type =~ m/^(geo|subject)$/;
+
+ return unless $CATEGORY_INV->{$category_type}{$category_id}{$filming};
+
+ my @sectionlist =
+ @{ $CATEGORY_INV->{$category_type}{$category_id}{$filming} };
+
+ return @sectionlist;
+}
+
+=item get_grouping_properties ($collection)
+
+Return metadata structure about the grouping properties for a collection.
+
+=cut
+
+sub get_grouping_properties {
+ my $class = shift or croak('param missing');
+ my $collection = shift or croak('param missing');
+
+ return $GROUPING_PROPERTY{$collection};
+}
+
+=item is_valid_section_uri ($uri)
+
+Returns 1 if the film section URI is valid.
+
+=cut
+
+sub is_valid_section_uri {
+ my $class = shift or croak('param missing');
+ my $uri = shift or croak('param missing');
+
+ my $is_valid;
+ if ( $uri =~
+ m;^${FILM_ROOT_URI}h[12]/(co|wa|sh)/[AFSW]\d{4}a?H(_[12])?/\d{4}(/[LR])?$; )
+ {
+ $is_valid = 1;
+ }
+
+ return $is_valid;
+}
+
+=back
+
+=head1 Instance methods
+
+=over 2
+
+=item id ()
+
+Return the id of the section (e.g., h1/sh/S0373H/1115/R).
+
+=cut
+
+sub id {
+ my $self = shift or croak('param missing');
+
+ my $uri = $self->{'@id'};
+ my ($id) = $uri =~
+m;^${FILM_ROOT_URI}(h[12]/(?:co|wa|sh)/[AFSW]\d{4}a?H(?:_[12])?/\d{4}(?:/[LR])?)$;;
+
+ return $id;
+}
+
+=item uri ()
+
+Return the URI of the section (e.g., https://pm20.zbw.eu/film/h1/sh/S0373H/1115/R).
+
+=cut
+
+sub uri {
+ my $self = shift or croak('param missing');
+
+ my $uri = $self->{'@id'};
+
+ return $uri;
+}
+
+=item collection ()
+
+Return the collection for the section.
+
+=cut
+
+sub collection {
+ my $self = shift or croak('param missing');
+
+ # extract from uri
+ my ($collection) = $self->{'@id'} =~ m;/(co|wa|sh)/;;
+
+ return $collection;
+}
+
+=item filming ()
+
+Return the filming for the section.
+
+=cut
+
+sub filming {
+ my $self = shift or croak('param missing');
+
+ # extract from uri
+ my ($filming) = $self->{'@id'} =~ m;/[hk]([12])/(?:co|wa|sh)/;;
+
+ return $filming;
+}
+
+=item film ()
+
+Return the film in which the section is located.
+
+=cut
+
+sub film {
+ my $self = shift or croak('param missing');
+
+ # extract from uri
+ my ($film_id) = $self->{'@id'} =~ m;/([hk][12]/(?:co|wa|sh)/.+?)/;;
+
+ my $film = ZBW::PM20x::Film->new($film_id);
+
+ return $film;
+}
+
+=item title ()
+
+Returns the full section title, as captured in Zotero or in the film lists,
+always in German.
+
+=cut
+
+sub title {
+ my $self = shift or croak('param missing');
+
+ my $title = $self->{'title'};
+
+ return $title;
+}
+
+=item label ( $lang, $detail_voc )
+
+Returns the partial category label from $detail_voc, in the according language.
+
+The full title may include additional information (e.g., individual diseases,
+or political relations to ...). For now, these keywords are added in German.
+
+For entries from the filmlists, currently no label can be generated (because of
+missing ids), and the full German title is returned.
+
+=cut
+
+sub label {
+ my $self = shift or croak('param missing');
+ my $lang = shift or croak('param missing');
+ my $detail_voc = shift or croak('param missing');
+
+ my $section_id = $self->id;
+ my $collection = $self->collection;
+ my $vocab_name = $detail_voc->vocab_name;
+ my $term_id;
+
+ # lazy load
+ if ( not defined $vocab{$vocab_name} ) {
+ __PACKAGE__->_load_vocab($vocab_name);
+ }
+
+ # sh
+ if ( $collection eq 'sh' ) {
+ if ( $vocab_name eq 'geo' ) {
+ if ( $self->{country}{'@id'} =~ m;.+/i/(\d{6})$; ) {
+ $term_id = $1;
+ }
+ } elsif ( $vocab_name eq 'subject' ) {
+ if ( $self->{subject} && $self->{subject}{'@id'} =~ m;.+/i/(\d{6})$; ) {
+ $term_id = $1;
+ } else {
+ return;
+ }
+ }
+ }
+
+ # wa
+ elsif ( $collection eq 'wa' ) {
+ if ( $vocab_name eq 'ware' ) {
+ if ( $self->{ware}{'@id'} =~ m;.+/i/(\d{6})$; ) {
+ $term_id = $1;
+ }
+ } elsif ( $vocab_name eq 'geo' ) {
+ if ( $self->{country} && $self->{country}{'@id'} =~ m;.+/i/(\d{6})$; ) {
+ $term_id = $1;
+ } else {
+ return;
+ }
+ }
+ }
+
+ # vocab lookup
+ my $label = $vocab{$vocab_name}->label( $lang, $term_id );
+ carp "$section_id: Term $term_id not found in $vocab_name\n" unless $label;
+
+ # TODO handle labels with keys/translations
+ # for ware, we for now have labels combined with keywords
+ if ( $collection eq 'sh' ) {
+ if ( $self->{keywords} ) {
+ $label = "$label - " . $self->{keywords}[0];
+ }
+ }
+ return $label;
+}
+
+=item img_count ()
+
+Returns the number of images in this section, or undef, if section or count is
+not defined.
+
+If a section starts on film_1 and runs up to film_2, the img_count() returns
+the sum of images on both film parts.
+
+See read_zotero.pl add_number_of_images() for the computation of
+number_of_images.
+
+=cut
+
+sub img_count {
+ my $self = shift or croak('param missing');
+
+ return $self->{totalImageCount}{'@value'};
+}
+
+=item is_filmstartonly ()
+
+Returns 1 if this is the first section on a film and the film is not indexed.
+
+TODO: additional condition: the section title is the same as in the prior
+section (otherwise, accidentally new content started at film start).
+
+=cut
+
+sub is_filmstartonly {
+ my $self = shift or croak('param missing');
+
+ my $filmstartonly;
+
+ my $film = $self->film;
+
+ my @sections = $film->sections;
+ if ( $self eq $sections[0]
+ and scalar( split( / : /, $self->title ) ) == 2 )
+ {
+ if ( $film->status eq 'unindexed' ) {
+ $filmstartonly = 1;
+ }
+ }
+ return $filmstartonly;
+}
+
+##### helper procedures - only internally used?
+
+sub is_known_variant {
+ my $vocab_name = shift or croak('param missing');
+ my $string = shift or croak('param missing');
+
+ # prepare lookup
+ my %variant;
+ my $list_str = << 'EOF';
+Neufundland
+Nigeria
+Osmanisches Reich
+Protektorat
+Saarland
+Sowjetunion
+Vereinigte Staaten
+Jugoslawien
+Palästina
+Türkei
+Südwestafrika
+Danzig
+Russland
+Deutschland (bis 1945)
+Nordische Länder
+Böhmen und Mähren (Reichsprotektorat)
+Französisch-Nordafrika
+Ostpreussen
+Memel
+Elsass-Lothringen
+Neu Kaledonien
+Posen
+EOF
+ my @list = split( "\n", $list_str );
+ foreach my $key (@list) {
+ $variant{$key} = 1;
+ }
+
+ my $is_variant = $variant{$string};
+
+ return $is_variant;
+}
+
+sub has_known_subdivisions {
+ my $title = shift or croak('param missing');
+
+ my $list_str = << 'EOF';
+Außenpolitik und politische Beziehungen zum Ausland
+Wahlen für parlamentarische Körperschaften
+Fremdländische Kapitalanlagen, privatwirtschaftliche Interessen, Angehöriger
+Geschichtliche Vorgänge in einzelnen Staaten, Provinzen und Städten
+Einwanderer aus
+Schiffsverkehr mit
+Politische Beziehungen zu
+Wirtschaftspolitische Beziehungen zu
+Handelsbeziehungen zu
+Minderheiten aus einzelnen Ländern
+Verhandlungen parlamentarischer Körperschaften einzelner Regionen
+Staatsgrenzen gegenüber einzelnen Ländern
+Landeskunde, Landschaften, Beschreibung einzelner Orte und Gegenden
+Nationale Angehörige im Ausland, in einzelnen Ländern
+Geheimbünde, Einzelne
+Bevölkerungsbewegung und Bevölkerungsstatistik einzelner Provinzen, Bundesstaaten und Städte
+Alliierte und assoziierte Mächte, Ministerkonferenzen und Botschafterkonferenzen
+Völkerbundsversammlung (Verhandlungen)
+Völkerbundsrat (Verhandlungen)
+Ständige Organisation der Arbeit, Hauptversammlungen
+einzelne
+Einzelne
+EOF
+ my @list = split( "\n", $list_str );
+
+ my $has_subdivisions;
+ foreach my $key (@list) {
+ if ( $title =~ m/$key/ ) {
+ $has_subdivisions = 1;
+ }
+ }
+
+ return $has_subdivisions;
+}
+
+=back
+
+=cut
+
+############ internal
+
+sub _init_data {
+
+ # use the unblessed section data loaded in Film.pm
+ my $SECTION_FROM_FILM = ZBW::PM20x::Film::_SECTION();
+
+ # populate $SECTION
+ foreach my $section_uri ( sort keys %{$SECTION_FROM_FILM} ) {
+
+ if ( not __PACKAGE__->is_valid_section_uri($section_uri) ) {
+ confess "Invalid film section uri $section_uri";
+ }
+
+ my $section_data = $SECTION_FROM_FILM->{$section_uri};
+ my $section = bless( $section_data, __PACKAGE__ );
+
+ $SECTION->{$section_uri} = $section;
+ }
+
+ # folders and categories
+ foreach my $section_uri ( sort keys %{$SECTION} ) {
+ my $section = $SECTION->{$section_uri};
+ my $filming = $section->filming;
+ my $section_id = $section->id;
+
+ # folders (currently only for co)
+ # TODO add folder and section objects
+ if ( my $pm20_uri = $section->{about}{'@id'} ) {
+ $pm20_uri =~ m;folder/co/(\d{6});;
+ my $folder_nk = $1;
+ push( @{ $FOLDER->{co}{$folder_nk}{$filming} }, $section->uri );
+ }
+
+ # categories
+ else {
+ # primary group
+ my $grp_prop_ref =
+ __PACKAGE__->get_grouping_properties( $section->collection );
+ my $category_type = $grp_prop_ref->{primary_group}{type};
+ my $category_prop = $grp_prop_ref->{primary_group}{jsonld};
+
+ if ( $section->{$category_prop}
+ and my $category_uri = $section->{$category_prop}{'@id'} )
+ {
+ my ($category_id) =
+ $category_uri =~ m;category/$category_type/i/(\d{6});;
+ push(
+ @{ $CATEGORY->{$category_type}{$category_id}{$filming} },
+ $section
+ );
+ }
+
+ next unless $grp_prop_ref->{secondary_group};
+
+ # secondary group
+ my $secondary_category_type = $grp_prop_ref->{secondary_group}{type};
+ my $secondary_category_prop = $grp_prop_ref->{secondary_group}{jsonld};
+
+ if ( $section->{$secondary_category_prop}
+ and my $category_uri = $section->{$secondary_category_prop}{'@id'} )
+ {
+ if ( $category_uri =~ m;category/$secondary_category_type/i/(\d{6}); ) {
+ my $secondary_category_id = $1;
+ push(
+ @{
+ $CATEGORY_INV->{$secondary_category_type}{$secondary_category_id}
+ {$filming}
+ },
+ $section
+ );
+ } else {
+
+ # particularly, the case of known ... {vocab}/i/nomatch
+ # no additional warning necessary here
+ ##carp "$section_id: no id for $category_uri\n";
+ }
+ }
+ }
+ }
+ return $SECTION, $FOLDER, $CATEGORY, $CATEGORY_INV;
+}
+
+sub _load_vocab {
+ my $class = shift or confess('class missing');
+ my $vocab_name = shift or confess('param missing');
+
+ $vocab{$vocab_name} = ZBW::PM20x::Vocab->new($vocab_name) || croak;
+}
+
+1;
+
diff --git a/lib/ZBW/PM20x/Folder.pm b/lib/ZBW/PM20x/Folder.pm
index cd4456e..3068060 100644
--- a/lib/ZBW/PM20x/Folder.pm
+++ b/lib/ZBW/PM20x/Folder.pm
@@ -4,9 +4,8 @@ package ZBW::PM20x::Folder;
use strict;
use warnings;
-
-use lib './lib/';
-use utf8;
+use autodie;
+use utf8::all;
use Carp;
use Data::Dumper;
@@ -17,6 +16,7 @@ use Readonly;
use Scalar::Util qw(looks_like_number reftype);
use ZBW::PM20x::Vocab;
use ZBW::PM20x::Film;
+use ZBW::PM20x::Film::Section;
Readonly my $FOLDER_URI_ROOT => 'https://pm20.zbw.eu/folder/';
Readonly our $FOLDER_ROOT => path('/pm20/folder');
@@ -710,7 +710,7 @@ sub get_filmsectionlist {
my $filming = shift or croak('param missing');
my @filmsectionlist =
- ZBW::PM20x::Film->foldersections( $self->get_folder_id, $filming );
+ ZBW::PM20x::Film::Section->foldersections( $self->get_folder_id, $filming );
return @filmsectionlist;
}
@@ -760,7 +760,7 @@ sub _load_docdata {
my $collection = shift or croak('param missing');
my $docdata_file = $DOCDATA_ROOT->child("${collection}_docdata.json");
- my $docdata_ref = decode_json( $docdata_file->slurp );
+ my $docdata_ref = decode_json( $docdata_file->slurp_raw );
$data{$collection}{docdata} = $docdata_ref;
}
diff --git a/lib/ZBW/PM20x/Vocab.pm b/lib/ZBW/PM20x/Vocab.pm
index 0e50a67..6cc918f 100644
--- a/lib/ZBW/PM20x/Vocab.pm
+++ b/lib/ZBW/PM20x/Vocab.pm
@@ -4,7 +4,8 @@ package ZBW::PM20x::Vocab;
use strict;
use warnings;
-use utf8;
+use autodie;
+use utf8::all;
use Carp qw/ cluck confess croak /;
use Data::Dumper;
@@ -14,7 +15,7 @@ use Path::Tiny;
use Readonly;
use Scalar::Util qw(looks_like_number reftype);
use Unicode::Collate;
-use ZBW::PM20x::Film;
+use ZBW::PM20x::Film::Section;
# exported package constants
our @ISA = qw/ Exporter /;
@@ -115,8 +116,11 @@ sub new {
my ( %cat, %lookup, $modified );
my $file = path("$RDF_ROOT/$vocab_name.skos.extended.jsonld");
foreach my $lang (qw/ en de /) {
+
+ # opening _raw is necessary to avoid "Wide character ..." problem with
+ # decode_json (slurp_utf8 does not work!)
my @categories =
- @{ decode_json( $file->slurp )->{'@graph'} };
+ @{ decode_json( $file->slurp_raw )->{'@graph'} };
# read jsonld graph
foreach my $category (@categories) {
@@ -304,6 +308,18 @@ sub lookup_ware_name {
return $term_id;
}
+=item vocab_name ()
+
+Return the name of the vocabulary.
+
+=cut
+
+sub vocab_name {
+ my $self = shift or confess('param missing');
+
+ return $self->{vocab_name};
+}
+
=back
=head2 Methods for an individual term/category
@@ -356,7 +372,7 @@ sub category_uri {
my $self = shift or croak('param missing');
my $term_id = shift or croak('param missing');
- my $uri = $URI_STUB . $self->{vocab_name} . "/i/$term_id";
+ my $uri = $URI_STUB . $self->vocab_name . "/i/$term_id";
return $uri;
}
@@ -525,16 +541,17 @@ sub folder_count {
my $detail_type = shift or croak('param missing');
# get from extended vocab data
- my $category_type = $self->{vocab_name};
+ my $category_type = $self->vocab_name;
my $prop = $COUNT_PROPERTY{$category_type}{$detail_type};
my $folder_count = $self->{id}{$term_id}{$prop}{'@value'};
return $folder_count;
}
-=item folderlist ( $lang, $term_id, $detail_type )
+=item folderlist ( $lang, $term_id, $detail_vocab )
-Returns a list of folders, sorted by long signature or ware name of the detail type.
+Returns a list of folders, sorted by long signature or ware name of the detail
+vocabulary.
=cut
@@ -546,8 +563,8 @@ sub folderlist {
my @folderlist;
- my $master_type = $self->{vocab_name};
- my $detail_type = $detail_voc->{vocab_name};
+ my $master_type = $self->vocab_name;
+ my $detail_type = $detail_voc->vocab_name;
my @detail_category_ids = $detail_voc->category_ids($lang);
foreach my $detail_id (@detail_category_ids) {
@@ -568,7 +585,12 @@ sub folderlist {
croak("Strange combination of master $master_type $term_id "
. "and detail $detail_id" );
}
+
+ # create a folder from hypotetical combination of terms
my $folder = ZBW::PM20x::Folder->new( $collection, $folder_nk );
+
+ # filters for actually existing folders
+ # (film sections cannot interfere here)
if ( $folder->get_doc_count ) {
push( @folderlist, $folder );
}
@@ -612,25 +634,54 @@ sub start_sig {
}
}
-=item filmsectionlist( $term_id, $filming )
+=item filmsectionlist( $term_id, $filming, $detail_type )
-Return a sorted list of film sections for a category (leaves out sections
-already published as folders and not manually indexed) for either filming 1 or
-2. Currently datastructure works only for primary category. For category geo,
-only sections for sh are returned.
+Return a (currently unsorted) list of film sections of detail category type $detail_type
+(defaults see below) for a term of the given (main) category type for either
+filming 1 or 2. Leaves out sections already published as folders and not
+manually indexed.
-TODO: Extend to secondary categories.
+Default detail types are subject for geo, geo for ware, or geo for subject.
=cut
sub filmsectionlist {
- my $self = shift or croak('param missing');
- my $term_id = shift or croak('param missing');
- my $filming = shift or croak('param missing');
+ my $self = shift or croak('param missing');
+ my $term_id = shift or croak('param missing');
+ my $filming = shift or croak('param missing');
+ my $detail_type = shift;
+
+ my $master_type = $self->vocab_name;
+
+ # set default detail type, if omitted by the caller
+ if ( not $detail_type ) {
+ if ( $master_type eq 'ware' ) {
+ $detail_type = 'geo';
+ } elsif ( $master_type eq 'geo' ) {
+ $detail_type = 'subject';
+ } elsif ( $master_type eq 'subject' ) {
+ $detail_type = 'geo';
+ }
+ }
- my @filmsectionlist =
- ZBW::PM20x::Film->categorysections( $self->{vocab_name}, $term_id,
- $filming );
+ my @filmsectionlist;
+
+ # only certain combinations of master/detail categories are valid!
+ if ( ( $master_type eq 'geo' and $detail_type eq 'subject' )
+ or ( $master_type eq 'ware' and $detail_type eq 'geo' ) )
+ {
+ @filmsectionlist =
+ ZBW::PM20x::Film::Section->categorysections( $master_type, $term_id,
+ $filming );
+ } elsif ( $master_type eq 'geo' and $detail_type eq 'ware'
+ or $master_type eq 'subject' and $detail_type eq 'geo' )
+ {
+ @filmsectionlist =
+ ZBW::PM20x::Film::Section->categorysections_inv( $master_type, $term_id,
+ $filming );
+ } else {
+ croak("Invalid combination of master $master_type and detail $detail_type");
+ }
return @filmsectionlist;
}
@@ -704,7 +755,7 @@ sub _as_array {
sub _add_subheadings {
my $self = shift or croak('param missing');
- if ( $self->{vocab_name} eq 'geo' ) {
+ if ( $self->vocab_name eq 'geo' ) {
$self->{subhead} = {
A => {
de => 'Europa',
@@ -747,7 +798,7 @@ sub _add_subheadings {
en => 'Tropics',
},
};
- } elsif ( $self->{vocab_name} eq 'subject' ) {
+ } elsif ( $self->vocab_name eq 'subject' ) {
foreach my $id ( keys %{ $self->{id} } ) {
my %terminfo = %{ $self->{id}{$id} };
my $signature = $terminfo{notation};
@@ -762,7 +813,7 @@ sub _add_subheadings {
$self->{subhead}{$signature}{$lang} = $label;
}
}
- } elsif ( $self->{vocab_name} eq 'ware' ) {
+ } elsif ( $self->vocab_name eq 'ware' ) {
# here we have no signature, but only start chars
foreach my $id ( keys %{ $self->{id} } ) {
@@ -806,7 +857,7 @@ sub _init_ware_name {
}
}
-# init the sorted id lists
+# init the sorted id lists (returns hash of arrays, keyed by language)
sub _init_sorted_ids {
my $self = shift or croak('param missing');
@@ -815,7 +866,7 @@ sub _init_sorted_ids {
my %cat_id = %{ $self->{id} };
foreach my $lang (@LANGUAGES) {
my @category_ids;
- if ( $self->{vocab_name} eq 'ware' ) {
+ if ( $self->vocab_name eq 'ware' ) {
my $uc = Unicode::Collate->new();
@category_ids = sort {
$uc->cmp( $cat_id{$a}{'prefLabel'}{$lang},
diff --git a/mk_sparql_results.pl b/mk_sparql_results.pl
index 1a08a22..58a7c3c 100755
--- a/mk_sparql_results.pl
+++ b/mk_sparql_results.pl
@@ -15,7 +15,8 @@
use strict;
use warnings;
-use utf8;
+use autodie;
+use utf8::all;
use Data::Dumper;
use Path::Tiny;
@@ -26,9 +27,6 @@
use URL::Encode qw/url_encode/;
use YAML;
-# create utf8 output
-binmode( STDOUT, ":utf8" );
-
Readonly my $DEFINITIONS_FILE => path('sparql_results.yaml');
my $definition_ref = YAML::LoadFile($DEFINITIONS_FILE);
diff --git a/check_film_notation.pl b/old/check_film_notation.pl
similarity index 100%
rename from check_film_notation.pl
rename to old/check_film_notation.pl
diff --git a/cp_reports.pl b/old/cp_reports.pl
similarity index 100%
rename from cp_reports.pl
rename to old/cp_reports.pl
diff --git a/expand_signatures.pl b/old/expand_signatures.pl
similarity index 100%
rename from expand_signatures.pl
rename to old/expand_signatures.pl
diff --git a/read_hypothesis.pl b/old/read_hypothesis.pl
similarity index 100%
rename from read_hypothesis.pl
rename to old/read_hypothesis.pl
diff --git a/rebuild_all_data.sh b/old/rebuild_all_data.sh
similarity index 100%
rename from rebuild_all_data.sh
rename to old/rebuild_all_data.sh
diff --git a/recreate_pm20_endpoint.sh b/old/recreate_pm20_endpoint.sh
similarity index 100%
rename from recreate_pm20_endpoint.sh
rename to old/recreate_pm20_endpoint.sh
diff --git a/parse_doc_attrib.pl b/parse_doc_attrib.pl
index 47554c9..e03265f 100755
--- a/parse_doc_attrib.pl
+++ b/parse_doc_attrib.pl
@@ -6,7 +6,8 @@
use strict;
use warnings;
-use utf8;
+use autodie;
+use utf8::all;
use Data::Dumper;
use JSON;
@@ -14,8 +15,6 @@
use Readonly;
use YAML::Tiny;
-binmode( STDIN, ":encoding(iso-8859-1)" );
-binmode( STDOUT, ":utf8" );
$Data::Dumper::Sortkeys = 1;
Readonly my $DOCATTRIB_ROOT => path('/pm20/data/DocAttribute/');
diff --git a/parse_docdata.pl b/parse_docdata.pl
index 70b85ff..32b1c04 100644
--- a/parse_docdata.pl
+++ b/parse_docdata.pl
@@ -15,9 +15,8 @@
use strict;
use warnings;
-use utf8;
-
-use lib './lib';
+use autodie;
+use utf8::all;
use Data::Dumper;
use JSON;
@@ -26,8 +25,6 @@
use Readonly;
use ZBW::Logutil;
-binmode( STDOUT, ":utf8" );
-
# logging
my $log = ZBW::Logutil->get_logger('./log_conf/parse_docdata.conf');
$log->level($INFO);
@@ -56,11 +53,13 @@
# read image data file
my $img_ref =
- decode_json( $IMAGEDATA_ROOT->child("${collection}_image.json")->slurp );
+ decode_json(
+ $IMAGEDATA_ROOT->child("${collection}_image.json")->slurp_raw );
# read doc attribute data file
my $docattr_ref =
- decode_json( $DOCDATA_ROOT->child("${collection}_docattr.json")->slurp );
+ decode_json(
+ $DOCDATA_ROOT->child("${collection}_docattr.json")->slurp_raw );
my %docdata;
foreach my $folder ( sort keys %{$img_ref} ) {
@@ -102,7 +101,7 @@
# consolidated document information
my $field_ref =
consolidate_info( $folder, $doc, $docdata{$folder}{info}{$doc},
- $docs{$doc} );
+ $docs{$doc} );
$docdata{$folder}{info}{$doc}{con} = $field_ref;
##if ($docdata{$folder}{info}{$doc}{_txt}{TIT}) {
diff --git a/parse_imagelists.pl b/parse_imagelists.pl
index 5e73fe4..5901758 100644
--- a/parse_imagelists.pl
+++ b/parse_imagelists.pl
@@ -5,7 +5,8 @@
use strict;
use warnings;
-use utf8;
+use autodie;
+use utf8::all;
use Data::Dumper;
use File::Basename;
diff --git a/recreate_document_locks.pl b/recreate_document_locks.pl
index 1828116..d44a688 100644
--- a/recreate_document_locks.pl
+++ b/recreate_document_locks.pl
@@ -7,9 +7,8 @@
use strict;
use warnings;
-use utf8;
-
-use lib './lib';
+use autodie;
+use utf8::all;
use Data::Dumper;
use Log::Log4perl::Level;
diff --git a/results2markdown.pl b/results2markdown.pl
index 43dab79..bca0552 100644
--- a/results2markdown.pl
+++ b/results2markdown.pl
@@ -9,9 +9,8 @@
use strict;
use warnings;
-use utf8;
-
-use lib './lib';
+use autodie;
+use utf8::all;
use Data::Dumper;
use JSON;
@@ -20,8 +19,6 @@
use YAML;
use ZBW::PM20x::Folder;
-binmode( STDOUT, ":utf8" );
-
Readonly my $DEFINITIONS_FILE => 'sparql_results.yaml';
Readonly my $CONFIGURATION_FILE => 'reports.yaml';
Readonly my $REPORT_ROOT => path('/pm20/web/report');
@@ -45,7 +42,7 @@
# read input
( my $input_dir = $definition{$report}{output_dir} ) =~ s|/var/|/data/|;
my $input_file = path("$input_dir/$report.$lang.json");
- my $input = decode_json( $input_file->slurp );
+ my $input = decode_json( $input_file->slurp_raw );
# collect output lines, starting with page head
my @lines;
diff --git a/testing/Film_Section_categorysections.t b/testing/Film_Section_categorysections.t
new file mode 100644
index 0000000..438623e
--- /dev/null
+++ b/testing/Film_Section_categorysections.t
@@ -0,0 +1,70 @@
+# 03.11.2025
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+
+my $class = 'ZBW::PM20x::Film::Section';
+
+use_ok($class) or die "Could not load $class\n";
+
+my $struct = $class->get_grouping_properties('wa');
+
+ok( $struct, 'get grouping wa' );
+
+#warn(Dumper $struct);
+
+my ( $ware_id, $geo_id, $subject_id, $filming, @waresections, @geosections,
+ @subjectsections );
+
+# Tests for secondary sections
+
+# testcase film/h1/wa/W0087H/0002 (Eisenwaren : Österreich)
+# film.jsonld comprises
+# - is arbitrary (beginning of new film, not beginning of geo
+# (as indicated by start date 1932))
+
+$ware_id = 142275;
+$geo_id = 141731;
+$filming = 1;
+
+@waresections = $class->categorysections( 'ware', $ware_id, $filming );
+##diag Dumper \@waresections;
+ok( @waresections, "ware $ware_id has sections in filming $filming" );
+
+my $section1 = $waresections[0];
+##diag Dumper $section1;
+ok( $section1->isa('ZBW::PM20x::Film::Section'), "section is of class film section");
+
+#warn(Dumper \@waresections);
+
+@geosections = $class->categorysections_inv( 'geo', $geo_id, $filming );
+ok( @geosections, "geo $geo_id has ware sections in filming $filming" );
+##diag Dumper \@geosections;
+
+# create a lookup hash of ware ids for the geo (just for testing)
+my %ware = map { $_->{ware}{'@id'} =~ m/\/(\d+)$/ => 1 } @geosections;
+
+# TODO inverse logic, when sections with start date are excluded
+ok( $ware{$ware_id}, "section for ware id $ware_id in result" );
+
+#warn(Dumper \%ware_id);
+
+# test case film/h1/sh/S0234H/0173/L (Polen : Seeschiffahrt)
+$subject_id = 145567;
+$geo_id = 140962;
+
+@subjectsections =
+ $class->categorysections_inv( 'subject', $subject_id, $filming );
+ok( @subjectsections,
+ "subject $subject_id has geo sections in filming $filming" );
+
+# create a lookup hash of geo ids for the subject (just for testing)
+my %geo = map { $_->{country}{'@id'} =~ m/\/(\d+)$/ => 1 } @subjectsections;
+ok( $geo{$geo_id}, "section for geo id $geo_id in result" );
+
+done_testing;
diff --git a/testing/Film_Section_foldersections.t b/testing/Film_Section_foldersections.t
new file mode 100644
index 0000000..7f57794
--- /dev/null
+++ b/testing/Film_Section_foldersections.t
@@ -0,0 +1,25 @@
+# 07.11.2025
+
+# verify that lists of sections for company folders are generated
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+
+my $class = 'ZBW::PM20x::Film::Section';
+
+use_ok($class) or die "Could not load $class\n";
+can_ok($class, "foldersections");
+
+my $collection = 'co';
+my $folder_nk = "041389" . "";
+
+my @list;
+ok(@list = $class->foldersections("$collection/$folder_nk", 2), "foldersections");
+#diag Dumper @list;
+
+done_testing;
diff --git a/testing/Film_Section_img_count.t b/testing/Film_Section_img_count.t
new file mode 100644
index 0000000..273dd7a
--- /dev/null
+++ b/testing/Film_Section_img_count.t
@@ -0,0 +1,60 @@
+# 2025-12-05
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+
+my $class = 'ZBW::PM20x::Film::Section';
+
+use_ok($class) or die "Could not load $class\n";
+
+my $_ref = [
+ {
+ title => '',
+ id => '',
+ expected => {
+ img_count => '',
+ },
+ diag => 1,
+ },
+];
+
+my @cases = (
+ {
+ title => 'Glutamat',
+ id => 'h2/wa/W2087H/0987/L',
+ expected => {
+ img_count => '15',
+ },
+ diag => 0,
+ },
+ {
+ title =>
+'Sachsen (Pr.) : Geschichtliche Vorgänge in einzelnen Staaten, Provinzen und Städten',
+ id => 'h1/sh/S0043H_1/0562/R',
+ expected => {
+ img_count => '59',
+ },
+ diag => 0,
+ comment => 'section spans S0043H_1 and S0043H_2',
+ },
+);
+
+my ( $section, $img_count );
+
+foreach my $case_ref (@cases) {
+ ok( my $section = ZBW::PM20x::Film::Section->init_from_id( $case_ref->{id} ),
+ 'init ' . $case_ref->{title} );
+ is(
+ $section->img_count,
+ $case_ref->{expected}{img_count},
+ 'img_count ' . $section->img_count
+ );
+ diag Dumper $case_ref, $section if $case_ref->{diag};
+}
+
+done_testing;
diff --git a/testing/Film_Section_init.t b/testing/Film_Section_init.t
new file mode 100644
index 0000000..ba454d7
--- /dev/null
+++ b/testing/Film_Section_init.t
@@ -0,0 +1,31 @@
+# 15.11.2025
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+
+my $class = 'ZBW::PM20x::Film::Section';
+
+use_ok($class) or die "Could not load $class\n";
+
+my $uri = 'https://pm20.zbw.eu/film/h1/sh/S0373H/1115';
+ok( my $section1 = $class->init_from_uri($uri), "created from uri" );
+##diag Dumper $section1;
+
+ok( my $section2 = $class->init_from_id('h1/sh/S0373H/1115'),
+ "created from id" );
+##diag Dumper $section2;
+
+is_deeply( $section1, $section2, "same result with init from uri and id" );
+
+is( $section1->id, 'h1/sh/S0373H/1115', "id" );
+
+is( $section1->collection, 'sh', "collection" );
+
+is( $section1->filming, '1', "filming" );
+
+done_testing;
diff --git a/testing/Film_Section_is_filmstartonly.t b/testing/Film_Section_is_filmstartonly.t
new file mode 100644
index 0000000..56c2609
--- /dev/null
+++ b/testing/Film_Section_is_filmstartonly.t
@@ -0,0 +1,50 @@
+# 2025-12-14
+
+# also tests $section->film()
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+
+my $class = 'ZBW::PM20x::Film::Section';
+
+use_ok($class) or die "Could not load $class\n";
+
+my $_ref = [
+ {
+ id => '',
+ expected => {
+ film => '',
+ filmstartonly => undef,
+ },
+ diag => 0,
+ },
+];
+
+my @cases = (
+ {
+ id => 'h1/wa/W0087H/0002',
+ expected => {
+ film => 'W0087H',
+ filmstartonly => 1,
+ },
+ diag => 0,
+ },
+);
+
+foreach my $case_ref (@cases) {
+ ok( my $section = ZBW::PM20x::Film::Section->init_from_id( $case_ref->{id} ),
+ "init from id" );
+ is( $section->film->name, $case_ref->{expected}{film}, 'film from section' );
+ is(
+ $section->is_filmstartonly,
+ $case_ref->{expected}{filmstartonly},
+ $section->id . ' is filmstartonly: ' . $section->is_filmstartonly
+ );
+}
+
+done_testing;
diff --git a/testing/Film_Section_label_sh.t b/testing/Film_Section_label_sh.t
new file mode 100644
index 0000000..2fa1913
--- /dev/null
+++ b/testing/Film_Section_label_sh.t
@@ -0,0 +1,148 @@
+# 15.11.2025
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+use ZBW::PM20x::Vocab;
+
+my $class = 'ZBW::PM20x::Film::Section';
+
+my %vocab = (
+ 'geo' => ZBW::PM20x::Vocab->new('geo'),
+ 'subject' => ZBW::PM20x::Vocab->new('subject'),
+ ## 'ware' => ZBW::PM20x::Vocab->new('ware'),
+);
+
+use_ok($class) or die "Could not load $class\n";
+
+my $_ref = [
+ {
+ title => '',
+ id => '',
+ signature_string => '',
+ vocab_name => '',
+ lang => '',
+ expected => {
+ label => '',
+ },
+ diag => 1,
+ },
+];
+
+my @cases = (
+ {
+ title => 'Polen : Seeschiffahrt',
+ id => 'h1/sh/S0234H/0173/L',
+ signature_string => 'A12 n32',
+ vocab_name => 'geo',
+ lang => 'en',
+ expected => {
+ label => 'Poland',
+ },
+ diag => 0,
+ },
+ {
+ title => 'Nyassaland',
+ id => 'h1/sh/S0824H/1180',
+ signature_string => 'C99',
+ vocab_name => 'geo',
+ lang => 'en',
+ expected => {
+ label => 'Nyasaland',
+ },
+ diag => 0,
+ },
+ {
+ title => 'Nyassaland',
+ id => 'h1/sh/S0824H/1180',
+ signature_string => 'C99',
+ vocab_name => 'subject',
+ lang => 'en',
+ expected => {
+ label => undef,
+ },
+ diag => 0,
+ },
+ {
+ title => 'Nepal : Politische Beziehungen zu einzelnen Ländern - Tibet',
+ id => 'h1/sh/S0693H/1237/R',
+ signature_string => 'B55 g1 - Tibet',
+ vocab_name => 'geo',
+ lang => 'de',
+ expected => {
+ label => 'Nepal - Tibet',
+ },
+ diag => 0,
+ },
+ {
+ title => 'Nepal : Politische Beziehungen zu einzelnen Ländern - Tibet',
+ id => 'h1/sh/S0693H/1237/R',
+ signature_string => 'B55 g1 - Tibet',
+ vocab_name => 'subject',
+ lang => 'de',
+ expected => {
+ label => 'Politische Beziehungen zu einzelnen Ländern - Tibet',
+ },
+ diag => 0,
+ },
+ {
+ title =>
+'Weichsel : Europa : Einzelne Binnenschiffahrtsstrassen u. Seekanäle, Verwaltung',
+ id => 'h1/sh/S0006H/0817/R',
+ signature_string => 'A1 n33a Sm3 - Weichsel',
+ vocab_name => 'geo',
+ lang => 'en',
+ expected => {
+ label => 'Europe - Weichsel',
+ },
+ diag => 0,
+ },
+ {
+ title =>
+'Weichsel : Europa : Einzelne Binnenschiffahrtsstrassen u. Seekanäle, Verwaltung',
+ id => 'h1/sh/S0006H/0817/R',
+ signature_string => 'A1 n33a Sm3 - Weichsel',
+ vocab_name => 'subject',
+ lang => 'en',
+ expected => {
+ label => 'Individual inland waterways and sea canals, administration - Weichsel',
+ },
+ diag => 0,
+ },
+);
+
+foreach my $case_ref (@cases) {
+ ok( my $section = ZBW::PM20x::Film::Section->init_from_id( $case_ref->{id} ),
+ "init from id" );
+ my $title = $case_ref->{title};
+ my $lang = $case_ref->{lang};
+ my $vocab_name = $case_ref->{vocab_name};
+ my $vocab = $vocab{$vocab_name};
+ $case_ref->{label} = $section->label( $lang, $vocab );
+ foreach my $field ( keys %{ $case_ref->{expected} } ) {
+ if ( my $expected = $case_ref->{expected}{$field} ) {
+ is( $case_ref->{$field}, $expected, "$title {$lang, $vocab_name}" );
+ }
+ }
+ if ( $case_ref->{diag} ) {
+ diag Dumper $case_ref, $section;
+ last;
+ }
+}
+
+done_testing;
+__DATA__
+ $class->parse_sh($case_ref);
+ (
+ is( $case_ref->{subject_id}, $case_ref->{expected}{subject_id}, $title )
+ && is( $case_ref->{geo_id}, $case_ref->{expected}{geo_id}, $title )
+ && is(
+ $case_ref->{keyword_string}, $case_ref->{expected}{keyword_string},
+ $title
+ )
+ ) || diag Dumper $case_ref;
+
diff --git a/testing/Film_Section_label_wa.t b/testing/Film_Section_label_wa.t
new file mode 100644
index 0000000..78de33d
--- /dev/null
+++ b/testing/Film_Section_label_wa.t
@@ -0,0 +1,163 @@
+# 03.11.2025
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+use ZBW::PM20x::Vocab;
+
+my %vocab = (
+ 'geo' => ZBW::PM20x::Vocab->new('geo'),
+ ##'subject' => ZBW::PM20x::Vocab->new('subject'),
+ 'ware' => ZBW::PM20x::Vocab->new('ware'),
+);
+
+my $class = 'ZBW::PM20x::Film::Section';
+
+use_ok($class) or die "Could not load $class\n";
+
+my $struct = $class->get_grouping_properties('wa');
+
+my $class2 = 'ZBW::PM20x::Vocab';
+use_ok($class2) or die "Could not load $class2\n";
+
+ok( $struct, 'get grouping wa' );
+
+my ( $ware_id, $geo_id, $subject_id, $filming, @waresections, @geosections,
+ @subjectsections );
+
+# Tests for secondary sections
+
+# testcase h1/wa/W0087H/0002 (Eisenwaren : Österreich)
+# film.jsonld comprises
+# - is arbitrary (beginning of new film, not beginning of geo
+# (as indicated by start date 1932))
+
+$ware_id = 142275;
+$geo_id = 141731;
+$filming = 1;
+
+@waresections = $class->categorysections( 'ware', $ware_id, $filming );
+##diag Dumper \@waresections;
+ok( @waresections, "ware $ware_id has sections in filming $filming" );
+is( $waresections[1]->label( 'de', $vocab{geo} ), 'Österreich', "utf8 string" );
+##diag Dumper $waresections[1];
+
+foreach my $section (@waresections) {
+
+ #diag $section->title, "\n";
+ #diag $section->label( 'en', $vocab{geo} ), "\n";
+}
+
+##@geosections = $class2->filmsectionlist($geo_id, $filming, 'ware');
+@geosections = $class->categorysections_inv( 'geo', $geo_id, $filming );
+foreach my $section (@geosections) {
+ ##diag $section->title, "\n";
+ ##diag $section->id, " ", $section->label( 'en', $ware_vocab ), "\n";
+}
+
+my $_ref = [
+ {
+ title => '',
+ id => '',
+ signature_string => '',
+ vocab_name => '',
+ lang => 'en',
+ expected => {
+ label => '',
+ },
+ diag => 0,
+ },
+];
+
+my @cases = (
+ {
+ title => 'Eisenwaren : Österreich',
+ id => 'h1/wa/W0087H/0002',
+ signature_string => '',
+ vocab_name => 'ware',
+ lang => 'en',
+ expected => {
+ label => '',
+ },
+ diag => 0,
+ },
+);
+
+foreach my $case_ref (@cases) {
+ ok( my $section = ZBW::PM20x::Film::Section->init_from_id( $case_ref->{id} ),
+ "init from id" );
+ my $title = $case_ref->{title};
+ my $lang = $case_ref->{lang};
+ my $vocab_name = $case_ref->{vocab_name};
+ my $vocab = $vocab{$vocab_name};
+ $case_ref->{label} = $section->label( $lang, $vocab );
+ foreach my $field ( keys %{ $case_ref->{expected} } ) {
+ if ( my $expected = $case_ref->{expected}{$field} ) {
+ is( $case_ref->{$field}, $expected, "$title {$lang, $vocab_name}" );
+ }
+ }
+ if ( $case_ref->{diag} ) {
+ diag Dumper $case_ref, $section;
+ last;
+ }
+}
+
+# Tests for secondary sections
+
+# testcase film/h1/wa/W0087H/0002 (Eisenwaren : Österreich)
+# film.jsonld comprises
+# - is arbitrary (beginning of new film, not beginning of geo
+# (as indicated by start date 1932))
+
+$ware_id = 142275;
+$geo_id = 141731;
+$filming = 1;
+
+@waresections = $class->categorysections( 'ware', $ware_id, $filming );
+##diag Dumper \@waresections;
+ok( @waresections, "ware $ware_id has sections in filming $filming" );
+is( $waresections[1]->label( 'de', $vocab{geo} ), 'Österreich', "utf8 string" );
+
+#diag Dumper $waresections[1];
+
+foreach my $section (@waresections) {
+ ##diag $section->title, "\n";
+ #diag $section->label( 'en', $geo_vocab ), "\n";
+}
+##@geosections = $class2->filmsectionlist($geo_id, $filming, 'ware');
+@geosections = $class->categorysections_inv( 'geo', $geo_id, $filming );
+foreach my $section (@geosections) {
+ ##diag $section->title, "\n";
+ ##diag $section->id, " ", $section->label( 'en', $ware_vocab ), "\n";
+}
+
+@geosections = $class->categorysections_inv( 'geo', $geo_id, $filming );
+ok( @geosections, "geo $geo_id has ware sections in filming $filming" );
+##diag Dumper \@geosections;
+
+# create a lookup hash of ware ids for the geo (just for testing)
+my %ware = map { $_->{ware}{'@id'} =~ m/\/(\d+)$/ => 1 } @geosections;
+
+# TODO inverse logic, when sections with start date are excluded
+ok( $ware{$ware_id}, "section for ware id $ware_id in result" );
+
+#warn(Dumper \%ware_id);
+
+# test case film/h1/sh/S0234H/0173/L (Polen : Seeschiffahrt)
+$subject_id = 145567;
+$geo_id = 140962;
+
+@subjectsections =
+ $class->categorysections_inv( 'subject', $subject_id, $filming );
+ok( @subjectsections,
+ "subject $subject_id has geo sections in filming $filming" );
+
+# create a lookup hash of geo ids for the subject (just for testing)
+my %geo = map { $_->{country}{'@id'} =~ m/\/(\d+)$/ => 1 } @subjectsections;
+ok( $geo{$geo_id}, "section for geo id $geo_id in result" );
+
+done_testing;
diff --git a/testing/Film_films.t b/testing/Film_films.t
new file mode 100644
index 0000000..c5df25d
--- /dev/null
+++ b/testing/Film_films.t
@@ -0,0 +1,24 @@
+# 2025-12-09
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+
+my $class = 'ZBW::PM20x::Film';
+
+use_ok($class) or die "Could not load $class\n";
+
+ok( my @films = $class->films('h1_sh'), "load film list" );
+
+#diag Dumper \@films;
+
+ok(
+ !( grep { $_->name() eq 'S0001H' } @films ),
+ 'does not contain S0001H (online with folder)'
+);
+
+done_testing;
diff --git a/testing/Film_id.t b/testing/Film_id.t
new file mode 100644
index 0000000..89e0288
--- /dev/null
+++ b/testing/Film_id.t
@@ -0,0 +1,20 @@
+#
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+
+my $class = 'ZBW::PM20x::Film';
+
+use_ok($class) or die "Could not load $class\n";
+
+my $film_id = 'h1/sh/S0073H_1';
+my $film = $class->new($film_id);
+
+is($film->id, $film_id, 'id() works');
+
+done_testing;
diff --git a/testing/Film_sections.t b/testing/Film_sections.t
new file mode 100644
index 0000000..e2d96b7
--- /dev/null
+++ b/testing/Film_sections.t
@@ -0,0 +1,29 @@
+# 2025-12-14
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+
+my $class = 'ZBW::PM20x::Film';
+
+use_ok($class) or die "Could not load $class\n";
+
+my $section_id = 'h1/wa/W0087H/0002';
+ok( my ($film_id) = $section_id =~ m;(.+?)/\d{4}$;, "film_id" );
+is( $film_id, 'h1/wa/W0087H', 'new film' );
+my $film = ZBW::PM20x::Film->new($film_id);
+my @sections = $film->sections;
+
+my $section = $sections[0];
+
+#diag Dumper $section;
+
+# TODO change to blessed Film::Section!!
+my $retrieved_section_id = substr( $section->{'@id'}, 25 );
+is( $retrieved_section_id, $section_id, 'first section id' );
+
+done_testing;
diff --git a/testing/Film_status.t b/testing/Film_status.t
new file mode 100644
index 0000000..72c18f7
--- /dev/null
+++ b/testing/Film_status.t
@@ -0,0 +1,26 @@
+# 12.11.2025
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+
+my $class = 'ZBW::PM20x::Film';
+
+use_ok($class) or die "Could not load $class\n";
+
+my ( $film_id, $film );
+
+$film_id = 'h1/wa/W0186H';
+ok( $film = ZBW::PM20x::Film->new($film_id), "Film $film_id" );
+is( $film->status, 'indexed', "film $film_id is indexed" );
+
+$film_id = 'h1/wa/W0086H';
+ok( $film = ZBW::PM20x::Film->new($film_id), "Film $film_id" );
+is( $film->status, 'unindexed', "film $film_id is not yet indexed" );
+##diag(Dumper $film->sections);
+
+done_testing;
diff --git a/testing/Film_valid.t b/testing/Film_valid.t
new file mode 100644
index 0000000..0c2bf80
--- /dev/null
+++ b/testing/Film_valid.t
@@ -0,0 +1,48 @@
+# 2025-12-09
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+
+my $class = 'ZBW::PM20x::Film';
+
+use_ok($class) or die "Could not load $class\n";
+
+my @cases = (
+ {
+ id => 'h1/sh/S0001H',
+ valid => undef,
+ },
+ {
+ id => 'h1/sh/S0006H',
+ valid => 1,
+ },
+ {
+ id => 'h1/sh/S0370H',
+ valid => 1,
+ },
+ {
+ id => 'h1/sh/S0374H_1',
+ valid => 1,
+ },
+ {
+ id => 'h1/sh/S0220H_1',
+ valid => undef,
+ },
+ {
+ id => 'h1/sh/S0220H_2',
+ valid => 1,
+ },
+);
+
+foreach my $case_ref (@cases) {
+ my $id = $case_ref->{id};
+ is( ZBW::PM20x::Film->valid($id),
+ $case_ref->{valid}, "Film $id -> " . ( $case_ref->{valid} || "0" ) );
+}
+
+done_testing;
diff --git a/testing/Folder_filmsectionlist.t b/testing/Folder_filmsectionlist.t
new file mode 100644
index 0000000..f4823c3
--- /dev/null
+++ b/testing/Folder_filmsectionlist.t
@@ -0,0 +1,23 @@
+# 07.11.2025
+
+# verify that lists of sections for company folders are generated
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+
+my $class = 'ZBW::PM20x::Folder';
+
+use_ok($class) or die "Could not load $class\n";
+
+my $collection = 'co';
+my $folder_nk = "041389" . "";
+
+ok( my $folder = $class->new( $collection, $folder_nk ), "new folder" );
+ok( $folder->get_filmsectionlist(2), "get filmsectionlist" );
+
+done_testing;
diff --git a/testing/Folder_get_doc_count.t b/testing/Folder_get_doc_count.t
new file mode 100644
index 0000000..779d0e9
--- /dev/null
+++ b/testing/Folder_get_doc_count.t
@@ -0,0 +1,29 @@
+# 2025-11-29
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+
+my $class = 'ZBW::PM20x::Folder';
+
+use_ok($class);
+
+my $folder;
+
+# existing folder (Akim Ltd)
+ok( $folder = $class->new( 'co', '047022' ), 'new ok' );
+##diag Dumper $folder;
+is( $folder->get_doc_count, 28, "document count" );
+
+# non-existing folder (this is used in Vocab->folderlist!)
+ok( $folder = $class->new( 'sh', '666666,777777' ), 'new ok' );
+##diag Dumper $folder;
+is( $folder->get_doc_count, undef, "undef document count" );
+
+done_testing;
+
+1;
diff --git a/testing/Vocab_filmsectionlist.t b/testing/Vocab_filmsectionlist.t
new file mode 100644
index 0000000..c76418e
--- /dev/null
+++ b/testing/Vocab_filmsectionlist.t
@@ -0,0 +1,81 @@
+# 10.11.2025
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+
+my $class = 'ZBW::PM20x::Vocab';
+
+use_ok($class) or die "Could not load $class\n";
+
+my ( $ware_id, $geo_id, $subject_id, $filming, @waresections, @geosections,
+ @subjectsections );
+my $ware_vocab = $class->new('ware');
+my $geo_vocab = $class->new('geo');
+my $subject_vocab = $class->new('subject');
+
+# testcase film/h1/wa/W0087H/0002 (Eisenwaren : Österreich)
+# film.jsonld comprises
+# - is arbitrary (beginning of new film, not beginning of geo
+# (as indicated by start date 1932))
+
+$ware_id = 142275;
+$geo_id = 141731;
+$filming = 1;
+
+@waresections = $ware_vocab->filmsectionlist( $ware_id, $filming, 'geo' );
+ok( @waresections, "ware $ware_id has geo sections in filming $filming" );
+
+#diag Dumper \@waresections;
+
+@geosections = $geo_vocab->filmsectionlist( $geo_id, $filming, 'ware' );
+ok( @geosections, "geo $geo_id has ware sections in filming $filming" );
+
+my @section_uris = map { $_->{'@id'} } @geosections;
+
+#diag Dumper \@section_uris;
+
+my @sorted_section_uris = sort @section_uris;
+is_deeply \@section_uris, \@sorted_section_uris,
+ "list is strictly sorted by section uri";
+
+# create a lookup hash of ware ids for the geo (just for testing)
+my %ware = map { $_->{ware}{'@id'} =~ m/\/(\d+)$/ => 1 } @geosections;
+
+# TODO inverse logic, when sections with start date are excluded
+ok( $ware{$ware_id}, "section for ware id $ware_id in result" );
+
+#warn(Dumper \%ware_id);
+
+# test case film/h1/sh/S0234H/0173/L (Polen : Seeschiffahrt)
+$subject_id = 145567;
+$geo_id = 140962;
+
+@geosections = $geo_vocab->filmsectionlist( $geo_id, $filming, 'subject' );
+ok( @geosections, "geo $geo_id has subject sections in filming $filming" );
+
+@subjectsections =
+ $subject_vocab->filmsectionlist( $subject_id, $filming, 'geo' );
+ok( @subjectsections,
+ "subject $subject_id has geo sections in filming $filming" );
+
+# create a lookup hash of geo ids for the subject (just for testing)
+my %geo = map { $_->{country}{'@id'} =~ m/\/(\d+)$/ => 1 } @subjectsections;
+ok( $geo{$geo_id}, "section for geo id $geo_id in result" );
+
+#test case: Kautschuk : Brasilien
+
+$ware_id = 143085;
+$geo_id = 141697;
+$filming = 1;
+
+@geosections = $geo_vocab->filmsectionlist( $geo_id, $filming, 'ware' );
+ok( @geosections, "geo $geo_id has ware sections in filming $filming" );
+
+#warn Dumper \@geosections;
+
+done_testing;
diff --git a/testing/Vocab_folderlist.t b/testing/Vocab_folderlist.t
new file mode 100644
index 0000000..6b6d86b
--- /dev/null
+++ b/testing/Vocab_folderlist.t
@@ -0,0 +1,30 @@
+# 27.11.2025
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+use ZBW::PM20x::Folder;
+
+my $class = 'ZBW::PM20x::Vocab';
+
+use_ok($class) or die "Could not load $class\n";
+
+my ( $ware_id, $geo_id, $subject_id, @warefolders );
+my $ware_vocab = $class->new('ware');
+my $geo_vocab = $class->new('geo');
+#my $subject_vocab = $class->new('subject');
+
+# Achat
+ok( @warefolders = $ware_vocab->folderlist( 'de', '141944', $geo_vocab ),
+ 'foldersections (Achat)' );
+
+#diag Dumper $warefolders[0];
+#diag Dumper \@warefolders;
+
+done_testing;
+
+1;
diff --git a/testing/Vocab_has_material.t b/testing/Vocab_has_material.t
new file mode 100644
index 0000000..3baec1b
--- /dev/null
+++ b/testing/Vocab_has_material.t
@@ -0,0 +1,23 @@
+# 10.11.2025
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+
+my $class = 'ZBW::PM20x::Vocab';
+
+use_ok($class) or die "Could not load $class\n";
+
+my ( $ware_id, $geo_id, $subject_id, $filming );
+my $ware_vocab = $class->new('ware');
+
+$ware_id = 142275;
+
+# TODO activate actual test
+##ok( $ware_vocab->has_material($ware_id), "ware $ware_id has material" );
+
+done_testing;
diff --git a/testing/Vocab_vocab_name.t b/testing/Vocab_vocab_name.t
new file mode 100644
index 0000000..9c525ad
--- /dev/null
+++ b/testing/Vocab_vocab_name.t
@@ -0,0 +1,22 @@
+# 14.11.2025
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+
+my $class = 'ZBW::PM20x::Vocab';
+
+use_ok($class) or die "Could not load $class\n";
+
+my $name = 'geo';
+
+my $vocab = $class->new($name);
+
+is($vocab->vocab_name, $name, "name returned correctly");
+
+
+done_testing;
diff --git a/testing/_template b/testing/_template
new file mode 100644
index 0000000..7e0b6d5
--- /dev/null
+++ b/testing/_template
@@ -0,0 +1,18 @@
+#
+
+use strict;
+use warnings;
+use autodie;
+use utf8::all;
+
+use Data::Dumper;
+use Test::More;
+
+my $class = 'ZBW::PM20x::Vocab';
+
+use_ok($class) or die "Could not load $class\n";
+
+
+
+
+done_testing;
diff --git a/urlalias.pl b/urlalias.pl
index 42bb481..6295f16 100644
--- a/urlalias.pl
+++ b/urlalias.pl
@@ -5,9 +5,8 @@
use strict;
use warnings;
-use utf8;
-
-use lib './lib';
+use autodie;
+use utf8::all;
use Data::Dumper;
use JSON;
@@ -56,7 +55,7 @@
# load input file
my $imagedata_file = $IMAGEDATA_ROOT->child("${collection}_image.json");
- my $imagedata_ref = decode_json( $imagedata_file->slurp );
+ my $imagedata_ref = decode_json( $imagedata_file->slurp_raw );
foreach my $folder_nk ( sort keys %{$imagedata_ref} ) {
my $folder = ZBW::PM20x::Folder->new( $collection, $folder_nk );
@@ -70,7 +69,7 @@
# categories
foreach my $vocab (qw/ geo subject /) {
my $klassdata_file = $KLASSDATA_ROOT->child("${vocab}_by_signature.de.json");
- my $klassdata_ref = decode_json( $klassdata_file->slurp );
+ my $klassdata_ref = decode_json( $klassdata_file->slurp_raw );
foreach my $entry ( @{ $klassdata_ref->{results}{bindings} } ) {
my $uri =
defined $entry->{category}
diff --git a/web_make_all.sh b/web_make_all.sh
index 8cc8783..12de439 100755
--- a/web_make_all.sh
+++ b/web_make_all.sh
@@ -4,10 +4,10 @@
# (execute different find command for parts of the website - find everything
# from root is too large)
-make -C /pm20/web
-make -C /pm20/web SET=category
-make -C /pm20/web SET=pe
-make -C /pm20/web SET=co
-make -C /pm20/web SET=sh
-make -C /pm20/web SET=wa
+make -s -C /pm20/web
+make -s -C /pm20/web SET=category
+make -s -C /pm20/web SET=pe
+make -s -C /pm20/web SET=co
+make -s -C /pm20/web SET=sh
+make -s -C /pm20/web SET=wa