diff --git a/extract_one_lang_from_rosettacode_tasks_xml.pl b/extract_one_lang_from_rosettacode_tasks_xml.pl index 6b02273..d8d8582 100755 --- a/extract_one_lang_from_rosettacode_tasks_xml.pl +++ b/extract_one_lang_from_rosettacode_tasks_xml.pl @@ -1,9 +1,10 @@ #!/usr/bin/perl + use strict; use warnings; use Data::Dumper; $Data::Dumper::Useqq = 1; -my $wanted_language = 'Perl 6'; +my $wanted_language = 'C++'; # extract_one_lang_from_rosettacode_tasks_xml.pl # by Util of Perlmonks @@ -88,17 +89,30 @@ #print Dumper \%lang_lines; last; } +use File::Path qw(make_path); +use File::Basename; + + for my $title ( sort keys %temp ) { - print "$title\n"; for (@{ $temp{$title} }) { s{<}{<}g; s{>}{>}g; s{"}{"}g; s{&}{&}g; s{}{}; - s{}{}; + s{}{}; + } + my $savefile= $title . ".cpp"; + my $dir = dirname($savefile); + print $dir, " ",$savefile,"\n"; + eval { make_path($dir) }; + if ($@) { + print "Couldn't create $dir: $@"; } - print "\t$_\n" for @{ $temp{$title} }; + open my $fh, '>', $savefile or die "Could not open file '$savefile' $!"; + print $fh "\t$_\n" for @{ $temp{$title} }; + close $fh; + } #print Dumper \@temp; diff --git a/get_all_rosettacode_tasks.pl b/get_all_rosettacode_tasks.pl index 7493a32..1bb1645 100755 --- a/get_all_rosettacode_tasks.pl +++ b/get_all_rosettacode_tasks.pl @@ -11,7 +11,7 @@ # Note that it is currently caching, so `rm pt_75_*.{xml,json}` to update. my $batch_size = 75; -my $base_url_json = "http://rosettacode.org/mw/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&format=json&cmlimit=$batch_size"; +my $base_url_json = "http://rosettacode.org/mw/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&format=json&cmlimit=$batch_size&rawcontinue"; my $base_url_xml = "http://rosettacode.org/mw/api.php?action=query&generator=categorymembers&gcmtitle=Category:Programming_Tasks&gcmlimit=$batch_size&export&exportnowrap"; sub run { @@ -32,14 +32,11 @@ sub get { # Kludge to remove the need for JSON::XS module. my $continue_re = qr{ - ,"query-continue":\{ + "query-continue":\{ "categorymembers":\{ "cmcontinue":"(page\|[a-fA-F0-9]+\|\d+)" \} \} - \} - \s* - \z }msx; my $n = 0;