-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Convertion utility of the database of language orthographies rewriten…
… in perl.
- Loading branch information
Showing
262 changed files
with
690 additions
and
1,114 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,323 @@ | ||
#!/usr/bin/perl | ||
|
||
$fc_lang_dir="../fc-lang"; | ||
$fc_lang_conv_dir="files"; | ||
|
||
$cat_srcfile="fc-lang-data.c"; | ||
$cat_hdrfile="fc-lang-data.h"; | ||
|
||
# By defifnition this is invalid code point. | ||
use constant CODE_IN_RANGE => 0xF0F0FFFF; | ||
|
||
sub processFile($$$); | ||
sub parseFile($$$); | ||
|
||
@orth_files=(); | ||
if (opendir(my $dirh, ${fc_lang_dir})) { | ||
my $fname = readdir($dirh); | ||
while (defined($fname)) { | ||
if ($fname =~ m/^.*\.orth$/) { | ||
push @orth_files, $fname; | ||
} | ||
$fname = readdir($dirh); | ||
} | ||
closedir($dirh); | ||
@orth_files = sort(@orth_files); | ||
} else { | ||
die "Failed to open directory!\n"; | ||
} | ||
|
||
$count=0; | ||
@good_src_files=(); | ||
foreach my $fname (@orth_files) { | ||
if ($fname =~ m/^(.*)\.orth$/) { | ||
my $langtag=$1; | ||
my $c_src_name = "${langtag}_orth.c"; | ||
if (processFile($langtag, "${fc_lang_dir}/${fname}", "${fc_lang_conv_dir}/${c_src_name}")) { | ||
push @good_src_files, ${c_src_name}; | ||
$count++; | ||
} | ||
} | ||
} | ||
|
||
# Create catalog header file | ||
if (!open(OUTF, "> $cat_hdrfile")) { | ||
die "Unable to open $cat_hdrfile"; | ||
} | ||
print OUTF<<EOF; | ||
// FontConfig database of language orthographies. | ||
// License: Public Domain. | ||
// This file is autogenerated from fc-lang database. | ||
// https://www.freedesktop.org/wiki/Software/fontconfig/ | ||
// https://gitlab.freedesktop.org/fontconfig/fontconfig/tree/master/fc-lang | ||
// by convert utility from https://github.com/virxkane/freetype_textdraw | ||
#ifndef FC_LANG_DATA_H | ||
#define FC_LANG_DATA_H | ||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
#define FC_LANG_DATA_SZ $count | ||
struct fc_lang_rec | ||
{ | ||
const char* lang_code; | ||
const unsigned int char_set_sz; | ||
const unsigned int* char_set; | ||
}; | ||
/** | ||
* \@brief Return pointer to FontConfig database of language orthographies | ||
* \@return array of fc_lang_rec records. | ||
*/ | ||
const struct fc_lang_rec* get_fc_lang_data(); | ||
/** | ||
* \@brief Get count of records in the FontConfig database of language orthographies. | ||
* \@return Count of records in array. | ||
*/ | ||
unsigned int get_fc_lang_data_size(); | ||
/** | ||
* \@brief Find language in database by code | ||
* \@param lang_code language code is exactly as it appears in the fc_lang catalog. | ||
* \@return Pointer to fc_lang_rec instance if language found, NULL otherwise. | ||
*/ | ||
const struct fc_lang_rec* fc_lang_find(const char* lang_code); | ||
#ifdef __cplusplus | ||
} | ||
#endif | ||
#endif // FC_LANG_DATA_H | ||
EOF | ||
close(OUTF); | ||
|
||
# Create catalog source file | ||
if (!open(OUTF, "> $cat_srcfile")) { | ||
die "Unable to open $cat_srcfile"; | ||
} | ||
print OUTF<<EOF; | ||
// FontConfig database of language orthographies. | ||
// License: Public Domain. | ||
// This file is autogenerated from fc-lang database. | ||
// https://www.freedesktop.org/wiki/Software/fontconfig/ | ||
// https://gitlab.freedesktop.org/fontconfig/fontconfig/tree/master/fc-lang | ||
// by convert utility from https://github.com/virxkane/freetype_textdraw | ||
#include <string.h> | ||
#include "$cat_hdrfile" | ||
EOF | ||
|
||
foreach my $c_src_name (@good_src_files) { | ||
print OUTF "#include \"${fc_lang_conv_dir}/${c_src_name}\"\n"; | ||
} | ||
print OUTF "\n"; | ||
print OUTF "static const struct fc_lang_rec fc_lang_data[] = {\n"; | ||
foreach my $c_src_name (@good_src_files) { | ||
my ($langtag, $langtag_lc, $langtag_uc); | ||
$c_src_name =~ m/^(.*)_orth.c$/; | ||
$langtag_lc=lc($1); | ||
$langtag_uc=uc($1); | ||
print OUTF " \"${langtag_lc}\", ${langtag_uc}_LANG_ORTH_SZ, ${langtag_lc}_lang_orth_chars,\n"; | ||
} | ||
print OUTF "};\n"; | ||
|
||
print OUTF<<EOF; | ||
const struct fc_lang_rec* get_fc_lang_data() { | ||
return &fc_lang_data[0]; | ||
} | ||
unsigned int get_fc_lang_data_size() { | ||
return FC_LANG_DATA_SZ; | ||
} | ||
const struct fc_lang_rec* fc_lang_find(const char* lang_code) { | ||
const struct fc_lang_rec* lang_ptr = fc_lang_data; | ||
int i; | ||
int found = 0; | ||
for (i = 0; i < FC_LANG_DATA_SZ; i++) { | ||
if (strcmp(lang_ptr->lang_code, lang_code) == 0) | ||
{ | ||
found = 1; | ||
break; | ||
} | ||
} | ||
if (found) | ||
return lang_ptr; | ||
return 0; | ||
} | ||
EOF | ||
|
||
close(OUTF); | ||
|
||
1; | ||
|
||
# functions | ||
|
||
sub processFile($$$) { | ||
my ($langtag, $orth_file, $c_src_name) = @_; | ||
#print "langtag=${langtag}; orth_file=$orth_file; c_src_name=$c_src_name\n"; | ||
my $count = 0; | ||
|
||
my ($fin, $fout); | ||
if (!open($fin, "< $orth_file")) { | ||
print STDERR "Can't open file \"${orth_file}\" for reading!\n"; | ||
return undef; | ||
} | ||
if (!open($fout, "> $c_src_name")) { | ||
print STDERR "Can't open file \"${$c_src_name}\" for reading!\n"; | ||
close($fin); | ||
return undef; | ||
} | ||
|
||
my $dirname = "."; | ||
if ($orth_file =~ m/^(.*)\/[a-zA-Z_0-9]+\.orth$/) { | ||
$dirname = $1; | ||
} | ||
|
||
print "processing orth-file for language tag \"${langtag}\"\n"; | ||
my $langtag_uc = uc($langtag); | ||
my $langtag_lc = lc($langtag); | ||
|
||
print $fout <<EOF; | ||
// This file is autogenerated from fc-lang database. | ||
// https://www.freedesktop.org/wiki/Software/fontconfig/ | ||
// https://gitlab.freedesktop.org/fontconfig/fontconfig/tree/master/fc-lang | ||
// by convert utility from https://github.com/virxkane/freetype_textdraw | ||
const unsigned int ${langtag_lc}_lang_orth_chars[] = { | ||
EOF | ||
$count = parseFile($dirname, $fin, $fout); | ||
print $fout "};\n"; | ||
print $fout "#define ${langtag_uc}_LANG_ORTH_SZ $count\n"; | ||
|
||
close($fout); | ||
close($fin); | ||
return $count > 0 ? 1 : undef; | ||
} | ||
|
||
|
||
sub parseFile($$$) { | ||
my ($dirname, $fin, $fout) = @_; | ||
|
||
my $count = 0; | ||
my $ok; | ||
my $line = 0; | ||
my ($first, $second); | ||
|
||
my @lines = <$fin>; | ||
|
||
my @lines_=(); | ||
foreach $line (@lines) { | ||
chomp($line); | ||
if (length($line) > 0) { | ||
# skip leading and/or trailing spaces | ||
$line =~ s/^\s*(\S*)\s*$/$1/; | ||
# skip comment | ||
if ($line =~ m/^#.*$/) { | ||
next; | ||
} | ||
# skip trailing comment | ||
$line =~ s/^(\S*)\s*#.*$/$1/; | ||
if (length($line) > 0) { | ||
push @lines_, $line; | ||
} | ||
} | ||
} | ||
|
||
# Combine 2 lines into one | ||
# See file mni.orth | ||
# 1: 0964 | ||
# 2: - 09c4 | ||
@lines=(); | ||
foreach $line (@lines_) { | ||
if ($line =~ m/^\s*-\s*[0-9a-fA-F]+.*$/) { | ||
if (scalar(@lines) > 0) { | ||
my $prev = pop @lines; | ||
$line = $prev . $line; | ||
push @lines, $line; | ||
} | ||
} else { | ||
push @lines, $line; | ||
} | ||
} | ||
|
||
foreach $line (@lines) { | ||
if (length($line) > 0) { | ||
if ($line =~ m/^include\s+(.*)$/) { | ||
$ok = undef; # reset flag before line parsing | ||
my $incFileName = "${dirname}/$1"; | ||
if (open(my $newfin, "< $incFileName")) { | ||
print "process included file: \"${incFileName}\"...\n"; | ||
my $inc_count = parseFile($dirname, $newfin, $fout); | ||
close($newfin); | ||
if ($inc_count > 0) { | ||
$ok = 1; | ||
$count += $inc_count; | ||
} | ||
} else { | ||
print STDERR "Unable to open ${incFileName}\n"; | ||
} | ||
} else { | ||
$ok = undef; # reset flag before line parsing | ||
if ($line =~ m/^(0x)*([0-9a-fA-F]+)$/) { | ||
# line contains one number | ||
$first = hex($2); | ||
$second = 0; | ||
$ok = 1; | ||
} elsif ($line =~ m/^(0x)*([0-9a-fA-F]+)\s+.*$/) { | ||
# line contains one number | ||
# with comment without symbol '#' | ||
$first = hex($2); | ||
$second = 0; | ||
$ok = 1; | ||
} elsif ($line =~ m/^(0x)*([0-9a-fA-F]+)\s*-\s*(0x)*([0-9a-fA-F]+)$/) { | ||
# line contains range | ||
$first = hex($2); | ||
$second = hex($4); | ||
$ok = 1; | ||
} elsif ($line =~ m/^(0x)*([0-9a-fA-F]+)\s*-\s*(0x)*([0-9a-fA-F]+)\s+.*$/) { | ||
# line contains range | ||
# with comment without symbol '#' | ||
$first = hex($2); | ||
$second = hex($4); | ||
$ok = 1; | ||
} elsif ($line =~ m/^(0x)*([0-9a-fA-F]+)\s*\.\.\s*(0x)*([0-9a-fA-F]+)$/) { | ||
# line contains range | ||
$first = hex($2); | ||
$second = hex($4); | ||
$ok = 1; | ||
} elsif ($line =~ m/^(0x)*([0-9a-fA-F]+)\s*\.\.\s*(0x)*([0-9a-fA-F]+)\s+.*$/) { | ||
# line contains range | ||
# with comment without symbol '#' | ||
$first = hex($2); | ||
$second = hex($4); | ||
$ok = 1; | ||
} else { | ||
# just comment without symbol '#' | ||
} | ||
if ($ok) { | ||
if (0 == $second) { | ||
printf $fout ("\t0x%04x,\n", $first); | ||
$count++; | ||
} else { | ||
printf $fout ("\t0x%08x, 0x%04x, 0x%04x, // range\n", CODE_IN_RANGE, $first, $second); | ||
$count += 3; | ||
} | ||
} | ||
} | ||
if (!$ok) { | ||
print STDERR "Failed to parse line: ${line}\n"; | ||
} | ||
} | ||
} | ||
return $count; | ||
} |
Oops, something went wrong.