Skip to content

Commit 99d1204

Browse files
kio-watanabeyutaro-sakamotounknownytr-sakamoto
authored
Modified to support COBOL source code written in UTF-8. (#582)
* fix: convert SJIS string literals to ordinary Java string literals * test: add test suites * test: add new tests cobol_utf8 * ci: run tests of --enable-utf8 tests * ci: fix workflows for push events * ci: disable `fail-fast` of some jobs temporarily * ci: change pull_request.yml to runu utf8 tests * ci: fix push.yml and pull-request.yml slightly * test: covert some misc tests to UTF8 versions and add them to cobol_utf8 * test: change utf8 tests * ci: run m4 when installing the compiler * fix: dispatch encodings of javac * wip: fix slightly * wip: wip * fix: update libcobj to encode Strings to SJIS * fix: add -Wno-parentheses * test: fix tests * fix: disable size checks of UTF-8 data * fix: value clause * test: fix pic-n.at and pic-x * fix: typeck.c to suppress checks against inspect statement * [WIP] fix: Remove the character encoding specification for 'new String' * [WIP] fix: Fix the byte size calculation for Japanese variable names. * fix: Fix the code related to the byte size of UTF-8. * fix: Fix the code related to the byte size of UTF-8. * fix: Fix the code related to the byte size of UTF-8. * fix: Remove unnecessary codes * fix: Minor adjustments related to code generation * fix: Fix the code generation for strings that are SJIS multibyte. * fix: Fix Fix the bug about MOVE of literals when UTF-8 * fix: Remove unnecessary comments. * fix: Fix regarding UTF8 multi byte strings * fix: Fix regarding UTF8 multi byte strings * fix: Fix regarding PROGRAM-ID written in UTF8 multibyte * fix: Fix test scripts * fix: Clean up codes * Clean up codes * fix: Clean up codes * fix: Minor fix * fix: fieldToString * ci: install wget in alma and amazon linux * fix: Fix a bug regarding PROGRAM-ID with multibyte chars * Fix: Convert cb_zero_utf8 to cb_zen_zero --------- Co-authored-by: Yutaro Sakamoto <[email protected]> Co-authored-by: unknown <[email protected]> Co-authored-by: Yutaro Sakamoto <[email protected]> Co-authored-by: Yutaro Sakamoto <[email protected]>
1 parent 04208c5 commit 99d1204

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+9101
-4806
lines changed

.github/workflows/build.yml

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ on:
66
os:
77
required: true
88
type: string
9+
configure-args:
10+
required: false
11+
type: string
12+
default: ""
913

1014
permissions:
1115
contents: read
@@ -32,26 +36,27 @@ jobs:
3236
if: inputs.os == 'ubuntu:24.04'
3337
run: |
3438
apt-get update -y
35-
apt-get install -y build-essential gettext autoconf
39+
apt-get install -y build-essential gettext autoconf bison flex
3640
3741
- name: Install dependencies on AlmaLinux 9
3842
if: inputs.os == 'almalinux:9'
3943
run: |
4044
dnf -y update
41-
dnf install -y gcc make autoconf diffutils gettext
45+
dnf install -y gcc make autoconf diffutils gettext bison flex
4246
4347
- name: Install dependencies on Amazon Linux 2023
4448
if: inputs.os == 'amazonlinux:2023'
4549
run: |
4650
dnf -y update
47-
dnf install -y java-21-amazon-corretto-devel gcc make autoconf diffutils gettext tar gzip
51+
dnf install -y java-21-amazon-corretto-devel gcc make autoconf diffutils gettext tar gzip bison flex
4852
4953
- name: Checkout opensource COBOL 4J
5054
uses: actions/checkout@v4
5155

5256
- name: Install opensource COBOL 4J
5357
run: |
54-
./configure --prefix=/usr/ CFLAGS=-Werror
58+
./configure --prefix=/usr/ CFLAGS=-Werror ${{ inputs.configure-args }}
59+
touch cobj/*.m4
5560
make
5661
echo "ARTIFACT_NAME=${{ inputs.os }}" | sed 's/:/-/g' >> "$GITHUB_ENV"
5762
@@ -64,5 +69,5 @@ jobs:
6469
- name: Upload an artifact
6570
uses: actions/upload-artifact@v4
6671
with:
67-
name: opensourcecobol4j-${{ env.ARTIFACT_NAME }}
72+
name: opensourcecobol4j-${{ env.ARTIFACT_NAME }}-opt_${{ inputs.configure-args }}
6873
path: opensourcecobol4j.tar.gz

.github/workflows/pull-request.yml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,16 @@ jobs:
3131
with:
3232
os: ${{ matrix.os }}
3333

34+
build-utf8:
35+
needs: check-workflows
36+
strategy:
37+
matrix:
38+
os: ["ubuntu:24.04", "almalinux:9", "amazonlinux:2023"]
39+
uses: ./.github/workflows/build.yml
40+
with:
41+
os: ${{ matrix.os }}
42+
configure-args: --enable-utf8
43+
3444
run-test-other:
3545
needs: build
3646
strategy:
@@ -50,6 +60,28 @@ jobs:
5060
test-name: ${{ matrix.test_name }}
5161
os: ${{ matrix.os }}
5262

63+
run-test-other-utf8:
64+
needs: build-utf8
65+
strategy:
66+
fail-fast: false
67+
matrix:
68+
test_name:
69+
- "command-line-options"
70+
- "data-rep"
71+
- "cobol_utf8"
72+
#- "i18n_utf8"
73+
- "jp-compat"
74+
- "run"
75+
- "syntax"
76+
- "cobj-idx"
77+
#- "misc"
78+
os: ["ubuntu:24.04", "almalinux:9", "amazonlinux:2023"]
79+
uses: ./.github/workflows/test-other.yml
80+
with:
81+
test-name: ${{ matrix.test_name }}
82+
os: ${{ matrix.os }}
83+
configure-args: --enable-utf8
84+
5385
run-test-cobj-api:
5486
needs: build
5587
strategy:
@@ -71,6 +103,20 @@ jobs:
71103
check-result: true
72104
os: ${{ matrix.os }}
73105

106+
run-test-nist-utf8:
107+
needs: build-utf8
108+
strategy:
109+
fail-fast: false
110+
matrix:
111+
test_name: ["IC", "IF", "IX", "NC", "OB", "RL", "SG", "SM", "SQ", "ST"]
112+
os: ["ubuntu:24.04", "almalinux:9", "amazonlinux:2023"]
113+
uses: ./.github/workflows/test-nist.yml
114+
with:
115+
test-name: ${{ matrix.test_name }}
116+
check-result: true
117+
os: ${{ matrix.os }}
118+
configure-args: --enable-utf8
119+
74120
run-test-nist-extra:
75121
needs: build
76122
strategy:

.github/workflows/push.yml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,20 @@ jobs:
3030
with:
3131
os: ${{ matrix.os }}
3232

33+
build-utf8:
34+
needs: check-workflows
35+
strategy:
36+
matrix:
37+
os: ["ubuntu:24.04"]
38+
uses: ./.github/workflows/build.yml
39+
with:
40+
os: ${{ matrix.os }}
41+
configure-args: --enable-utf8
42+
3343
run-test-other:
3444
needs: build
3545
strategy:
46+
fail-fast: false
3647
matrix:
3748
test_name:
3849
- "command-line-options"
@@ -48,6 +59,28 @@ jobs:
4859
with:
4960
test-name: ${{ matrix.test_name }}
5061
os: ${{ matrix.os }}
62+
63+
run-test-other-utf8:
64+
needs: build-utf8
65+
strategy:
66+
fail-fast: false
67+
matrix:
68+
test_name:
69+
- "command-line-options"
70+
- "data-rep"
71+
- "cobol_utf8"
72+
#- "i18n_utf8"
73+
- "jp-compat"
74+
- "run"
75+
- "syntax"
76+
- "cobj-idx"
77+
#- "misc"
78+
os: ["ubuntu:24.04"]
79+
uses: ./.github/workflows/test-other.yml
80+
with:
81+
test-name: ${{ matrix.test_name }}
82+
os: ${{ matrix.os }}
83+
configure-args: --enable-utf8
5184

5285
run-test-cobj-api:
5386
needs: build
@@ -61,6 +94,20 @@ jobs:
6194
run-test-nist:
6295
needs: build
6396
strategy:
97+
fail-fast: false
98+
matrix:
99+
test_name: ["IC", "IF", "IX", "NC", "OB", "RL", "SG", "SM", "SQ", "ST"]
100+
os: ["ubuntu:24.04"]
101+
uses: ./.github/workflows/test-nist.yml
102+
with:
103+
test-name: ${{ matrix.test_name }}
104+
check-result: true
105+
os: ${{ matrix.os }}
106+
107+
run-test-nist-utf8:
108+
needs: build-utf8
109+
strategy:
110+
fail-fast: false
64111
matrix:
65112
test_name: ["IC", "IF", "IX", "NC", "OB", "RL", "SG", "SM", "SQ", "ST"]
66113
os: ["ubuntu:24.04"]
@@ -69,6 +116,7 @@ jobs:
69116
test-name: ${{ matrix.test_name }}
70117
check-result: true
71118
os: ${{ matrix.os }}
119+
configure-args: --enable-utf8
72120

73121
run-test-nist-extra:
74122
needs: build

.github/workflows/test-cobj-api.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ on:
66
os:
77
required: true
88
type: string
9+
configure-args:
10+
required: false
11+
type: string
12+
default: ""
913

1014
permissions:
1115
contents: read
@@ -24,7 +28,7 @@ jobs:
2428

2529
- uses: actions/download-artifact@v4
2630
with:
27-
name: opensourcecobol4j-${{ env.ARTIFACT_NAME }}
31+
name: opensourcecobol4j-${{ env.ARTIFACT_NAME }}-opt_${{ inputs.configure-args }}
2832

2933
- uses: actions/setup-java@v4
3034
if: inputs.os != 'amazonlinux:2023'

.github/workflows/test-nist.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ on:
1212
os:
1313
required: true
1414
type: string
15+
configure-args:
16+
required: false
17+
type: string
18+
default: ""
1519

1620
permissions:
1721
contents: read
@@ -30,7 +34,7 @@ jobs:
3034

3135
- uses: actions/download-artifact@v4
3236
with:
33-
name: opensourcecobol4j-${{ env.ARTIFACT_NAME }}
37+
name: opensourcecobol4j-${{ env.ARTIFACT_NAME }}-opt_${{ inputs.configure-args }}
3438

3539
- name: Install Java
3640
uses: actions/setup-java@v4

.github/workflows/test-other.yml

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ on:
99
os:
1010
required: true
1111
type: string
12+
configure-args:
13+
required: false
14+
type: string
15+
default: ""
1216

1317
permissions:
1418
contents: read
@@ -27,7 +31,7 @@ jobs:
2731

2832
- uses: actions/download-artifact@v4
2933
with:
30-
name: opensourcecobol4j-${{ env.ARTIFACT_NAME }}
34+
name: opensourcecobol4j-${{ env.ARTIFACT_NAME }}-opt_${{ inputs.configure-args }}
3135

3236
- uses: actions/setup-java@v4
3337
if: inputs.os != 'amazonlinux:2023'
@@ -39,19 +43,29 @@ jobs:
3943
if: inputs.os == 'ubuntu:24.04'
4044
run: |
4145
apt-get update -y
42-
apt-get install -y build-essential unzip
46+
apt-get install -y build-essential unzip nkf
4347
4448
- name: Install dependencies on AlmaLinux 9
4549
if: inputs.os == 'almalinux:9'
4650
run: |
4751
dnf -y update
48-
dnf install -y gcc make diffutils glibc-gconv-extra unzip
52+
dnf install -y gcc make diffutils glibc-gconv-extra unzip wget
53+
wget "http://sourceforge.jp/frs/redir.php?m=jaist&f=%2Fnkf%2F59912%2Fnkf-2.1.3.tar.gz" -O nkf-2.1.3.tar.gz --no-check-certificate
54+
tar zxf nkf-2.1.3.tar.gz
55+
cd nkf-2.1.3
56+
make
57+
make install
4958
5059
- name: Install dependencies on Amazon Linux 2023
5160
if: inputs.os == 'amazonlinux:2023'
5261
run: |
5362
dnf -y update
54-
dnf install -y gcc make diffutils tar gzip unzip
63+
dnf install -y gcc make diffutils tar gzip unzip wget
64+
wget "http://sourceforge.jp/frs/redir.php?m=jaist&f=%2Fnkf%2F59912%2Fnkf-2.1.3.tar.gz" -O nkf-2.1.3.tar.gz --no-check-certificate
65+
tar zxf nkf-2.1.3.tar.gz
66+
cd nkf-2.1.3
67+
make
68+
make install
5569
5670
- name: Install Java
5771
if: inputs.os == 'amazonlinux:2023'

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ tests/command-line-options
4949
tests/data-rep
5050
tests/i18n_sjis
5151
tests/i18n_utf8
52+
tests/cobol_utf8
5253
tests/jp-compat
5354
tests/cobj-idx
5455
tests/misc

cobj/cobj.c

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,12 @@ static const struct option long_options[] = {
329329
#undef CB_WARNDEF
330330
{NULL, 0, NULL, 0}};
331331

332+
#ifdef I18N_UTF8
333+
static const char *JAVAC_ENCODING = "UTF-8";
334+
#else
335+
static const char *JAVAC_ENCODING = "SJIS";
336+
#endif
337+
332338
static const char *cob_cc; /* gcc */
333339
static char cob_java_flags[COB_SMALL_BUFF]; /* -I... */
334340
static char cob_libs[COB_MEDIUM_BUFF]; /* -L... -lcob */
@@ -664,6 +670,43 @@ void sjis_spc_to_ascii(char *str) {
664670
}
665671
#endif /*I18N_UTF8*/
666672

673+
#ifdef I18N_UTF8
674+
size_t utf8_calc_sjis_size(const unsigned char *p, int len) {
675+
const unsigned char *ub = p + len;
676+
int char_size = 0;
677+
size_t name_size = 0;
678+
while (p < ub) {
679+
char_size = COB_U8BYTE_1(*p);
680+
if (char_size == 1) {
681+
name_size += 1;
682+
p++;
683+
} else if (char_size == 3 && utf8_hankaku_kana(p)) {
684+
name_size += 1;
685+
p += char_size;
686+
} else {
687+
name_size += 2;
688+
p += char_size;
689+
}
690+
}
691+
return name_size;
692+
}
693+
694+
int utf8_hankaku_kana(const unsigned char *p) {
695+
if (p[0] == 0xef) {
696+
if (p[1] == 0xbd) {
697+
if (p[2] >= 0xa1 && p[2] <= 0xbf) {
698+
return 1;
699+
}
700+
} else if (p[1] == 0xbe) {
701+
if (p[2] >= 0x80 && p[2] <= 0x9f) {
702+
return 1;
703+
}
704+
}
705+
}
706+
return 0;
707+
}
708+
#endif /*I18N_UTF8*/
709+
667710
/*
668711
* Local functions
669712
*/
@@ -1772,8 +1815,9 @@ static int process_compile(struct filename *fn) {
17721815

17731816
char **program_id;
17741817
for (program_id = program_id_list; *program_id; ++program_id) {
1775-
snprintf(buff, COB_MEDIUM_BUFF, "javac %s -encoding SJIS -d %s %s/%s.java",
1776-
cob_java_flags, output_name_a, java_source_dir_a, *program_id);
1818+
snprintf(buff, COB_MEDIUM_BUFF, "javac %s -encoding %s -d %s %s/%s.java",
1819+
cob_java_flags, JAVAC_ENCODING, output_name_a, java_source_dir_a,
1820+
*program_id);
17771821
ret = process(buff);
17781822

17791823
if (ret) {
@@ -2044,8 +2088,8 @@ static int process_build_single_jar() {
20442088
#else
20452089
char remove_cmd[] = "rm -f";
20462090
#endif
2047-
sprintf(buff, "javac %s -encoding SJIS -d %s %s/*.java", cob_java_flags,
2048-
output_name_a, java_source_dir_a);
2091+
sprintf(buff, "javac %s -encoding %s -d %s %s/*.java", cob_java_flags,
2092+
JAVAC_ENCODING, output_name_a, java_source_dir_a);
20492093

20502094
ret = process(buff);
20512095
if (ret) {

cobj/cobj.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,8 @@ extern size_t utf8_strlen(const unsigned char *p);
198198
extern int utf8_casecmp(const char *s1, const char *s2);
199199
extern void utf8_spc_to_ascii(char *);
200200
extern int utf8_national_length(const unsigned char *str, int len);
201+
extern size_t utf8_calc_sjis_size(const unsigned char *data, int len);
202+
extern int utf8_hankaku_kana(const unsigned char *data);
201203
#else /*!I18N_UTF8*/
202204
extern const unsigned char *sjis_pick(const unsigned char *);
203205
extern size_t sjis_strlen(const unsigned char *);

0 commit comments

Comments
 (0)