Skip to content

Commit f0412f2

Browse files
authored
Fix the bug regarding exceeding column 72 with UTF-8 Japanese literal (#621)
Fixed the bug regarding exceeding column 72 with UTF-8 Japanese literal. detail: In the case of UTF-8 Japanese strings are used, '\n' would be inserted to column 73. Due to the difference in the number of bytes between SJIS and UTF-8, '\n' may be inserted in a different position than in SJIS, resulting in an error.
1 parent c731e44 commit f0412f2

File tree

4 files changed

+48
-0
lines changed

4 files changed

+48
-0
lines changed

cobj/cobj.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,27 @@ int utf8_hankaku_kana(const unsigned char *p) {
705705
}
706706
return 0;
707707
}
708+
709+
int utf8_calc_sjis_column(const unsigned char *p, int column) {
710+
const unsigned char *start = p;
711+
int char_size = 0;
712+
int i = 0;
713+
714+
while (i < column && *p != '\0') {
715+
char_size = COB_U8BYTE_1(*p);
716+
if (char_size == 1) {
717+
i++;
718+
p++;
719+
} else if (char_size == 3 && utf8_hankaku_kana(p)) {
720+
i++;
721+
p += char_size;
722+
} else {
723+
i += 2;
724+
p += char_size;
725+
}
726+
}
727+
return p - start;
728+
}
708729
#endif /*I18N_UTF8*/
709730

710731
/*

cobj/cobj.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ extern void utf8_spc_to_ascii(char *);
200200
extern int utf8_national_length(const unsigned char *str, int len);
201201
extern size_t utf8_calc_sjis_size(const unsigned char *data, int len);
202202
extern int utf8_hankaku_kana(const unsigned char *data);
203+
extern int utf8_calc_sjis_column(const unsigned char *p, int column);
203204
#else /*!I18N_UTF8*/
204205
extern const unsigned char *sjis_pick(const unsigned char *);
205206
extern size_t sjis_strlen(const unsigned char *);

cobj/pplex.l.m4

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,7 +1179,12 @@ start:
11791179
}
11801180

11811181
/* remove it */
1182+
#ifdef I18N_UTF8
1183+
p = (unsigned char *) buff;
1184+
strcpy (buff + utf8_calc_sjis_column(p, cb_text_column), "\n");
1185+
#else /*!I18N_UTF8*/
11821186
strcpy (buff + cb_text_column, "\n");
1187+
#endif /*I18N_UTF8*/
11831188
last_line_2 = cb_source_line;
11841189
n = cb_text_column + 1;
11851190
}

tests/cobol_utf8.src/pic-x.at

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,27 @@ AT_CHECK([COB_TERMINAL_ENCODING=UTF-8 java prog], [0],
614614

615615
AT_CLEANUP
616616

617+
AT_SETUP([Exceed column 72 with Japanese literal])
618+
export LC_ALL=''
619+
620+
AT_DATA([prog.cob], [
621+
IDENTIFICATION DIVISION.
622+
PROGRAM-ID. prog.
623+
DATA DIVISION.
624+
WORKING-STORAGE SECTION.
625+
01 str PIC X(5) VALUE 'アイウエオ'.
626+
PROCEDURE DIVISION.
627+
DISPLAY str.
628+
])
629+
630+
AT_CHECK([cobj prog.cob])
631+
AT_CHECK([java prog > out1.txt])
632+
AT_CHECK([echo -n 'アイウエオ
633+
' | nkf -x --ic=UTF-8 --oc=Shift_JIS > out2.txt])
634+
AT_CHECK([diff out1.txt out2.txt])
635+
636+
AT_CLEANUP
637+
617638
#AT_SETUP([Readable string literals])
618639
#export LC_ALL=''
619640
## Older compilers converts string literals "日本語" in COBOL source code

0 commit comments

Comments
 (0)