Skip to content

Commit 1552dfc

Browse files
xtaixefaho
authored andcommitted
Add Java implementation
1 parent 7dccb16 commit 1552dfc

9 files changed

+1716
-15
lines changed

Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
test: tester rust
22
./tester
33

4-
widechar_width.h widechar_width.js widechar_width.rs: generate.py
4+
widechar_width.h widechar_width.js widechar_width.rs widechar_width.py widechar_width.java: generate.py
55
./generate.py
66

77
wcwidth9.h:
@@ -16,4 +16,4 @@ tester: test.cpp widechar_width.h | wcwidth9.h
1616
clang++ -std=c++11 test.cpp -o tester
1717

1818
clean:
19-
rm -f UnicodeData.txt emoji-data.txt EastAsianWidth.txt widechar_width.h widechar_width.js widechar_width.rs widechar_width.py tester
19+
rm -f UnicodeData.txt emoji-data.txt EastAsianWidth.txt widechar_width.h widechar_width.js widechar_width.rs widechar_width.py widechar_width.java tester

README.md

+30-9
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ widecharwidth is a Python script that outputs implementations of `wcwidth()`, by
44
- JavaScript
55
- Python
66
- Rust
7+
- Java
78

89
## C++ Usage
910

@@ -13,15 +14,15 @@ This header contains a single public function `widechar_wcwidth()`. This returns
1314

1415
If you aren't sure how to handle negative return values, try this table:
1516

16-
| return value | width |
17-
|-------------------------|---|
18-
| `widechar_nonprint` | 0 |
19-
| `widechar_combining` | 0 |
20-
| `widechar_ambiguous` | 1 |
21-
| `widechar_private_use` | 1 |
22-
| `widechar_unassigned` | 0 |
23-
| `widechar_non_character`| 0 |
24-
| `widechar_widened_in_9` | 2 (or maybe 1, renderer dependent) |
17+
| return value | width |
18+
|--------------------------|------------------------------------|
19+
| `widechar_nonprint` | 0 |
20+
| `widechar_combining` | 0 |
21+
| `widechar_ambiguous` | 1 |
22+
| `widechar_private_use` | 1 |
23+
| `widechar_unassigned` | 0 |
24+
| `widechar_non_character` | 0 |
25+
| `widechar_widened_in_9` | 2 (or maybe 1, renderer dependent) |
2526

2627
## JavaScript usage
2728

@@ -64,6 +65,26 @@ match WcWidth::from_char(c) {
6465
}
6566
```
6667

68+
## Java usage
69+
70+
For Java 8+, file `widechar_width.java` contains the `WcWidth` class definition, which you can use as follows:
71+
72+
```java
73+
int width = WcWidth.Type.of(codePoint).defaultWidth();
74+
```
75+
76+
The default values are based on the recommendations in the table of the C++ above.
77+
If you need a different width for some types, create your own wrapper method using something like:
78+
79+
```java
80+
final WcWidth.Type type = WcWidth.Type.of(codePoint);
81+
switch (type) {
82+
case WIDENED_IN_9: return 1;
83+
...
84+
default: return type.defaultWidth();
85+
}
86+
```
87+
6788
## Regenerating the sources
6889

6990
To regenerate the generated sources, run `make`. This will download and parse the relevant files, and run tests.

generate.py

+1
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,7 @@ def gitobjecthash(data):
391391
".js": ("[]",),
392392
".py": ("()", " " * 2, True),
393393
".rs": ("()",),
394+
".java": ("{}", " " * 2),
394395
}
395396
for suffix, settings in langs.items():
396397
with open("templates/template" + suffix) as templatefile:

templates/template.java

+144
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
import java.util.stream.Stream;
2+
3+
import static java.lang.String.format;
4+
5+
/**
6+
* {filename} for Unicode {unicode_version}
7+
* See <a href="https://github.com/ridiculousfish/widecharwidth/">https://github.com/ridiculousfish/widecharwidth/</a>
8+
*
9+
* <p>SHA1 file hashes:
10+
* (
11+
* the hashes for generate.py and the template are git object hashes,
12+
* use `git log --all --find-object=hash` in the widecharwidth repository
13+
* to see which commit they correspond to,
14+
* or run `git hash-object` on the file to compare.
15+
* The other hashes are simple `sha1sum` style hashes.
16+
* )
17+
*
18+
* <ul>
19+
* <li>generate.py: {generate_hash}</li>
20+
* <li>template.java: {template_hash}</li>
21+
* <li>UnicodeData.txt: {unicode_hash}</li>
22+
* <li>EastAsianWidth.txt: {eaw_hash}</li>
23+
* <li>emoji-data.txt: {emoji_hash}</li>
24+
* </ul>
25+
*/
26+
public class WcWidth {{
27+
28+
private WcWidth() {{
29+
}}
30+
31+
public enum Type {{
32+
ONE(1, ASCII_TABLE), // The character is single-width
33+
PRIVATE_USE(1, PRIVATE_TABLE), // The character is for private use.
34+
NON_PRINT(0, NON_PRINT_TABLE), // The character is not printable
35+
NON_CHARACTER(0, NON_CHARACTER_TABLE), // The character is a non-character.
36+
COMBINING(0, COMBINING_TABLE, COMBINING_LETTERS_TABLE), // The character is a zero-width combiner
37+
TWO(2, DOUBLE_WIDE_TABLE), // The character is double-width
38+
AMBIGUOUS(1, AMBIGUOUS_TABLE), // The character is East-Asian ambiguous width.
39+
UNASSIGNED(0, UNASSIGNED_TABLE), // The character is unassigned.
40+
WIDENED_IN_9(2, WIDENED_IN_9_TABLE); // Width is 1 in Unicode 8, 2 in Unicode 9+.
41+
42+
private final int defaultWidth;
43+
private final int[][][] tables;
44+
45+
Type(int defaultWidth, int[][]... tables) {{
46+
this.defaultWidth = defaultWidth;
47+
this.tables = tables;
48+
}}
49+
50+
public int defaultWidth() {{
51+
return defaultWidth;
52+
}}
53+
54+
private boolean contains(int c) {{
55+
return Stream.of(tables).anyMatch(table -> contains(c, table));
56+
}}
57+
58+
private boolean contains(int c, int[][] table) {{
59+
var min = 0;
60+
var max = table.length - 1;
61+
62+
if (c < table[0][0] || c > table[max][1]) {{
63+
return false;
64+
}}
65+
66+
while (max >= min) {{
67+
var mid = (min + max) / 2;
68+
69+
if (c > table[mid][1]) {{
70+
min = mid + 1;
71+
}} else if (c < table[mid][0]) {{
72+
max = mid - 1;
73+
}} else {{
74+
return true;
75+
}}
76+
}}
77+
78+
return false;
79+
}}
80+
81+
public static Type of(int c) {{
82+
if (c < 0 || c > 0x10FFFF) {{
83+
throw new IllegalArgumentException(format("'0x%X' is not a Unicode code point", c))
84+
}}
85+
86+
return Stream.of(Type.values())
87+
.filter(type -> type.contains(c))
88+
.findFirst()
89+
.orElse(ONE);
90+
}}
91+
92+
}}
93+
94+
// Simple ASCII characters - used a lot, so we check them first.
95+
private static final int[][] ASCII_TABLE = {{
96+
{ascii}
97+
}};
98+
99+
// Private usage range.
100+
private static final int[][] PRIVATE_TABLE = {{
101+
{private}
102+
}};
103+
104+
// Nonprinting characters.
105+
private static final int[][] NON_PRINT_TABLE = {{
106+
{nonprint}
107+
}};
108+
109+
// Width 0 combining marks.
110+
private static final int[][] COMBINING_TABLE = {{
111+
{combining}
112+
}};
113+
114+
// Width 0 combining letters.
115+
private static final int[][] COMBINING_LETTERS_TABLE = {{
116+
{combiningletters}
117+
}};
118+
119+
// Width 2 characters.
120+
private static final int[][] DOUBLE_WIDE_TABLE = {{
121+
{doublewide}
122+
}};
123+
124+
// Ambiguous-width characters.
125+
private static final int[][] AMBIGUOUS_TABLE = {{
126+
{ambiguous}
127+
}};
128+
129+
// Unassigned characters.
130+
private static final int[][] UNASSIGNED_TABLE = {{
131+
{unassigned}
132+
}};
133+
134+
// Non-characters.
135+
private static final int[][] NON_CHARACTER_TABLE = {{
136+
{noncharacters}
137+
}};
138+
139+
// Characters that were widened from width 1 to 2 in Unicode 9.
140+
private static final int[][] WIDENED_IN_9_TABLE = {{
141+
{widenedin9}
142+
}};
143+
144+
}}

widechar_width.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* The other hashes are simple `sha1sum` style hashes.
1212
* )
1313
*
14-
* generate.py: 6d63502e0a28f40351524953141ea802a79dced9
14+
* generate.py: 1d24de5a7caf6e8cc4e5a688ea83db972efe4538
1515
* template.js: 1249763c5b7c1e308aeb4ca64f1e15bce1fab9b3
1616
* UnicodeData.txt: 3e1900295af0978ad6be3153de4c97d55198ab4b
1717
* EastAsianWidth.txt: 2637ce61d024cb25c768023fa4d7594b53474919

0 commit comments

Comments
 (0)