Skip to content

Commit 9e8f721

Browse files
authored
Merge pull request #7 from typelevel/char-classes-top-level
Move character classes to top level
2 parents d6ab168 + 3f1390c commit 9e8f721

File tree

2 files changed

+397
-380
lines changed

2 files changed

+397
-380
lines changed
Lines changed: 391 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,391 @@
1+
/*
2+
* Copyright 2022 Typelevel
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.typelevel.scalacheck.xml
18+
19+
/** As defined in XML 1.0, Fourth Edition, Appendix B. These rules are
20+
* "orphaned" in XML 1.0, Fifth Edition, but still the basis of xerces2-j
21+
* version 2.12.2.
22+
*
23+
* @see
24+
* https://www.w3.org/TR/xml/#CharClasses
25+
* @see
26+
* https://xerces.apache.org/xerces2-j/
27+
*/
28+
object characterClasses {
29+
val char: Seq[Char] =
30+
Seq(
31+
0x0009 to 0x0009,
32+
0x000a to 0x000a,
33+
0x000d to 0x000d,
34+
0x0020 to 0xd7ff,
35+
0xe000 to 0xfffd
36+
).flatten.map(_.toChar)
37+
38+
val baseChar: Seq[Char] =
39+
Seq(
40+
0x0041 to 0x005a,
41+
0x0061 to 0x007a,
42+
0x00c0 to 0x00d6,
43+
0x00d8 to 0x00f6,
44+
0x00f8 to 0x00ff,
45+
0x0100 to 0x0131,
46+
0x0134 to 0x013e,
47+
0x0141 to 0x0148,
48+
0x014a to 0x017e,
49+
0x0180 to 0x01c3,
50+
0x01cd to 0x01f0,
51+
0x01f4 to 0x01f5,
52+
0x01fa to 0x0217,
53+
0x0250 to 0x02a8,
54+
0x02bb to 0x02c1,
55+
0x0386 to 0x0386,
56+
0x0388 to 0x038a,
57+
0x038c to 0x038c,
58+
0x038e to 0x03a1,
59+
0x03a3 to 0x03ce,
60+
0x03d0 to 0x03d6,
61+
0x03da to 0x03da,
62+
0x03dc to 0x03dc,
63+
0x03de to 0x03de,
64+
0x03e0 to 0x03e0,
65+
0x03e2 to 0x03f3,
66+
0x0401 to 0x040c,
67+
0x040e to 0x044f,
68+
0x0451 to 0x045c,
69+
0x045e to 0x0481,
70+
0x0490 to 0x04c4,
71+
0x04c7 to 0x04c8,
72+
0x04cb to 0x04cc,
73+
0x04d0 to 0x04eb,
74+
0x04ee to 0x04f5,
75+
0x04f8 to 0x04f9,
76+
0x0531 to 0x0556,
77+
0x0559 to 0x0559,
78+
0x0561 to 0x0586,
79+
0x05d0 to 0x05ea,
80+
0x05f0 to 0x05f2,
81+
0x0621 to 0x063a,
82+
0x0641 to 0x064a,
83+
0x0671 to 0x06b7,
84+
0x06ba to 0x06be,
85+
0x06c0 to 0x06ce,
86+
0x06d0 to 0x06d3,
87+
0x06d5 to 0x06d5,
88+
0x06e5 to 0x06e6,
89+
0x0905 to 0x0939,
90+
0x093d to 0x093d,
91+
0x0958 to 0x0961,
92+
0x0985 to 0x098c,
93+
0x098f to 0x0990,
94+
0x0993 to 0x09a8,
95+
0x09aa to 0x09b0,
96+
0x09b2 to 0x09b2,
97+
0x09b6 to 0x09b9,
98+
0x09dc to 0x09dd,
99+
0x09df to 0x09e1,
100+
0x09f0 to 0x09f1,
101+
0x0a05 to 0x0a0a,
102+
0x0a0f to 0x0a10,
103+
0x0a13 to 0x0a28,
104+
0x0a2a to 0x0a30,
105+
0x0a32 to 0x0a33,
106+
0x0a35 to 0x0a36,
107+
0x0a38 to 0x0a39,
108+
0x0a59 to 0x0a5c,
109+
0x0a5e to 0x0a5e,
110+
0x0a72 to 0x0a74,
111+
0x0a85 to 0x0a8b,
112+
0x0a8d to 0x0a8d,
113+
0x0a8f to 0x0a91,
114+
0x0a93 to 0x0aa8,
115+
0x0aaa to 0x0ab0,
116+
0x0ab2 to 0x0ab3,
117+
0x0ab5 to 0x0ab9,
118+
0x0abd to 0x0abd,
119+
0x0ae0 to 0x0ae0,
120+
0x0b05 to 0x0b0c,
121+
0x0b0f to 0x0b10,
122+
0x0b13 to 0x0b28,
123+
0x0b2a to 0x0b30,
124+
0x0b32 to 0x0b33,
125+
0x0b36 to 0x0b39,
126+
0x0b3d to 0x0b3d,
127+
0x0b5c to 0x0b5d,
128+
0x0b5f to 0x0b61,
129+
0x0b85 to 0x0b8a,
130+
0x0b8e to 0x0b90,
131+
0x0b92 to 0x0b95,
132+
0x0b99 to 0x0b9a,
133+
0x0b9c to 0x0b9c,
134+
0x0b9e to 0x0b9f,
135+
0x0ba3 to 0x0ba4,
136+
0x0ba8 to 0x0baa,
137+
0x0bae to 0x0bb5,
138+
0x0bb7 to 0x0bb9,
139+
0x0c05 to 0x0c0c,
140+
0x0c0e to 0x0c10,
141+
0x0c12 to 0x0c28,
142+
0x0c2a to 0x0c33,
143+
0x0c35 to 0x0c39,
144+
0x0c60 to 0x0c61,
145+
0x0c85 to 0x0c8c,
146+
0x0c8e to 0x0c90,
147+
0x0c92 to 0x0ca8,
148+
0x0caa to 0x0cb3,
149+
0x0cb5 to 0x0cb9,
150+
0x0cde to 0x0cde,
151+
0x0ce0 to 0x0ce1,
152+
0x0d05 to 0x0d0c,
153+
0x0d0e to 0x0d10,
154+
0x0d12 to 0x0d28,
155+
0x0d2a to 0x0d39,
156+
0x0d60 to 0x0d61,
157+
0x0e01 to 0x0e2e,
158+
0x0e30 to 0x0e30,
159+
0x0e32 to 0x0e33,
160+
0x0e40 to 0x0e45,
161+
0x0e81 to 0x0e82,
162+
0x0e84 to 0x0e84,
163+
0x0e87 to 0x0e88,
164+
0x0e8a to 0x0e8a,
165+
0x0e8d to 0x0e8d,
166+
0x0e94 to 0x0e97,
167+
0x0e99 to 0x0e9f,
168+
0x0ea1 to 0x0ea3,
169+
0x0ea5 to 0x0ea5,
170+
0x0ea7 to 0x0ea7,
171+
0x0eaa to 0x0eab,
172+
0x0ead to 0x0eae,
173+
0x0eb0 to 0x0eb0,
174+
0x0eb2 to 0x0eb3,
175+
0x0ebd to 0x0ebd,
176+
0x0ec0 to 0x0ec4,
177+
0x0f40 to 0x0f47,
178+
0x0f49 to 0x0f69,
179+
0x10a0 to 0x10c5,
180+
0x10d0 to 0x10f6,
181+
0x1100 to 0x1100,
182+
0x1102 to 0x1103,
183+
0x1105 to 0x1107,
184+
0x1109 to 0x1109,
185+
0x110b to 0x110c,
186+
0x110e to 0x1112,
187+
0x113c to 0x113c,
188+
0x113e to 0x113e,
189+
0x1140 to 0x1140,
190+
0x114c to 0x114c,
191+
0x114e to 0x114e,
192+
0x1150 to 0x1150,
193+
0x1154 to 0x1155,
194+
0x1159 to 0x1159,
195+
0x115f to 0x1161,
196+
0x1163 to 0x1163,
197+
0x1165 to 0x1165,
198+
0x1167 to 0x1167,
199+
0x1169 to 0x1169,
200+
0x116d to 0x116e,
201+
0x1172 to 0x1173,
202+
0x1175 to 0x1175,
203+
0x119e to 0x119e,
204+
0x11a8 to 0x11a8,
205+
0x11ab to 0x11ab,
206+
0x11ae to 0x11af,
207+
0x11b7 to 0x11b8,
208+
0x11ba to 0x11ba,
209+
0x11bc to 0x11c2,
210+
0x11eb to 0x11eb,
211+
0x11f0 to 0x11f0,
212+
0x11f9 to 0x11f9,
213+
0x1e00 to 0x1e9b,
214+
0x1ea0 to 0x1ef9,
215+
0x1f00 to 0x1f15,
216+
0x1f18 to 0x1f1d,
217+
0x1f20 to 0x1f45,
218+
0x1f48 to 0x1f4d,
219+
0x1f50 to 0x1f57,
220+
0x1f59 to 0x1f59,
221+
0x1f5b to 0x1f5b,
222+
0x1f5d to 0x1f5d,
223+
0x1f5f to 0x1f7d,
224+
0x1f80 to 0x1fb4,
225+
0x1fb6 to 0x1fbc,
226+
0x1fbe to 0x1fbe,
227+
0x1fc2 to 0x1fc4,
228+
0x1fc6 to 0x1fcc,
229+
0x1fd0 to 0x1fd3,
230+
0x1fd6 to 0x1fdb,
231+
0x1fe0 to 0x1fec,
232+
0x1ff2 to 0x1ff4,
233+
0x1ff6 to 0x1ffc,
234+
0x2126 to 0x2126,
235+
0x212a to 0x212b,
236+
0x212e to 0x212e,
237+
0x2180 to 0x2182,
238+
0x3041 to 0x3094,
239+
0x30a1 to 0x30fa,
240+
0x3105 to 0x312c,
241+
0xac00 to 0xd7a3
242+
).flatten.map(_.toChar)
243+
244+
val ideographic: Seq[Char] =
245+
Seq(
246+
0x4e00 to 0x9fa5,
247+
0x3007 to 0x3007,
248+
0x3021 to 0x3029
249+
).flatten.map(_.toChar)
250+
251+
val letter: Seq[Char] =
252+
baseChar ++ ideographic
253+
254+
val combiningChar: Seq[Char] = Seq(
255+
0x0300 to 0x0345,
256+
0x0360 to 0x0361,
257+
0x0483 to 0x0486,
258+
0x0591 to 0x05a1,
259+
0x05a3 to 0x05b9,
260+
0x05bb to 0x05bd,
261+
0x05bf to 0x05bf,
262+
0x05c1 to 0x05c2,
263+
0x05c4 to 0x05c4,
264+
0x064b to 0x0652,
265+
0x0670 to 0x0670,
266+
0x06d6 to 0x06dc,
267+
0x06dd to 0x06df,
268+
0x06e0 to 0x06e4,
269+
0x06e7 to 0x06e8,
270+
0x06ea to 0x06ed,
271+
0x0901 to 0x0903,
272+
0x093c to 0x093c,
273+
0x093e to 0x094c,
274+
0x094d to 0x094d,
275+
0x0951 to 0x0954,
276+
0x0962 to 0x0963,
277+
0x0981 to 0x0983,
278+
0x09bc to 0x09bc,
279+
0x09be to 0x09be,
280+
0x09bf to 0x09bf,
281+
0x09c0 to 0x09c4,
282+
0x09c7 to 0x09c8,
283+
0x09cb to 0x09cd,
284+
0x09d7 to 0x09d7,
285+
0x09e2 to 0x09e3,
286+
0x0a02 to 0x0a02,
287+
0x0a3c to 0x0a3c,
288+
0x0a3e to 0x0a3e,
289+
0x0a3f to 0x0a3f,
290+
0x0a40 to 0x0a42,
291+
0x0a47 to 0x0a48,
292+
0x0a4b to 0x0a4d,
293+
0x0a70 to 0x0a71,
294+
0x0a81 to 0x0a83,
295+
0x0abc to 0x0abc,
296+
0x0abe to 0x0ac5,
297+
0x0ac7 to 0x0ac9,
298+
0x0acb to 0x0acd,
299+
0x0b01 to 0x0b03,
300+
0x0b3c to 0x0b3c,
301+
0x0b3e to 0x0b43,
302+
0x0b47 to 0x0b48,
303+
0x0b4b to 0x0b4d,
304+
0x0b56 to 0x0b57,
305+
0x0b82 to 0x0b83,
306+
0x0bbe to 0x0bc2,
307+
0x0bc6 to 0x0bc8,
308+
0x0bca to 0x0bcd,
309+
0x0bd7 to 0x0bd7,
310+
0x0c01 to 0x0c03,
311+
0x0c3e to 0x0c44,
312+
0x0c46 to 0x0c48,
313+
0x0c4a to 0x0c4d,
314+
0x0c55 to 0x0c56,
315+
0x0c82 to 0x0c83,
316+
0x0cbe to 0x0cc4,
317+
0x0cc6 to 0x0cc8,
318+
0x0cca to 0x0ccd,
319+
0x0cd5 to 0x0cd6,
320+
0x0d02 to 0x0d03,
321+
0x0d3e to 0x0d43,
322+
0x0d46 to 0x0d48,
323+
0x0d4a to 0x0d4d,
324+
0x0d57 to 0x0d57,
325+
0x0e31 to 0x0e31,
326+
0x0e34 to 0x0e3a,
327+
0x0e47 to 0x0e4e,
328+
0x0eb1 to 0x0eb1,
329+
0x0eb4 to 0x0eb9,
330+
0x0ebb to 0x0ebc,
331+
0x0ec8 to 0x0ecd,
332+
0x0f18 to 0x0f19,
333+
0x0f35 to 0x0f35,
334+
0x0f37 to 0x0f37,
335+
0x0f39 to 0x0f39,
336+
0x0f3e to 0x0f3e,
337+
0x0f3f to 0x0f3f,
338+
0x0f71 to 0x0f84,
339+
0x0f86 to 0x0f8b,
340+
0x0f90 to 0x0f95,
341+
0x0f97 to 0x0f97,
342+
0x0f99 to 0x0fad,
343+
0x0fb1 to 0x0fb7,
344+
0x0fb9 to 0x0fb9,
345+
0x20d0 to 0x20dc,
346+
0x20e1 to 0x20e1,
347+
0x302a to 0x302f,
348+
0x3099 to 0x3099,
349+
0x309a to 0x309a
350+
).flatten.map(_.toChar)
351+
352+
val digit: Seq[Char] =
353+
Seq(
354+
0x0030 to 0x0039,
355+
0x0660 to 0x0669,
356+
0x06f0 to 0x06f9,
357+
0x0966 to 0x096f,
358+
0x09e6 to 0x09ef,
359+
0x0a66 to 0x0a6f,
360+
0x0ae6 to 0x0aef,
361+
0x0b66 to 0x0b6f,
362+
0x0be7 to 0x0bef,
363+
0x0c66 to 0x0c6f,
364+
0x0ce6 to 0x0cef,
365+
0x0d66 to 0x0d6f,
366+
0x0e50 to 0x0e59,
367+
0x0ed0 to 0x0ed9,
368+
0x0f20 to 0x0f29
369+
).flatten.map(_.toChar)
370+
371+
val extender: Seq[Char] =
372+
Seq(
373+
0x00b7 to 0x00b7,
374+
0x02d0 to 0x02d0,
375+
0x02d1 to 0x02d1,
376+
0x0387 to 0x0387,
377+
0x0640 to 0x0640,
378+
0x0e46 to 0x0e46,
379+
0x0ec6 to 0x0ec6,
380+
0x3005 to 0x3005,
381+
0x3031 to 0x3035,
382+
0x309d to 0x309e,
383+
0x30fc to 0x30fe
384+
).flatten.map(_.toChar)
385+
386+
val ncNameStartChar: Seq[Char] =
387+
letter ++ "_"
388+
389+
val ncNameChar: Seq[Char] =
390+
letter ++ digit ++ ".-_" ++ combiningChar ++ extender
391+
}

0 commit comments

Comments
 (0)