|
| 1 | +/* |
| 2 | + * Copyright 2022 Typelevel |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +package org.typelevel.scalacheck.xml |
| 18 | + |
| 19 | +/** As defined in XML 1.0, Fourth Edition, Appendix B. These rules are |
| 20 | + * "orphaned" in XML 1.0, Fifth Edition, but still the basis of xerces2-j |
| 21 | + * version 2.12.2. |
| 22 | + * |
| 23 | + * @see |
| 24 | + * https://www.w3.org/TR/xml/#CharClasses |
| 25 | + * @see |
| 26 | + * https://xerces.apache.org/xerces2-j/ |
| 27 | + */ |
| 28 | +object characterClasses { |
| 29 | + val char: Seq[Char] = |
| 30 | + Seq( |
| 31 | + 0x0009 to 0x0009, |
| 32 | + 0x000a to 0x000a, |
| 33 | + 0x000d to 0x000d, |
| 34 | + 0x0020 to 0xd7ff, |
| 35 | + 0xe000 to 0xfffd |
| 36 | + ).flatten.map(_.toChar) |
| 37 | + |
| 38 | + val baseChar: Seq[Char] = |
| 39 | + Seq( |
| 40 | + 0x0041 to 0x005a, |
| 41 | + 0x0061 to 0x007a, |
| 42 | + 0x00c0 to 0x00d6, |
| 43 | + 0x00d8 to 0x00f6, |
| 44 | + 0x00f8 to 0x00ff, |
| 45 | + 0x0100 to 0x0131, |
| 46 | + 0x0134 to 0x013e, |
| 47 | + 0x0141 to 0x0148, |
| 48 | + 0x014a to 0x017e, |
| 49 | + 0x0180 to 0x01c3, |
| 50 | + 0x01cd to 0x01f0, |
| 51 | + 0x01f4 to 0x01f5, |
| 52 | + 0x01fa to 0x0217, |
| 53 | + 0x0250 to 0x02a8, |
| 54 | + 0x02bb to 0x02c1, |
| 55 | + 0x0386 to 0x0386, |
| 56 | + 0x0388 to 0x038a, |
| 57 | + 0x038c to 0x038c, |
| 58 | + 0x038e to 0x03a1, |
| 59 | + 0x03a3 to 0x03ce, |
| 60 | + 0x03d0 to 0x03d6, |
| 61 | + 0x03da to 0x03da, |
| 62 | + 0x03dc to 0x03dc, |
| 63 | + 0x03de to 0x03de, |
| 64 | + 0x03e0 to 0x03e0, |
| 65 | + 0x03e2 to 0x03f3, |
| 66 | + 0x0401 to 0x040c, |
| 67 | + 0x040e to 0x044f, |
| 68 | + 0x0451 to 0x045c, |
| 69 | + 0x045e to 0x0481, |
| 70 | + 0x0490 to 0x04c4, |
| 71 | + 0x04c7 to 0x04c8, |
| 72 | + 0x04cb to 0x04cc, |
| 73 | + 0x04d0 to 0x04eb, |
| 74 | + 0x04ee to 0x04f5, |
| 75 | + 0x04f8 to 0x04f9, |
| 76 | + 0x0531 to 0x0556, |
| 77 | + 0x0559 to 0x0559, |
| 78 | + 0x0561 to 0x0586, |
| 79 | + 0x05d0 to 0x05ea, |
| 80 | + 0x05f0 to 0x05f2, |
| 81 | + 0x0621 to 0x063a, |
| 82 | + 0x0641 to 0x064a, |
| 83 | + 0x0671 to 0x06b7, |
| 84 | + 0x06ba to 0x06be, |
| 85 | + 0x06c0 to 0x06ce, |
| 86 | + 0x06d0 to 0x06d3, |
| 87 | + 0x06d5 to 0x06d5, |
| 88 | + 0x06e5 to 0x06e6, |
| 89 | + 0x0905 to 0x0939, |
| 90 | + 0x093d to 0x093d, |
| 91 | + 0x0958 to 0x0961, |
| 92 | + 0x0985 to 0x098c, |
| 93 | + 0x098f to 0x0990, |
| 94 | + 0x0993 to 0x09a8, |
| 95 | + 0x09aa to 0x09b0, |
| 96 | + 0x09b2 to 0x09b2, |
| 97 | + 0x09b6 to 0x09b9, |
| 98 | + 0x09dc to 0x09dd, |
| 99 | + 0x09df to 0x09e1, |
| 100 | + 0x09f0 to 0x09f1, |
| 101 | + 0x0a05 to 0x0a0a, |
| 102 | + 0x0a0f to 0x0a10, |
| 103 | + 0x0a13 to 0x0a28, |
| 104 | + 0x0a2a to 0x0a30, |
| 105 | + 0x0a32 to 0x0a33, |
| 106 | + 0x0a35 to 0x0a36, |
| 107 | + 0x0a38 to 0x0a39, |
| 108 | + 0x0a59 to 0x0a5c, |
| 109 | + 0x0a5e to 0x0a5e, |
| 110 | + 0x0a72 to 0x0a74, |
| 111 | + 0x0a85 to 0x0a8b, |
| 112 | + 0x0a8d to 0x0a8d, |
| 113 | + 0x0a8f to 0x0a91, |
| 114 | + 0x0a93 to 0x0aa8, |
| 115 | + 0x0aaa to 0x0ab0, |
| 116 | + 0x0ab2 to 0x0ab3, |
| 117 | + 0x0ab5 to 0x0ab9, |
| 118 | + 0x0abd to 0x0abd, |
| 119 | + 0x0ae0 to 0x0ae0, |
| 120 | + 0x0b05 to 0x0b0c, |
| 121 | + 0x0b0f to 0x0b10, |
| 122 | + 0x0b13 to 0x0b28, |
| 123 | + 0x0b2a to 0x0b30, |
| 124 | + 0x0b32 to 0x0b33, |
| 125 | + 0x0b36 to 0x0b39, |
| 126 | + 0x0b3d to 0x0b3d, |
| 127 | + 0x0b5c to 0x0b5d, |
| 128 | + 0x0b5f to 0x0b61, |
| 129 | + 0x0b85 to 0x0b8a, |
| 130 | + 0x0b8e to 0x0b90, |
| 131 | + 0x0b92 to 0x0b95, |
| 132 | + 0x0b99 to 0x0b9a, |
| 133 | + 0x0b9c to 0x0b9c, |
| 134 | + 0x0b9e to 0x0b9f, |
| 135 | + 0x0ba3 to 0x0ba4, |
| 136 | + 0x0ba8 to 0x0baa, |
| 137 | + 0x0bae to 0x0bb5, |
| 138 | + 0x0bb7 to 0x0bb9, |
| 139 | + 0x0c05 to 0x0c0c, |
| 140 | + 0x0c0e to 0x0c10, |
| 141 | + 0x0c12 to 0x0c28, |
| 142 | + 0x0c2a to 0x0c33, |
| 143 | + 0x0c35 to 0x0c39, |
| 144 | + 0x0c60 to 0x0c61, |
| 145 | + 0x0c85 to 0x0c8c, |
| 146 | + 0x0c8e to 0x0c90, |
| 147 | + 0x0c92 to 0x0ca8, |
| 148 | + 0x0caa to 0x0cb3, |
| 149 | + 0x0cb5 to 0x0cb9, |
| 150 | + 0x0cde to 0x0cde, |
| 151 | + 0x0ce0 to 0x0ce1, |
| 152 | + 0x0d05 to 0x0d0c, |
| 153 | + 0x0d0e to 0x0d10, |
| 154 | + 0x0d12 to 0x0d28, |
| 155 | + 0x0d2a to 0x0d39, |
| 156 | + 0x0d60 to 0x0d61, |
| 157 | + 0x0e01 to 0x0e2e, |
| 158 | + 0x0e30 to 0x0e30, |
| 159 | + 0x0e32 to 0x0e33, |
| 160 | + 0x0e40 to 0x0e45, |
| 161 | + 0x0e81 to 0x0e82, |
| 162 | + 0x0e84 to 0x0e84, |
| 163 | + 0x0e87 to 0x0e88, |
| 164 | + 0x0e8a to 0x0e8a, |
| 165 | + 0x0e8d to 0x0e8d, |
| 166 | + 0x0e94 to 0x0e97, |
| 167 | + 0x0e99 to 0x0e9f, |
| 168 | + 0x0ea1 to 0x0ea3, |
| 169 | + 0x0ea5 to 0x0ea5, |
| 170 | + 0x0ea7 to 0x0ea7, |
| 171 | + 0x0eaa to 0x0eab, |
| 172 | + 0x0ead to 0x0eae, |
| 173 | + 0x0eb0 to 0x0eb0, |
| 174 | + 0x0eb2 to 0x0eb3, |
| 175 | + 0x0ebd to 0x0ebd, |
| 176 | + 0x0ec0 to 0x0ec4, |
| 177 | + 0x0f40 to 0x0f47, |
| 178 | + 0x0f49 to 0x0f69, |
| 179 | + 0x10a0 to 0x10c5, |
| 180 | + 0x10d0 to 0x10f6, |
| 181 | + 0x1100 to 0x1100, |
| 182 | + 0x1102 to 0x1103, |
| 183 | + 0x1105 to 0x1107, |
| 184 | + 0x1109 to 0x1109, |
| 185 | + 0x110b to 0x110c, |
| 186 | + 0x110e to 0x1112, |
| 187 | + 0x113c to 0x113c, |
| 188 | + 0x113e to 0x113e, |
| 189 | + 0x1140 to 0x1140, |
| 190 | + 0x114c to 0x114c, |
| 191 | + 0x114e to 0x114e, |
| 192 | + 0x1150 to 0x1150, |
| 193 | + 0x1154 to 0x1155, |
| 194 | + 0x1159 to 0x1159, |
| 195 | + 0x115f to 0x1161, |
| 196 | + 0x1163 to 0x1163, |
| 197 | + 0x1165 to 0x1165, |
| 198 | + 0x1167 to 0x1167, |
| 199 | + 0x1169 to 0x1169, |
| 200 | + 0x116d to 0x116e, |
| 201 | + 0x1172 to 0x1173, |
| 202 | + 0x1175 to 0x1175, |
| 203 | + 0x119e to 0x119e, |
| 204 | + 0x11a8 to 0x11a8, |
| 205 | + 0x11ab to 0x11ab, |
| 206 | + 0x11ae to 0x11af, |
| 207 | + 0x11b7 to 0x11b8, |
| 208 | + 0x11ba to 0x11ba, |
| 209 | + 0x11bc to 0x11c2, |
| 210 | + 0x11eb to 0x11eb, |
| 211 | + 0x11f0 to 0x11f0, |
| 212 | + 0x11f9 to 0x11f9, |
| 213 | + 0x1e00 to 0x1e9b, |
| 214 | + 0x1ea0 to 0x1ef9, |
| 215 | + 0x1f00 to 0x1f15, |
| 216 | + 0x1f18 to 0x1f1d, |
| 217 | + 0x1f20 to 0x1f45, |
| 218 | + 0x1f48 to 0x1f4d, |
| 219 | + 0x1f50 to 0x1f57, |
| 220 | + 0x1f59 to 0x1f59, |
| 221 | + 0x1f5b to 0x1f5b, |
| 222 | + 0x1f5d to 0x1f5d, |
| 223 | + 0x1f5f to 0x1f7d, |
| 224 | + 0x1f80 to 0x1fb4, |
| 225 | + 0x1fb6 to 0x1fbc, |
| 226 | + 0x1fbe to 0x1fbe, |
| 227 | + 0x1fc2 to 0x1fc4, |
| 228 | + 0x1fc6 to 0x1fcc, |
| 229 | + 0x1fd0 to 0x1fd3, |
| 230 | + 0x1fd6 to 0x1fdb, |
| 231 | + 0x1fe0 to 0x1fec, |
| 232 | + 0x1ff2 to 0x1ff4, |
| 233 | + 0x1ff6 to 0x1ffc, |
| 234 | + 0x2126 to 0x2126, |
| 235 | + 0x212a to 0x212b, |
| 236 | + 0x212e to 0x212e, |
| 237 | + 0x2180 to 0x2182, |
| 238 | + 0x3041 to 0x3094, |
| 239 | + 0x30a1 to 0x30fa, |
| 240 | + 0x3105 to 0x312c, |
| 241 | + 0xac00 to 0xd7a3 |
| 242 | + ).flatten.map(_.toChar) |
| 243 | + |
| 244 | + val ideographic: Seq[Char] = |
| 245 | + Seq( |
| 246 | + 0x4e00 to 0x9fa5, |
| 247 | + 0x3007 to 0x3007, |
| 248 | + 0x3021 to 0x3029 |
| 249 | + ).flatten.map(_.toChar) |
| 250 | + |
| 251 | + val letter: Seq[Char] = |
| 252 | + baseChar ++ ideographic |
| 253 | + |
| 254 | + val combiningChar: Seq[Char] = Seq( |
| 255 | + 0x0300 to 0x0345, |
| 256 | + 0x0360 to 0x0361, |
| 257 | + 0x0483 to 0x0486, |
| 258 | + 0x0591 to 0x05a1, |
| 259 | + 0x05a3 to 0x05b9, |
| 260 | + 0x05bb to 0x05bd, |
| 261 | + 0x05bf to 0x05bf, |
| 262 | + 0x05c1 to 0x05c2, |
| 263 | + 0x05c4 to 0x05c4, |
| 264 | + 0x064b to 0x0652, |
| 265 | + 0x0670 to 0x0670, |
| 266 | + 0x06d6 to 0x06dc, |
| 267 | + 0x06dd to 0x06df, |
| 268 | + 0x06e0 to 0x06e4, |
| 269 | + 0x06e7 to 0x06e8, |
| 270 | + 0x06ea to 0x06ed, |
| 271 | + 0x0901 to 0x0903, |
| 272 | + 0x093c to 0x093c, |
| 273 | + 0x093e to 0x094c, |
| 274 | + 0x094d to 0x094d, |
| 275 | + 0x0951 to 0x0954, |
| 276 | + 0x0962 to 0x0963, |
| 277 | + 0x0981 to 0x0983, |
| 278 | + 0x09bc to 0x09bc, |
| 279 | + 0x09be to 0x09be, |
| 280 | + 0x09bf to 0x09bf, |
| 281 | + 0x09c0 to 0x09c4, |
| 282 | + 0x09c7 to 0x09c8, |
| 283 | + 0x09cb to 0x09cd, |
| 284 | + 0x09d7 to 0x09d7, |
| 285 | + 0x09e2 to 0x09e3, |
| 286 | + 0x0a02 to 0x0a02, |
| 287 | + 0x0a3c to 0x0a3c, |
| 288 | + 0x0a3e to 0x0a3e, |
| 289 | + 0x0a3f to 0x0a3f, |
| 290 | + 0x0a40 to 0x0a42, |
| 291 | + 0x0a47 to 0x0a48, |
| 292 | + 0x0a4b to 0x0a4d, |
| 293 | + 0x0a70 to 0x0a71, |
| 294 | + 0x0a81 to 0x0a83, |
| 295 | + 0x0abc to 0x0abc, |
| 296 | + 0x0abe to 0x0ac5, |
| 297 | + 0x0ac7 to 0x0ac9, |
| 298 | + 0x0acb to 0x0acd, |
| 299 | + 0x0b01 to 0x0b03, |
| 300 | + 0x0b3c to 0x0b3c, |
| 301 | + 0x0b3e to 0x0b43, |
| 302 | + 0x0b47 to 0x0b48, |
| 303 | + 0x0b4b to 0x0b4d, |
| 304 | + 0x0b56 to 0x0b57, |
| 305 | + 0x0b82 to 0x0b83, |
| 306 | + 0x0bbe to 0x0bc2, |
| 307 | + 0x0bc6 to 0x0bc8, |
| 308 | + 0x0bca to 0x0bcd, |
| 309 | + 0x0bd7 to 0x0bd7, |
| 310 | + 0x0c01 to 0x0c03, |
| 311 | + 0x0c3e to 0x0c44, |
| 312 | + 0x0c46 to 0x0c48, |
| 313 | + 0x0c4a to 0x0c4d, |
| 314 | + 0x0c55 to 0x0c56, |
| 315 | + 0x0c82 to 0x0c83, |
| 316 | + 0x0cbe to 0x0cc4, |
| 317 | + 0x0cc6 to 0x0cc8, |
| 318 | + 0x0cca to 0x0ccd, |
| 319 | + 0x0cd5 to 0x0cd6, |
| 320 | + 0x0d02 to 0x0d03, |
| 321 | + 0x0d3e to 0x0d43, |
| 322 | + 0x0d46 to 0x0d48, |
| 323 | + 0x0d4a to 0x0d4d, |
| 324 | + 0x0d57 to 0x0d57, |
| 325 | + 0x0e31 to 0x0e31, |
| 326 | + 0x0e34 to 0x0e3a, |
| 327 | + 0x0e47 to 0x0e4e, |
| 328 | + 0x0eb1 to 0x0eb1, |
| 329 | + 0x0eb4 to 0x0eb9, |
| 330 | + 0x0ebb to 0x0ebc, |
| 331 | + 0x0ec8 to 0x0ecd, |
| 332 | + 0x0f18 to 0x0f19, |
| 333 | + 0x0f35 to 0x0f35, |
| 334 | + 0x0f37 to 0x0f37, |
| 335 | + 0x0f39 to 0x0f39, |
| 336 | + 0x0f3e to 0x0f3e, |
| 337 | + 0x0f3f to 0x0f3f, |
| 338 | + 0x0f71 to 0x0f84, |
| 339 | + 0x0f86 to 0x0f8b, |
| 340 | + 0x0f90 to 0x0f95, |
| 341 | + 0x0f97 to 0x0f97, |
| 342 | + 0x0f99 to 0x0fad, |
| 343 | + 0x0fb1 to 0x0fb7, |
| 344 | + 0x0fb9 to 0x0fb9, |
| 345 | + 0x20d0 to 0x20dc, |
| 346 | + 0x20e1 to 0x20e1, |
| 347 | + 0x302a to 0x302f, |
| 348 | + 0x3099 to 0x3099, |
| 349 | + 0x309a to 0x309a |
| 350 | + ).flatten.map(_.toChar) |
| 351 | + |
| 352 | + val digit: Seq[Char] = |
| 353 | + Seq( |
| 354 | + 0x0030 to 0x0039, |
| 355 | + 0x0660 to 0x0669, |
| 356 | + 0x06f0 to 0x06f9, |
| 357 | + 0x0966 to 0x096f, |
| 358 | + 0x09e6 to 0x09ef, |
| 359 | + 0x0a66 to 0x0a6f, |
| 360 | + 0x0ae6 to 0x0aef, |
| 361 | + 0x0b66 to 0x0b6f, |
| 362 | + 0x0be7 to 0x0bef, |
| 363 | + 0x0c66 to 0x0c6f, |
| 364 | + 0x0ce6 to 0x0cef, |
| 365 | + 0x0d66 to 0x0d6f, |
| 366 | + 0x0e50 to 0x0e59, |
| 367 | + 0x0ed0 to 0x0ed9, |
| 368 | + 0x0f20 to 0x0f29 |
| 369 | + ).flatten.map(_.toChar) |
| 370 | + |
| 371 | + val extender: Seq[Char] = |
| 372 | + Seq( |
| 373 | + 0x00b7 to 0x00b7, |
| 374 | + 0x02d0 to 0x02d0, |
| 375 | + 0x02d1 to 0x02d1, |
| 376 | + 0x0387 to 0x0387, |
| 377 | + 0x0640 to 0x0640, |
| 378 | + 0x0e46 to 0x0e46, |
| 379 | + 0x0ec6 to 0x0ec6, |
| 380 | + 0x3005 to 0x3005, |
| 381 | + 0x3031 to 0x3035, |
| 382 | + 0x309d to 0x309e, |
| 383 | + 0x30fc to 0x30fe |
| 384 | + ).flatten.map(_.toChar) |
| 385 | + |
| 386 | + val ncNameStartChar: Seq[Char] = |
| 387 | + letter ++ "_" |
| 388 | + |
| 389 | + val ncNameChar: Seq[Char] = |
| 390 | + letter ++ digit ++ ".-_" ++ combiningChar ++ extender |
| 391 | +} |
0 commit comments